diff options
Diffstat (limited to 'pkg/sentry/fs')
80 files changed, 12400 insertions, 8780 deletions
diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD deleted file mode 100644 index ea85ab33c..000000000 --- a/pkg/sentry/fs/BUILD +++ /dev/null @@ -1,135 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_library( - name = "fs", - srcs = [ - "attr.go", - "context.go", - "copy_up.go", - "dentry.go", - "dirent.go", - "dirent_cache.go", - "dirent_cache_limiter.go", - "dirent_list.go", - "dirent_state.go", - "event_list.go", - "file.go", - "file_operations.go", - "file_overlay.go", - "file_state.go", - "filesystems.go", - "flags.go", - "fs.go", - "inode.go", - "inode_inotify.go", - "inode_operations.go", - "inode_overlay.go", - "inotify.go", - "inotify_event.go", - "inotify_watch.go", - "mock.go", - "mount.go", - "mount_overlay.go", - "mounts.go", - "offset.go", - "overlay.go", - "path.go", - "restore.go", - "save.go", - "seek.go", - "splice.go", - "sync.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/context", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/memmap", - "//pkg/sentry/platform", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/uniqueid", - "//pkg/sentry/usage", - "//pkg/state", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_template_instance( - name = "dirent_list", - out = "dirent_list.go", - package = "fs", - prefix = "dirent", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Dirent", - "Element": "*Dirent", - }, -) - -go_template_instance( - name = "event_list", - out = "event_list.go", - package = "fs", - prefix = "event", - template = "//pkg/ilist:generic_list", - types = { - "Linker": "*Event", - "Element": "*Event", - }, -) - -go_test( - name = "fs_x_test", - size = "small", - srcs = [ - "copy_up_test.go", - "file_overlay_test.go", - "inode_overlay_test.go", - "mounts_test.go", - ], - deps = [ - ":fs", - "//pkg/context", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "fs_test", - size = "small", - srcs = [ - "dirent_cache_test.go", - "dirent_refs_test.go", - "mount_test.go", - "path_test.go", - ], - library = ":fs", - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - ], -) diff --git a/pkg/sentry/fs/README.md b/pkg/sentry/fs/README.md deleted file mode 100644 index db4a1b730..000000000 --- a/pkg/sentry/fs/README.md +++ /dev/null @@ -1,229 +0,0 @@ -This package provides an implementation of the Linux virtual filesystem. - -[TOC] - -## Overview - -- An `fs.Dirent` caches an `fs.Inode` in memory at a path in the VFS, giving - the `fs.Inode` a relative position with respect to other `fs.Inode`s. - -- If an `fs.Dirent` is referenced by two file descriptors, then those file - descriptors are coherent with each other: they depend on the same - `fs.Inode`. - -- A mount point is an `fs.Dirent` for which `fs.Dirent.mounted` is true. It - exposes the root of a mounted filesystem. - -- The `fs.Inode` produced by a registered filesystem on mount(2) owns an - `fs.MountedFilesystem` from which other `fs.Inode`s will be looked up. For a - remote filesystem, the `fs.MountedFilesystem` owns the connection to that - remote filesystem. - -- In general: - -``` -fs.Inode <------------------------------ -| | -| | -produced by | -exactly one | -| responsible for the -| virtual identity of -v | -fs.MountedFilesystem ------------------- -``` - -Glossary: - -- VFS: virtual filesystem. - -- inode: a virtual file object holding a cached view of a file on a backing - filesystem (includes metadata and page caches). - -- superblock: the virtual state of a mounted filesystem (e.g. the virtual - inode number set). - -- mount namespace: a view of the mounts under a root (during path traversal, - the VFS makes visible/follows the mount point that is in the current task's - mount namespace). - -## Save and restore - -An application's hard dependencies on filesystem state can be broken down into -two categories: - -- The state necessary to execute a traversal on or view the *virtual* - filesystem hierarchy, regardless of what files an application has open. - -- The state necessary to represent open files. - -The first is always necessary to save and restore. An application may never have -any open file descriptors, but across save and restore it should see a coherent -view of any mount namespace. NOTE(b/63601033): Currently only one "initial" -mount namespace is supported. - -The second is so that system calls across save and restore are coherent with -each other (e.g. so that unintended re-reads or overwrites do not occur). - -Specifically this state is: - -- An `fs.MountManager` containing mount points. - -- A `kernel.FDTable` containing pointers to open files. - -Anything else managed by the VFS that can be easily loaded into memory from a -filesystem is synced back to those filesystems and is not saved. Examples are -pages in page caches used for optimizations (i.e. readahead and writeback), and -directory entries used to accelerate path lookups. - -### Mount points - -Saving and restoring a mount point means saving and restoring: - -- The root of the mounted filesystem. - -- Mount flags, which control how the VFS interacts with the mounted - filesystem. - -- Any relevant metadata about the mounted filesystem. - -- All `fs.Inode`s referenced by the application that reside under the mount - point. - -`fs.MountedFilesystem` is metadata about a filesystem that is mounted. It is -referenced by every `fs.Inode` loaded into memory under the mount point -including the `fs.Inode` of the mount point itself. The `fs.MountedFilesystem` -maps file objects on the filesystem to a virtualized `fs.Inode` number and vice -versa. - -To restore all `fs.Inode`s under a given mount point, each `fs.Inode` leverages -its dependency on an `fs.MountedFilesystem`. Since the `fs.MountedFilesystem` -knows how an `fs.Inode` maps to a file object on a backing filesystem, this -mapping can be trivially consulted by each `fs.Inode` when the `fs.Inode` is -restored. - -In detail, a mount point is saved in two steps: - -- First, after the kernel is paused but before state.Save, we walk all mount - namespaces and install a mapping from `fs.Inode` numbers to file paths - relative to the root of the mounted filesystem in each - `fs.MountedFilesystem`. This is subsequently called the set of `fs.Inode` - mappings. - -- Second, during state.Save, each `fs.MountedFilesystem` decides whether to - save the set of `fs.Inode` mappings. In-memory filesystems, like tmpfs, have - no need to save a set of `fs.Inode` mappings, since the `fs.Inode`s can be - entirely encoded in state file. Each `fs.MountedFilesystem` also optionally - saves the device name from when the filesystem was originally mounted. Each - `fs.Inode` saves its virtual identifier and a reference to a - `fs.MountedFilesystem`. - -A mount point is restored in two steps: - -- First, before state.Load, all mount configurations are stored in a global - `fs.RestoreEnvironment`. This tells us what mount points the user wants to - restore and how to re-establish pointers to backing filesystems. - -- Second, during state.Load, each `fs.MountedFilesystem` optionally searches - for a mount in the `fs.RestoreEnvironment` that matches its saved device - name. The `fs.MountedFilesystem` then reestablishes a pointer to the root of - the mounted filesystem. For example, the mount specification provides the - network connection for a mounted remote filesystem client to communicate - with its remote file server. The `fs.MountedFilesystem` also trivially loads - its set of `fs.Inode` mappings. When an `fs.Inode` is encountered, the - `fs.Inode` loads its virtual identifier and its reference a - `fs.MountedFilesystem`. It uses the `fs.MountedFilesystem` to obtain the - root of the mounted filesystem and the `fs.Inode` mappings to obtain the - relative file path to its data. With these, the `fs.Inode` re-establishes a - pointer to its file object. - -A mount point can trivially restore its `fs.Inode`s in parallel since -`fs.Inode`s have a restore dependency on their `fs.MountedFilesystem` and not on -each other. - -### Open files - -An `fs.File` references the following filesystem objects: - -```go -fs.File -> fs.Dirent -> fs.Inode -> fs.MountedFilesystem -``` - -The `fs.Inode` is restored using its `fs.MountedFilesystem`. The -[Mount points](#mount-points) section above describes how this happens in -detail. The `fs.Dirent` restores its pointer to an `fs.Inode`, pointers to -parent and children `fs.Dirents`, and the basename of the file. - -Otherwise an `fs.File` restores flags, an offset, and a unique identifier (only -used internally). - -It may use the `fs.Inode`, which it indirectly holds a reference on through the -`fs.Dirent`, to reestablish an open file handle on the backing filesystem (e.g. -to continue reading and writing). - -## Overlay - -The overlay implementation in the fs package takes Linux overlayfs as a frame of -reference but corrects for several POSIX consistency errors. - -In Linux overlayfs, the `struct inode` used for reading and writing to the same -file may be different. This is because the `struct inode` is dissociated with -the process of copying up the file from the upper to the lower directory. Since -flock(2) and fcntl(2) locks, inotify(7) watches, page caches, and a file's -identity are all stored directly or indirectly off the `struct inode`, these -properties of the `struct inode` may be stale after the first modification. This -can lead to file locking bugs, missed inotify events, and inconsistent data in -shared memory mappings of files, to name a few problems. - -The fs package maintains a single `fs.Inode` to represent a directory entry in -an overlay and defines operations on this `fs.Inode` which synchronize with the -copy up process. This achieves several things: - -+ File locks, inotify watches, and the identity of the file need not be copied - at all. - -+ Memory mappings of files coordinate with the copy up process so that if a - file in the lower directory is memory mapped, all references to it are - invalidated, forcing the application to re-fault on memory mappings of the - file under the upper directory. - -The `fs.Inode` holds metadata about files in the upper and/or lower directories -via an `fs.overlayEntry`. The `fs.overlayEntry` implements the `fs.Mappable` -interface. It multiplexes between upper and lower directory memory mappings and -stores a copy of memory references so they can be transferred to the upper -directory `fs.Mappable` when the file is copied up. - -The lower filesystem in an overlay may contain another (nested) overlay, but the -upper filesystem may not contain another overlay. In other words, nested -overlays form a tree structure that only allows branching in the lower -filesystem. - -Caching decisions in the overlay are delegated to the upper filesystem, meaning -that the Keep and Revalidate methods on the overlay return the same values as -the upper filesystem. A small wrinkle is that the lower filesystem is not -allowed to return `true` from Revalidate, as the overlay can not reload inodes -from the lower filesystem. A lower filesystem that does return `true` from -Revalidate will trigger a panic. - -The `fs.Inode` also holds a reference to a `fs.MountedFilesystem` that -normalizes across the mounted filesystem state of the upper and lower -directories. - -When a file is copied from the lower to the upper directory, attempts to -interact with the file block until the copy completes. All copying synchronizes -with rename(2). - -## Future Work - -### Overlay - -When a file is copied from a lower directory to an upper directory, several -locks are taken: the global renamuMu and the copyMu of the `fs.Inode` being -copied. This blocks operations on the file, including fault handling of memory -mappings. Performance could be improved by copying files into a temporary -directory that resides on the same filesystem as the upper directory and doing -an atomic rename, holding locks only during the rename operation. - -Additionally files are copied up synchronously. For large files, this causes a -noticeable latency. Performance could be improved by pipelining copies at -non-overlapping file offsets. diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD deleted file mode 100644 index aedcecfa1..000000000 --- a/pkg/sentry/fs/anon/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "anon", - srcs = [ - "anon.go", - "device.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/anon/anon_state_autogen.go b/pkg/sentry/fs/anon/anon_state_autogen.go new file mode 100644 index 000000000..b2b1a466e --- /dev/null +++ b/pkg/sentry/fs/anon/anon_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package anon diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go deleted file mode 100644 index c7a11eec1..000000000 --- a/pkg/sentry/fs/copy_up_test.go +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "bytes" - "crypto/rand" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - // origFileSize is the original file size. This many bytes should be - // copied up before the test file is modified. - origFileSize = 4096 - - // truncatedFileSize is the size to truncate all test files. - truncateFileSize = 10 -) - -// TestConcurrentCopyUp is a copy up stress test for an overlay. -// -// It creates a 64-level deep directory tree in the lower filesystem and -// populates the last subdirectory with 64 files containing random content: -// -// /lower -// /sudir0/.../subdir63/ -// /file0 -// ... -// /file63 -// -// The files are truncated concurrently by 4 goroutines per file. -// These goroutines contend with copying up all parent 64 subdirectories -// as well as the final file content. -// -// At the end of the test, we assert that the files respect the new truncated -// size and contain the content we expect. -func TestConcurrentCopyUp(t *testing.T) { - ctx := contexttest.Context(t) - files := makeOverlayTestFiles(t) - - var wg sync.WaitGroup - for _, file := range files { - for i := 0; i < 4; i++ { - wg.Add(1) - go func(o *overlayTestFile) { - if err := o.File.Dirent.Inode.Truncate(ctx, o.File.Dirent, truncateFileSize); err != nil { - t.Fatalf("failed to copy up: %v", err) - } - wg.Done() - }(file) - } - } - wg.Wait() - - for _, file := range files { - got := make([]byte, origFileSize) - n, err := file.File.Readv(ctx, usermem.BytesIOSequence(got)) - if int(n) != truncateFileSize { - t.Fatalf("read %d bytes from file, want %d", n, truncateFileSize) - } - if err != nil && err != io.EOF { - t.Fatalf("read got error %v, want nil", err) - } - if !bytes.Equal(got[:n], file.content[:truncateFileSize]) { - t.Fatalf("file content is %v, want %v", got[:n], file.content[:truncateFileSize]) - } - } -} - -type overlayTestFile struct { - File *fs.File - name string - content []byte -} - -func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { - ctx := contexttest.Context(t) - - // Create a lower tmpfs mount. - fsys, _ := fs.FindFilesystem("tmpfs") - lower, err := fsys.Mount(contexttest.Context(t), "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - lowerRoot := fs.NewDirent(ctx, lower, "") - - // Make a deep set of subdirectories that everyone shares. - next := lowerRoot - for i := 0; i < 64; i++ { - name := fmt.Sprintf("subdir%d", i) - err := next.CreateDirectory(ctx, lowerRoot, name, fs.FilePermsFromMode(0777)) - if err != nil { - t.Fatalf("failed to create dir %q: %v", name, err) - } - next, err = next.Walk(ctx, lowerRoot, name) - if err != nil { - t.Fatalf("failed to walk to %q: %v", name, err) - } - } - - // Make a bunch of files in the last directory. - var files []*overlayTestFile - for i := 0; i < 64; i++ { - name := fmt.Sprintf("file%d", i) - f, err := next.Create(ctx, next, name, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - t.Fatalf("failed to create file %q: %v", name, err) - } - defer f.DecRef(ctx) - - relname, _ := f.Dirent.FullName(lowerRoot) - - o := &overlayTestFile{ - name: relname, - content: make([]byte, origFileSize), - } - - if _, err := rand.Read(o.content); err != nil { - t.Fatalf("failed to read from /dev/urandom: %v", err) - } - - if _, err := f.Writev(ctx, usermem.BytesIOSequence(o.content)); err != nil { - t.Fatalf("failed to write content to file %q: %v", name, err) - } - - files = append(files, o) - } - - // Create an empty upper tmpfs mount which we will copy up into. - upper, err := fsys.Mount(ctx, "", fs.MountSourceFlags{}, "", nil) - if err != nil { - t.Fatalf("failed to mount tmpfs: %v", err) - } - - // Construct an overlay root. - overlay, err := fs.NewOverlayRoot(ctx, upper, lower, fs.MountSourceFlags{}) - if err != nil { - t.Fatalf("failed to construct overlay root: %v", err) - } - - // Create a MountNamespace to traverse the file system. - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("failed to construct mount manager: %v", err) - } - - // Walk to all of the files in the overlay, open them readable. - for _, f := range files { - maxTraversals := uint(0) - d, err := mns.FindInode(ctx, mns.Root(), mns.Root(), f.name, &maxTraversals) - if err != nil { - t.Fatalf("failed to find %q: %v", f.name, err) - } - defer d.DecRef(ctx) - - f.File, err = d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("failed to open file %q readable: %v", f.name, err) - } - } - - return files -} diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD deleted file mode 100644 index 6b7b451b8..000000000 --- a/pkg/sentry/fs/dev/BUILD +++ /dev/null @@ -1,41 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "dev", - srcs = [ - "dev.go", - "device.go", - "fs.go", - "full.go", - "net_tun.go", - "null.go", - "random.go", - "tty.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/rand", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/memmap", - "//pkg/sentry/mm", - "//pkg/sentry/pgalloc", - "//pkg/sentry/socket/netstack", - "//pkg/syserror", - "//pkg/tcpip/link/tun", - "//pkg/tcpip/network/arp", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/dev/dev_state_autogen.go b/pkg/sentry/fs/dev/dev_state_autogen.go new file mode 100644 index 000000000..958ddf698 --- /dev/null +++ b/pkg/sentry/fs/dev/dev_state_autogen.go @@ -0,0 +1,298 @@ +// automatically generated by stateify. + +package dev + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/dev.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (f *fullDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.fullDevice" +} + +func (f *fullDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (f *fullDevice) beforeSave() {} + +func (f *fullDevice) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeSimpleAttributes) +} + +func (f *fullDevice) afterLoad() {} + +func (f *fullDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeSimpleAttributes) +} + +func (f *fullFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.fullFileOperations" +} + +func (f *fullFileOperations) StateFields() []string { + return []string{} +} + +func (f *fullFileOperations) beforeSave() {} + +func (f *fullFileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *fullFileOperations) afterLoad() {} + +func (f *fullFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (n *netTunInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.netTunInodeOperations" +} + +func (n *netTunInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *netTunInodeOperations) beforeSave() {} + +func (n *netTunInodeOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *netTunInodeOperations) afterLoad() {} + +func (n *netTunInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (n *netTunFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.netTunFileOperations" +} + +func (n *netTunFileOperations) StateFields() []string { + return []string{ + "device", + } +} + +func (n *netTunFileOperations) beforeSave() {} + +func (n *netTunFileOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.device) +} + +func (n *netTunFileOperations) afterLoad() {} + +func (n *netTunFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.device) +} + +func (n *nullDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.nullDevice" +} + +func (n *nullDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *nullDevice) beforeSave() {} + +func (n *nullDevice) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *nullDevice) afterLoad() {} + +func (n *nullDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (n *nullFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.nullFileOperations" +} + +func (n *nullFileOperations) StateFields() []string { + return []string{} +} + +func (n *nullFileOperations) beforeSave() {} + +func (n *nullFileOperations) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *nullFileOperations) afterLoad() {} + +func (n *nullFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (zd *zeroDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.zeroDevice" +} + +func (zd *zeroDevice) StateFields() []string { + return []string{ + "nullDevice", + } +} + +func (zd *zeroDevice) beforeSave() {} + +func (zd *zeroDevice) StateSave(stateSinkObject state.Sink) { + zd.beforeSave() + stateSinkObject.Save(0, &zd.nullDevice) +} + +func (zd *zeroDevice) afterLoad() {} + +func (zd *zeroDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &zd.nullDevice) +} + +func (z *zeroFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.zeroFileOperations" +} + +func (z *zeroFileOperations) StateFields() []string { + return []string{} +} + +func (z *zeroFileOperations) beforeSave() {} + +func (z *zeroFileOperations) StateSave(stateSinkObject state.Sink) { + z.beforeSave() +} + +func (z *zeroFileOperations) afterLoad() {} + +func (z *zeroFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (r *randomDevice) StateTypeName() string { + return "pkg/sentry/fs/dev.randomDevice" +} + +func (r *randomDevice) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (r *randomDevice) beforeSave() {} + +func (r *randomDevice) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.InodeSimpleAttributes) +} + +func (r *randomDevice) afterLoad() {} + +func (r *randomDevice) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.InodeSimpleAttributes) +} + +func (r *randomFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.randomFileOperations" +} + +func (r *randomFileOperations) StateFields() []string { + return []string{} +} + +func (r *randomFileOperations) beforeSave() {} + +func (r *randomFileOperations) StateSave(stateSinkObject state.Sink) { + r.beforeSave() +} + +func (r *randomFileOperations) afterLoad() {} + +func (r *randomFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (t *ttyInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.ttyInodeOperations" +} + +func (t *ttyInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (t *ttyInodeOperations) beforeSave() {} + +func (t *ttyInodeOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.InodeSimpleAttributes) +} + +func (t *ttyInodeOperations) afterLoad() {} + +func (t *ttyInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.InodeSimpleAttributes) +} + +func (t *ttyFileOperations) StateTypeName() string { + return "pkg/sentry/fs/dev.ttyFileOperations" +} + +func (t *ttyFileOperations) StateFields() []string { + return []string{} +} + +func (t *ttyFileOperations) beforeSave() {} + +func (t *ttyFileOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() +} + +func (t *ttyFileOperations) afterLoad() {} + +func (t *ttyFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*filesystem)(nil)) + state.Register((*fullDevice)(nil)) + state.Register((*fullFileOperations)(nil)) + state.Register((*netTunInodeOperations)(nil)) + state.Register((*netTunFileOperations)(nil)) + state.Register((*nullDevice)(nil)) + state.Register((*nullFileOperations)(nil)) + state.Register((*zeroDevice)(nil)) + state.Register((*zeroFileOperations)(nil)) + state.Register((*randomDevice)(nil)) + state.Register((*randomFileOperations)(nil)) + state.Register((*ttyInodeOperations)(nil)) + state.Register((*ttyFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/dirent_cache_test.go b/pkg/sentry/fs/dirent_cache_test.go deleted file mode 100644 index 395c879f5..000000000 --- a/pkg/sentry/fs/dirent_cache_test.go +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -func TestDirentCache(t *testing.T) { - const maxSize = 5 - - c := NewDirentCache(maxSize) - - // Size starts at 0. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Create a Dirent d. - d := NewNegativeDirent("") - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add d to the cache. - c.Add(d) - - // Size is now 1. - if got, want := c.Size(), uint64(1); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // "Bump" d to the front by re-adding it. - c.Add(d) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add maxSize-1 more elements. d should again be oldest element. - for i := 0; i < maxSize-1; i++ { - c.Add(NewNegativeDirent("")) - } - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c contains d. - if got, want := c.contains(d), true; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Add one more element, which will bump d from the cache. - c.Add(NewNegativeDirent("")) - - // Size is maxSize. - if got, want := c.Size(), uint64(maxSize); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // c does not contain d. - if got, want := c.contains(d), false; got != want { - t.Errorf("c.contains(d) got %v want %v", got, want) - } - - // Invalidating causes size to be 0 and list to be empty. - c.Invalidate() - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - if got, want := c.list.Empty(), true; got != want { - t.Errorf("c.list.Empty() got %v, want %v", got, want) - } - - // Fill cache with maxSize dirents. - for i := 0; i < maxSize; i++ { - c.Add(NewNegativeDirent("")) - } -} - -func TestDirentCacheLimiter(t *testing.T) { - const ( - globalMaxSize = 5 - maxSize = 3 - ) - - limit := NewDirentCacheLimiter(globalMaxSize) - c1 := NewDirentCache(maxSize) - c1.limit = limit - c2 := NewDirentCache(maxSize) - c2.limit = limit - - // Create a Dirent d. - d := NewNegativeDirent("") - - // Add d to the cache. - c1.Add(d) - if got, want := c1.Size(), uint64(1); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Add maxSize-1 more elements. d should be oldest element. - for i := 0; i < maxSize-1; i++ { - c1.Add(NewNegativeDirent("")) - } - if got, want := c1.Size(), uint64(maxSize); got != want { - t.Errorf("c1.Size() got %v, want %v", got, want) - } - - // Check that d is still there. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Fill up the other cache, it will start dropping old entries from the cache - // when the global limit is reached. - for i := 0; i < maxSize; i++ { - c2.Add(NewNegativeDirent("")) - } - - // Check is what's remaining from global max. - if got, want := c2.Size(), globalMaxSize-maxSize; int(got) != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } - - // Check that d was not dropped. - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add an entry that will eventually be dropped. Check is done later... - drop := NewNegativeDirent("") - c1.Add(drop) - - // Check that d is bumped to front even when global limit is reached. - c1.Add(d) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - - // Add 2 more element and check that: - // - d is still in the list: to verify that d was bumped - // - d2/d3 are in the list: older entries are dropped when global limit is - // reached. - // - drop is not in the list: indeed older elements are dropped. - d2 := NewNegativeDirent("") - c1.Add(d2) - d3 := NewNegativeDirent("") - c1.Add(d3) - if got, want := c1.contains(d), true; got != want { - t.Errorf("c1.contains(d) got %v want %v", got, want) - } - if got, want := c1.contains(d2), true; got != want { - t.Errorf("c1.contains(d2) got %v want %v", got, want) - } - if got, want := c1.contains(d3), true; got != want { - t.Errorf("c1.contains(d3) got %v want %v", got, want) - } - if got, want := c1.contains(drop), false; got != want { - t.Errorf("c1.contains(drop) got %v want %v", got, want) - } - - // Drop all entries from one cache. The other will be allowed to grow. - c1.Invalidate() - c2.Add(NewNegativeDirent("")) - if got, want := c2.Size(), uint64(maxSize); got != want { - t.Errorf("c2.Size() got %v, want %v", got, want) - } -} - -// TestNilDirentCache tests that a nil cache supports all cache operations, but -// treats them as noop. -func TestNilDirentCache(t *testing.T) { - // Create a nil cache. - var c *DirentCache - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Add. - c.Add(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Remove. - c.Remove(NewNegativeDirent("")) - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } - - // Call Invalidate. - c.Invalidate() - - // Size is zero. - if got, want := c.Size(), uint64(0); got != want { - t.Errorf("c.Size() got %v, want %v", got, want) - } -} diff --git a/pkg/sentry/fs/dirent_list.go b/pkg/sentry/fs/dirent_list.go new file mode 100644 index 000000000..357ea925d --- /dev/null +++ b/pkg/sentry/fs/dirent_list.go @@ -0,0 +1,193 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type direntElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (direntElementMapper) linkerFor(elem *Dirent) *Dirent { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type direntList struct { + head *Dirent + tail *Dirent +} + +// Reset resets list l to the empty state. +func (l *direntList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *direntList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *direntList) Front() *Dirent { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *direntList) Back() *Dirent { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +func (l *direntList) Len() (count int) { + for e := l.Front(); e != nil; e = (direntElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +func (l *direntList) PushFront(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + direntElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *direntList) PushBack(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *direntList) PushBackList(m *direntList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + direntElementMapper{}.linkerFor(l.tail).SetNext(m.head) + direntElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *direntList) InsertAfter(b, e *Dirent) { + bLinker := direntElementMapper{}.linkerFor(b) + eLinker := direntElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + direntElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *direntList) InsertBefore(a, e *Dirent) { + aLinker := direntElementMapper{}.linkerFor(a) + eLinker := direntElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + direntElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *direntList) Remove(e *Dirent) { + linker := direntElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + direntElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + direntElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type direntEntry struct { + next *Dirent + prev *Dirent +} + +// Next returns the entry that follows e in the list. +func (e *direntEntry) Next() *Dirent { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *direntEntry) Prev() *Dirent { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *direntEntry) SetNext(elem *Dirent) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *direntEntry) SetPrev(elem *Dirent) { + e.prev = elem +} diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go deleted file mode 100644 index 176b894ba..000000000 --- a/pkg/sentry/fs/dirent_refs_test.go +++ /dev/null @@ -1,418 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode { - return NewMockInode(ctx, NewMockMountSource(cache), StableAttr{Type: Directory}) -} - -func TestWalkPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, newMockDirInode(ctx, nil), "root") - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - d, err := root.walk(ctx, root, name, false) - if err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 1) - } - - d.DecRef(ctx) - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := d.ReadRefs(); got != 0 { - t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 0) - } - - root.flush(ctx) - - if got := len(root.children); got != 0 { - t.Fatalf("root has %d children, want %d", got, 0) - } -} - -func TestWalkNegative(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - mn := root.Inode.InodeOperations.(*mockInodeOperationsLookupNegative) - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - name := "d" - for i := 0; i < 100; i++ { - _, err := root.walk(ctx, root, name, false) - if err != syscall.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, syscall.ENOENT) - } - } - - if got := root.ReadRefs(); got != 1 { - t.Fatalf("root has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("root wants child at %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("root wants to resolve weak reference") - } - - if !child.(*Dirent).IsNegative() { - t.Fatalf("root found positive child at %q, want negative", name) - } - - if got := child.(*Dirent).ReadRefs(); got != 2 { - t.Fatalf("child has a ref count of %d, want %d", got, 2) - } - - child.DecRef(ctx) - - if got := child.(*Dirent).ReadRefs(); got != 1 { - t.Fatalf("child has a ref count of %d, want %d", got, 1) - } - - if got := len(root.children); got != 1 { - t.Fatalf("root has %d children, want %d", got, 1) - } - - root.DecRef(ctx) - - if got := root.ReadRefs(); got != 0 { - t.Fatalf("root has a ref count of %d, want %d", got, 0) - } - - AsyncBarrier() - - if got := mn.releaseCalled; got != true { - t.Fatalf("root.Close was called %v, want true", got) - } -} - -type mockInodeOperationsLookupNegative struct { - *MockInodeOperations - releaseCalled bool -} - -func NewEmptyDir(ctx context.Context, cache *DirentCache) *Inode { - m := NewMockMountSource(cache) - return NewInode(ctx, &mockInodeOperationsLookupNegative{ - MockInodeOperations: NewMockInodeOperations(ctx), - }, m, StableAttr{Type: Directory}) -} - -func (m *mockInodeOperationsLookupNegative) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - return NewNegativeDirent(p), nil -} - -func (m *mockInodeOperationsLookupNegative) Release(context.Context) { - m.releaseCalled = true -} - -func TestHashNegativeToPositive(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root") - - name := "d" - _, err := root.walk(ctx, root, name, false) - if err != syscall.ENOENT { - t.Fatalf("root.walk(root, %q) got %v, want %v", name, err, syscall.ENOENT) - } - - if got := root.exists(ctx, root, name); got != false { - t.Fatalf("got %q exists, want does not exist", name) - } - - f, err := root.Create(ctx, root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _), got error %v, want nil", name, err) - } - d := f.Dirent - - if d.IsNegative() { - t.Fatalf("got negative Dirent, want positive") - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("child %q has a ref count of %d, want %d", name, got, 1) - } - - if got := root.ReadRefs(); got != 2 { - t.Fatalf("root has a ref count of %d, want %d", got, 2) - } - - if got := len(root.children); got != 1 { - t.Fatalf("got %d children, want %d", got, 1) - } - - w, ok := root.children[name] - if !ok { - t.Fatalf("failed to find weak reference to %q", name) - } - - child := w.Get() - if child == nil { - t.Fatalf("want to resolve weak reference") - } - - if child.(*Dirent) != d { - t.Fatalf("got foreign child") - } -} - -func TestRevalidate(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // Whether to make negative Dirents. - makeNegative bool - }{ - { - desc: "Revalidate negative Dirent", - makeNegative: true, - }, - { - desc: "Revalidate positive Dirent", - makeNegative: false, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - root := NewDirent(ctx, NewMockInodeRevalidate(ctx, test.makeNegative), "root") - - name := "d" - d1, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - d2, err := root.walk(ctx, root, name, false) - if !test.makeNegative && err != nil { - t.Fatalf("root.walk(root, %q) got %v, want nil", name, err) - } - if !test.makeNegative && d1 == d2 { - t.Fatalf("revalidating walk got same *Dirent, want different") - } - if got := len(root.children); got != 1 { - t.Errorf("revalidating walk got %d children, want %d", got, 1) - } - }) - } -} - -type MockInodeOperationsRevalidate struct { - *MockInodeOperations - makeNegative bool -} - -func NewMockInodeRevalidate(ctx context.Context, makeNegative bool) *Inode { - mn := NewMockInodeOperations(ctx) - m := NewMockMountSource(nil) - m.MountSourceOperations.(*MockMountSourceOps).revalidate = true - return NewInode(ctx, &MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory}) -} - -func (m *MockInodeOperationsRevalidate) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) { - if !m.makeNegative { - return m.MockInodeOperations.Lookup(ctx, dir, p) - } - return NewNegativeDirent(p), nil -} - -func TestCreateExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to create from. - root *Dirent - - // expected references on walked Dirent. - refs int64 - }{ - { - desc: "Create caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - refs: 2, - }, - { - desc: "Create not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - refs: 1, - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(root, %q) failed: %v", name, err) - } - d := f.Dirent - - if got := d.ReadRefs(); got != test.refs { - t.Errorf("dirent has a ref count of %d, want %d", got, test.refs) - } - }) - } -} - -func TestRemoveExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - ctx := contexttest.Context(t) - for _, test := range []struct { - // desc is the test's description. - desc string - - // root is the Dirent to make and remove from. - root *Dirent - }{ - { - desc: "Remove caching", - root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"), - }, - { - desc: "Remove not caching", - root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"), - }, - } { - t.Run(test.desc, func(t *testing.T) { - name := "d" - f, err := test.root.Create(ctx, test.root, name, FileFlags{}, FilePermissions{}) - if err != nil { - t.Fatalf("root.Create(%q, _) failed: %v", name, err) - } - d := f.Dirent - - if err := test.root.Remove(contexttest.Context(t), test.root, name, false /* dirPath */); err != nil { - t.Fatalf("root.Remove(root, %q) failed: %v", name, err) - } - - if got := d.ReadRefs(); got != 1 { - t.Fatalf("dirent has a ref count of %d, want %d", got, 1) - } - - d.DecRef(ctx) - - test.root.flush(ctx) - - if got := len(test.root.children); got != 0 { - t.Errorf("root has %d children, want %d", got, 0) - } - }) - } -} - -func TestRenameExtraRefs(t *testing.T) { - // refs == 0 -> one reference. - // refs == -1 -> has been destroyed. - - for _, test := range []struct { - // desc is the test's description. - desc string - - // cache of extra Dirent references, may be nil. - cache *DirentCache - }{ - { - desc: "Rename no caching", - cache: nil, - }, - { - desc: "Rename caching", - cache: NewDirentCache(5), - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - dirAttr := StableAttr{Type: Directory} - - oldParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent") - newParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent") - - renamed, err := oldParent.Walk(ctx, oldParent, "old_child") - if err != nil { - t.Fatalf("Walk(oldParent, %q) got error %v, want nil", "old_child", err) - } - replaced, err := newParent.Walk(ctx, oldParent, "new_child") - if err != nil { - t.Fatalf("Walk(newParent, %q) got error %v, want nil", "new_child", err) - } - - if err := Rename(contexttest.RootContext(t), oldParent /*root */, oldParent, "old_child", newParent, "new_child"); err != nil { - t.Fatalf("Rename got error %v, want nil", err) - } - - oldParent.flush(ctx) - newParent.flush(ctx) - - // Expect to have only active references. - if got := renamed.ReadRefs(); got != 1 { - t.Errorf("renamed has ref count %d, want only active references %d", got, 1) - } - if got := replaced.ReadRefs(); got != 1 { - t.Errorf("replaced has ref count %d, want only active references %d", got, 1) - } - }) - } -} diff --git a/pkg/sentry/fs/event_list.go b/pkg/sentry/fs/event_list.go new file mode 100644 index 000000000..277d8458e --- /dev/null +++ b/pkg/sentry/fs/event_list.go @@ -0,0 +1,193 @@ +package fs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type eventElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (eventElementMapper) linkerFor(elem *Event) *Event { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type eventList struct { + head *Event + tail *Event +} + +// Reset resets list l to the empty state. +func (l *eventList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +func (l *eventList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +func (l *eventList) Front() *Event { + return l.head +} + +// Back returns the last element of list l or nil. +func (l *eventList) Back() *Event { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +func (l *eventList) Len() (count int) { + for e := l.Front(); e != nil; e = (eventElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +func (l *eventList) PushFront(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + eventElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +func (l *eventList) PushBack(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +func (l *eventList) PushBackList(m *eventList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + eventElementMapper{}.linkerFor(l.tail).SetNext(m.head) + eventElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +func (l *eventList) InsertAfter(b, e *Event) { + bLinker := eventElementMapper{}.linkerFor(b) + eLinker := eventElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + eventElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +func (l *eventList) InsertBefore(a, e *Event) { + aLinker := eventElementMapper{}.linkerFor(a) + eLinker := eventElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + eventElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +func (l *eventList) Remove(e *Event) { + linker := eventElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + eventElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + eventElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type eventEntry struct { + next *Event + prev *Event +} + +// Next returns the entry that follows e in the list. +func (e *eventEntry) Next() *Event { + return e.next +} + +// Prev returns the entry that precedes e in the list. +func (e *eventEntry) Prev() *Event { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +func (e *eventEntry) SetNext(elem *Event) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +func (e *eventEntry) SetPrev(elem *Event) { + e.prev = elem +} diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD deleted file mode 100644 index 1d09e983c..000000000 --- a/pkg/sentry/fs/fdpipe/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "fdpipe", - srcs = [ - "pipe.go", - "pipe_opener.go", - "pipe_state.go", - ], - imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/log", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "fdpipe_test", - size = "small", - srcs = [ - "pipe_opener_test.go", - "pipe_test.go", - ], - library = ":fdpipe", - deps = [ - "//pkg/context", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/syserror", - "//pkg/usermem", - "@com_github_google_uuid//:go_default_library", - ], -) diff --git a/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go new file mode 100644 index 000000000..f946f2ba5 --- /dev/null +++ b/pkg/sentry/fs/fdpipe/fdpipe_state_autogen.go @@ -0,0 +1,39 @@ +// automatically generated by stateify. + +package fdpipe + +import ( + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/state" +) + +func (p *pipeOperations) StateTypeName() string { + return "pkg/sentry/fs/fdpipe.pipeOperations" +} + +func (p *pipeOperations) StateFields() []string { + return []string{ + "flags", + "opener", + "readAheadBuffer", + } +} + +func (p *pipeOperations) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + var flagsValue fs.FileFlags = p.saveFlags() + stateSinkObject.SaveValue(0, flagsValue) + stateSinkObject.Save(1, &p.opener) + stateSinkObject.Save(2, &p.readAheadBuffer) +} + +func (p *pipeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(1, &p.opener) + stateSourceObject.Load(2, &p.readAheadBuffer) + stateSourceObject.LoadValue(0, new(fs.FileFlags), func(y interface{}) { p.loadFlags(y.(fs.FileFlags)) }) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func init() { + state.Register((*pipeOperations)(nil)) +} diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go deleted file mode 100644 index b9cec4b13..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ /dev/null @@ -1,523 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "fmt" - "io" - "os" - "path" - "syscall" - "testing" - "time" - - "github.com/google/uuid" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -type hostOpener struct { - name string -} - -func (h *hostOpener) NonBlockingOpen(_ context.Context, p fs.PermMask) (*fd.FD, error) { - var flags int - switch { - case p.Read && p.Write: - flags = syscall.O_RDWR - case p.Write: - flags = syscall.O_WRONLY - case p.Read: - flags = syscall.O_RDONLY - default: - return nil, syscall.EINVAL - } - f, err := syscall.Open(h.name, flags|syscall.O_NONBLOCK, 0666) - if err != nil { - return nil, err - } - return fd.New(f), nil -} - -func pipename() string { - return fmt.Sprintf(path.Join(os.TempDir(), "test-named-pipe-%s"), uuid.New()) -} - -func mkpipe(name string) error { - return syscall.Mknod(name, syscall.S_IFIFO|0666, 0) -} - -func TestTryOpen(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // makePipe is true if the test case should create the pipe. - makePipe bool - - // flags are the fs.FileFlags used to open the pipe. - flags fs.FileFlags - - // expectFile is true if a fs.File is expected. - expectFile bool - - // err is the expected error - err error - }{ - { - desc: "FileFlags lacking Read and Write are invalid", - makePipe: false, - flags: fs.FileFlags{}, /* bogus */ - expectFile: false, - err: syscall.EINVAL, - }, - { - desc: "NonBlocking Read only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "NonBlocking Read only success returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, NonBlocking: true}, - expectFile: true, - err: nil, - }, - { - desc: "NonBlocking Write only error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "NonBlocking Write only no reader error returns immediately", - makePipe: true, - flags: fs.FileFlags{Write: true, NonBlocking: true}, - expectFile: false, - err: syscall.ENXIO, - }, - { - desc: "ReadWrite error returns immediately", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "ReadWrite returns immediately", - makePipe: true, - flags: fs.FileFlags{Read: true, Write: true}, - expectFile: true, - err: nil, - }, - { - desc: "Blocking Write only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: syscall.ENOENT, /* from bogus perms */ - }, - { - desc: "Blocking Read only returns open error", - makePipe: false, /* causes the error */ - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: syscall.ENOENT, - }, - { - desc: "Blocking Write only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Write: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - { - desc: "Blocking Read only returns with syserror.ErrWouldBlock", - makePipe: true, - flags: fs.FileFlags{Read: true}, - expectFile: false, - err: syserror.ErrWouldBlock, - }, - } { - name := pipename() - if test.makePipe { - // Create the pipe. We do this per-test case to keep tests independent. - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - } - - // Use a host opener to keep things simple. - opener := &hostOpener{name: name} - - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, test.flags) - if unwrapError(err) != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - if pipeOps != nil { - // Cleanup the state of the pipe, and remove the fd from the - // fdnotifier. Sadly this needed to maintain the correctness - // of other tests because the fdnotifier is global. - pipeOps.Release(ctx) - } - continue - } - if (pipeOps != nil) != test.expectFile { - t.Errorf("%s: got non-nil file %v, want %v", test.desc, pipeOps != nil, test.expectFile) - } - if pipeOps != nil { - // Same as above. - pipeOps.Release(ctx) - } - } -} - -func TestPipeOpenUnblocksEventually(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // partnerIsReader is true if the goroutine opening the same pipe as the test case - // should open the pipe read only. Otherwise write only. This also means that the - // test case will open the pipe in the opposite way. - partnerIsReader bool - - // partnerIsBlocking is true if the goroutine opening the same pipe as the test case - // should do so without the O_NONBLOCK flag, otherwise opens the pipe with O_NONBLOCK - // until ENXIO is not returned. - partnerIsBlocking bool - }{ - { - desc: "Blocking Read with blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: true, - }, - { - desc: "Blocking Write with blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: true, - }, - { - desc: "Blocking Read with non-blocking writer partner opens eventually", - partnerIsReader: false, - partnerIsBlocking: false, - }, - { - desc: "Blocking Write with non-blocking reader partner opens eventually", - partnerIsReader: true, - partnerIsBlocking: false, - }, - } { - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - - // Spawn the partner. - type fderr struct { - fd int - err error - } - errch := make(chan fderr, 1) - go func() { - var flags int - if test.partnerIsReader { - flags = syscall.O_RDONLY - } else { - flags = syscall.O_WRONLY - } - if test.partnerIsBlocking { - fd, err := syscall.Open(name, flags, 0666) - errch <- fderr{fd: fd, err: err} - } else { - var fd int - err := error(syscall.ENXIO) - for err == syscall.ENXIO { - fd, err = syscall.Open(name, flags|syscall.O_NONBLOCK, 0666) - time.Sleep(1 * time.Second) - } - errch <- fderr{fd: fd, err: err} - } - }() - - // Setup file flags for either a read only or write only open. - flags := fs.FileFlags{ - Read: !test.partnerIsReader, - Write: test.partnerIsReader, - } - - // Open the pipe in a blocking way, which should succeed eventually. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, flags) - if pipeOps != nil { - // Same as TestTryOpen. - pipeOps.Release(ctx) - } - - // Check that the partner opened the file successfully. - e := <-errch - if e.err != nil { - t.Errorf("%s: partner got error %v, wanted nil", test.desc, e.err) - continue - } - // If so, then close the partner fd to avoid leaking an fd. - syscall.Close(e.fd) - - // Check that our blocking open was successful. - if err != nil { - t.Errorf("%s: blocking open got error %v, wanted nil", test.desc, err) - continue - } - if pipeOps == nil { - t.Errorf("%s: blocking open got nil file, wanted non-nil", test.desc) - continue - } - } -} - -func TestCopiedReadAheadBuffer(t *testing.T) { - // Create the pipe. - name := pipename() - if err := mkpipe(name); err != nil { - t.Fatalf("failed to make host pipe: %v", err) - } - defer syscall.Unlink(name) - - // We're taking advantage of the fact that pipes opened read only always return - // success, but internally they are not deemed "opened" until we're sure that - // another writer comes along. This means we can open the same pipe write only - // with no problems + write to it, given that opener.Open already tried to open - // the pipe RDONLY and succeeded, which we know happened if TryOpen returns - // syserror.ErrwouldBlock. - // - // This simulates the open(RDONLY) <-> open(WRONLY)+write race we care about, but - // does not cause our test to be racy (which would be terrible). - opener := &hostOpener{name: name} - pipeOpenState := &pipeOpenState{} - ctx := contexttest.Context(t) - pipeOps, err := pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps != nil { - pipeOps.Release(ctx) - t.Fatalf("open(%s, %o) got file, want nil", name, syscall.O_RDONLY) - } - if err != syserror.ErrWouldBlock { - t.Fatalf("open(%s, %o) got error %v, want %v", name, syscall.O_RDONLY, err, syserror.ErrWouldBlock) - } - - // Then open the same pipe write only and write some bytes to it. The next - // time we try to open the pipe read only again via the pipeOpenState, we should - // succeed and buffer some of the bytes written. - fd, err := syscall.Open(name, syscall.O_WRONLY, 0666) - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_WRONLY, err) - } - defer syscall.Close(fd) - - data := []byte("hello") - if n, err := syscall.Write(fd, data); n != len(data) || err != nil { - t.Fatalf("write(%v) got (%d, %v), want (%d, nil)", data, n, err, len(data)) - } - - // Try the read again, knowing that it should succeed this time. - pipeOps, err = pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) - if pipeOps == nil { - t.Fatalf("open(%s, %o) got nil file, want not nil", name, syscall.O_RDONLY) - } - defer pipeOps.Release(ctx) - - if err != nil { - t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_RDONLY, err) - } - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, pipeOps) - - // Check that the file we opened points to a pipe with a non-empty read ahead buffer. - bufsize := len(pipeOps.readAheadBuffer) - if bufsize != 1 { - t.Fatalf("read ahead buffer got %d bytes, want %d", bufsize, 1) - } - - // Now for the final test, try to read everything in, expecting to get back all of - // the bytes that were written at once. Note that in the wild there is no atomic - // read size so expecting to get all bytes from a single writer when there are - // multiple readers is a bad expectation. - buf := make([]byte, len(data)) - ioseq := usermem.BytesIOSequence(buf) - n, err := pipeOps.Read(ctx, file, ioseq, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(data)) { - t.Fatalf("read request got %d bytes, want %d", n, len(data)) - } - if !bytes.Equal(buf, data) { - t.Errorf("read request got bytes [%v], want [%v]", buf, data) - } -} - -func TestPipeHangup(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // flags control how we open our end of the pipe and must be read - // only or write only. They also dicate how a coordinating partner - // fd is opened, which is their inverse (read only -> write only, etc). - flags fs.FileFlags - - // hangupSelf if true causes the test case to close our end of the pipe - // and causes hangup errors to be asserted on our coordinating partner's - // fd. If hangupSelf is false, then our partner's fd is closed and the - // hangup errors are expected on our end of the pipe. - hangupSelf bool - }{ - { - desc: "Read only gets hangup error", - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Write only gets hangup error", - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Read only generates hangup error", - flags: fs.FileFlags{Read: true}, - hangupSelf: true, - }, - { - desc: "Write only generates hangup error", - flags: fs.FileFlags{Write: true}, - hangupSelf: true, - }, - } { - if test.flags.Read == test.flags.Write { - t.Errorf("%s: test requires a single reader or writer", test.desc) - continue - } - - // Create the pipe. We do this per-test case to keep tests independent. - name := pipename() - if err := mkpipe(name); err != nil { - t.Errorf("%s: failed to make host pipe: %v", test.desc, err) - continue - } - defer syscall.Unlink(name) - - // Fire off a partner routine which tries to open the same pipe blocking, - // which will synchronize with us. The channel allows us to get back the - // fd once we expect this partner routine to succeed, so we can manifest - // hangup events more directly. - fdchan := make(chan int, 1) - go func() { - // Be explicit about the flags to protect the test from - // misconfiguration. - var flags int - if test.flags.Read { - flags = syscall.O_WRONLY - } else { - flags = syscall.O_RDONLY - } - fd, err := syscall.Open(name, flags, 0666) - if err != nil { - t.Logf("Open(%q, %o, 0666) partner failed: %v", name, flags, err) - } - fdchan <- fd - }() - - // Open our end in a blocking way to ensure that we coordinate. - opener := &hostOpener{name: name} - ctx := contexttest.Context(t) - pipeOps, err := Open(ctx, opener, test.flags) - if err != nil { - t.Errorf("%s: Open got error %v, want nil", test.desc, err) - continue - } - // Don't defer file.DecRef here because that causes the hangup we're - // trying to test for. - - // Expect the partner routine to have coordinated with us and get back - // its open fd. - f := <-fdchan - if f < 0 { - t.Errorf("%s: partner routine got fd %d, want > 0", test.desc, f) - pipeOps.Release(ctx) - continue - } - - if test.hangupSelf { - // Hangup self and assert that our partner got the expected hangup - // error. - pipeOps.Release(ctx) - - if test.flags.Read { - // Partner is writer. - assertWriterHungup(t, test.desc, fd.NewReadWriter(f)) - } else { - // Partner is reader. - assertReaderHungup(t, test.desc, fd.NewReadWriter(f)) - } - } else { - // Hangup our partner and expect us to get the hangup error. - syscall.Close(f) - defer pipeOps.Release(ctx) - - if test.flags.Read { - assertReaderHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } else { - assertWriterHungup(t, test.desc, pipeOps.(*pipeOperations).file) - } - } - } -} - -func assertReaderHungup(t *testing.T, desc string, reader io.Reader) bool { - // Drain the pipe completely, it might have crap in it, but expect EOF eventually. - var err error - for err == nil { - _, err = reader.Read(make([]byte, 10)) - } - if err != io.EOF { - t.Errorf("%s: read from self after hangup got error %v, want %v", desc, err, io.EOF) - return false - } - return true -} - -func assertWriterHungup(t *testing.T, desc string, writer io.Writer) bool { - if _, err := writer.Write([]byte("hello")); unwrapError(err) != syscall.EPIPE { - t.Errorf("%s: write to self after hangup got error %v, want %v", desc, err, syscall.EPIPE) - return false - } - return true -} diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go deleted file mode 100644 index 1c9e82562..000000000 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ /dev/null @@ -1,507 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fdpipe - -import ( - "bytes" - "io" - "os" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -func singlePipeFD() (int, error) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - return -1, err - } - syscall.Close(fds[1]) - return fds[0], nil -} - -func singleDirFD() (int, error) { - return syscall.Open(os.TempDir(), syscall.O_RDONLY, 0666) -} - -func mockPipeDirent(t *testing.T) *fs.Dirent { - ctx := contexttest.Context(t) - node := fs.NewMockInodeOperations(ctx) - node.UAttr = fs.UnstableAttr{ - Perms: fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, - } - inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - BlockSize: usermem.PageSize, - }) - return fs.NewDirent(ctx, inode, "") -} - -func TestNewPipe(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // getfd generates the fd to pass to newPipeOperations. - getfd func() (int, error) - - // flags are the fs.FileFlags passed to newPipeOperations. - flags fs.FileFlags - - // readAheadBuffer is the buffer passed to newPipeOperations. - readAheadBuffer []byte - - // err is the expected error. - err error - }{ - { - desc: "Cannot make new pipe from bad fd", - getfd: func() (int, error) { return -1, nil }, - err: syscall.EINVAL, - }, - { - desc: "Cannot make new pipe from non-pipe fd", - getfd: singleDirFD, - err: syscall.EINVAL, - }, - { - desc: "Can make new pipe from pipe fd", - getfd: singlePipeFD, - flags: fs.FileFlags{Read: true}, - readAheadBuffer: []byte("hello"), - }, - } { - gfd, err := test.getfd() - if err != nil { - t.Errorf("%s: getfd got (%d, %v), want (fd, nil)", test.desc, gfd, err) - continue - } - f := fd.New(gfd) - - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, test.flags, f, test.readAheadBuffer) - if p != nil { - // This is necessary to remove the fd from the global fd notifier. - defer p.Release(ctx) - } else { - // If there is no p to DecRef on, because newPipeOperations failed, then the - // file still needs to be closed. - defer f.Close() - } - - if err != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - continue - } - // Check the state of the pipe given that it was successfully opened. - if err == nil { - if p == nil { - t.Errorf("%s: got nil pipe and nil error, want (pipe, nil)", test.desc) - continue - } - if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %v, want %v", test.desc, flags, test.flags) - continue - } - if len(test.readAheadBuffer) != len(p.readAheadBuffer) { - t.Errorf("%s: got read ahead buffer length %d, want %d", test.desc, len(p.readAheadBuffer), len(test.readAheadBuffer)) - continue - } - fileFlags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(p.file.FD()), syscall.F_GETFL, 0) - if errno != 0 { - t.Errorf("%s: failed to get file flags for fd %d, got %v, want 0", test.desc, p.file.FD(), errno) - continue - } - if fileFlags&syscall.O_NONBLOCK == 0 { - t.Errorf("%s: pipe is blocking, expected non-blocking", test.desc) - continue - } - if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD()) - } - } - } -} - -func TestPipeDestruction(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - f := fd.New(fds[0]) - - // We don't care about the other end, just use the read end. - syscall.Close(fds[1]) - - // Test the read end, but it doesn't really matter which. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, f, nil) - if err != nil { - f.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Drop our only reference, which should trigger the destructor. - p.Release(ctx) - - if fdnotifier.HasFD(int32(fds[0])) { - t.Fatalf("after DecRef fdnotifier has fd %d, want no longer registered", fds[0]) - } - if p.file != nil { - t.Errorf("after DecRef got file, want nil") - } -} - -type Seek struct{} - -type ReadDir struct{} - -type Writev struct { - Src usermem.IOSequence -} - -type Readv struct { - Dst usermem.IOSequence -} - -type Fsync struct{} - -func TestPipeRequest(t *testing.T) { - for _, test := range []struct { - // desc is the test's description. - desc string - - // request to execute. - context interface{} - - // flags determines whether to use the read or write end - // of the pipe, for this test it can only be Read or Write. - flags fs.FileFlags - - // keepOpenPartner if false closes the other end of the pipe, - // otherwise this is delayed until the end of the test. - keepOpenPartner bool - - // expected error - err error - }{ - { - desc: "ReadDir on pipe returns ENOTDIR", - context: &ReadDir{}, - err: syscall.ENOTDIR, - }, - { - desc: "Fsync on pipe returns EINVAL", - context: &Fsync{}, - err: syscall.EINVAL, - }, - { - desc: "Seek on pipe returns ESPIPE", - context: &Seek{}, - err: syscall.ESPIPE, - }, - { - desc: "Readv on pipe from empty buffer returns nil", - context: &Readv{Dst: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Read: true}, - }, - { - desc: "Readv on pipe from non-empty buffer and closed partner returns EOF", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - err: io.EOF, - }, - { - desc: "Readv on pipe from non-empty buffer and open partner returns EWOULDBLOCK", - context: &Readv{Dst: usermem.BytesIOSequence(make([]byte, 10))}, - flags: fs.FileFlags{Read: true}, - keepOpenPartner: true, - err: syserror.ErrWouldBlock, - }, - { - desc: "Writev on pipe from empty buffer returns nil", - context: &Writev{Src: usermem.BytesIOSequence(nil)}, - flags: fs.FileFlags{Write: true}, - }, - { - desc: "Writev on pipe from non-empty buffer and closed partner returns EPIPE", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - err: syscall.EPIPE, - }, - { - desc: "Writev on pipe from non-empty buffer and open partner succeeds", - context: &Writev{Src: usermem.BytesIOSequence([]byte("hello"))}, - flags: fs.FileFlags{Write: true}, - keepOpenPartner: true, - }, - } { - if test.flags.Read && test.flags.Write { - panic("both read and write not supported for this test") - } - - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Errorf("%s: failed to create pipes: got %v, want nil", test.desc, err) - continue - } - - // Configure the fd and partner fd based on the file flags. - testFd, partnerFd := fds[0], fds[1] - if test.flags.Write { - testFd, partnerFd = fds[1], fds[0] - } - - // Configure closing the fds. - if test.keepOpenPartner { - defer syscall.Close(partnerFd) - } else { - syscall.Close(partnerFd) - } - - // Create the pipe. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, test.flags, fd.New(testFd), nil) - if err != nil { - t.Fatalf("%s: newPipeOperations got error %v, want nil", test.desc, err) - } - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Issue request via the appropriate function. - switch c := test.context.(type) { - case *Seek: - _, err = p.Seek(ctx, file, 0, 0) - case *ReadDir: - _, err = p.Readdir(ctx, file, nil) - case *Readv: - _, err = p.Read(ctx, file, c.Dst, 0) - case *Writev: - _, err = p.Write(ctx, file, c.Src, 0) - case *Fsync: - err = p.Fsync(ctx, file, 0, fs.FileMaxOffset, fs.SyncAll) - default: - t.Errorf("%s: unknown request type %T", test.desc, test.context) - } - - if unwrapError(err) != test.err { - t.Errorf("%s: got error %v, want %v", test.desc, err, test.err) - } - } -} - -func TestPipeReadAheadBuffer(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, which is not wrapped in a pipe object. - defer syscall.Close(fds[1]) - - // Write some bytes to this end. - data := []byte("world") - if n, err := syscall.Write(fds[1], data); n != len(data) || err != nil { - rfile.Close() - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - // Close the write end immediately, we don't care about it. - - buffered := []byte("hello ") - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, buffered) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // In total we expect to read data + buffered. - total := append(buffered, data...) - - buf := make([]byte, len(total)) - iov := usermem.BytesIOSequence(buf) - n, err := p.Read(contexttest.Context(t), file, iov, 0) - if err != nil { - t.Fatalf("read request got error %v, want nil", err) - } - if n != int64(len(total)) { - t.Fatalf("read request got %d bytes, want %d", n, len(total)) - } - if !bytes.Equal(buf, total) { - t.Errorf("read request got bytes [%v], want [%v]", buf, total) - } -} - -// This is very important for pipes in general because they can return -// EWOULDBLOCK and for those that block they must continue until they have read -// all of the data (and report it as such). -func TestPipeReadsAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - rfile := fd.New(fds[0]) - - // Eventually close the write end, it doesn't depend on a pipe object. - defer syscall.Close(fds[1]) - - // Get a new read only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, rfile, nil) - if err != nil { - rfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests will - // likely be borked, because it's global :( - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - // Write some some bytes to the pipe. - data := []byte("some message") - if n, err := syscall.Write(fds[1], data); n != len(data) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data)) - } - - // Construct a segment vec that is a bit more than we have written so we - // trigger an EWOULDBLOCK. - wantBytes := len(data) + 1 - readBuffer := make([]byte, wantBytes) - iov := usermem.BytesIOSequence(readBuffer) - n, err := p.Read(ctx, file, iov, 0) - total := n - iov = iov.DropFirst64(n) - if err != syserror.ErrWouldBlock { - t.Fatalf("Readv got error %v, want %v", err, syserror.ErrWouldBlock) - } - - // Write a few more bytes to allow us to read more/accumulate. - extra := []byte("extra") - if n, err := syscall.Write(fds[1], extra); n != len(extra) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(extra)) - } - - // This time, using the same request, we should not block. - n, err = p.Read(ctx, file, iov, 0) - total += n - if err != nil { - t.Fatalf("Readv got error %v, want nil", err) - } - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Readv sequence got %d bytes, want %d", total, wantBytes) - } - - if want := append(data, extra[0]); !bytes.Equal(readBuffer, want) { - t.Errorf("Readv sequence got %v, want %v", readBuffer, want) - } -} - -// Same as TestReadsAccumulate. -func TestPipeWritesAccumulate(t *testing.T) { - fds := make([]int, 2) - if err := syscall.Pipe(fds); err != nil { - t.Fatalf("failed to create pipes: got %v, want nil", err) - } - wfile := fd.New(fds[1]) - - // Eventually close the read end, it doesn't depend on a pipe object. - defer syscall.Close(fds[0]) - - // Get a new write only pipe reference. - ctx := contexttest.Context(t) - p, err := newPipeOperations(ctx, nil, fs.FileFlags{Write: true}, wfile, nil) - if err != nil { - wfile.Close() - t.Fatalf("newPipeOperations got error %v, want nil", err) - } - // Don't forget to remove the fd from the fd notifier. Otherwise other tests - // will likely be borked, because it's global :( - defer p.Release(ctx) - - inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ - Type: fs.Pipe, - }) - file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) - - pipeSize, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(wfile.FD()), syscall.F_GETPIPE_SZ, 0) - if errno != 0 { - t.Fatalf("fcntl(F_GETPIPE_SZ) failed: %v", errno) - } - t.Logf("Pipe buffer size: %d", pipeSize) - - // Construct a segment vec that is larger than the pipe size to trigger an - // EWOULDBLOCK. - wantBytes := int(pipeSize) * 2 - writeBuffer := make([]byte, wantBytes) - for i := 0; i < wantBytes; i++ { - writeBuffer[i] = 'a' - } - iov := usermem.BytesIOSequence(writeBuffer) - n, err := p.Write(ctx, file, iov, 0) - if err != syserror.ErrWouldBlock { - t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total := n - iov = iov.DropFirst64(n) - - // Read the entire pipe buf size to make space for the second half. - readBuffer := make([]byte, n) - if n, err := syscall.Read(fds[0], readBuffer); n != len(readBuffer) || err != nil { - t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(readBuffer)) - } - if !bytes.Equal(readBuffer, writeBuffer[:len(readBuffer)]) { - t.Fatalf("wrong data read from pipe, got: %v, want: %v", readBuffer, writeBuffer) - } - - // This time we should not block. - n, err = p.Write(ctx, file, iov, 0) - if err != nil { - t.Fatalf("Writev got error %v, want nil", err) - } - if n != int64(pipeSize) { - t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize) - } - total += n - - // Assert that the result we got back is cumulative. - if total != int64(wantBytes) { - t.Fatalf("Writev sequence got %d bytes, want %d", total, wantBytes) - } -} diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go deleted file mode 100644 index 1971cc680..000000000 --- a/pkg/sentry/fs/file_overlay_test.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -func TestReaddir(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - - // Want from lookup. - err error - names []string - }{ - { - desc: "no upper, lower has entries", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper has entries, no lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - {name: "b"}, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "b"}, - }, - { - desc: "upper and lower, entries combine, none are masked", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - { - desc: "upper and lower, entries combine, upper masks some of lower", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - {name: "a"}, - }, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - {name: "b"}, /* will be masked */ - {name: "c"}, - }, nil), /* lower */ - false /* revalidate */), - names: []string{".", "..", "a", "c"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - openDir, err := test.dir.GetFile(ctx, fs.NewDirent(ctx, test.dir, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - stubSerializer := &fs.CollectEntriesSerializer{} - err = openDir.Readdir(ctx, stubSerializer) - if err != test.err { - t.Fatalf("Readdir got error %v, want nil", err) - } - if err != nil { - return - } - if !reflect.DeepEqual(stubSerializer.Order, test.names) { - t.Errorf("Readdir got names %v, want %v", stubSerializer.Order, test.names) - } - }) - } -} - -func TestReaddirRevalidation(t *testing.T) { - ctx := contexttest.Context(t) - ctx = &rootContext{ - Context: ctx, - root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"), - } - - // Create an overlay with two directories, each with one file. - upper := newTestRamfsDir(ctx, []dirContent{{name: "a"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "b"}}, nil) - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - // Get a handle to the dirent in the upper filesystem so that we can - // modify it without going through the dirent. - upperDir := upper.InodeOperations.(*dir).InodeOperations.(*ramfs.Dir) - - // Check that overlay returns the files from both upper and lower. - openDir, err := overlay.GetFile(ctx, fs.NewDirent(ctx, overlay, "stub"), fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile got error %v, want nil", err) - } - ser := &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - got, want := ser.Order, []string{".", "..", "a", "b"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Remove "a" from the upper and add "c". - if err := upperDir.Remove(ctx, upper, "a"); err != nil { - t.Fatalf("error removing child: %v", err) - } - upperDir.AddChild(ctx, "c", fs.NewInode(ctx, fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0), - upper.MountSource, fs.StableAttr{Type: fs.RegularFile})) - - // Seek to beginning of the directory and do the readdir again. - if _, err := openDir.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of dir: %v", err) - } - ser = &fs.CollectEntriesSerializer{} - if err := openDir.Readdir(ctx, ser); err != nil { - t.Fatalf("Readdir got error %v, want nil", err) - } - - // Readdir should return the updated children. - got, want = ser.Order, []string{".", "..", "b", "c"} - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// Value implements context.Context. -func (r *rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - r.root.IncRef() - return r.root - default: - return r.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD deleted file mode 100644 index a8000e010..000000000 --- a/pkg/sentry/fs/filetest/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "filetest", - testonly = 1, - srcs = ["filetest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go deleted file mode 100644 index 8049538f2..000000000 --- a/pkg/sentry/fs/filetest/filetest.go +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package filetest provides a test implementation of an fs.File. -package filetest - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/anon" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -// TestFileOperations is an implementation of the File interface. It provides all -// required methods. -type TestFileOperations struct { - fsutil.FileNoopRelease `state:"nosave"` - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` -} - -// NewTestFile creates and initializes a new test file. -func NewTestFile(tb testing.TB) *fs.File { - ctx := contexttest.Context(tb) - dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "test") - return fs.NewFile(ctx, dirent, fs.FileFlags{}, &TestFileOperations{}) -} - -// Read just fails the request. -func (*TestFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("Readv not implemented") -} - -// Write just fails the request. -func (*TestFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { - return 0, fmt.Errorf("Writev not implemented") -} diff --git a/pkg/sentry/fs/fs_state_autogen.go b/pkg/sentry/fs/fs_state_autogen.go new file mode 100644 index 000000000..667495740 --- /dev/null +++ b/pkg/sentry/fs/fs_state_autogen.go @@ -0,0 +1,1141 @@ +// automatically generated by stateify. + +package fs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *StableAttr) StateTypeName() string { + return "pkg/sentry/fs.StableAttr" +} + +func (s *StableAttr) StateFields() []string { + return []string{ + "Type", + "DeviceID", + "InodeID", + "BlockSize", + "DeviceFileMajor", + "DeviceFileMinor", + } +} + +func (s *StableAttr) beforeSave() {} + +func (s *StableAttr) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Type) + stateSinkObject.Save(1, &s.DeviceID) + stateSinkObject.Save(2, &s.InodeID) + stateSinkObject.Save(3, &s.BlockSize) + stateSinkObject.Save(4, &s.DeviceFileMajor) + stateSinkObject.Save(5, &s.DeviceFileMinor) +} + +func (s *StableAttr) afterLoad() {} + +func (s *StableAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Type) + stateSourceObject.Load(1, &s.DeviceID) + stateSourceObject.Load(2, &s.InodeID) + stateSourceObject.Load(3, &s.BlockSize) + stateSourceObject.Load(4, &s.DeviceFileMajor) + stateSourceObject.Load(5, &s.DeviceFileMinor) +} + +func (ua *UnstableAttr) StateTypeName() string { + return "pkg/sentry/fs.UnstableAttr" +} + +func (ua *UnstableAttr) StateFields() []string { + return []string{ + "Size", + "Usage", + "Perms", + "Owner", + "AccessTime", + "ModificationTime", + "StatusChangeTime", + "Links", + } +} + +func (ua *UnstableAttr) beforeSave() {} + +func (ua *UnstableAttr) StateSave(stateSinkObject state.Sink) { + ua.beforeSave() + stateSinkObject.Save(0, &ua.Size) + stateSinkObject.Save(1, &ua.Usage) + stateSinkObject.Save(2, &ua.Perms) + stateSinkObject.Save(3, &ua.Owner) + stateSinkObject.Save(4, &ua.AccessTime) + stateSinkObject.Save(5, &ua.ModificationTime) + stateSinkObject.Save(6, &ua.StatusChangeTime) + stateSinkObject.Save(7, &ua.Links) +} + +func (ua *UnstableAttr) afterLoad() {} + +func (ua *UnstableAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ua.Size) + stateSourceObject.Load(1, &ua.Usage) + stateSourceObject.Load(2, &ua.Perms) + stateSourceObject.Load(3, &ua.Owner) + stateSourceObject.Load(4, &ua.AccessTime) + stateSourceObject.Load(5, &ua.ModificationTime) + stateSourceObject.Load(6, &ua.StatusChangeTime) + stateSourceObject.Load(7, &ua.Links) +} + +func (a *AttrMask) StateTypeName() string { + return "pkg/sentry/fs.AttrMask" +} + +func (a *AttrMask) StateFields() []string { + return []string{ + "Type", + "DeviceID", + "InodeID", + "BlockSize", + "Size", + "Usage", + "Perms", + "UID", + "GID", + "AccessTime", + "ModificationTime", + "StatusChangeTime", + "Links", + } +} + +func (a *AttrMask) beforeSave() {} + +func (a *AttrMask) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.Type) + stateSinkObject.Save(1, &a.DeviceID) + stateSinkObject.Save(2, &a.InodeID) + stateSinkObject.Save(3, &a.BlockSize) + stateSinkObject.Save(4, &a.Size) + stateSinkObject.Save(5, &a.Usage) + stateSinkObject.Save(6, &a.Perms) + stateSinkObject.Save(7, &a.UID) + stateSinkObject.Save(8, &a.GID) + stateSinkObject.Save(9, &a.AccessTime) + stateSinkObject.Save(10, &a.ModificationTime) + stateSinkObject.Save(11, &a.StatusChangeTime) + stateSinkObject.Save(12, &a.Links) +} + +func (a *AttrMask) afterLoad() {} + +func (a *AttrMask) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.Type) + stateSourceObject.Load(1, &a.DeviceID) + stateSourceObject.Load(2, &a.InodeID) + stateSourceObject.Load(3, &a.BlockSize) + stateSourceObject.Load(4, &a.Size) + stateSourceObject.Load(5, &a.Usage) + stateSourceObject.Load(6, &a.Perms) + stateSourceObject.Load(7, &a.UID) + stateSourceObject.Load(8, &a.GID) + stateSourceObject.Load(9, &a.AccessTime) + stateSourceObject.Load(10, &a.ModificationTime) + stateSourceObject.Load(11, &a.StatusChangeTime) + stateSourceObject.Load(12, &a.Links) +} + +func (p *PermMask) StateTypeName() string { + return "pkg/sentry/fs.PermMask" +} + +func (p *PermMask) StateFields() []string { + return []string{ + "Read", + "Write", + "Execute", + } +} + +func (p *PermMask) beforeSave() {} + +func (p *PermMask) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Read) + stateSinkObject.Save(1, &p.Write) + stateSinkObject.Save(2, &p.Execute) +} + +func (p *PermMask) afterLoad() {} + +func (p *PermMask) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Read) + stateSourceObject.Load(1, &p.Write) + stateSourceObject.Load(2, &p.Execute) +} + +func (f *FilePermissions) StateTypeName() string { + return "pkg/sentry/fs.FilePermissions" +} + +func (f *FilePermissions) StateFields() []string { + return []string{ + "User", + "Group", + "Other", + "Sticky", + "SetUID", + "SetGID", + } +} + +func (f *FilePermissions) beforeSave() {} + +func (f *FilePermissions) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.User) + stateSinkObject.Save(1, &f.Group) + stateSinkObject.Save(2, &f.Other) + stateSinkObject.Save(3, &f.Sticky) + stateSinkObject.Save(4, &f.SetUID) + stateSinkObject.Save(5, &f.SetGID) +} + +func (f *FilePermissions) afterLoad() {} + +func (f *FilePermissions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.User) + stateSourceObject.Load(1, &f.Group) + stateSourceObject.Load(2, &f.Other) + stateSourceObject.Load(3, &f.Sticky) + stateSourceObject.Load(4, &f.SetUID) + stateSourceObject.Load(5, &f.SetGID) +} + +func (f *FileOwner) StateTypeName() string { + return "pkg/sentry/fs.FileOwner" +} + +func (f *FileOwner) StateFields() []string { + return []string{ + "UID", + "GID", + } +} + +func (f *FileOwner) beforeSave() {} + +func (f *FileOwner) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.UID) + stateSinkObject.Save(1, &f.GID) +} + +func (f *FileOwner) afterLoad() {} + +func (f *FileOwner) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.UID) + stateSourceObject.Load(1, &f.GID) +} + +func (d *DentAttr) StateTypeName() string { + return "pkg/sentry/fs.DentAttr" +} + +func (d *DentAttr) StateFields() []string { + return []string{ + "Type", + "InodeID", + } +} + +func (d *DentAttr) beforeSave() {} + +func (d *DentAttr) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Type) + stateSinkObject.Save(1, &d.InodeID) +} + +func (d *DentAttr) afterLoad() {} + +func (d *DentAttr) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Type) + stateSourceObject.Load(1, &d.InodeID) +} + +func (s *SortedDentryMap) StateTypeName() string { + return "pkg/sentry/fs.SortedDentryMap" +} + +func (s *SortedDentryMap) StateFields() []string { + return []string{ + "names", + "entries", + } +} + +func (s *SortedDentryMap) beforeSave() {} + +func (s *SortedDentryMap) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.names) + stateSinkObject.Save(1, &s.entries) +} + +func (s *SortedDentryMap) afterLoad() {} + +func (s *SortedDentryMap) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.names) + stateSourceObject.Load(1, &s.entries) +} + +func (d *Dirent) StateTypeName() string { + return "pkg/sentry/fs.Dirent" +} + +func (d *Dirent) StateFields() []string { + return []string{ + "AtomicRefCount", + "userVisible", + "Inode", + "name", + "parent", + "deleted", + "mounted", + "children", + } +} + +func (d *Dirent) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + var childrenValue map[string]*Dirent = d.saveChildren() + stateSinkObject.SaveValue(7, childrenValue) + stateSinkObject.Save(0, &d.AtomicRefCount) + stateSinkObject.Save(1, &d.userVisible) + stateSinkObject.Save(2, &d.Inode) + stateSinkObject.Save(3, &d.name) + stateSinkObject.Save(4, &d.parent) + stateSinkObject.Save(5, &d.deleted) + stateSinkObject.Save(6, &d.mounted) +} + +func (d *Dirent) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.AtomicRefCount) + stateSourceObject.Load(1, &d.userVisible) + stateSourceObject.Load(2, &d.Inode) + stateSourceObject.Load(3, &d.name) + stateSourceObject.Load(4, &d.parent) + stateSourceObject.Load(5, &d.deleted) + stateSourceObject.Load(6, &d.mounted) + stateSourceObject.LoadValue(7, new(map[string]*Dirent), func(y interface{}) { d.loadChildren(y.(map[string]*Dirent)) }) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (c *DirentCache) StateTypeName() string { + return "pkg/sentry/fs.DirentCache" +} + +func (c *DirentCache) StateFields() []string { + return []string{ + "maxSize", + "limit", + } +} + +func (c *DirentCache) beforeSave() {} + +func (c *DirentCache) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + if !state.IsZeroValue(&c.currentSize) { + state.Failf("currentSize is %#v, expected zero", &c.currentSize) + } + if !state.IsZeroValue(&c.list) { + state.Failf("list is %#v, expected zero", &c.list) + } + stateSinkObject.Save(0, &c.maxSize) + stateSinkObject.Save(1, &c.limit) +} + +func (c *DirentCache) afterLoad() {} + +func (c *DirentCache) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.maxSize) + stateSourceObject.Load(1, &c.limit) +} + +func (d *DirentCacheLimiter) StateTypeName() string { + return "pkg/sentry/fs.DirentCacheLimiter" +} + +func (d *DirentCacheLimiter) StateFields() []string { + return []string{ + "max", + } +} + +func (d *DirentCacheLimiter) beforeSave() {} + +func (d *DirentCacheLimiter) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + if !state.IsZeroValue(&d.count) { + state.Failf("count is %#v, expected zero", &d.count) + } + stateSinkObject.Save(0, &d.max) +} + +func (d *DirentCacheLimiter) afterLoad() {} + +func (d *DirentCacheLimiter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.max) +} + +func (l *direntList) StateTypeName() string { + return "pkg/sentry/fs.direntList" +} + +func (l *direntList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *direntList) beforeSave() {} + +func (l *direntList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *direntList) afterLoad() {} + +func (l *direntList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *direntEntry) StateTypeName() string { + return "pkg/sentry/fs.direntEntry" +} + +func (e *direntEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *direntEntry) beforeSave() {} + +func (e *direntEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *direntEntry) afterLoad() {} + +func (e *direntEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (l *eventList) StateTypeName() string { + return "pkg/sentry/fs.eventList" +} + +func (l *eventList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *eventList) beforeSave() {} + +func (l *eventList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *eventList) afterLoad() {} + +func (l *eventList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *eventEntry) StateTypeName() string { + return "pkg/sentry/fs.eventEntry" +} + +func (e *eventEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *eventEntry) beforeSave() {} + +func (e *eventEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *eventEntry) afterLoad() {} + +func (e *eventEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (f *File) StateTypeName() string { + return "pkg/sentry/fs.File" +} + +func (f *File) StateFields() []string { + return []string{ + "AtomicRefCount", + "UniqueID", + "Dirent", + "flags", + "async", + "FileOperations", + "offset", + } +} + +func (f *File) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.AtomicRefCount) + stateSinkObject.Save(1, &f.UniqueID) + stateSinkObject.Save(2, &f.Dirent) + stateSinkObject.Save(3, &f.flags) + stateSinkObject.Save(4, &f.async) + stateSinkObject.Save(5, &f.FileOperations) + stateSinkObject.Save(6, &f.offset) +} + +func (f *File) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.AtomicRefCount) + stateSourceObject.Load(1, &f.UniqueID) + stateSourceObject.Load(2, &f.Dirent) + stateSourceObject.Load(3, &f.flags) + stateSourceObject.Load(4, &f.async) + stateSourceObject.LoadWait(5, &f.FileOperations) + stateSourceObject.Load(6, &f.offset) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (f *overlayFileOperations) StateTypeName() string { + return "pkg/sentry/fs.overlayFileOperations" +} + +func (f *overlayFileOperations) StateFields() []string { + return []string{ + "upper", + "lower", + "dirCursor", + } +} + +func (f *overlayFileOperations) beforeSave() {} + +func (f *overlayFileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.upper) + stateSinkObject.Save(1, &f.lower) + stateSinkObject.Save(2, &f.dirCursor) +} + +func (f *overlayFileOperations) afterLoad() {} + +func (f *overlayFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.upper) + stateSourceObject.Load(1, &f.lower) + stateSourceObject.Load(2, &f.dirCursor) +} + +func (omi *overlayMappingIdentity) StateTypeName() string { + return "pkg/sentry/fs.overlayMappingIdentity" +} + +func (omi *overlayMappingIdentity) StateFields() []string { + return []string{ + "AtomicRefCount", + "id", + "overlayFile", + } +} + +func (omi *overlayMappingIdentity) beforeSave() {} + +func (omi *overlayMappingIdentity) StateSave(stateSinkObject state.Sink) { + omi.beforeSave() + stateSinkObject.Save(0, &omi.AtomicRefCount) + stateSinkObject.Save(1, &omi.id) + stateSinkObject.Save(2, &omi.overlayFile) +} + +func (omi *overlayMappingIdentity) afterLoad() {} + +func (omi *overlayMappingIdentity) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &omi.AtomicRefCount) + stateSourceObject.Load(1, &omi.id) + stateSourceObject.Load(2, &omi.overlayFile) +} + +func (m *MountSourceFlags) StateTypeName() string { + return "pkg/sentry/fs.MountSourceFlags" +} + +func (m *MountSourceFlags) StateFields() []string { + return []string{ + "ReadOnly", + "NoAtime", + "ForcePageCache", + "NoExec", + } +} + +func (m *MountSourceFlags) beforeSave() {} + +func (m *MountSourceFlags) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.ReadOnly) + stateSinkObject.Save(1, &m.NoAtime) + stateSinkObject.Save(2, &m.ForcePageCache) + stateSinkObject.Save(3, &m.NoExec) +} + +func (m *MountSourceFlags) afterLoad() {} + +func (m *MountSourceFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.ReadOnly) + stateSourceObject.Load(1, &m.NoAtime) + stateSourceObject.Load(2, &m.ForcePageCache) + stateSourceObject.Load(3, &m.NoExec) +} + +func (f *FileFlags) StateTypeName() string { + return "pkg/sentry/fs.FileFlags" +} + +func (f *FileFlags) StateFields() []string { + return []string{ + "Direct", + "NonBlocking", + "DSync", + "Sync", + "Append", + "Read", + "Write", + "Pread", + "Pwrite", + "Directory", + "Async", + "LargeFile", + "NonSeekable", + "Truncate", + } +} + +func (f *FileFlags) beforeSave() {} + +func (f *FileFlags) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Direct) + stateSinkObject.Save(1, &f.NonBlocking) + stateSinkObject.Save(2, &f.DSync) + stateSinkObject.Save(3, &f.Sync) + stateSinkObject.Save(4, &f.Append) + stateSinkObject.Save(5, &f.Read) + stateSinkObject.Save(6, &f.Write) + stateSinkObject.Save(7, &f.Pread) + stateSinkObject.Save(8, &f.Pwrite) + stateSinkObject.Save(9, &f.Directory) + stateSinkObject.Save(10, &f.Async) + stateSinkObject.Save(11, &f.LargeFile) + stateSinkObject.Save(12, &f.NonSeekable) + stateSinkObject.Save(13, &f.Truncate) +} + +func (f *FileFlags) afterLoad() {} + +func (f *FileFlags) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Direct) + stateSourceObject.Load(1, &f.NonBlocking) + stateSourceObject.Load(2, &f.DSync) + stateSourceObject.Load(3, &f.Sync) + stateSourceObject.Load(4, &f.Append) + stateSourceObject.Load(5, &f.Read) + stateSourceObject.Load(6, &f.Write) + stateSourceObject.Load(7, &f.Pread) + stateSourceObject.Load(8, &f.Pwrite) + stateSourceObject.Load(9, &f.Directory) + stateSourceObject.Load(10, &f.Async) + stateSourceObject.Load(11, &f.LargeFile) + stateSourceObject.Load(12, &f.NonSeekable) + stateSourceObject.Load(13, &f.Truncate) +} + +func (i *Inode) StateTypeName() string { + return "pkg/sentry/fs.Inode" +} + +func (i *Inode) StateFields() []string { + return []string{ + "AtomicRefCount", + "InodeOperations", + "StableAttr", + "LockCtx", + "Watches", + "MountSource", + "overlay", + } +} + +func (i *Inode) beforeSave() {} + +func (i *Inode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.AtomicRefCount) + stateSinkObject.Save(1, &i.InodeOperations) + stateSinkObject.Save(2, &i.StableAttr) + stateSinkObject.Save(3, &i.LockCtx) + stateSinkObject.Save(4, &i.Watches) + stateSinkObject.Save(5, &i.MountSource) + stateSinkObject.Save(6, &i.overlay) +} + +func (i *Inode) afterLoad() {} + +func (i *Inode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.AtomicRefCount) + stateSourceObject.Load(1, &i.InodeOperations) + stateSourceObject.Load(2, &i.StableAttr) + stateSourceObject.Load(3, &i.LockCtx) + stateSourceObject.Load(4, &i.Watches) + stateSourceObject.Load(5, &i.MountSource) + stateSourceObject.Load(6, &i.overlay) +} + +func (l *LockCtx) StateTypeName() string { + return "pkg/sentry/fs.LockCtx" +} + +func (l *LockCtx) StateFields() []string { + return []string{ + "Posix", + "BSD", + } +} + +func (l *LockCtx) beforeSave() {} + +func (l *LockCtx) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Posix) + stateSinkObject.Save(1, &l.BSD) +} + +func (l *LockCtx) afterLoad() {} + +func (l *LockCtx) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Posix) + stateSourceObject.Load(1, &l.BSD) +} + +func (w *Watches) StateTypeName() string { + return "pkg/sentry/fs.Watches" +} + +func (w *Watches) StateFields() []string { + return []string{ + "ws", + "unlinked", + } +} + +func (w *Watches) beforeSave() {} + +func (w *Watches) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.ws) + stateSinkObject.Save(1, &w.unlinked) +} + +func (w *Watches) afterLoad() {} + +func (w *Watches) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.ws) + stateSourceObject.Load(1, &w.unlinked) +} + +func (i *Inotify) StateTypeName() string { + return "pkg/sentry/fs.Inotify" +} + +func (i *Inotify) StateFields() []string { + return []string{ + "id", + "events", + "scratch", + "nextWatch", + "watches", + } +} + +func (i *Inotify) beforeSave() {} + +func (i *Inotify) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.id) + stateSinkObject.Save(1, &i.events) + stateSinkObject.Save(2, &i.scratch) + stateSinkObject.Save(3, &i.nextWatch) + stateSinkObject.Save(4, &i.watches) +} + +func (i *Inotify) afterLoad() {} + +func (i *Inotify) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.id) + stateSourceObject.Load(1, &i.events) + stateSourceObject.Load(2, &i.scratch) + stateSourceObject.Load(3, &i.nextWatch) + stateSourceObject.Load(4, &i.watches) +} + +func (e *Event) StateTypeName() string { + return "pkg/sentry/fs.Event" +} + +func (e *Event) StateFields() []string { + return []string{ + "eventEntry", + "wd", + "mask", + "cookie", + "len", + "name", + } +} + +func (e *Event) beforeSave() {} + +func (e *Event) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.eventEntry) + stateSinkObject.Save(1, &e.wd) + stateSinkObject.Save(2, &e.mask) + stateSinkObject.Save(3, &e.cookie) + stateSinkObject.Save(4, &e.len) + stateSinkObject.Save(5, &e.name) +} + +func (e *Event) afterLoad() {} + +func (e *Event) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.eventEntry) + stateSourceObject.Load(1, &e.wd) + stateSourceObject.Load(2, &e.mask) + stateSourceObject.Load(3, &e.cookie) + stateSourceObject.Load(4, &e.len) + stateSourceObject.Load(5, &e.name) +} + +func (w *Watch) StateTypeName() string { + return "pkg/sentry/fs.Watch" +} + +func (w *Watch) StateFields() []string { + return []string{ + "owner", + "wd", + "target", + "unpinned", + "mask", + "pins", + } +} + +func (w *Watch) beforeSave() {} + +func (w *Watch) StateSave(stateSinkObject state.Sink) { + w.beforeSave() + stateSinkObject.Save(0, &w.owner) + stateSinkObject.Save(1, &w.wd) + stateSinkObject.Save(2, &w.target) + stateSinkObject.Save(3, &w.unpinned) + stateSinkObject.Save(4, &w.mask) + stateSinkObject.Save(5, &w.pins) +} + +func (w *Watch) afterLoad() {} + +func (w *Watch) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &w.owner) + stateSourceObject.Load(1, &w.wd) + stateSourceObject.Load(2, &w.target) + stateSourceObject.Load(3, &w.unpinned) + stateSourceObject.Load(4, &w.mask) + stateSourceObject.Load(5, &w.pins) +} + +func (msrc *MountSource) StateTypeName() string { + return "pkg/sentry/fs.MountSource" +} + +func (msrc *MountSource) StateFields() []string { + return []string{ + "AtomicRefCount", + "MountSourceOperations", + "FilesystemType", + "Flags", + "fscache", + "direntRefs", + } +} + +func (msrc *MountSource) beforeSave() {} + +func (msrc *MountSource) StateSave(stateSinkObject state.Sink) { + msrc.beforeSave() + stateSinkObject.Save(0, &msrc.AtomicRefCount) + stateSinkObject.Save(1, &msrc.MountSourceOperations) + stateSinkObject.Save(2, &msrc.FilesystemType) + stateSinkObject.Save(3, &msrc.Flags) + stateSinkObject.Save(4, &msrc.fscache) + stateSinkObject.Save(5, &msrc.direntRefs) +} + +func (msrc *MountSource) afterLoad() {} + +func (msrc *MountSource) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &msrc.AtomicRefCount) + stateSourceObject.Load(1, &msrc.MountSourceOperations) + stateSourceObject.Load(2, &msrc.FilesystemType) + stateSourceObject.Load(3, &msrc.Flags) + stateSourceObject.Load(4, &msrc.fscache) + stateSourceObject.Load(5, &msrc.direntRefs) +} + +func (smo *SimpleMountSourceOperations) StateTypeName() string { + return "pkg/sentry/fs.SimpleMountSourceOperations" +} + +func (smo *SimpleMountSourceOperations) StateFields() []string { + return []string{ + "keep", + "revalidate", + "cacheReaddir", + } +} + +func (smo *SimpleMountSourceOperations) beforeSave() {} + +func (smo *SimpleMountSourceOperations) StateSave(stateSinkObject state.Sink) { + smo.beforeSave() + stateSinkObject.Save(0, &smo.keep) + stateSinkObject.Save(1, &smo.revalidate) + stateSinkObject.Save(2, &smo.cacheReaddir) +} + +func (smo *SimpleMountSourceOperations) afterLoad() {} + +func (smo *SimpleMountSourceOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &smo.keep) + stateSourceObject.Load(1, &smo.revalidate) + stateSourceObject.Load(2, &smo.cacheReaddir) +} + +func (o *overlayMountSourceOperations) StateTypeName() string { + return "pkg/sentry/fs.overlayMountSourceOperations" +} + +func (o *overlayMountSourceOperations) StateFields() []string { + return []string{ + "upper", + "lower", + } +} + +func (o *overlayMountSourceOperations) beforeSave() {} + +func (o *overlayMountSourceOperations) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.upper) + stateSinkObject.Save(1, &o.lower) +} + +func (o *overlayMountSourceOperations) afterLoad() {} + +func (o *overlayMountSourceOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.upper) + stateSourceObject.Load(1, &o.lower) +} + +func (ofs *overlayFilesystem) StateTypeName() string { + return "pkg/sentry/fs.overlayFilesystem" +} + +func (ofs *overlayFilesystem) StateFields() []string { + return []string{} +} + +func (ofs *overlayFilesystem) beforeSave() {} + +func (ofs *overlayFilesystem) StateSave(stateSinkObject state.Sink) { + ofs.beforeSave() +} + +func (ofs *overlayFilesystem) afterLoad() {} + +func (ofs *overlayFilesystem) StateLoad(stateSourceObject state.Source) { +} + +func (m *Mount) StateTypeName() string { + return "pkg/sentry/fs.Mount" +} + +func (m *Mount) StateFields() []string { + return []string{ + "ID", + "ParentID", + "root", + "previous", + } +} + +func (m *Mount) beforeSave() {} + +func (m *Mount) StateSave(stateSinkObject state.Sink) { + m.beforeSave() + stateSinkObject.Save(0, &m.ID) + stateSinkObject.Save(1, &m.ParentID) + stateSinkObject.Save(2, &m.root) + stateSinkObject.Save(3, &m.previous) +} + +func (m *Mount) afterLoad() {} + +func (m *Mount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &m.ID) + stateSourceObject.Load(1, &m.ParentID) + stateSourceObject.Load(2, &m.root) + stateSourceObject.Load(3, &m.previous) +} + +func (mns *MountNamespace) StateTypeName() string { + return "pkg/sentry/fs.MountNamespace" +} + +func (mns *MountNamespace) StateFields() []string { + return []string{ + "AtomicRefCount", + "userns", + "root", + "mounts", + "mountID", + } +} + +func (mns *MountNamespace) beforeSave() {} + +func (mns *MountNamespace) StateSave(stateSinkObject state.Sink) { + mns.beforeSave() + stateSinkObject.Save(0, &mns.AtomicRefCount) + stateSinkObject.Save(1, &mns.userns) + stateSinkObject.Save(2, &mns.root) + stateSinkObject.Save(3, &mns.mounts) + stateSinkObject.Save(4, &mns.mountID) +} + +func (mns *MountNamespace) afterLoad() {} + +func (mns *MountNamespace) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mns.AtomicRefCount) + stateSourceObject.Load(1, &mns.userns) + stateSourceObject.Load(2, &mns.root) + stateSourceObject.Load(3, &mns.mounts) + stateSourceObject.Load(4, &mns.mountID) +} + +func (o *overlayEntry) StateTypeName() string { + return "pkg/sentry/fs.overlayEntry" +} + +func (o *overlayEntry) StateFields() []string { + return []string{ + "lowerExists", + "lower", + "mappings", + "upper", + "dirCache", + } +} + +func (o *overlayEntry) beforeSave() {} + +func (o *overlayEntry) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.lowerExists) + stateSinkObject.Save(1, &o.lower) + stateSinkObject.Save(2, &o.mappings) + stateSinkObject.Save(3, &o.upper) + stateSinkObject.Save(4, &o.dirCache) +} + +func (o *overlayEntry) afterLoad() {} + +func (o *overlayEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.lowerExists) + stateSourceObject.Load(1, &o.lower) + stateSourceObject.Load(2, &o.mappings) + stateSourceObject.Load(3, &o.upper) + stateSourceObject.Load(4, &o.dirCache) +} + +func init() { + state.Register((*StableAttr)(nil)) + state.Register((*UnstableAttr)(nil)) + state.Register((*AttrMask)(nil)) + state.Register((*PermMask)(nil)) + state.Register((*FilePermissions)(nil)) + state.Register((*FileOwner)(nil)) + state.Register((*DentAttr)(nil)) + state.Register((*SortedDentryMap)(nil)) + state.Register((*Dirent)(nil)) + state.Register((*DirentCache)(nil)) + state.Register((*DirentCacheLimiter)(nil)) + state.Register((*direntList)(nil)) + state.Register((*direntEntry)(nil)) + state.Register((*eventList)(nil)) + state.Register((*eventEntry)(nil)) + state.Register((*File)(nil)) + state.Register((*overlayFileOperations)(nil)) + state.Register((*overlayMappingIdentity)(nil)) + state.Register((*MountSourceFlags)(nil)) + state.Register((*FileFlags)(nil)) + state.Register((*Inode)(nil)) + state.Register((*LockCtx)(nil)) + state.Register((*Watches)(nil)) + state.Register((*Inotify)(nil)) + state.Register((*Event)(nil)) + state.Register((*Watch)(nil)) + state.Register((*MountSource)(nil)) + state.Register((*SimpleMountSourceOperations)(nil)) + state.Register((*overlayMountSourceOperations)(nil)) + state.Register((*overlayFilesystem)(nil)) + state.Register((*Mount)(nil)) + state.Register((*MountNamespace)(nil)) + state.Register((*overlayEntry)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD deleted file mode 100644 index 5fb419bcd..000000000 --- a/pkg/sentry/fs/fsutil/BUILD +++ /dev/null @@ -1,115 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "dirty_set_impl", - out = "dirty_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "Dirty", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "DirtyInfo", - "Functions": "dirtySetFunctions", - }, -) - -go_template_instance( - name = "frame_ref_set_impl", - out = "frame_ref_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "FrameRef", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.FileRange", - "Value": "uint64", - "Functions": "FrameRefSetFunctions", - }, -) - -go_template_instance( - name = "file_range_set_impl", - out = "file_range_set_impl.go", - imports = { - "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - }, - package = "fsutil", - prefix = "FileRange", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "memmap.MappableRange", - "Value": "uint64", - "Functions": "FileRangeSetFunctions", - }, -) - -go_library( - name = "fsutil", - srcs = [ - "dirty_set.go", - "dirty_set_impl.go", - "file.go", - "file_range_set.go", - "file_range_set_impl.go", - "frame_ref_set.go", - "frame_ref_set_impl.go", - "fsutil.go", - "host_file_mapper.go", - "host_file_mapper_state.go", - "host_file_mapper_unsafe.go", - "host_mappable.go", - "inode.go", - "inode_cached.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/log", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/state", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "fsutil_test", - size = "small", - srcs = [ - "dirty_set_test.go", - "inode_cached_test.go", - ], - library = ":fsutil", - deps = [ - "//pkg/context", - "//pkg/safemem", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/fsutil/README.md b/pkg/sentry/fs/fsutil/README.md deleted file mode 100644 index 8be367334..000000000 --- a/pkg/sentry/fs/fsutil/README.md +++ /dev/null @@ -1,207 +0,0 @@ -This package provides utilities for implementing virtual filesystem objects. - -[TOC] - -## Page cache - -`CachingInodeOperations` implements a page cache for files that cannot use the -host page cache. Normally these are files that store their data in a remote -filesystem. This also applies to files that are accessed on a platform that does -not support directly memory mapping host file descriptors (e.g. the ptrace -platform). - -An `CachingInodeOperations` buffers regions of a single file into memory. It is -owned by an `fs.Inode`, the in-memory representation of a file (all open file -descriptors are backed by an `fs.Inode`). The `fs.Inode` provides operations for -reading memory into an `CachingInodeOperations`, to represent the contents of -the file in-memory, and for writing memory out, to relieve memory pressure on -the kernel and to synchronize in-memory changes to filesystems. - -An `CachingInodeOperations` enables readable and/or writable memory access to -file content. Files can be mapped shared or private, see mmap(2). When a file is -mapped shared, changes to the file via write(2) and truncate(2) are reflected in -the shared memory region. Conversely, when the shared memory region is modified, -changes to the file are visible via read(2). Multiple shared mappings of the -same file are coherent with each other. This is consistent with Linux. - -When a file is mapped private, updates to the mapped memory are not visible to -other memory mappings. Updates to the mapped memory are also not reflected in -the file content as seen by read(2). If the file is changed after a private -mapping is created, for instance by write(2), the change to the file may or may -not be reflected in the private mapping. This is consistent with Linux. - -An `CachingInodeOperations` keeps track of ranges of memory that were modified -(or "dirtied"). When the file is explicitly synced via fsync(2), only the dirty -ranges are written out to the filesystem. Any error returned indicates a failure -to write all dirty memory of an `CachingInodeOperations` to the filesystem. In -this case the filesystem may be in an inconsistent state. The same operation can -be performed on the shared memory itself using msync(2). If neither fsync(2) nor -msync(2) is performed, then the dirty memory is written out in accordance with -the `CachingInodeOperations` eviction strategy (see below) and there is no -guarantee that memory will be written out successfully in full. - -### Memory allocation and eviction - -An `CachingInodeOperations` implements the following allocation and eviction -strategy: - -- Memory is allocated and brought up to date with the contents of a file when - a region of mapped memory is accessed (or "faulted on"). - -- Dirty memory is written out to filesystems when an fsync(2) or msync(2) - operation is performed on a memory mapped file, for all memory mapped files - when saved, and/or when there are no longer any memory mappings of a range - of a file, see munmap(2). As the latter implies, in the absence of a panic - or SIGKILL, dirty memory is written out for all memory mapped files when an - application exits. - -- Memory is freed when there are no longer any memory mappings of a range of a - file (e.g. when an application exits). This behavior is consistent with - Linux for shared memory that has been locked via mlock(2). - -Notably, memory is not allocated for read(2) or write(2) operations. This means -that reads and writes to the file are only accelerated by an -`CachingInodeOperations` if the file being read or written has been memory -mapped *and* if the shared memory has been accessed at the region being read or -written. This diverges from Linux which buffers memory into a page cache on -read(2) proactively (i.e. readahead) and delays writing it out to filesystems on -write(2) (i.e. writeback). The absence of these optimizations is not visible to -applications beyond less than optimal performance when repeatedly reading and/or -writing to same region of a file. See [Future Work](#future-work) for plans to -implement these optimizations. - -Additionally, memory held by `CachingInodeOperationss` is currently unbounded in -size. An `CachingInodeOperations` does not write out dirty memory and free it -under system memory pressure. This can cause pathological memory usage. - -When memory is written back, an `CachingInodeOperations` may write regions of -shared memory that were never modified. This is due to the strategy of -minimizing page faults (see below) and handling only a subset of memory write -faults. In the absence of an application or sentry crash, it is guaranteed that -if a region of shared memory was written to, it is written back to a filesystem. - -### Life of a shared memory mapping - -A file is memory mapped via mmap(2). For example, if `A` is an address, an -application may execute: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -This creates a shared mapping of fd that reflects 4k of the contents of fd -starting at offset 0, accessible at address `A`. This in turn creates a virtual -memory area region ("vma") which indicates that [`A`, `A`+0x1000) is now a valid -address range for this application to access. - -At this point, memory has not been allocated in the file's -`CachingInodeOperations`. It is also the case that the address range [`A`, -`A`+0x1000) has not been mapped on the host on behalf of the application. If the -application then tries to modify 8 bytes of the shared memory: - -``` -char buffer[] = "aaaaaaaa"; -memcpy(A, buffer, 8); -``` - -The host then sends a `SIGSEGV` to the sentry because the address range [`A`, -`A`+8) is not mapped on the host. The `SIGSEGV` indicates that the memory was -accessed writable. The sentry looks up the vma associated with [`A`, `A`+8), -finds the file that was mapped and its `CachingInodeOperations`. It then calls -`CachingInodeOperations.Translate` which allocates memory to back [`A`, `A`+8). -It may choose to allocate more memory (i.e. do "readahead") to minimize -subsequent faults. - -Memory that is allocated comes from a host tmpfs file (see -`pgalloc.MemoryFile`). The host tmpfs file memory is brought up to date with the -contents of the mapped file on its filesystem. The region of the host tmpfs file -that reflects the mapped file is then mapped into the host address space of the -application so that subsequent memory accesses do not repeatedly generate a -`SIGSEGV`. - -The range that was allocated, including any extra memory allocation to minimize -faults, is marked dirty due to the write fault. This overcounts dirty memory if -the extra memory allocated is never modified. - -To make the scenario more interesting, imagine that this application spawns -another process and maps the same file in the exact same way: - -``` -mmap(A, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); -``` - -Imagine that this process then tries to modify the file again but with only 4 -bytes: - -``` -char buffer[] = "bbbb"; -memcpy(A, buffer, 4); -``` - -Since the first process has already mapped and accessed the same region of the -file writable, `CachingInodeOperations.Translate` is called but returns the -memory that has already been allocated rather than allocating new memory. The -address range [`A`, `A`+0x1000) reflects the same cached view of the file as the -first process sees. For example, reading 8 bytes from the file from either -process via read(2) starting at offset 0 returns a consistent "bbbbaaaa". - -When this process no longer needs the shared memory, it may do: - -``` -munmap(A, 0x1000); -``` - -At this point, the modified memory cached by the `CachingInodeOperations` is not -written back to the file because it is still in use by the first process that -mapped it. When the first process also does: - -``` -munmap(A, 0x1000); -``` - -Then the last memory mapping of the file at the range [0, 0x1000) is gone. The -file's `CachingInodeOperations` then starts writing back memory marked dirty to -the file on its filesystem. Once writing completes, regardless of whether it was -successful, the `CachingInodeOperations` frees the memory cached at the range -[0, 0x1000). - -Subsequent read(2) or write(2) operations on the file go directly to the -filesystem since there no longer exists memory for it in its -`CachingInodeOperations`. - -## Future Work - -### Page cache - -The sentry does not yet implement the readahead and writeback optimizations for -read(2) and write(2) respectively. To do so, on read(2) and/or write(2) the -sentry must ensure that memory is allocated in a page cache to read or write -into. However, the sentry cannot boundlessly allocate memory. If it did, the -host would eventually OOM-kill the sentry+application process. This means that -the sentry must implement a page cache memory allocation strategy that is -bounded by a global user or container imposed limit. When this limit is -approached, the sentry must decide from which page cache memory should be freed -so that it can allocate more memory. If it makes a poor decision, the sentry may -end up freeing and re-allocating memory to back regions of files that are -frequently used, nullifying the optimization (and in some cases causing worse -performance due to the overhead of memory allocation and general management). -This is a form of "cache thrashing". - -In Linux, much research has been done to select and implement a lightweight but -optimal page cache eviction algorithm. Linux makes use of hardware page bits to -keep track of whether memory has been accessed. The sentry does not have direct -access to hardware. Implementing a similarly lightweight and optimal page cache -eviction algorithm will need to either introduce a kernel interface to obtain -these page bits or find a suitable alternative proxy for access events. - -In Linux, readahead happens by default but is not always ideal. For instance, -for files that are not read sequentially, it would be more ideal to simply read -from only those regions of the file rather than to optimistically cache some -number of bytes ahead of the read (up to 2MB in Linux) if the bytes cached won't -be accessed. Linux implements the fadvise64(2) system call for applications to -specify that a range of a file will not be accessed sequentially. The advice bit -FADV_RANDOM turns off the readahead optimization for the given range in the -given file. However fadvise64 is rarely used by applications so Linux implements -a readahead backoff strategy if reads are not sequential. To ensure that -application performance is not degraded, the sentry must implement a similar -backoff strategy. diff --git a/pkg/sentry/fs/fsutil/dirty_set_impl.go b/pkg/sentry/fs/fsutil/dirty_set_impl.go new file mode 100644 index 000000000..9f1463389 --- /dev/null +++ b/pkg/sentry/fs/fsutil/dirty_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const DirtytrackGaps = 0 + +var _ = uint8(DirtytrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type DirtydynamicGap [DirtytrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *DirtydynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *DirtydynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + DirtyminDegree = 3 + + DirtymaxDegree = 2 * DirtyminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type DirtySet struct { + root Dirtynode `state:".(*DirtySegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *DirtySet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *DirtySet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *DirtySet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *DirtySet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *DirtySet) FirstSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *DirtySet) LastSegment() DirtyIterator { + if s.root.nrSegments == 0 { + return DirtyIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *DirtySet) FirstGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return DirtyGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *DirtySet) LastGap() DirtyGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *DirtySet) Find(key uint64) (DirtyIterator, DirtyGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return DirtyIterator{n, i}, DirtyGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return DirtyIterator{}, DirtyGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *DirtySet) FindSegment(key uint64) DirtyIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *DirtySet) LowerBoundSegment(min uint64) DirtyIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *DirtySet) UpperBoundSegment(max uint64) DirtyIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *DirtySet) FindGap(key uint64) DirtyGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *DirtySet) LowerBoundGap(min uint64) DirtyGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *DirtySet) UpperBoundGap(max uint64) DirtyGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *DirtySet) Add(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *DirtySet) AddWithoutMerging(r __generics_imported0.MappableRange, val DirtyInfo) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *DirtySet) Insert(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := DirtytrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (dirtySetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := DirtytrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *DirtySet) InsertWithoutMerging(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *DirtySet) InsertWithoutMergingUnchecked(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := DirtytrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return DirtyIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *DirtySet) Remove(seg DirtyIterator) DirtyGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if DirtytrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + dirtySetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if DirtytrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(DirtyGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *DirtySet) RemoveAll() { + s.root = Dirtynode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *DirtySet) RemoveRange(r __generics_imported0.MappableRange) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *DirtySet) Merge(first, second DirtyIterator) DirtyIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *DirtySet) MergeUnchecked(first, second DirtyIterator) DirtyIterator { + if first.End() == second.Start() { + if mval, ok := (dirtySetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return DirtyIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *DirtySet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *DirtySet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *DirtySet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *DirtySet) Split(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *DirtySet) SplitUnchecked(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) { + val1, val2 := (dirtySetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *DirtySet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *DirtySet) Isolate(seg DirtyIterator, r __generics_imported0.MappableRange) DirtyIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *DirtySet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg DirtyIterator)) DirtyGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return DirtyGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return DirtyGapIterator{} + } + } +} + +// +stateify savable +type Dirtynode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Dirtynode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap DirtydynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [DirtymaxDegree - 1]__generics_imported0.MappableRange + values [DirtymaxDegree - 1]DirtyInfo + children [DirtymaxDegree]*Dirtynode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) firstSegment() DirtyIterator { + for n.hasChildren { + n = n.children[0] + } + return DirtyIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Dirtynode) lastSegment() DirtyIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return DirtyIterator{n, n.nrSegments - 1} +} + +func (n *Dirtynode) prevSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Dirtynode) nextSibling() *Dirtynode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Dirtynode) rebalanceBeforeInsert(gap DirtyGapIterator) DirtyGapIterator { + if n.nrSegments < DirtymaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:DirtyminDegree-1], n.keys[:DirtyminDegree-1]) + copy(left.values[:DirtyminDegree-1], n.values[:DirtyminDegree-1]) + copy(right.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(right.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + n.keys[0], n.values[0] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + DirtyzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:DirtyminDegree], n.children[:DirtyminDegree]) + copy(right.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[2:]) + for i := 0; i < DirtyminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if DirtytrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return DirtyGapIterator{left, gap.index} + } + return DirtyGapIterator{right, gap.index - DirtyminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Dirtynode{ + nrSegments: DirtyminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:]) + copy(sibling.values[:DirtyminDegree-1], n.values[DirtyminDegree:]) + DirtyzeroValueSlice(n.values[DirtyminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:DirtyminDegree], n.children[DirtyminDegree:]) + DirtyzeroNodeSlice(n.children[DirtyminDegree:]) + for i := 0; i < DirtyminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = DirtyminDegree - 1 + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < DirtyminDegree { + return gap + } + return DirtyGapIterator{sibling, gap.index - DirtyminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Dirtynode) rebalanceAfterRemove(gap DirtyGapIterator) DirtyGapIterator { + for { + if n.nrSegments >= DirtyminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return DirtyGapIterator{n, 0} + } + if gap.node == n { + return DirtyGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if DirtytrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return DirtyGapIterator{n, n.nrSegments} + } + return DirtyGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return DirtyGapIterator{p, gap.index} + } + if gap.node == right { + return DirtyGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Dirtynode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = DirtyGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + dirtySetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if DirtytrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *Dirtynode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *Dirtynode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *Dirtynode) calculateMaxGapLeaf() uint64 { + max := DirtyGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (DirtyGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *Dirtynode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Dirtynode) searchFirstLargeEnoughGap(minSize uint64) DirtyGapIterator { + if n.maxGap.Get() < minSize { + return DirtyGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := DirtyGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Dirtynode) searchLastLargeEnoughGap(minSize uint64) DirtyGapIterator { + if n.maxGap.Get() < minSize { + return DirtyGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := DirtyGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Dirtynode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg DirtyIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg DirtyIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg DirtyIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg DirtyIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg DirtyIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg DirtyIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg DirtyIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg DirtyIterator) Value() DirtyInfo { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) ValuePtr() *DirtyInfo { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg DirtyIterator) SetValue(val DirtyInfo) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg DirtyIterator) PrevSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return DirtyIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg DirtyIterator) NextSegment() DirtyIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return DirtyIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return DirtyIterator{} + } + return DirtysegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg DirtyIterator) PrevGap() DirtyGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return DirtyGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg DirtyIterator) NextGap() DirtyGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return DirtyGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg DirtyIterator) PrevNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.PrevSegment(), DirtyGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg DirtyIterator) NextNonEmpty() (DirtyIterator, DirtyGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return DirtyIterator{}, gap + } + return gap.NextSegment(), DirtyGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type DirtyGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Dirtynode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap DirtyGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap DirtyGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap DirtyGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return dirtySetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap DirtyGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return dirtySetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap DirtyGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap DirtyGapIterator) PrevSegment() DirtyIterator { + return DirtysegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap DirtyGapIterator) NextSegment() DirtyIterator { + return DirtysegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap DirtyGapIterator) PrevGap() DirtyGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap DirtyGapIterator) NextGap() DirtyGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return DirtyGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap DirtyGapIterator) NextLargeEnoughGap(minSize uint64) DirtyGapIterator { + if DirtytrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap DirtyGapIterator) nextLargeEnoughGapHelper(minSize uint64) DirtyGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return DirtyGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap DirtyGapIterator) PrevLargeEnoughGap(minSize uint64) DirtyGapIterator { + if DirtytrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap DirtyGapIterator) prevLargeEnoughGapHelper(minSize uint64) DirtyGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return DirtyGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func DirtysegmentBeforePosition(n *Dirtynode, i int) DirtyIterator { + for i == 0 { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func DirtysegmentAfterPosition(n *Dirtynode, i int) DirtyIterator { + for i == n.nrSegments { + if n.parent == nil { + return DirtyIterator{} + } + n, i = n.parent, n.parentIndex + } + return DirtyIterator{n, i} +} + +func DirtyzeroValueSlice(slice []DirtyInfo) { + + for i := range slice { + dirtySetFunctions{}.ClearValue(&slice[i]) + } +} + +func DirtyzeroNodeSlice(slice []*Dirtynode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *DirtySet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Dirtynode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Dirtynode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if DirtytrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type DirtySegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []DirtyInfo +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *DirtySet) ExportSortedSlices() *DirtySegmentDataSlices { + var sds DirtySegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *DirtySet) ImportSortedSlices(sds *DirtySegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *DirtySet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.MappableRange, DirtyInfo) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *DirtySet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *DirtySet) saveRoot() *DirtySegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *DirtySet) loadRoot(sds *DirtySegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go deleted file mode 100644 index e3579c23c..000000000 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/usermem" -) - -func TestDirtySet(t *testing.T) { - var set DirtySet - set.MarkDirty(memmap.MappableRange{0, 2 * usermem.PageSize}) - set.KeepDirty(memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize}) - set.MarkClean(memmap.MappableRange{0, 2 * usermem.PageSize}) - want := &DirtySegmentDataSlices{ - Start: []uint64{usermem.PageSize}, - End: []uint64{2 * usermem.PageSize}, - Values: []DirtyInfo{{Keep: true}}, - } - if got := set.ExportSortedSlices(); !reflect.DeepEqual(got, want) { - t.Errorf("set:\n\tgot %v,\n\twant %v", got, want) - } -} diff --git a/pkg/sentry/fs/fsutil/file_range_set_impl.go b/pkg/sentry/fs/fsutil/file_range_set_impl.go new file mode 100644 index 000000000..374ed79b7 --- /dev/null +++ b/pkg/sentry/fs/fsutil/file_range_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const FileRangetrackGaps = 0 + +var _ = uint8(FileRangetrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type FileRangedynamicGap [FileRangetrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FileRangedynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FileRangedynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + FileRangeminDegree = 3 + + FileRangemaxDegree = 2 * FileRangeminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type FileRangeSet struct { + root FileRangenode `state:".(*FileRangeSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *FileRangeSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *FileRangeSet) IsEmptyRange(r __generics_imported0.MappableRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *FileRangeSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *FileRangeSet) SpanRange(r __generics_imported0.MappableRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *FileRangeSet) FirstSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *FileRangeSet) LastSegment() FileRangeIterator { + if s.root.nrSegments == 0 { + return FileRangeIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *FileRangeSet) FirstGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return FileRangeGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *FileRangeSet) LastGap() FileRangeGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *FileRangeSet) Find(key uint64) (FileRangeIterator, FileRangeGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return FileRangeIterator{n, i}, FileRangeGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return FileRangeIterator{}, FileRangeGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *FileRangeSet) FindSegment(key uint64) FileRangeIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *FileRangeSet) LowerBoundSegment(min uint64) FileRangeIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *FileRangeSet) UpperBoundSegment(max uint64) FileRangeIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *FileRangeSet) FindGap(key uint64) FileRangeGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *FileRangeSet) LowerBoundGap(min uint64) FileRangeGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *FileRangeSet) UpperBoundGap(max uint64) FileRangeGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *FileRangeSet) Add(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *FileRangeSet) AddWithoutMerging(r __generics_imported0.MappableRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *FileRangeSet) Insert(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (FileRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := FileRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (FileRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (FileRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := FileRangetrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *FileRangeSet) InsertWithoutMerging(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *FileRangeSet) InsertWithoutMergingUnchecked(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := FileRangetrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return FileRangeIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *FileRangeSet) Remove(seg FileRangeIterator) FileRangeGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if FileRangetrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + FileRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if FileRangetrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(FileRangeGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *FileRangeSet) RemoveAll() { + s.root = FileRangenode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *FileRangeSet) RemoveRange(r __generics_imported0.MappableRange) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *FileRangeSet) Merge(first, second FileRangeIterator) FileRangeIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *FileRangeSet) MergeUnchecked(first, second FileRangeIterator) FileRangeIterator { + if first.End() == second.Start() { + if mval, ok := (FileRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return FileRangeIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *FileRangeSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *FileRangeSet) MergeRange(r __generics_imported0.MappableRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *FileRangeSet) MergeAdjacent(r __generics_imported0.MappableRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *FileRangeSet) Split(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *FileRangeSet) SplitUnchecked(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) { + val1, val2 := (FileRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *FileRangeSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *FileRangeSet) Isolate(seg FileRangeIterator, r __generics_imported0.MappableRange) FileRangeIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *FileRangeSet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg FileRangeIterator)) FileRangeGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return FileRangeGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return FileRangeGapIterator{} + } + } +} + +// +stateify savable +type FileRangenode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *FileRangenode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap FileRangedynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [FileRangemaxDegree - 1]__generics_imported0.MappableRange + values [FileRangemaxDegree - 1]uint64 + children [FileRangemaxDegree]*FileRangenode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) firstSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[0] + } + return FileRangeIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FileRangenode) lastSegment() FileRangeIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FileRangeIterator{n, n.nrSegments - 1} +} + +func (n *FileRangenode) prevSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *FileRangenode) nextSibling() *FileRangenode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *FileRangenode) rebalanceBeforeInsert(gap FileRangeGapIterator) FileRangeGapIterator { + if n.nrSegments < FileRangemaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:FileRangeminDegree-1], n.keys[:FileRangeminDegree-1]) + copy(left.values[:FileRangeminDegree-1], n.values[:FileRangeminDegree-1]) + copy(right.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(right.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + n.keys[0], n.values[0] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + FileRangezeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:FileRangeminDegree], n.children[:FileRangeminDegree]) + copy(right.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[2:]) + for i := 0; i < FileRangeminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if FileRangetrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return FileRangeGapIterator{left, gap.index} + } + return FileRangeGapIterator{right, gap.index - FileRangeminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &FileRangenode{ + nrSegments: FileRangeminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:]) + copy(sibling.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:]) + FileRangezeroValueSlice(n.values[FileRangeminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:FileRangeminDegree], n.children[FileRangeminDegree:]) + FileRangezeroNodeSlice(n.children[FileRangeminDegree:]) + for i := 0; i < FileRangeminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = FileRangeminDegree - 1 + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < FileRangeminDegree { + return gap + } + return FileRangeGapIterator{sibling, gap.index - FileRangeminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *FileRangenode) rebalanceAfterRemove(gap FileRangeGapIterator) FileRangeGapIterator { + for { + if n.nrSegments >= FileRangeminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + FileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return FileRangeGapIterator{n, 0} + } + if gap.node == n { + return FileRangeGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + FileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FileRangetrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return FileRangeGapIterator{n, n.nrSegments} + } + return FileRangeGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return FileRangeGapIterator{p, gap.index} + } + if gap.node == right { + return FileRangeGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *FileRangenode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = FileRangeGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + FileRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if FileRangetrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *FileRangenode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *FileRangenode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *FileRangenode) calculateMaxGapLeaf() uint64 { + max := FileRangeGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (FileRangeGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *FileRangenode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FileRangenode) searchFirstLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if n.maxGap.Get() < minSize { + return FileRangeGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := FileRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FileRangenode) searchLastLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if n.maxGap.Get() < minSize { + return FileRangeGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := FileRangeGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *FileRangenode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg FileRangeIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg FileRangeIterator) Range() __generics_imported0.MappableRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg FileRangeIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg FileRangeIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetRange(r __generics_imported0.MappableRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg FileRangeIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg FileRangeIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg FileRangeIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg FileRangeIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg FileRangeIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg FileRangeIterator) PrevSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return FileRangeIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg FileRangeIterator) NextSegment() FileRangeIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return FileRangeIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return FileRangeIterator{} + } + return FileRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg FileRangeIterator) PrevGap() FileRangeGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return FileRangeGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg FileRangeIterator) NextGap() FileRangeGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return FileRangeGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) PrevNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.PrevSegment(), FileRangeGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg FileRangeIterator) NextNonEmpty() (FileRangeIterator, FileRangeGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return FileRangeIterator{}, gap + } + return gap.NextSegment(), FileRangeGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FileRangeGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *FileRangenode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap FileRangeGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap FileRangeGapIterator) Range() __generics_imported0.MappableRange { + return __generics_imported0.MappableRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap FileRangeGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return FileRangeSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap FileRangeGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return FileRangeSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap FileRangeGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap FileRangeGapIterator) PrevSegment() FileRangeIterator { + return FileRangesegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap FileRangeGapIterator) NextSegment() FileRangeIterator { + return FileRangesegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap FileRangeGapIterator) PrevGap() FileRangeGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap FileRangeGapIterator) NextGap() FileRangeGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return FileRangeGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FileRangeGapIterator) NextLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if FileRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap FileRangeGapIterator) nextLargeEnoughGapHelper(minSize uint64) FileRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FileRangeGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FileRangeGapIterator) PrevLargeEnoughGap(minSize uint64) FileRangeGapIterator { + if FileRangetrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap FileRangeGapIterator) prevLargeEnoughGapHelper(minSize uint64) FileRangeGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FileRangeGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func FileRangesegmentBeforePosition(n *FileRangenode, i int) FileRangeIterator { + for i == 0 { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func FileRangesegmentAfterPosition(n *FileRangenode, i int) FileRangeIterator { + for i == n.nrSegments { + if n.parent == nil { + return FileRangeIterator{} + } + n, i = n.parent, n.parentIndex + } + return FileRangeIterator{n, i} +} + +func FileRangezeroValueSlice(slice []uint64) { + + for i := range slice { + FileRangeSetFunctions{}.ClearValue(&slice[i]) + } +} + +func FileRangezeroNodeSlice(slice []*FileRangenode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *FileRangeSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *FileRangenode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *FileRangenode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if FileRangetrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type FileRangeSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *FileRangeSet) ExportSortedSlices() *FileRangeSegmentDataSlices { + var sds FileRangeSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *FileRangeSet) ImportSortedSlices(sds *FileRangeSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *FileRangeSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.MappableRange, uint64) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *FileRangeSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *FileRangeSet) saveRoot() *FileRangeSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *FileRangeSet) loadRoot(sds *FileRangeSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/frame_ref_set_impl.go b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go new file mode 100644 index 000000000..619246875 --- /dev/null +++ b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go @@ -0,0 +1,1647 @@ +package fsutil + +import ( + __generics_imported0 "gvisor.dev/gvisor/pkg/sentry/memmap" +) + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const FrameReftrackGaps = 0 + +var _ = uint8(FrameReftrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type FrameRefdynamicGap [FrameReftrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FrameRefdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *FrameRefdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + FrameRefminDegree = 3 + + FrameRefmaxDegree = 2 * FrameRefminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type FrameRefSet struct { + root FrameRefnode `state:".(*FrameRefSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *FrameRefSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *FrameRefSet) IsEmptyRange(r __generics_imported0.FileRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *FrameRefSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *FrameRefSet) SpanRange(r __generics_imported0.FileRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *FrameRefSet) FirstSegment() FrameRefIterator { + if s.root.nrSegments == 0 { + return FrameRefIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *FrameRefSet) LastSegment() FrameRefIterator { + if s.root.nrSegments == 0 { + return FrameRefIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *FrameRefSet) FirstGap() FrameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return FrameRefGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *FrameRefSet) LastGap() FrameRefGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FrameRefGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *FrameRefSet) Find(key uint64) (FrameRefIterator, FrameRefGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return FrameRefIterator{n, i}, FrameRefGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return FrameRefIterator{}, FrameRefGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *FrameRefSet) FindSegment(key uint64) FrameRefIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *FrameRefSet) LowerBoundSegment(min uint64) FrameRefIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *FrameRefSet) UpperBoundSegment(max uint64) FrameRefIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *FrameRefSet) FindGap(key uint64) FrameRefGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *FrameRefSet) LowerBoundGap(min uint64) FrameRefGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *FrameRefSet) UpperBoundGap(max uint64) FrameRefGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *FrameRefSet) Add(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *FrameRefSet) AddWithoutMerging(r __generics_imported0.FileRange, val uint64) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *FrameRefSet) Insert(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (FrameRefSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := FrameReftrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (FrameRefSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (FrameRefSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := FrameReftrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *FrameRefSet) InsertWithoutMerging(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *FrameRefSet) InsertWithoutMergingUnchecked(gap FrameRefGapIterator, r __generics_imported0.FileRange, val uint64) FrameRefIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := FrameReftrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return FrameRefIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *FrameRefSet) Remove(seg FrameRefIterator) FrameRefGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if FrameReftrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + FrameRefSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if FrameReftrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(FrameRefGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *FrameRefSet) RemoveAll() { + s.root = FrameRefnode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *FrameRefSet) RemoveRange(r __generics_imported0.FileRange) FrameRefGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *FrameRefSet) Merge(first, second FrameRefIterator) FrameRefIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *FrameRefSet) MergeUnchecked(first, second FrameRefIterator) FrameRefIterator { + if first.End() == second.Start() { + if mval, ok := (FrameRefSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return FrameRefIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *FrameRefSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *FrameRefSet) MergeRange(r __generics_imported0.FileRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *FrameRefSet) MergeAdjacent(r __generics_imported0.FileRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *FrameRefSet) Split(seg FrameRefIterator, split uint64) (FrameRefIterator, FrameRefIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *FrameRefSet) SplitUnchecked(seg FrameRefIterator, split uint64) (FrameRefIterator, FrameRefIterator) { + val1, val2 := (FrameRefSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *FrameRefSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *FrameRefSet) Isolate(seg FrameRefIterator, r __generics_imported0.FileRange) FrameRefIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *FrameRefSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg FrameRefIterator)) FrameRefGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return FrameRefGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return FrameRefGapIterator{} + } + } +} + +// +stateify savable +type FrameRefnode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *FrameRefnode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap FrameRefdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [FrameRefmaxDegree - 1]__generics_imported0.FileRange + values [FrameRefmaxDegree - 1]uint64 + children [FrameRefmaxDegree]*FrameRefnode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FrameRefnode) firstSegment() FrameRefIterator { + for n.hasChildren { + n = n.children[0] + } + return FrameRefIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *FrameRefnode) lastSegment() FrameRefIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return FrameRefIterator{n, n.nrSegments - 1} +} + +func (n *FrameRefnode) prevSibling() *FrameRefnode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *FrameRefnode) nextSibling() *FrameRefnode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *FrameRefnode) rebalanceBeforeInsert(gap FrameRefGapIterator) FrameRefGapIterator { + if n.nrSegments < FrameRefmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:FrameRefminDegree-1], n.keys[:FrameRefminDegree-1]) + copy(left.values[:FrameRefminDegree-1], n.values[:FrameRefminDegree-1]) + copy(right.keys[:FrameRefminDegree-1], n.keys[FrameRefminDegree:]) + copy(right.values[:FrameRefminDegree-1], n.values[FrameRefminDegree:]) + n.keys[0], n.values[0] = n.keys[FrameRefminDegree-1], n.values[FrameRefminDegree-1] + FrameRefzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:FrameRefminDegree], n.children[:FrameRefminDegree]) + copy(right.children[:FrameRefminDegree], n.children[FrameRefminDegree:]) + FrameRefzeroNodeSlice(n.children[2:]) + for i := 0; i < FrameRefminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if FrameReftrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < FrameRefminDegree { + return FrameRefGapIterator{left, gap.index} + } + return FrameRefGapIterator{right, gap.index - FrameRefminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FrameRefminDegree-1], n.values[FrameRefminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &FrameRefnode{ + nrSegments: FrameRefminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:FrameRefminDegree-1], n.keys[FrameRefminDegree:]) + copy(sibling.values[:FrameRefminDegree-1], n.values[FrameRefminDegree:]) + FrameRefzeroValueSlice(n.values[FrameRefminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:FrameRefminDegree], n.children[FrameRefminDegree:]) + FrameRefzeroNodeSlice(n.children[FrameRefminDegree:]) + for i := 0; i < FrameRefminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = FrameRefminDegree - 1 + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < FrameRefminDegree { + return gap + } + return FrameRefGapIterator{sibling, gap.index - FrameRefminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *FrameRefnode) rebalanceAfterRemove(gap FrameRefGapIterator) FrameRefGapIterator { + for { + if n.nrSegments >= FrameRefminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FrameRefminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + FrameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return FrameRefGapIterator{n, 0} + } + if gap.node == n { + return FrameRefGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FrameRefminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + FrameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if FrameReftrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return FrameRefGapIterator{n, n.nrSegments} + } + return FrameRefGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return FrameRefGapIterator{p, gap.index} + } + if gap.node == right { + return FrameRefGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *FrameRefnode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = FrameRefGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + FrameRefSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if FrameReftrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *FrameRefnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *FrameRefnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *FrameRefnode) calculateMaxGapLeaf() uint64 { + max := FrameRefGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (FrameRefGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *FrameRefnode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FrameRefnode) searchFirstLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if n.maxGap.Get() < minSize { + return FrameRefGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := FrameRefGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *FrameRefnode) searchLastLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if n.maxGap.Get() < minSize { + return FrameRefGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := FrameRefGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FrameRefIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *FrameRefnode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg FrameRefIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg FrameRefIterator) Range() __generics_imported0.FileRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg FrameRefIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg FrameRefIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg FrameRefIterator) SetRangeUnchecked(r __generics_imported0.FileRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetRange(r __generics_imported0.FileRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg FrameRefIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg FrameRefIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg FrameRefIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg FrameRefIterator) Value() uint64 { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg FrameRefIterator) ValuePtr() *uint64 { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg FrameRefIterator) SetValue(val uint64) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg FrameRefIterator) PrevSegment() FrameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return FrameRefIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return FrameRefIterator{} + } + return FrameRefsegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg FrameRefIterator) NextSegment() FrameRefIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return FrameRefIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return FrameRefIterator{} + } + return FrameRefsegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg FrameRefIterator) PrevGap() FrameRefGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return FrameRefGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg FrameRefIterator) NextGap() FrameRefGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return FrameRefGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg FrameRefIterator) PrevNonEmpty() (FrameRefIterator, FrameRefGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return FrameRefIterator{}, gap + } + return gap.PrevSegment(), FrameRefGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg FrameRefIterator) NextNonEmpty() (FrameRefIterator, FrameRefGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return FrameRefIterator{}, gap + } + return gap.NextSegment(), FrameRefGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type FrameRefGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *FrameRefnode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap FrameRefGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap FrameRefGapIterator) Range() __generics_imported0.FileRange { + return __generics_imported0.FileRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap FrameRefGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return FrameRefSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap FrameRefGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return FrameRefSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap FrameRefGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap FrameRefGapIterator) PrevSegment() FrameRefIterator { + return FrameRefsegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap FrameRefGapIterator) NextSegment() FrameRefIterator { + return FrameRefsegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap FrameRefGapIterator) PrevGap() FrameRefGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return FrameRefGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap FrameRefGapIterator) NextGap() FrameRefGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return FrameRefGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FrameRefGapIterator) NextLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if FrameReftrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap FrameRefGapIterator) nextLargeEnoughGapHelper(minSize uint64) FrameRefGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FrameRefGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap FrameRefGapIterator) PrevLargeEnoughGap(minSize uint64) FrameRefGapIterator { + if FrameReftrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap FrameRefGapIterator) prevLargeEnoughGapHelper(minSize uint64) FrameRefGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return FrameRefGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func FrameRefsegmentBeforePosition(n *FrameRefnode, i int) FrameRefIterator { + for i == 0 { + if n.parent == nil { + return FrameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return FrameRefIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func FrameRefsegmentAfterPosition(n *FrameRefnode, i int) FrameRefIterator { + for i == n.nrSegments { + if n.parent == nil { + return FrameRefIterator{} + } + n, i = n.parent, n.parentIndex + } + return FrameRefIterator{n, i} +} + +func FrameRefzeroValueSlice(slice []uint64) { + + for i := range slice { + FrameRefSetFunctions{}.ClearValue(&slice[i]) + } +} + +func FrameRefzeroNodeSlice(slice []*FrameRefnode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *FrameRefSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *FrameRefnode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *FrameRefnode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if FrameReftrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type FrameRefSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []uint64 +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *FrameRefSet) ExportSortedSlices() *FrameRefSegmentDataSlices { + var sds FrameRefSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *FrameRefSet) ImportSortedSlices(sds *FrameRefSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *FrameRefSet) segmentTestCheck(expectedSegments int, segFunc func(int, __generics_imported0.FileRange, uint64) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *FrameRefSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *FrameRefSet) saveRoot() *FrameRefSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *FrameRefSet) loadRoot(sds *FrameRefSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go new file mode 100644 index 000000000..cb6b2ca81 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_impl_state_autogen.go @@ -0,0 +1,310 @@ +// automatically generated by stateify. + +package fsutil + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *DirtySet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtySet" +} + +func (s *DirtySet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *DirtySet) beforeSave() {} + +func (s *DirtySet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *DirtySegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *DirtySet) afterLoad() {} + +func (s *DirtySet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*DirtySegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*DirtySegmentDataSlices)) }) +} + +func (n *Dirtynode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.Dirtynode" +} + +func (n *Dirtynode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *Dirtynode) beforeSave() {} + +func (n *Dirtynode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *Dirtynode) afterLoad() {} + +func (n *Dirtynode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (d *DirtySegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtySegmentDataSlices" +} + +func (d *DirtySegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (d *DirtySegmentDataSlices) beforeSave() {} + +func (d *DirtySegmentDataSlices) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Start) + stateSinkObject.Save(1, &d.End) + stateSinkObject.Save(2, &d.Values) +} + +func (d *DirtySegmentDataSlices) afterLoad() {} + +func (d *DirtySegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Start) + stateSourceObject.Load(1, &d.End) + stateSourceObject.Load(2, &d.Values) +} + +func (s *FileRangeSet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangeSet" +} + +func (s *FileRangeSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *FileRangeSet) beforeSave() {} + +func (s *FileRangeSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *FileRangeSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *FileRangeSet) afterLoad() {} + +func (s *FileRangeSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*FileRangeSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*FileRangeSegmentDataSlices)) }) +} + +func (n *FileRangenode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangenode" +} + +func (n *FileRangenode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *FileRangenode) beforeSave() {} + +func (n *FileRangenode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *FileRangenode) afterLoad() {} + +func (n *FileRangenode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (f *FileRangeSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileRangeSegmentDataSlices" +} + +func (f *FileRangeSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (f *FileRangeSegmentDataSlices) beforeSave() {} + +func (f *FileRangeSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Start) + stateSinkObject.Save(1, &f.End) + stateSinkObject.Save(2, &f.Values) +} + +func (f *FileRangeSegmentDataSlices) afterLoad() {} + +func (f *FileRangeSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Start) + stateSourceObject.Load(1, &f.End) + stateSourceObject.Load(2, &f.Values) +} + +func (s *FrameRefSet) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefSet" +} + +func (s *FrameRefSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *FrameRefSet) beforeSave() {} + +func (s *FrameRefSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *FrameRefSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *FrameRefSet) afterLoad() {} + +func (s *FrameRefSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*FrameRefSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*FrameRefSegmentDataSlices)) }) +} + +func (n *FrameRefnode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefnode" +} + +func (n *FrameRefnode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *FrameRefnode) beforeSave() {} + +func (n *FrameRefnode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *FrameRefnode) afterLoad() {} + +func (n *FrameRefnode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (f *FrameRefSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FrameRefSegmentDataSlices" +} + +func (f *FrameRefSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (f *FrameRefSegmentDataSlices) beforeSave() {} + +func (f *FrameRefSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Start) + stateSinkObject.Save(1, &f.End) + stateSinkObject.Save(2, &f.Values) +} + +func (f *FrameRefSegmentDataSlices) afterLoad() {} + +func (f *FrameRefSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Start) + stateSourceObject.Load(1, &f.End) + stateSourceObject.Load(2, &f.Values) +} + +func init() { + state.Register((*DirtySet)(nil)) + state.Register((*Dirtynode)(nil)) + state.Register((*DirtySegmentDataSlices)(nil)) + state.Register((*FileRangeSet)(nil)) + state.Register((*FileRangenode)(nil)) + state.Register((*FileRangeSegmentDataSlices)(nil)) + state.Register((*FrameRefSet)(nil)) + state.Register((*FrameRefnode)(nil)) + state.Register((*FrameRefSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/fsutil_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go new file mode 100644 index 000000000..439ab9a65 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go @@ -0,0 +1,383 @@ +// automatically generated by stateify. + +package fsutil + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *DirtyInfo) StateTypeName() string { + return "pkg/sentry/fs/fsutil.DirtyInfo" +} + +func (d *DirtyInfo) StateFields() []string { + return []string{ + "Keep", + } +} + +func (d *DirtyInfo) beforeSave() {} + +func (d *DirtyInfo) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.Keep) +} + +func (d *DirtyInfo) afterLoad() {} + +func (d *DirtyInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.Keep) +} + +func (sdfo *StaticDirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/fsutil.StaticDirFileOperations" +} + +func (sdfo *StaticDirFileOperations) StateFields() []string { + return []string{ + "dentryMap", + "dirCursor", + } +} + +func (sdfo *StaticDirFileOperations) beforeSave() {} + +func (sdfo *StaticDirFileOperations) StateSave(stateSinkObject state.Sink) { + sdfo.beforeSave() + stateSinkObject.Save(0, &sdfo.dentryMap) + stateSinkObject.Save(1, &sdfo.dirCursor) +} + +func (sdfo *StaticDirFileOperations) afterLoad() {} + +func (sdfo *StaticDirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sdfo.dentryMap) + stateSourceObject.Load(1, &sdfo.dirCursor) +} + +func (n *NoReadWriteFile) StateTypeName() string { + return "pkg/sentry/fs/fsutil.NoReadWriteFile" +} + +func (n *NoReadWriteFile) StateFields() []string { + return []string{} +} + +func (n *NoReadWriteFile) beforeSave() {} + +func (n *NoReadWriteFile) StateSave(stateSinkObject state.Sink) { + n.beforeSave() +} + +func (n *NoReadWriteFile) afterLoad() {} + +func (n *NoReadWriteFile) StateLoad(stateSourceObject state.Source) { +} + +func (scr *FileStaticContentReader) StateTypeName() string { + return "pkg/sentry/fs/fsutil.FileStaticContentReader" +} + +func (scr *FileStaticContentReader) StateFields() []string { + return []string{ + "content", + } +} + +func (scr *FileStaticContentReader) beforeSave() {} + +func (scr *FileStaticContentReader) StateSave(stateSinkObject state.Sink) { + scr.beforeSave() + stateSinkObject.Save(0, &scr.content) +} + +func (scr *FileStaticContentReader) afterLoad() {} + +func (scr *FileStaticContentReader) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &scr.content) +} + +func (f *HostFileMapper) StateTypeName() string { + return "pkg/sentry/fs/fsutil.HostFileMapper" +} + +func (f *HostFileMapper) StateFields() []string { + return []string{ + "refs", + } +} + +func (f *HostFileMapper) beforeSave() {} + +func (f *HostFileMapper) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.refs) +} + +func (f *HostFileMapper) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.refs) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (h *HostMappable) StateTypeName() string { + return "pkg/sentry/fs/fsutil.HostMappable" +} + +func (h *HostMappable) StateFields() []string { + return []string{ + "hostFileMapper", + "backingFile", + "mappings", + } +} + +func (h *HostMappable) beforeSave() {} + +func (h *HostMappable) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.hostFileMapper) + stateSinkObject.Save(1, &h.backingFile) + stateSinkObject.Save(2, &h.mappings) +} + +func (h *HostMappable) afterLoad() {} + +func (h *HostMappable) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.hostFileMapper) + stateSourceObject.Load(1, &h.backingFile) + stateSourceObject.Load(2, &h.mappings) +} + +func (s *SimpleFileInode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.SimpleFileInode" +} + +func (s *SimpleFileInode) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (s *SimpleFileInode) beforeSave() {} + +func (s *SimpleFileInode) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) +} + +func (s *SimpleFileInode) afterLoad() {} + +func (s *SimpleFileInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) +} + +func (n *NoReadWriteFileInode) StateTypeName() string { + return "pkg/sentry/fs/fsutil.NoReadWriteFileInode" +} + +func (n *NoReadWriteFileInode) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + } +} + +func (n *NoReadWriteFileInode) beforeSave() {} + +func (n *NoReadWriteFileInode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.InodeSimpleAttributes) +} + +func (n *NoReadWriteFileInode) afterLoad() {} + +func (n *NoReadWriteFileInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.InodeSimpleAttributes) +} + +func (i *InodeSimpleAttributes) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeSimpleAttributes" +} + +func (i *InodeSimpleAttributes) StateFields() []string { + return []string{ + "fsType", + "unstable", + } +} + +func (i *InodeSimpleAttributes) beforeSave() {} + +func (i *InodeSimpleAttributes) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fsType) + stateSinkObject.Save(1, &i.unstable) +} + +func (i *InodeSimpleAttributes) afterLoad() {} + +func (i *InodeSimpleAttributes) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.fsType) + stateSourceObject.Load(1, &i.unstable) +} + +func (i *InodeSimpleExtendedAttributes) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeSimpleExtendedAttributes" +} + +func (i *InodeSimpleExtendedAttributes) StateFields() []string { + return []string{ + "xattrs", + } +} + +func (i *InodeSimpleExtendedAttributes) beforeSave() {} + +func (i *InodeSimpleExtendedAttributes) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.xattrs) +} + +func (i *InodeSimpleExtendedAttributes) afterLoad() {} + +func (i *InodeSimpleExtendedAttributes) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.xattrs) +} + +func (s *staticFile) StateTypeName() string { + return "pkg/sentry/fs/fsutil.staticFile" +} + +func (s *staticFile) StateFields() []string { + return []string{ + "FileStaticContentReader", + } +} + +func (s *staticFile) beforeSave() {} + +func (s *staticFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.FileStaticContentReader) +} + +func (s *staticFile) afterLoad() {} + +func (s *staticFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.FileStaticContentReader) +} + +func (i *InodeStaticFileGetter) StateTypeName() string { + return "pkg/sentry/fs/fsutil.InodeStaticFileGetter" +} + +func (i *InodeStaticFileGetter) StateFields() []string { + return []string{ + "Contents", + } +} + +func (i *InodeStaticFileGetter) beforeSave() {} + +func (i *InodeStaticFileGetter) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.Contents) +} + +func (i *InodeStaticFileGetter) afterLoad() {} + +func (i *InodeStaticFileGetter) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.Contents) +} + +func (c *CachingInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/fsutil.CachingInodeOperations" +} + +func (c *CachingInodeOperations) StateFields() []string { + return []string{ + "backingFile", + "mfp", + "opts", + "attr", + "dirtyAttr", + "mappings", + "cache", + "dirty", + "hostFileMapper", + "refs", + } +} + +func (c *CachingInodeOperations) beforeSave() {} + +func (c *CachingInodeOperations) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.backingFile) + stateSinkObject.Save(1, &c.mfp) + stateSinkObject.Save(2, &c.opts) + stateSinkObject.Save(3, &c.attr) + stateSinkObject.Save(4, &c.dirtyAttr) + stateSinkObject.Save(5, &c.mappings) + stateSinkObject.Save(6, &c.cache) + stateSinkObject.Save(7, &c.dirty) + stateSinkObject.Save(8, &c.hostFileMapper) + stateSinkObject.Save(9, &c.refs) +} + +func (c *CachingInodeOperations) afterLoad() {} + +func (c *CachingInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.backingFile) + stateSourceObject.Load(1, &c.mfp) + stateSourceObject.Load(2, &c.opts) + stateSourceObject.Load(3, &c.attr) + stateSourceObject.Load(4, &c.dirtyAttr) + stateSourceObject.Load(5, &c.mappings) + stateSourceObject.Load(6, &c.cache) + stateSourceObject.Load(7, &c.dirty) + stateSourceObject.Load(8, &c.hostFileMapper) + stateSourceObject.Load(9, &c.refs) +} + +func (c *CachingInodeOperationsOptions) StateTypeName() string { + return "pkg/sentry/fs/fsutil.CachingInodeOperationsOptions" +} + +func (c *CachingInodeOperationsOptions) StateFields() []string { + return []string{ + "ForcePageCache", + "LimitHostFDTranslation", + } +} + +func (c *CachingInodeOperationsOptions) beforeSave() {} + +func (c *CachingInodeOperationsOptions) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ForcePageCache) + stateSinkObject.Save(1, &c.LimitHostFDTranslation) +} + +func (c *CachingInodeOperationsOptions) afterLoad() {} + +func (c *CachingInodeOperationsOptions) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ForcePageCache) + stateSourceObject.Load(1, &c.LimitHostFDTranslation) +} + +func init() { + state.Register((*DirtyInfo)(nil)) + state.Register((*StaticDirFileOperations)(nil)) + state.Register((*NoReadWriteFile)(nil)) + state.Register((*FileStaticContentReader)(nil)) + state.Register((*HostFileMapper)(nil)) + state.Register((*HostMappable)(nil)) + state.Register((*SimpleFileInode)(nil)) + state.Register((*NoReadWriteFileInode)(nil)) + state.Register((*InodeSimpleAttributes)(nil)) + state.Register((*InodeSimpleExtendedAttributes)(nil)) + state.Register((*staticFile)(nil)) + state.Register((*InodeStaticFileGetter)(nil)) + state.Register((*CachingInodeOperations)(nil)) + state.Register((*CachingInodeOperationsOptions)(nil)) +} diff --git a/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go new file mode 100644 index 000000000..00b0994f6 --- /dev/null +++ b/pkg/sentry/fs/fsutil/fsutil_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package fsutil diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go deleted file mode 100644 index 1547584c5..000000000 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fsutil - -import ( - "bytes" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -type noopBackingFile struct{} - -func (noopBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - return dsts.NumBytes(), nil -} - -func (noopBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - return srcs.NumBytes(), nil -} - -func (noopBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error { - return nil -} - -func (noopBackingFile) Sync(context.Context) error { - return nil -} - -func (noopBackingFile) FD() int { - return -1 -} - -func (noopBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return nil -} - -func TestSetPermissions(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{ - Perms: fs.FilePermsFromMode(0444), - }) - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - perms := fs.FilePermsFromMode(0777) - if !iops.SetPermissions(ctx, nil, perms) { - t.Fatalf("SetPermissions failed, want success") - } - - // Did permissions change? - if iops.attr.Perms != perms { - t.Fatalf("got perms +%v, want +%v", iops.attr.Perms, perms) - } - - // Did status change time change? - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("got status change time not dirty, want dirty") - } - if iops.attr.StatusChangeTime.Equal(uattr.StatusChangeTime) { - t.Fatalf("got status change time unchanged") - } -} - -func TestSetTimestamps(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - desc string - ts fs.TimeSpec - wantChanged fs.AttrMask - }{ - { - desc: "noop", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{}, - }, - { - desc: "access time only", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTimeOmit: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - }, - }, - { - desc: "modification time only", - ts: fs.TimeSpec{ - ATimeOmit: true, - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - ModificationTime: true, - }, - }, - { - desc: "access and modification time", - ts: fs.TimeSpec{ - ATime: ktime.NowFromContext(ctx), - MTime: ktime.NowFromContext(ctx), - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - { - desc: "system time access and modification time", - ts: fs.TimeSpec{ - ATimeSetSystemTime: true, - MTimeSetSystemTime: true, - }, - wantChanged: fs.AttrMask{ - AccessTime: true, - ModificationTime: true, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - ctx := contexttest.Context(t) - - epoch := ktime.ZeroTime - uattr := fs.UnstableAttr{ - AccessTime: epoch, - ModificationTime: epoch, - StatusChangeTime: epoch, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - if err := iops.SetTimestamps(ctx, nil, test.ts); err != nil { - t.Fatalf("SetTimestamps got error %v, want nil", err) - } - if test.wantChanged.AccessTime { - if !iops.attr.AccessTime.After(uattr.AccessTime) { - t.Fatalf("diritied access time did not advance, want %v > %v", iops.attr.AccessTime, uattr.AccessTime) - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty access time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - if test.wantChanged.ModificationTime { - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("diritied modification time did not advance") - } - if !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("dirty modification time requires dirty status change time") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not advance") - } - } - }) - } -} - -func TestTruncate(t *testing.T) { - ctx := contexttest.Context(t) - - uattr := fs.UnstableAttr{ - Size: 0, - } - iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - if err := iops.Truncate(ctx, nil, uattr.Size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - var size int64 = 4096 - if err := iops.Truncate(ctx, nil, size); err != nil { - t.Fatalf("Truncate got error %v, want nil", err) - } - if iops.attr.Size != size { - t.Fatalf("Truncate got %d, want %d", iops.attr.Size, size) - } - if !iops.dirtyAttr.ModificationTime || !iops.dirtyAttr.StatusChangeTime { - t.Fatalf("Truncate did not dirty modification and status change time") - } - if !iops.attr.ModificationTime.After(uattr.ModificationTime) { - t.Fatalf("dirtied modification time did not change") - } - if !iops.attr.StatusChangeTime.After(uattr.StatusChangeTime) { - t.Fatalf("dirtied status change time did not change") - } -} - -type sliceBackingFile struct { - data []byte -} - -func newSliceBackingFile(data []byte) *sliceBackingFile { - return &sliceBackingFile{data} -} - -func (f *sliceBackingFile) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { - r := safemem.BlockSeqReader{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return r.ReadToBlocks(dsts) -} - -func (f *sliceBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { - w := safemem.BlockSeqWriter{safemem.BlockSeqOf(safemem.BlockFromSafeSlice(f.data)).DropFirst64(offset)} - return w.WriteFromBlocks(srcs) -} - -func (*sliceBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error { - return nil -} - -func (*sliceBackingFile) Sync(context.Context) error { - return nil -} - -func (*sliceBackingFile) FD() int { - return -1 -} - -func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length int64) error { - return syserror.EOPNOTSUPP -} - -type noopMappingSpace struct{} - -// Invalidate implements memmap.MappingSpace.Invalidate. -func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) { -} - -func anonInode(ctx context.Context) *fs.Inode { - return fs.NewInode(ctx, &SimpleFileInode{ - InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{ - User: fs.PermMask{Read: true, Write: true}, - }, 0), - }, fs.NewPseudoMountSource(ctx), fs.StableAttr{ - Type: fs.Anonymous, - BlockSize: usermem.PageSize, - }) -} - -func pagesOf(bs ...byte) []byte { - buf := make([]byte, 0, len(bs)*usermem.PageSize) - for _, b := range bs { - buf = append(buf, bytes.Repeat([]byte{b}, usermem.PageSize)...) - } - return buf -} - -func TestRead(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 3-page file. - buf := pagesOf('a', 'b', 'c') - file := fs.NewFile(ctx, fs.NewDirent(ctx, anonInode(ctx), "anon"), fs.FileFlags{}, nil) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second page (as CachingInodeOperations - // expects to only cache mapped pages), then call Translate to force it to - // be cached. - var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 2 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - mr := memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, usermem.PageSize) - } - - // Try to read 4 pages. The first and third pages should be read directly - // from the "file", the second page should be read from the cache, and only - // 3 pages (the size of the file) should be readable. - rbuf := make([]byte, 4*usermem.PageSize) - dst := usermem.BytesIOSequence(rbuf) - n, err := iops.Read(ctx, file, dst, 0) - if n != 3*usermem.PageSize || (err != nil && err != io.EOF) { - t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*usermem.PageSize) - } - rbuf = rbuf[:3*usermem.PageSize] - - // Did we get the bytes we expect? - if !bytes.Equal(rbuf, buf) { - t.Errorf("Read back bytes %v, want %v", rbuf, buf) - } - - // Delete the memory mapping before iops.Release(). The cached page will - // either be evicted by ctx's pgalloc.MemoryFile, or dropped by - // iops.Release(). - iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) -} - -func TestWrite(t *testing.T) { - ctx := contexttest.Context(t) - - // Construct a 4-page file. - buf := pagesOf('a', 'b', 'c', 'd') - orig := append([]byte(nil), buf...) - inode := anonInode(ctx) - uattr := fs.UnstableAttr{ - Size: int64(len(buf)), - } - iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{}) - defer iops.Release() - - // Expect the cache to be initially empty. - if cached := iops.cache.Span(); cached != 0 { - t.Errorf("Span got %d, want 0", cached) - } - - // Create a memory mapping of the second and third pages (as - // CachingInodeOperations expects to only cache mapped pages), then call - // Translate to force them to be cached. - var ms noopMappingSpace - ar := usermem.AddrRange{usermem.PageSize, 3 * usermem.PageSize} - if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil { - t.Fatalf("AddMapping got %v, want nil", err) - } - defer iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true) - mr := memmap.MappableRange{usermem.PageSize, 3 * usermem.PageSize} - if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil { - t.Fatalf("Translate got %v, want nil", err) - } - if cached := iops.cache.Span(); cached != 2*usermem.PageSize { - t.Errorf("SpanRange got %d, want %d", cached, 2*usermem.PageSize) - } - - // Write to the first 2 pages. - wbuf := pagesOf('e', 'f') - src := usermem.BytesIOSequence(wbuf) - n, err := iops.Write(ctx, src, 0) - if n != 2*usermem.PageSize || err != nil { - t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*usermem.PageSize) - } - - // The first page should have been written directly, since it was not cached. - want := append([]byte(nil), orig...) - copy(want, pagesOf('e')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } - - // Sync back to the "backing file". - if err := iops.WriteOut(ctx, inode); err != nil { - t.Errorf("Sync got %v, want nil", err) - } - - // Now the second page should have been written as well. - copy(want[usermem.PageSize:], pagesOf('f')) - if !bytes.Equal(buf, want) { - t.Errorf("File contents are %v, want %v", buf, want) - } -} diff --git a/pkg/sentry/fs/g3doc/.gitignore b/pkg/sentry/fs/g3doc/.gitignore deleted file mode 100644 index 2d19fc766..000000000 --- a/pkg/sentry/fs/g3doc/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.html diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md deleted file mode 100644 index 05e043583..000000000 --- a/pkg/sentry/fs/g3doc/fuse.md +++ /dev/null @@ -1,360 +0,0 @@ -# Foreword - -This document describes an on-going project to support FUSE filesystems within -the sentry. This is intended to become the final documentation for this -subsystem, and is therefore written in the past tense. However FUSE support is -currently incomplete and the document will be updated as things progress. - -# FUSE: Filesystem in Userspace - -The sentry supports dispatching filesystem operations to a FUSE server, allowing -FUSE filesystem to be used with a sandbox. - -## Overview - -FUSE has two main components: - -1. A client kernel driver (canonically `fuse.ko` in Linux), which forwards - filesystem operations (usually initiated by syscalls) to the server. - -2. A server, which is a userspace daemon that implements the actual filesystem. - -The sentry implements the client component, which allows a server daemon running -within the sandbox to implement a filesystem within the sandbox. - -A FUSE filesystem is initialized with `mount(2)`, typically with the help of a -utility like `fusermount(1)`. Various mount options exist for establishing -ownership and access permissions on the filesystem, but the most important mount -option is a file descriptor used to establish communication between the client -and server. - -The FUSE device FD is obtained by opening `/dev/fuse`. During regular operation, -the client and server use the FUSE protocol described in `fuse(4)` to service -filesystem operations. See the "Protocol" section below for more information -about this protocol. The core of the sentry support for FUSE is the client-side -implementation of this protocol. - -## FUSE in the Sentry - -The sentry's FUSE client targets VFS2 and has the following components: - -- An implementation of `/dev/fuse`. - -- A VFS2 filesystem for mapping syscalls to FUSE ops. Since we're targeting - VFS2, one point of contention may be the lack of inodes in VFS2. We can - tentatively implement a kernfs-based filesystem to bridge the gap in APIs. - The kernfs base functionality can serve the role of the Linux inode cache - and, the filesystem can map VFS2 syscalls to kernfs inode operations; see - the `kernfs.Inode` interface. - -The FUSE protocol lends itself well to marshaling with `go_marshal`. The various -request and response packets can be defined in the ABI package and converted to -and from the wire format using `go_marshal`. - -### Design Goals - -- While filesystem performance is always important, the sentry's FUSE support - is primarily concerned with compatibility, with performance as a secondary - concern. - -- Avoiding deadlocks from a hung server daemon. - -- Consider the potential for denial of service from a malicious server daemon. - Protecting itself from userspace is already a design goal for the sentry, - but needs additional consideration for FUSE. Normally, an operating system - doesn't rely on userspace to make progress with filesystem operations. Since - this changes with FUSE, it opens up the possibility of creating a chain of - dependencies controlled by userspace, which could affect an entire sandbox. - For example: a FUSE op can block a syscall, which could be holding a - subsystem lock, which can then block another task goroutine. - -### Milestones - -Below are some broad goals to aim for while implementing FUSE in the sentry. -Many FUSE ops can be grouped into broad categories of functionality, and most -ops can be implemented in parallel. - -#### Minimal client that can mount a trivial FUSE filesystem. - -- Implement `/dev/fuse` - a character device used to establish an FD for - communication between the sentry and the server daemon. - -- Implement basic FUSE ops like `FUSE_INIT`. - -#### Read-only mount with basic file operations - -- Implement the majority of file, directory and file descriptor FUSE ops. For - this milestone, we can skip uncommon or complex operations like mmap, mknod, - file locking, poll, and extended attributes. We can stub these out along - with any ops that modify the filesystem. The exact list of required ops are - to be determined, but the goal is to mount a real filesystem as read-only, - and be able to read contents from the filesystem in the sentry. - -#### Full read-write support - -- Implement the remaining FUSE ops and decide if we can omit rarely used - operations like ioctl. - -### Design Details - -#### Lifecycle for a FUSE Request - -- User invokes a syscall -- Sentry prepares corresponding request - - If FUSE device is available - - Write the request in binary - - If FUSE device is full - - Kernel task blocked until available -- Sentry notifies the readers of fuse device that it's ready for read -- FUSE daemon reads the request and processes it -- Sentry waits until a reply is written to the FUSE device - - but returns directly for async requests -- FUSE daemon writes to the fuse device -- Sentry processes the reply - - For sync requests, unblock blocked kernel task - - For async requests, execute pre-specified callback if any -- Sentry returns the syscall to the user - -#### Channels and Queues for Requests in Different Stages - -`connection.initializedChan` - -- a channel that the requests issued before connection initialization blocks - on. - -`fd.queue` - -- a queue of requests that haven’t been read by the FUSE daemon yet. - -`fd.completions` - -- a map of the requests that have been prepared but not yet received a - response, including the ones on the `fd.queue`. - -`fd.waitQueue` - -- a queue of waiters that is waiting for the fuse device fd to be available, - such as the FUSE daemon. - -`fd.fullQueueCh` - -- a channel that the kernel task will be blocked on when the fd is not - available. - -#### Basic I/O Implementation - -Currently we have implemented basic functionalities of read and write for our -FUSE. We describe the design and ways to improve it here: - -##### Basic FUSE Read - -The vfs2 expects implementations of `vfs.FileDescriptionImpl.Read()` and -`vfs.FileDescriptionImpl.PRead()`. When a syscall is made, it will eventually -reach our implementation of those interface functions located at -`pkg/sentry/fsimpl/fuse/regular_file.go` for regular files. - -After validation checks of the input, sentry sends `FUSE_READ` requests to the -FUSE daemon. The FUSE daemon returns data after the `fuse_out_header` as the -responses. For the first version, we create a copy in kernel memory of those -data. They are represented as a byte slice in the marshalled struct. This -happens as a common process for all the FUSE responses at this moment at -`pkg/sentry/fsimpl/fuse/dev.go:writeLocked()`. We then directly copy from this -intermediate buffer to the input buffer provided by the read syscall. - -There is an extra requirement for FUSE: When mounting the FUSE fs, the mounter -or the FUSE daemon can specify a `max_read` or a `max_pages` parameter. They are -the upperbound of the bytes to read in each `FUSE_READ` request. We implemented -the code to handle the fragmented reads. - -To improve the performance: ideally we should have buffer cache to copy those -data from the responses of FUSE daemon into, as is also the design of several -other existing file system implementations for sentry, instead of a single-use -temporary buffer. Directly mapping the memory of one process to another could -also boost the performance, but to keep them isolated, we did not choose to do -so. - -##### Basic FUSE Write - -The vfs2 invokes implementations of `vfs.FileDescriptionImpl.Write()` and -`vfs.FileDescriptionImpl.PWrite()` on the regular file descriptor of FUSE when a -user makes write(2) and pwrite(2) syscall. - -For valid writes, sentry sends the bytes to write after a `FUSE_WRITE` header -(can be regarded as a request with 2 payloads) to the FUSE daemon. For the first -version, we allocate a buffer inside kernel memory to store the bytes from the -user, and copy directly from that buffer to the memory of FUSE daemon. This -happens at `pkg/sentry/fsimpl/fuse/dev.go:readLocked()` - -The parameters `max_write` and `max_pages` restrict the number of bytes in one -`FUSE_WRITE`. There are code handling fragmented writes in current -implementation. - -To have better performance: the extra copy created to store the bytes to write -can be replaced by the buffer cache as well. - -# Appendix - -## FUSE Protocol - -The FUSE protocol is a request-response protocol. All requests are initiated by -the client. The wire-format for the protocol is raw C structs serialized to -memory. - -All FUSE requests begin with the following request header: - -```c -struct fuse_in_header { - uint32_t len; // Length of the request, including this header. - uint32_t opcode; // Requested operation. - uint64_t unique; // A unique identifier for this request. - uint64_t nodeid; // ID of the filesystem object being operated on. - uint32_t uid; // UID of the requesting process. - uint32_t gid; // GID of the requesting process. - uint32_t pid; // PID of the requesting process. - uint32_t padding; -}; -``` - -The request is then followed by a payload specific to the `opcode`. - -All responses begin with this response header: - -```c -struct fuse_out_header { - uint32_t len; // Length of the response, including this header. - int32_t error; // Status of the request, 0 if success. - uint64_t unique; // The unique identifier from the corresponding request. -}; -``` - -The response payload also depends on the request `opcode`. If `error != 0`, the -response payload must be empty. - -### Operations - -The following is a list of all FUSE operations used in `fuse_in_header.opcode` -as of Linux v4.4, and a brief description of their purpose. These are defined in -`uapi/linux/fuse.h`. Many of these have a corresponding request and response -payload struct; `fuse(4)` has details for some of these. We also note how these -operations map to the sentry virtual filesystem. - -#### FUSE meta-operations - -These operations are specific to FUSE and don't have a corresponding action in a -generic filesystem. - -- `FUSE_INIT`: This operation initializes a new FUSE filesystem, and is the - first message sent by the client after mount. This is used for version and - feature negotiation. This is related to `mount(2)`. -- `FUSE_DESTROY`: Teardown a FUSE filesystem, related to `unmount(2)`. -- `FUSE_INTERRUPT`: Interrupts an in-flight operation, specified by the - `fuse_in_header.unique` value provided in the corresponding request header. - The client can send at most one of these per request, and will enter an - uninterruptible wait for a reply. The server is expected to reply promptly. -- `FUSE_FORGET`: A hint to the server that server should evict the indicate - node from any caches. This is wired up to `(struct - super_operations).evict_inode` in Linux, which is in turned hooked as the - inode cache shrinker which is typically triggered by system memory pressure. -- `FUSE_BATCH_FORGET`: Batch version of `FUSE_FORGET`. - -#### Filesystem Syscalls - -These FUSE ops map directly to an equivalent filesystem syscall, or family of -syscalls. The relevant syscalls have a similar name to the operation, unless -otherwise noted. - -Node creation: - -- `FUSE_MKNOD` -- `FUSE_MKDIR` -- `FUSE_CREATE`: This is equivalent to `open(2)` and `creat(2)`, which - atomically creates and opens a node. - -Node attributes and extended attributes: - -- `FUSE_GETATTR` -- `FUSE_SETATTR` -- `FUSE_SETXATTR` -- `FUSE_GETXATTR` -- `FUSE_LISTXATTR` -- `FUSE_REMOVEXATTR` - -Node link manipulation: - -- `FUSE_READLINK` -- `FUSE_LINK` -- `FUSE_SYMLINK` -- `FUSE_UNLINK` - -Directory operations: - -- `FUSE_RMDIR` -- `FUSE_RENAME` -- `FUSE_RENAME2` -- `FUSE_OPENDIR`: `open(2)` for directories. -- `FUSE_RELEASEDIR`: `close(2)` for directories. -- `FUSE_READDIR` -- `FUSE_READDIRPLUS` -- `FUSE_FSYNCDIR`: `fsync(2)` for directories. -- `FUSE_LOOKUP`: Establishes a unique identifier for a FS node. This is - reminiscent of `VirtualFilesystem.GetDentryAt` in that it resolves a path - component to a node. However the returned identifier is opaque to the - client. The server must remember this mapping, as this is how the client - will reference the node in the future. - -File operations: - -- `FUSE_OPEN`: `open(2)` for files. -- `FUSE_RELEASE`: `close(2)` for files. -- `FUSE_FSYNC` -- `FUSE_FALLOCATE` -- `FUSE_SETUPMAPPING`: Creates a memory map on a file for `mmap(2)`. -- `FUSE_REMOVEMAPPING`: Removes a memory map for `munmap(2)`. - -File locking: - -- `FUSE_GETLK` -- `FUSE_SETLK` -- `FUSE_SETLKW` -- `FUSE_COPY_FILE_RANGE` - -File descriptor operations: - -- `FUSE_IOCTL` -- `FUSE_POLL` -- `FUSE_LSEEK` - -Filesystem operations: - -- `FUSE_STATFS` - -#### Permissions - -- `FUSE_ACCESS` is used to check if a node is accessible, as part of many - syscall implementations. Maps to `vfs.FilesystemImpl.AccessAt` in the - sentry. - -#### I/O Operations - -These ops are used to read and write file pages. They're used to implement both -I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`. - -- `FUSE_READ` -- `FUSE_WRITE` - -#### Miscellaneous - -- `FUSE_FLUSH`: Used by the client to indicate when a file descriptor is - closed. Distinct from `FUSE_FSYNC`, which corresponds to an `fsync(2)` - syscall from the user. Maps to `vfs.FileDescriptorImpl.Release` in the - sentry. -- `FUSE_BMAP`: Old address space API for block defrag. Probably not needed. -- `FUSE_NOTIFY_REPLY`: [TODO: what does this do?] - -# References - -- [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html) -- [Linux kernel FUSE documentation](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) -- [The reference implementation of the Linux FUSE (Filesystem in Userspace) - interface](https://github.com/libfuse/libfuse) -- [The kernel interface of FUSE](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fuse.h) diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md deleted file mode 100644 index 85063d4e6..000000000 --- a/pkg/sentry/fs/g3doc/inotify.md +++ /dev/null @@ -1,122 +0,0 @@ -# Inotify - -Inotify implements the like-named filesystem event notification system for the -sentry, see `inotify(7)`. - -## Architecture - -For the most part, the sentry implementation of inotify mirrors the Linux -architecture. Inotify instances (i.e. the fd returned by inotify_init(2)) are -backed by a pseudo-filesystem. Events are generated from various places in the -sentry, including the [syscall layer][syscall_dir], the [vfs layer][dirent] and -the [process fd table][fd_table]. Watches are stored in inodes and generated -events are queued to the inotify instance owning the watches for delivery to the -user. - -## Objects - -Here is a brief description of the existing and new objects involved in the -sentry inotify mechanism, and how they interact: - -### [`fs.Inotify`][inotify] - -- An inotify instances, created by inotify_init(2)/inotify_init1(2). -- The inotify fd has a `fs.Dirent`, supports filesystem syscalls to read - events. -- Has multiple `fs.Watch`es, with at most one watch per target inode, per - inotify instance. -- Has an instance `id` which is globally unique. This is *not* the fd number - for this instance, since the fd can be duped. This `id` is not externally - visible. - -### [`fs.Watch`][watch] - -- An inotify watch, created/deleted by - inotify_add_watch(2)/inotify_rm_watch(2). -- Owned by an `fs.Inotify` instance, each watch keeps a pointer to the - `owner`. -- Associated with a single `fs.Inode`, which is the watch `target`. While the - watch is active, it indirectly pins `target` to memory. See the "Reference - Model" section for a detailed explanation. -- Filesystem operations on `target` generate `fs.Event`s. - -### [`fs.Event`][event] - -- A simple struct encapsulating all the fields for an inotify event. -- Generated by `fs.Watch`es and forwarded to the watches' `owner`s. -- Serialized to the user during read(2) syscalls on the associated - `fs.Inotify`'s fd. - -### [`fs.Dirent`][dirent] - -- Many inotify events are generated inside dirent methods. Events are - generated in the dirent methods rather than `fs.Inode` methods because some - events carry the name of the subject node, and node names are generally - unavailable in an `fs.Inode`. -- Dirents do not directly contain state for any watches. Instead, they forward - notifications to the underlying `fs.Inode`. - -### [`fs.Inode`][inode] - -- Interacts with inotify through `fs.Watch`es. -- Inodes contain a map of all active `fs.Watch`es on them. -- An `fs.Inotify` instance can have at most one `fs.Watch` per inode. - `fs.Watch`es on an inode are indexed by their `owner`'s `id`. -- All inotify logic is encapsulated in the [`Watches`][inode_watches] struct - in an inode. Logically, `Watches` is the set of inotify watches on the - inode. - -## Reference Model - -The sentry inotify implementation has a complex reference model. An inotify -watch observes a single inode. For efficient lookup, the state for a watch is -stored directly on the target inode. This state needs to be persistent for the -lifetime of watch. Unlike usual filesystem metadata, the watch state has no -"on-disk" representation, so they cannot be reconstructed by the filesystem if -the inode is flushed from memory. This effectively means we need to keep any -inodes with actives watches pinned to memory. - -We can't just hold an extra ref on the inode to pin it to memory because some -filesystems (such as gofer-based filesystems) don't have persistent inodes. In -such a filesystem, if we just pin the inode, nothing prevents the enclosing -dirent from being GCed. Once the dirent is GCed, the pinned inode is -unreachable -- these filesystems generate a new inode by re-reading the node -state on the next walk. Incidentally, hardlinks also don't work on these -filesystems for this reason. - -To prevent the above scenario, when a new watch is added on an inode, we *pin* -the dirent we used to reach the inode. Note that due to hardlinks, this dirent -may not be the only dirent pointing to the inode. Attempting to set an inotify -watch via multiple hardlinks to the same file results in the same watch being -returned for both links. However, for each new dirent we use to reach the same -inode, we add a new pin. We need a new pin for each new dirent used to reach the -inode because we have no guarantees about the deletion order of the different -links to the inode. - -## Lock Ordering - -There are 4 locks related to the inotify implementation: - -- `Inotify.mu`: the inotify instance lock. -- `Inotify.evMu`: the inotify event queue lock. -- `Watch.mu`: the watch lock, used to protect pins. -- `fs.Watches.mu`: the inode watch set mu, used to protect the collection of - watches on the inode. - -The correct lock ordering for inotify code is: - -`Inotify.mu` -> `fs.Watches.mu` -> `Watch.mu` -> `Inotify.evMu`. - -We need a distinct lock for the event queue because by the time a goroutine -attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used -`Inotify.mu` to also protect the event queue, this would violate the above lock -ordering. - -[dirent]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/dirent.go -[event]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_event.go -[fd_table]: https://github.com/google/gvisor/blob/master/pkg/sentry/kernel/fd_table.go -[inode]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode.go -[inode_watches]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://github.com/google/gvisor/blob/master/pkg/sentry/syscalls/linux/ -[watch]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD deleted file mode 100644 index fea135eea..000000000 --- a/pkg/sentry/fs/gofer/BUILD +++ /dev/null @@ -1,67 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "gofer", - srcs = [ - "attr.go", - "cache_policy.go", - "context_file.go", - "device.go", - "fifo.go", - "file.go", - "file_state.go", - "fs.go", - "handles.go", - "inode.go", - "inode_state.go", - "path.go", - "session.go", - "session_state.go", - "socket.go", - "util.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fd", - "//pkg/log", - "//pkg/metric", - "//pkg/p9", - "//pkg/refs", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fdpipe", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/host", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "gofer_test", - size = "small", - srcs = ["gofer_test.go"], - library = ":gofer", - deps = [ - "//pkg/context", - "//pkg/p9", - "//pkg/p9/p9test", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - ], -) diff --git a/pkg/sentry/fs/gofer/gofer_state_autogen.go b/pkg/sentry/fs/gofer/gofer_state_autogen.go new file mode 100644 index 000000000..d2eb67d58 --- /dev/null +++ b/pkg/sentry/fs/gofer/gofer_state_autogen.go @@ -0,0 +1,255 @@ +// automatically generated by stateify. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *fifo) StateTypeName() string { + return "pkg/sentry/fs/gofer.fifo" +} + +func (i *fifo) StateFields() []string { + return []string{ + "InodeOperations", + "fileIops", + } +} + +func (i *fifo) beforeSave() {} + +func (i *fifo) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeOperations) + stateSinkObject.Save(1, &i.fileIops) +} + +func (i *fifo) afterLoad() {} + +func (i *fifo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeOperations) + stateSourceObject.Load(1, &i.fileIops) +} + +func (f *fileOperations) StateTypeName() string { + return "pkg/sentry/fs/gofer.fileOperations" +} + +func (f *fileOperations) StateFields() []string { + return []string{ + "inodeOperations", + "dirCursor", + "flags", + } +} + +func (f *fileOperations) beforeSave() {} + +func (f *fileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.inodeOperations) + stateSinkObject.Save(1, &f.dirCursor) + stateSinkObject.Save(2, &f.flags) +} + +func (f *fileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.inodeOperations) + stateSourceObject.Load(1, &f.dirCursor) + stateSourceObject.LoadWait(2, &f.flags) + stateSourceObject.AfterLoad(f.afterLoad) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/gofer.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/fs/gofer.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "fileState", + "cachingInodeOps", + } +} + +func (i *inodeOperations) beforeSave() {} + +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fileState) + stateSinkObject.Save(1, &i.cachingInodeOps) +} + +func (i *inodeOperations) afterLoad() {} + +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.fileState) + stateSourceObject.Load(1, &i.cachingInodeOps) +} + +func (i *inodeFileState) StateTypeName() string { + return "pkg/sentry/fs/gofer.inodeFileState" +} + +func (i *inodeFileState) StateFields() []string { + return []string{ + "s", + "sattr", + "loading", + "savedUAttr", + "hostMappable", + } +} + +func (i *inodeFileState) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + var loadingValue struct{} = i.saveLoading() + stateSinkObject.SaveValue(2, loadingValue) + stateSinkObject.Save(0, &i.s) + stateSinkObject.Save(1, &i.sattr) + stateSinkObject.Save(3, &i.savedUAttr) + stateSinkObject.Save(4, &i.hostMappable) +} + +func (i *inodeFileState) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.s) + stateSourceObject.LoadWait(1, &i.sattr) + stateSourceObject.Load(3, &i.savedUAttr) + stateSourceObject.Load(4, &i.hostMappable) + stateSourceObject.LoadValue(2, new(struct{}), func(y interface{}) { i.loadLoading(y.(struct{})) }) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (l *overrideInfo) StateTypeName() string { + return "pkg/sentry/fs/gofer.overrideInfo" +} + +func (l *overrideInfo) StateFields() []string { + return []string{ + "dirent", + "endpoint", + "inode", + } +} + +func (l *overrideInfo) beforeSave() {} + +func (l *overrideInfo) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.dirent) + stateSinkObject.Save(1, &l.endpoint) + stateSinkObject.Save(2, &l.inode) +} + +func (l *overrideInfo) afterLoad() {} + +func (l *overrideInfo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.dirent) + stateSourceObject.Load(1, &l.endpoint) + stateSourceObject.Load(2, &l.inode) +} + +func (e *overrideMaps) StateTypeName() string { + return "pkg/sentry/fs/gofer.overrideMaps" +} + +func (e *overrideMaps) StateFields() []string { + return []string{ + "pathMap", + } +} + +func (e *overrideMaps) beforeSave() {} + +func (e *overrideMaps) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.pathMap) +} + +func (e *overrideMaps) afterLoad() {} + +func (e *overrideMaps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.pathMap) +} + +func (s *session) StateTypeName() string { + return "pkg/sentry/fs/gofer.session" +} + +func (s *session) StateFields() []string { + return []string{ + "AtomicRefCount", + "msize", + "version", + "cachePolicy", + "aname", + "superBlockFlags", + "limitHostFDTranslation", + "overlayfsStaleRead", + "connID", + "inodeMappings", + "mounter", + "overrides", + } +} + +func (s *session) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.AtomicRefCount) + stateSinkObject.Save(1, &s.msize) + stateSinkObject.Save(2, &s.version) + stateSinkObject.Save(3, &s.cachePolicy) + stateSinkObject.Save(4, &s.aname) + stateSinkObject.Save(5, &s.superBlockFlags) + stateSinkObject.Save(6, &s.limitHostFDTranslation) + stateSinkObject.Save(7, &s.overlayfsStaleRead) + stateSinkObject.Save(8, &s.connID) + stateSinkObject.Save(9, &s.inodeMappings) + stateSinkObject.Save(10, &s.mounter) + stateSinkObject.Save(11, &s.overrides) +} + +func (s *session) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.AtomicRefCount) + stateSourceObject.LoadWait(1, &s.msize) + stateSourceObject.LoadWait(2, &s.version) + stateSourceObject.LoadWait(3, &s.cachePolicy) + stateSourceObject.LoadWait(4, &s.aname) + stateSourceObject.LoadWait(5, &s.superBlockFlags) + stateSourceObject.Load(6, &s.limitHostFDTranslation) + stateSourceObject.Load(7, &s.overlayfsStaleRead) + stateSourceObject.LoadWait(8, &s.connID) + stateSourceObject.LoadWait(9, &s.inodeMappings) + stateSourceObject.LoadWait(10, &s.mounter) + stateSourceObject.LoadWait(11, &s.overrides) + stateSourceObject.AfterLoad(s.afterLoad) +} + +func init() { + state.Register((*fifo)(nil)) + state.Register((*fileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inodeOperations)(nil)) + state.Register((*inodeFileState)(nil)) + state.Register((*overrideInfo)(nil)) + state.Register((*overrideMaps)(nil)) + state.Register((*session)(nil)) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go deleted file mode 100644 index 326fed954..000000000 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gofer - -import ( - "fmt" - "syscall" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/p9/p9test" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// rootTest runs a test with a p9 mock and an fs.InodeOperations created from -// the attached root directory. The root file will be closed and client -// disconnected, but additional files must be closed manually. -func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context, *p9test.Harness, *p9test.Mock, *fs.Inode)) { - t.Run(name, func(t *testing.T) { - h, c := p9test.NewHarness(t) - defer h.Finish() - - // Create a new root. Note that we pass an empty, but non-nil - // map here. This allows tests to extend the root children - // dynamically. - root := h.NewDirectory(map[string]p9test.Generator{})(nil) - - // Return this as the root. - h.Attacher.EXPECT().Attach().Return(root, nil).Times(1) - - // ... and open via the client. - rootFile, err := c.Attach("/") - if err != nil { - t.Fatalf("unable to attach: %v", err) - } - defer rootFile.Close() - - // Wrap an a session. - s := &session{ - mounter: fs.RootOwner, - cachePolicy: cp, - client: c, - } - - // ... and an INode, with only the mode being explicitly valid for now. - ctx := contexttest.Context(t) - sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ - file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr) - m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) - rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) - - // Ensure that the cache is fully invalidated, so that any - // close actions actually take place before the full harness is - // torn down. - defer func() { - m.FlushDirentRefs() - - // Wait for all resources to be released, otherwise the - // operations may fail after we close the rootFile. - fs.AsyncBarrier() - }() - - // Execute the test. - fn(ctx, h, root, rootInode) - }) -} - -func TestLookup(t *testing.T) { - type lookupTest struct { - // Name of the test. - name string - - // Expected return value. - want error - } - - tests := []lookupTest{ - { - name: "mock Walk passes (function succeeds)", - want: nil, - }, - { - name: "mock Walk fails (function fails)", - want: syscall.ENOENT, - }, - } - - const file = "file" // The walked target file. - - for _, test := range tests { - rootTest(t, test.name, cacheNone, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Setup the appropriate result. - rootFile.WalkCallback = func() error { - return test.want - } - if test.want == nil { - // Set the contents of the root. We expect a - // normal file generator for ppp above. This is - // overriden by setting WalkErr in the mock. - rootFile.AddChild(file, h.NewFile()) - } - - // Call function. - dirent, err := rootInode.Lookup(ctx, file) - - // Unwrap the InodeOperations. - var newInodeOperations fs.InodeOperations - if dirent != nil { - if dirent.IsNegative() { - err = syscall.ENOENT - } else { - newInodeOperations = dirent.Inode.InodeOperations - } - } - - // Check return values. - if err != test.want { - t.Errorf("Lookup got err %v, want %v", err, test.want) - } - if err == nil && newInodeOperations == nil { - t.Errorf("Lookup got non-nil err and non-nil node, wanted at least one non-nil") - } - }) - } -} - -func TestRevalidation(t *testing.T) { - type revalidationTest struct { - cachePolicy cachePolicy - - // Whether dirent should be reloaded before any modifications. - preModificationWantReload bool - - // Whether dirent should be reloaded after updating an unstable - // attribute on the remote fs. - postModificationWantReload bool - - // Whether dirent unstable attributes should be updated after - // updating an attribute on the remote fs. - postModificationWantUpdatedAttrs bool - - // Whether dirent should be reloaded after the remote has - // removed the file. - postRemovalWantReload bool - } - - tests := []revalidationTest{ - { - // Policy cacheNone causes Revalidate to always return - // true. - cachePolicy: cacheNone, - preModificationWantReload: true, - postModificationWantReload: true, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - { - // Policy cacheAll causes Revalidate to always return - // false. - cachePolicy: cacheAll, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheAllWritethrough causes Revalidate to - // always return false. - cachePolicy: cacheAllWritethrough, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: false, - postRemovalWantReload: false, - }, - { - // Policy cacheRemoteRevalidating causes Revalidate to - // return update cached unstable attrs, and returns - // true only when the remote inode itself has been - // removed or replaced. - cachePolicy: cacheRemoteRevalidating, - preModificationWantReload: false, - postModificationWantReload: false, - postModificationWantUpdatedAttrs: true, - postRemovalWantReload: true, - }, - } - - const file = "file" // The file walked below. - - for _, test := range tests { - name := fmt.Sprintf("cachepolicy=%s", test.cachePolicy) - rootTest(t, name, test.cachePolicy, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) { - // Wrap in a dirent object. - rootDir := fs.NewDirent(ctx, rootInode, "root") - - // Create a mock file a child of the root. We save when - // this is generated, so that when the time changed, we - // can update the original entry. - var origMocks []*p9test.Mock - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Regular a regular file that has a consistent - // path number. This might be used by - // validation so we don't change it. - m := h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - }) - origMocks = append(origMocks, m) - return m - }) - - // Do the walk. - dirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - - // We must release the dirent, of the test will fail - // with a reference leak. This is tracked by p9test. - defer dirent.DecRef(ctx) - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err := rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.preModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent %+v, wanted a new dirent", test.cachePolicy, dirent) - } - if !test.preModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent %+v, wanted old dirent %+v", test.cachePolicy, newDirent, dirent) - } - newDirent.DecRef(ctx) // See above. - - // Modify the underlying mocked file's modification - // time for the next walk that occurs. - nowSeconds := time.Now().Unix() - rootFile.AddChild(file, func(parent *p9test.Mock) *p9test.Mock { - // Ensure that the path is the same as above, - // but we change only the modification time of - // the file. - return h.NewMock(parent, 0, p9.Attr{ - Mode: p9.ModeRegular, - MTimeSeconds: uint64(nowSeconds), - }) - }) - - // We also modify the original time, so that GetAttr - // behaves as expected for the caching case. - for _, m := range origMocks { - m.Attr.MTimeSeconds = uint64(nowSeconds) - } - - // Walk again. Depending on the cache policy, we may - // get a new dirent. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if err != nil { - t.Fatalf("Lookup failed: %v", err) - } - if test.postModificationWantReload && dirent == newDirent { - t.Errorf("Lookup with cachePolicy=%s got old dirent, wanted a new dirent", test.cachePolicy) - } - if !test.postModificationWantReload && dirent != newDirent { - t.Errorf("Lookup with cachePolicy=%s got new dirent, wanted old dirent", test.cachePolicy) - } - uattrs, err := newDirent.Inode.UnstableAttr(ctx) - if err != nil { - t.Fatalf("Error getting unstable attrs: %v", err) - } - gotModTimeSeconds := uattrs.ModificationTime.Seconds() - if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds { - t.Fatalf("Lookup with cachePolicy=%s got new modification time %v, wanted %v", test.cachePolicy, gotModTimeSeconds, nowSeconds) - } - newDirent.DecRef(ctx) // See above. - - // Remove the file from the remote fs, subsequent walks - // should now fail to find anything. - rootFile.RemoveChild(file) - - // Walk again. Depending on the cache policy, we may - // get ENOENT. - newDirent, err = rootDir.Walk(ctx, rootDir, file) - if test.postRemovalWantReload && err == nil { - t.Errorf("Lookup with cachePolicy=%s got nil error, wanted ENOENT", test.cachePolicy) - } - if !test.postRemovalWantReload && (err != nil || dirent != newDirent) { - t.Errorf("Lookup with cachePolicy=%s got new dirent and error %v, wanted old dirent and nil error", test.cachePolicy, err) - } - if err == nil { - newDirent.DecRef(ctx) // See above. - } - }) - } -} diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD deleted file mode 100644 index 1368014c4..000000000 --- a/pkg/sentry/fs/host/BUILD +++ /dev/null @@ -1,84 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "host", - srcs = [ - "control.go", - "descriptor.go", - "descriptor_state.go", - "device.go", - "file.go", - "host.go", - "inode.go", - "inode_state.go", - "ioctl_unsafe.go", - "socket.go", - "socket_iovec.go", - "socket_state.go", - "socket_unsafe.go", - "tty.go", - "util.go", - "util_amd64_unsafe.go", - "util_arm64_unsafe.go", - "util_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/iovec", - "//pkg/log", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/safemem", - "//pkg/secio", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/control", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sentry/uniqueid", - "//pkg/sync", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/unet", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "host_test", - size = "small", - srcs = [ - "descriptor_test.go", - "inode_test.go", - "socket_test.go", - "wait_test.go", - ], - library = ":host", - deps = [ - "//pkg/fd", - "//pkg/fdnotifier", - "//pkg/sentry/contexttest", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix/transport", - "//pkg/syserr", - "//pkg/tcpip", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go deleted file mode 100644 index d8e4605b6..000000000 --- a/pkg/sentry/fs/host/descriptor_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "io/ioutil" - "path/filepath" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestDescriptorRelease(t *testing.T) { - for _, tc := range []struct { - name string - saveable bool - wouldBlock bool - }{ - {name: "all false"}, - {name: "saveable", saveable: true}, - {name: "wouldBlock", wouldBlock: true}, - } { - t.Run(tc.name, func(t *testing.T) { - dir, err := ioutil.TempDir("", "descriptor_test") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } - - fd, err := syscall.Open(filepath.Join(dir, "file"), syscall.O_RDWR|syscall.O_CREAT, 0666) - if err != nil { - t.Fatal("failed to open temp file:", err) - } - - // FD ownership is transferred to the descritor. - queue := &waiter.Queue{} - d, err := newDescriptor(fd, tc.saveable, tc.wouldBlock, queue) - if err != nil { - syscall.Close(fd) - t.Fatalf("newDescriptor(%d, %t, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) - } - if tc.saveable { - if d.origFD < 0 { - t.Errorf("saveable descriptor must preserve origFD, desc: %+v", d) - } - } - if tc.wouldBlock { - if !fdnotifier.HasFD(int32(d.value)) { - t.Errorf("FD not registered with notifier, desc: %+v", d) - } - } - - oldVal := d.value - d.Release() - if d.value != -1 { - t.Errorf("d.value want: -1, got: %d", d.value) - } - if tc.wouldBlock { - if fdnotifier.HasFD(int32(oldVal)) { - t.Errorf("FD not unregistered with notifier, desc: %+v", d) - } - } - }) - } -} diff --git a/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go new file mode 100644 index 000000000..488cbdfcf --- /dev/null +++ b/pkg/sentry/fs/host/host_amd64_unsafe_state_autogen.go @@ -0,0 +1,5 @@ +// automatically generated by stateify. + +// +build amd64 + +package host diff --git a/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go new file mode 100644 index 000000000..7371b44db --- /dev/null +++ b/pkg/sentry/fs/host/host_arm64_unsafe_state_autogen.go @@ -0,0 +1,5 @@ +// automatically generated by stateify. + +// +build arm64 + +package host diff --git a/pkg/sentry/fs/host/host_state_autogen.go b/pkg/sentry/fs/host/host_state_autogen.go new file mode 100644 index 000000000..baae8fffa --- /dev/null +++ b/pkg/sentry/fs/host/host_state_autogen.go @@ -0,0 +1,206 @@ +// automatically generated by stateify. + +package host + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *descriptor) StateTypeName() string { + return "pkg/sentry/fs/host.descriptor" +} + +func (d *descriptor) StateFields() []string { + return []string{ + "origFD", + "wouldBlock", + } +} + +func (d *descriptor) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.origFD) + stateSinkObject.Save(1, &d.wouldBlock) +} + +func (d *descriptor) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.origFD) + stateSourceObject.Load(1, &d.wouldBlock) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (f *fileOperations) StateTypeName() string { + return "pkg/sentry/fs/host.fileOperations" +} + +func (f *fileOperations) StateFields() []string { + return []string{ + "iops", + "dirCursor", + } +} + +func (f *fileOperations) beforeSave() {} + +func (f *fileOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.iops) + stateSinkObject.Save(1, &f.dirCursor) +} + +func (f *fileOperations) afterLoad() {} + +func (f *fileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &f.iops) + stateSourceObject.Load(1, &f.dirCursor) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/host.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *inodeOperations) StateTypeName() string { + return "pkg/sentry/fs/host.inodeOperations" +} + +func (i *inodeOperations) StateFields() []string { + return []string{ + "fileState", + "cachingInodeOps", + } +} + +func (i *inodeOperations) beforeSave() {} + +func (i *inodeOperations) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.fileState) + stateSinkObject.Save(1, &i.cachingInodeOps) +} + +func (i *inodeOperations) afterLoad() {} + +func (i *inodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.fileState) + stateSourceObject.Load(1, &i.cachingInodeOps) +} + +func (i *inodeFileState) StateTypeName() string { + return "pkg/sentry/fs/host.inodeFileState" +} + +func (i *inodeFileState) StateFields() []string { + return []string{ + "descriptor", + "sattr", + "savedUAttr", + } +} + +func (i *inodeFileState) beforeSave() {} + +func (i *inodeFileState) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + if !state.IsZeroValue(&i.queue) { + state.Failf("queue is %#v, expected zero", &i.queue) + } + stateSinkObject.Save(0, &i.descriptor) + stateSinkObject.Save(1, &i.sattr) + stateSinkObject.Save(2, &i.savedUAttr) +} + +func (i *inodeFileState) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadWait(0, &i.descriptor) + stateSourceObject.LoadWait(1, &i.sattr) + stateSourceObject.Load(2, &i.savedUAttr) + stateSourceObject.AfterLoad(i.afterLoad) +} + +func (c *ConnectedEndpoint) StateTypeName() string { + return "pkg/sentry/fs/host.ConnectedEndpoint" +} + +func (c *ConnectedEndpoint) StateFields() []string { + return []string{ + "ref", + "queue", + "path", + "srfd", + "stype", + } +} + +func (c *ConnectedEndpoint) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.ref) + stateSinkObject.Save(1, &c.queue) + stateSinkObject.Save(2, &c.path) + stateSinkObject.Save(3, &c.srfd) + stateSinkObject.Save(4, &c.stype) +} + +func (c *ConnectedEndpoint) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.ref) + stateSourceObject.Load(1, &c.queue) + stateSourceObject.Load(2, &c.path) + stateSourceObject.LoadWait(3, &c.srfd) + stateSourceObject.Load(4, &c.stype) + stateSourceObject.AfterLoad(c.afterLoad) +} + +func (t *TTYFileOperations) StateTypeName() string { + return "pkg/sentry/fs/host.TTYFileOperations" +} + +func (t *TTYFileOperations) StateFields() []string { + return []string{ + "fileOperations", + "session", + "fgProcessGroup", + "termios", + } +} + +func (t *TTYFileOperations) beforeSave() {} + +func (t *TTYFileOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.fileOperations) + stateSinkObject.Save(1, &t.session) + stateSinkObject.Save(2, &t.fgProcessGroup) + stateSinkObject.Save(3, &t.termios) +} + +func (t *TTYFileOperations) afterLoad() {} + +func (t *TTYFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.fileOperations) + stateSourceObject.Load(1, &t.session) + stateSourceObject.Load(2, &t.fgProcessGroup) + stateSourceObject.Load(3, &t.termios) +} + +func init() { + state.Register((*descriptor)(nil)) + state.Register((*fileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*inodeOperations)(nil)) + state.Register((*inodeFileState)(nil)) + state.Register((*ConnectedEndpoint)(nil)) + state.Register((*TTYFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/host/host_unsafe_state_autogen.go b/pkg/sentry/fs/host/host_unsafe_state_autogen.go new file mode 100644 index 000000000..b2d8c661f --- /dev/null +++ b/pkg/sentry/fs/host/host_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package host diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go deleted file mode 100644 index 41a23b5da..000000000 --- a/pkg/sentry/fs/host/inode_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -// TestCloseFD verifies fds will be closed. -func TestCloseFD(t *testing.T) { - var p [2]int - if err := syscall.Pipe(p[0:]); err != nil { - t.Fatalf("Failed to create pipe %v", err) - } - defer syscall.Close(p[0]) - defer syscall.Close(p[1]) - - // Use the write-end because we will detect if it's closed on the read end. - ctx := contexttest.Context(t) - file, err := NewFile(ctx, p[1]) - if err != nil { - t.Fatalf("Failed to create File: %v", err) - } - file.DecRef(ctx) - - s := make([]byte, 10) - if c, err := syscall.Read(p[0], s); c != 0 || err != nil { - t.Errorf("want 0, nil (EOF) from read end, got %v, %v", c, err) - } -} diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go deleted file mode 100644 index 9d58ea448..000000000 --- a/pkg/sentry/fs/host/socket_test.go +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "reflect" - "syscall" - "testing" - - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/pkg/waiter" -) - -var ( - // Make sure that ConnectedEndpoint implements transport.ConnectedEndpoint. - _ = transport.ConnectedEndpoint(new(ConnectedEndpoint)) - - // Make sure that ConnectedEndpoint implements transport.Receiver. - _ = transport.Receiver(new(ConnectedEndpoint)) -) - -func getFl(fd int) (uint32, error) { - fl, _, err := syscall.RawSyscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_GETFL, 0) - if err == 0 { - return uint32(fl), nil - } - return 0, err -} - -func TestSocketIsBlocking(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - - fl, err := getFl(pair[0]) - if err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Fatalf("Expected socket %v to be blocking", pair[1]) - } - ctx := contexttest.Context(t) - sock, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) failed => %v", pair[0], err) - } - defer sock.DecRef(ctx) - // Test that the socket now is non-blocking. - if fl, err = getFl(pair[0]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) - } - if fl&syscall.O_NONBLOCK != syscall.O_NONBLOCK { - t.Errorf("Expected socket %v to have become non-blocking", pair[0]) - } - if fl, err = getFl(pair[1]); err != nil { - t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[1], err) - } - if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { - t.Errorf("Did not expect socket %v to become non-blocking", pair[1]) - } -} - -func TestSocketWritev(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - socket, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef(ctx) - buf := []byte("hello world\n") - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(buf)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != int64(len(buf)) { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketWritevLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - socket, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer socket.DecRef(ctx) - n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(nil)) - if err != nil { - t.Fatalf("socket writev failed: %v", err) - } - - if n != 0 { - t.Fatalf("socket writev wrote incorrect bytes: %d", n) - } -} - -func TestSocketSendMsgLen0(t *testing.T) { - // Using socketpair here because it's already connected. - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("host socket creation failed: %v", err) - } - ctx := contexttest.Context(t) - sfile, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile.DecRef(ctx) - - s := sfile.FileOperations.(socket.Socket) - n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, false, ktime.Time{}, socket.ControlMessages{}) - if n != 0 { - t.Fatalf("socket sendmsg() failed: %v wrote: %d", terr, n) - } - - if terr != nil { - t.Fatalf("socket sendmsg() failed: %v", terr) - } -} - -func TestListen(t *testing.T) { - pair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) - } - ctx := contexttest.Context(t) - sfile1, err := newSocket(ctx, pair[0], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[0], err) - } - defer sfile1.DecRef(ctx) - socket1 := sfile1.FileOperations.(socket.Socket) - - sfile2, err := newSocket(ctx, pair[1], false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", pair[1], err) - } - defer sfile2.DecRef(ctx) - socket2 := sfile2.FileOperations.(socket.Socket) - - // Socketpairs can not be listened to. - if err := socket1.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket1.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - if err := socket2.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket2.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } - - // Create a Unix socket, do not bind it. - sock, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) - if err != nil { - t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) - } - sfile3, err := newSocket(ctx, sock, false) - if err != nil { - t.Fatalf("newSocket(%v) => %v", sock, err) - } - defer sfile3.DecRef(ctx) - socket3 := sfile3.FileOperations.(socket.Socket) - - // This socket is not bound so we can't listen on it. - if err := socket3.Listen(nil, 64); err != syserr.ErrInvalidEndpointState { - t.Fatalf("socket3.Listen(nil, 64) => %v, want syserr.ErrInvalidEndpointState", err) - } -} - -func TestPasscred(t *testing.T) { - e := &ConnectedEndpoint{} - if got, want := e.Passcred(), false; got != want { - t.Errorf("Got %#v.Passcred() = %t, want = %t", e, got, want) - } -} - -func TestGetLocalAddress(t *testing.T) { - e := &ConnectedEndpoint{path: "foo"} - want := tcpip.FullAddress{Addr: tcpip.Address("foo")} - if got, err := e.GetLocalAddress(); err != nil || got != want { - t.Errorf("Got %#v.GetLocalAddress() = %#v, %v, want = %#v, %v", e, got, err, want, nil) - } -} - -func TestQueuedSize(t *testing.T) { - e := &ConnectedEndpoint{} - tests := []struct { - name string - f func() int64 - }{ - {"SendQueuedSize", e.SendQueuedSize}, - {"RecvQueuedSize", e.RecvQueuedSize}, - } - - for _, test := range tests { - if got, want := test.f(), int64(-1); got != want { - t.Errorf("Got %#v.%s() = %d, want = %d", e, test.name, got, want) - } - } -} - -func TestRelease(t *testing.T) { - f, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, 0) - if err != nil { - t.Fatal("Creating socket:", err) - } - c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} - want := &ConnectedEndpoint{queue: c.queue} - ctx := contexttest.Context(t) - want.ref.DecRef(ctx) - fdnotifier.AddFD(int32(c.file.FD()), nil) - c.Release(ctx) - if !reflect.DeepEqual(c, want) { - t.Errorf("got = %#v, want = %#v", c, want) - } -} diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go deleted file mode 100644 index c143f4ce2..000000000 --- a/pkg/sentry/fs/host/wait_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "syscall" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/waiter" -) - -func TestWait(t *testing.T) { - var fds [2]int - err := syscall.Pipe(fds[:]) - if err != nil { - t.Fatalf("Unable to create pipe: %v", err) - } - - defer syscall.Close(fds[1]) - - ctx := contexttest.Context(t) - file, err := NewFile(ctx, fds[0]) - if err != nil { - syscall.Close(fds[0]) - t.Fatalf("NewFile failed: %v", err) - } - - defer file.DecRef(ctx) - - r := file.Readiness(waiter.EventIn) - if r != 0 { - t.Fatalf("File is ready for read when it shouldn't be.") - } - - e, ch := waiter.NewChannelEntry(nil) - file.EventRegister(&e, waiter.EventIn) - defer file.EventUnregister(&e) - - // Check that there are no notifications yet. - if len(ch) != 0 { - t.Fatalf("Channel is non-empty") - } - - // Write to the pipe, so it should be writable now. - syscall.Write(fds[1], []byte{1}) - - // Check that we get a notification. We need to yield the current thread - // so that the fdnotifier can deliver notifications, so we use a - // 1-second timeout instead of just checking the length of the channel. - select { - case <-ch: - case <-time.After(1 * time.Second): - t.Fatalf("Channel not notified") - } -} diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go deleted file mode 100644 index aa9851b26..000000000 --- a/pkg/sentry/fs/inode_overlay_test.go +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/syserror" -) - -func TestLookup(t *testing.T) { - ctx := contexttest.Context(t) - for _, test := range []struct { - // Test description. - desc string - - // Lookup parameters. - dir *fs.Inode - name string - - // Want from lookup. - found bool - hasUpper bool - hasLower bool - }{ - { - desc: "no upper, lower has name", - dir: fs.NewTestOverlayDir(ctx, - nil, /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "no lower, upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - nil, /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, only lower has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - { - desc: "upper and lower, only upper has name", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "b", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: false, - }, - { - desc: "upper and lower, both have directory", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: true, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: true, - hasLower: true, - }, - { - desc: "upper and lower, upper negative masks lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"a"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: false, - hasUpper: false, - hasLower: false, - }, - { - desc: "upper and lower, upper negative does not mask lower file", - dir: fs.NewTestOverlayDir(ctx, - newTestRamfsDir(ctx, nil, []string{"b"}), /* upper */ - newTestRamfsDir(ctx, []dirContent{ - { - name: "a", - dir: false, - }, - }, nil), /* lower */ - false /* revalidate */), - name: "a", - found: true, - hasUpper: false, - hasLower: true, - }, - } { - t.Run(test.desc, func(t *testing.T) { - dirent, err := test.dir.Lookup(ctx, test.name) - if test.found && (err == syserror.ENOENT || dirent.IsNegative()) { - t.Fatalf("lookup %q expected to find positive dirent, got dirent %v err %v", test.name, dirent, err) - } - if !test.found { - if err != syserror.ENOENT && !dirent.IsNegative() { - t.Errorf("lookup %q expected to return ENOENT or negative dirent, got dirent %v err %v", test.name, dirent, err) - } - // Nothing more to check. - return - } - if hasUpper := dirent.Inode.TestHasUpperFS(); hasUpper != test.hasUpper { - t.Fatalf("lookup got upper filesystem %v, want %v", hasUpper, test.hasUpper) - } - if hasLower := dirent.Inode.TestHasLowerFS(); hasLower != test.hasLower { - t.Errorf("lookup got lower filesystem %v, want %v", hasLower, test.hasLower) - } - }) - } -} - -func TestLookupRevalidation(t *testing.T) { - // File name used in the tests. - fileName := "foofile" - ctx := contexttest.Context(t) - for _, tc := range []struct { - // Test description. - desc string - - // Upper and lower fs for the overlay. - upper *fs.Inode - lower *fs.Inode - - // Whether the upper requires revalidation. - revalidate bool - - // Whether we should get the same dirent on second lookup. - wantSame bool - }{ - { - desc: "file from upper with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, nil, nil), - revalidate: true, - wantSame: false, - }, - { - desc: "file from lower with no revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from lower with revalidation", - upper: newTestRamfsDir(ctx, nil, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - // The file does not exist in the upper, so we do not - // need to revalidate it. - wantSame: true, - }, - { - desc: "file from upper and lower with no revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: false, - wantSame: true, - }, - { - desc: "file from upper and lower with revalidation", - upper: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - lower: newTestRamfsDir(ctx, []dirContent{{name: fileName}}, nil), - revalidate: true, - wantSame: false, - }, - } { - t.Run(tc.desc, func(t *testing.T) { - root := fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - overlay := fs.NewDirent(ctx, fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay") - // Lookup the file twice through the overlay. - first, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - second, err := overlay.Walk(ctx, root, fileName) - if err != nil { - t.Fatalf("overlay.Walk(%q) failed: %v", fileName, err) - } - - if tc.wantSame && first != second { - t.Errorf("dirent lookup got different dirents, wanted same\nfirst=%+v\nsecond=%+v", first, second) - } else if !tc.wantSame && first == second { - t.Errorf("dirent lookup got the same dirent, wanted different: %+v", first) - } - }) - } -} - -func TestCacheFlush(t *testing.T) { - ctx := contexttest.Context(t) - - // Upper and lower each have a file. - upperFileName := "file-from-upper" - lowerFileName := "file-from-lower" - upper := newTestRamfsDir(ctx, []dirContent{{name: upperFileName}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: lowerFileName}}, nil) - - overlay := fs.NewTestOverlayDir(ctx, upper, lower, true /* revalidate */) - - mns, err := fs.NewMountNamespace(ctx, overlay) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - root := mns.Root() - defer root.DecRef(ctx) - - ctx = &rootContext{ - Context: ctx, - root: root, - } - - for _, fileName := range []string{upperFileName, lowerFileName} { - // Walk to the file. - maxTraversals := uint(0) - dirent, err := mns.FindInode(ctx, root, nil, fileName, &maxTraversals) - if err != nil { - t.Fatalf("FindInode(%q) failed: %v", fileName, err) - } - - // Get a file from the dirent. - file, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("GetFile() failed: %v", err) - } - - // The dirent should have 3 refs, one from us, one from the - // file, and one from the dirent cache. - // dirent cache. - if got, want := dirent.ReadRefs(), 3; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop the file reference. - file.DecRef(ctx) - - // Dirent should have 2 refs left. - if got, want := dirent.ReadRefs(), 2; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Flush the dirent cache. - mns.FlushMountSourceRefs() - - // Dirent should have 1 ref left from the dirent cache. - if got, want := dirent.ReadRefs(), 1; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - - // Drop our ref. - dirent.DecRef(ctx) - - // We should be back to zero refs. - if got, want := dirent.ReadRefs(), 0; int(got) != want { - t.Errorf("dirent.ReadRefs() got %d want %d", got, want) - } - } - -} - -type dir struct { - fs.InodeOperations - - // List of negative child names. - negative []string - - // ReaddirCalled records whether Readdir was called on a file - // corresponding to this inode. - ReaddirCalled bool -} - -// GetXattr implements InodeOperations.GetXattr. -func (d *dir) GetXattr(_ context.Context, _ *fs.Inode, name string, _ uint64) (string, error) { - for _, n := range d.negative { - if name == fs.XattrOverlayWhiteout(n) { - return "y", nil - } - } - return "", syserror.ENOATTR -} - -// GetFile implements InodeOperations.GetFile. -func (d *dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - file, err := d.InodeOperations.GetFile(ctx, dirent, flags) - if err != nil { - return nil, err - } - defer file.DecRef(ctx) - // Wrap the file's FileOperations in a dirFile. - fops := &dirFile{ - FileOperations: file.FileOperations, - inode: d, - } - return fs.NewFile(ctx, dirent, flags, fops), nil -} - -type dirContent struct { - name string - dir bool -} - -type dirFile struct { - fs.FileOperations - inode *dir -} - -type inode struct { - fsutil.InodeGenericChecker `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` - fsutil.InodeNoopRelease `state:"nosave"` - fsutil.InodeNoopWriteOut `state:"nosave"` - fsutil.InodeNotAllocatable `state:"nosave"` - fsutil.InodeNotDirectory `state:"nosave"` - fsutil.InodeNotMappable `state:"nosave"` - fsutil.InodeNotSocket `state:"nosave"` - fsutil.InodeNotSymlink `state:"nosave"` - fsutil.InodeNotTruncatable `state:"nosave"` - fsutil.InodeNotVirtual `state:"nosave"` - - fsutil.InodeSimpleAttributes - fsutil.InodeStaticFileGetter -} - -// Readdir implements fs.FileOperations.Readdir. It sets the ReaddirCalled -// field on the inode. -func (f *dirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { - f.inode.ReaddirCalled = true - return f.FileOperations.Readdir(ctx, file, ser) -} - -func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - inode := fs.NewInode(ctx, &inode{ - InodeStaticFileGetter: fsutil.InodeStaticFileGetter{ - Contents: []byte("foobar"), - }, - }, msrc, fs.StableAttr{Type: fs.RegularFile}) - return inode -} - -func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []string) *fs.Inode { - msrc := fs.NewPseudoMountSource(ctx) - contents := make(map[string]*fs.Inode) - for _, c := range contains { - if c.dir { - contents[c.name] = newTestRamfsDir(ctx, nil, nil) - } else { - contents[c.name] = newTestRamfsInode(ctx, msrc) - } - } - dops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermissions{ - User: fs.PermMask{Read: true, Execute: true}, - }) - return fs.NewInode(ctx, &dir{ - InodeOperations: dops, - negative: negative, - }, msrc, fs.StableAttr{Type: fs.Directory}) -} diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD deleted file mode 100644 index ae3331737..000000000 --- a/pkg/sentry/fs/lock/BUILD +++ /dev/null @@ -1,58 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "lock_range", - out = "lock_range.go", - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - -go_template_instance( - name = "lock_set", - out = "lock_set.go", - consts = { - "minDegree": "3", - }, - package = "lock", - prefix = "Lock", - template = "//pkg/segment:generic_set", - types = { - "Key": "uint64", - "Range": "LockRange", - "Value": "Lock", - "Functions": "lockSetFunctions", - }, -) - -go_library( - name = "lock", - srcs = [ - "lock.go", - "lock_range.go", - "lock_set.go", - "lock_set_functions.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/log", - "//pkg/sync", - "//pkg/waiter", - ], -) - -go_test( - name = "lock_test", - size = "small", - srcs = [ - "lock_range_test.go", - "lock_test.go", - ], - library = ":lock", -) diff --git a/pkg/sentry/fs/lock/lock_range.go b/pkg/sentry/fs/lock/lock_range.go new file mode 100644 index 000000000..7a6f77640 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_range.go @@ -0,0 +1,62 @@ +package lock + +// A Range represents a contiguous range of T. +// +// +stateify savable +type LockRange struct { + // Start is the inclusive start of the range. + Start uint64 + + // End is the exclusive end of the range. + End uint64 +} + +// WellFormed returns true if r.Start <= r.End. All other methods on a Range +// require that the Range is well-formed. +func (r LockRange) WellFormed() bool { + return r.Start <= r.End +} + +// Length returns the length of the range. +func (r LockRange) Length() uint64 { + return r.End - r.Start +} + +// Contains returns true if r contains x. +func (r LockRange) Contains(x uint64) bool { + return r.Start <= x && x < r.End +} + +// Overlaps returns true if r and r2 overlap. +func (r LockRange) Overlaps(r2 LockRange) bool { + return r.Start < r2.End && r2.Start < r.End +} + +// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is +// contained within r. +func (r LockRange) IsSupersetOf(r2 LockRange) bool { + return r.Start <= r2.Start && r.End >= r2.End +} + +// Intersect returns a range consisting of the intersection between r and r2. +// If r and r2 do not overlap, Intersect returns a range with unspecified +// bounds, but for which Length() == 0. +func (r LockRange) Intersect(r2 LockRange) LockRange { + if r.Start < r2.Start { + r.Start = r2.Start + } + if r.End > r2.End { + r.End = r2.End + } + if r.End < r.Start { + r.End = r.Start + } + return r +} + +// CanSplitAt returns true if it is legal to split a segment spanning the range +// r at x; that is, splitting at x would produce two ranges, both of which have +// non-zero length. +func (r LockRange) CanSplitAt(x uint64) bool { + return r.Contains(x) && r.Start < x +} diff --git a/pkg/sentry/fs/lock/lock_range_test.go b/pkg/sentry/fs/lock/lock_range_test.go deleted file mode 100644 index 6221199d1..000000000 --- a/pkg/sentry/fs/lock/lock_range_test.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "syscall" - "testing" -) - -func TestComputeRange(t *testing.T) { - tests := []struct { - // Description of test. - name string - - // Requested start of the lock range. - start int64 - - // Requested length of the lock range, - // can be negative :( - length int64 - - // Pre-computed file offset based on whence. - // Will be added to start. - offset int64 - - // Expected error. - err error - - // If error is nil, the expected LockRange. - LockRange - }{ - { - name: "offset, start, and length all zero", - LockRange: LockRange{Start: 0, End: LockEOF}, - }, - { - name: "zero offset, zero start, positive length", - start: 0, - length: 4096, - offset: 0, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "zero offset, negative start", - start: -4096, - offset: 0, - err: syscall.EINVAL, - }, - { - name: "large offset, negative start, positive length", - start: -2048, - length: 2048, - offset: 4096, - LockRange: LockRange{Start: 2048, End: 4096}, - }, - { - name: "large offset, negative start, zero length", - start: -2048, - length: 0, - offset: 4096, - LockRange: LockRange{Start: 2048, End: LockEOF}, - }, - { - name: "zero offset, zero start, negative length", - start: 0, - length: -4096, - offset: 0, - err: syscall.EINVAL, - }, - { - name: "large offset, zero start, negative length", - start: 0, - length: -4096, - offset: 4096, - LockRange: LockRange{Start: 0, End: 4096}, - }, - { - name: "offset, start, and length equal, length is negative", - start: 1024, - length: -1024, - offset: 1024, - LockRange: LockRange{Start: 1024, End: 2048}, - }, - { - name: "offset, start, and length equal, start is negative", - start: -1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, offset is negative", - start: 1024, - length: 1024, - offset: -1024, - LockRange: LockRange{Start: 0, End: 1024}, - }, - { - name: "offset, start, and length equal, all negative", - start: -1024, - length: -1024, - offset: -1024, - err: syscall.EINVAL, - }, - { - name: "offset, start, and length equal, all positive", - start: 1024, - length: 1024, - offset: 1024, - LockRange: LockRange{Start: 2048, End: 3072}, - }, - } - - for _, test := range tests { - rng, err := ComputeRange(test.start, test.length, test.offset) - if err != test.err { - t.Errorf("%s: lockRange(%d, %d, %d) got error %v, want %v", test.name, test.start, test.length, test.offset, err, test.err) - continue - } - if err == nil && rng != test.LockRange { - t.Errorf("%s: lockRange(%d, %d, %d) got LockRange %v, want %v", test.name, test.start, test.length, test.offset, rng, test.LockRange) - } - } -} diff --git a/pkg/sentry/fs/lock/lock_set.go b/pkg/sentry/fs/lock/lock_set.go new file mode 100644 index 000000000..37c216b95 --- /dev/null +++ b/pkg/sentry/fs/lock/lock_set.go @@ -0,0 +1,1643 @@ +package lock + +import ( + "bytes" + "fmt" +) + +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const LocktrackGaps = 0 + +var _ = uint8(LocktrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type LockdynamicGap [LocktrackGaps]uint64 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *LockdynamicGap) Get() uint64 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *LockdynamicGap) Set(v uint64) { + d[:][0] = v +} + +const ( + // minDegree is the minimum degree of an internal node in a Set B-tree. + // + // - Any non-root node has at least minDegree-1 segments. + // + // - Any non-root internal (non-leaf) node has at least minDegree children. + // + // - The root node may have fewer than minDegree-1 segments, but it may + // only have 0 segments if the tree is empty. + // + // Our implementation requires minDegree >= 3. Higher values of minDegree + // usually improve performance, but increase memory usage for small sets. + LockminDegree = 3 + + LockmaxDegree = 2 * LockminDegree +) + +// A Set is a mapping of segments with non-overlapping Range keys. The zero +// value for a Set is an empty set. Set values are not safely movable nor +// copyable. Set is thread-compatible. +// +// +stateify savable +type LockSet struct { + root Locknode `state:".(*LockSegmentDataSlices)"` +} + +// IsEmpty returns true if the set contains no segments. +func (s *LockSet) IsEmpty() bool { + return s.root.nrSegments == 0 +} + +// IsEmptyRange returns true iff no segments in the set overlap the given +// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be +// more efficient. +func (s *LockSet) IsEmptyRange(r LockRange) bool { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return true + } + _, gap := s.Find(r.Start) + if !gap.Ok() { + return false + } + return r.End <= gap.End() +} + +// Span returns the total size of all segments in the set. +func (s *LockSet) Span() uint64 { + var sz uint64 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sz += seg.Range().Length() + } + return sz +} + +// SpanRange returns the total size of the intersection of segments in the set +// with the given range. +func (s *LockSet) SpanRange(r LockRange) uint64 { + switch { + case r.Length() < 0: + panic(fmt.Sprintf("invalid range %v", r)) + case r.Length() == 0: + return 0 + } + var sz uint64 + for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() { + sz += seg.Range().Intersect(r).Length() + } + return sz +} + +// FirstSegment returns the first segment in the set. If the set is empty, +// FirstSegment returns a terminal iterator. +func (s *LockSet) FirstSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.firstSegment() +} + +// LastSegment returns the last segment in the set. If the set is empty, +// LastSegment returns a terminal iterator. +func (s *LockSet) LastSegment() LockIterator { + if s.root.nrSegments == 0 { + return LockIterator{} + } + return s.root.lastSegment() +} + +// FirstGap returns the first gap in the set. +func (s *LockSet) FirstGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[0] + } + return LockGapIterator{n, 0} +} + +// LastGap returns the last gap in the set. +func (s *LockSet) LastGap() LockGapIterator { + n := &s.root + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockGapIterator{n, n.nrSegments} +} + +// Find returns the segment or gap whose range contains the given key. If a +// segment is found, the returned Iterator is non-terminal and the +// returned GapIterator is terminal. Otherwise, the returned Iterator is +// terminal and the returned GapIterator is non-terminal. +func (s *LockSet) Find(key uint64) (LockIterator, LockGapIterator) { + n := &s.root + for { + + lower := 0 + upper := n.nrSegments + for lower < upper { + i := lower + (upper-lower)/2 + if r := n.keys[i]; key < r.End { + if key >= r.Start { + return LockIterator{n, i}, LockGapIterator{} + } + upper = i + } else { + lower = i + 1 + } + } + i := lower + if !n.hasChildren { + return LockIterator{}, LockGapIterator{n, i} + } + n = n.children[i] + } +} + +// FindSegment returns the segment whose range contains the given key. If no +// such segment exists, FindSegment returns a terminal iterator. +func (s *LockSet) FindSegment(key uint64) LockIterator { + seg, _ := s.Find(key) + return seg +} + +// LowerBoundSegment returns the segment with the lowest range that contains a +// key greater than or equal to min. If no such segment exists, +// LowerBoundSegment returns a terminal iterator. +func (s *LockSet) LowerBoundSegment(min uint64) LockIterator { + seg, gap := s.Find(min) + if seg.Ok() { + return seg + } + return gap.NextSegment() +} + +// UpperBoundSegment returns the segment with the highest range that contains a +// key less than or equal to max. If no such segment exists, UpperBoundSegment +// returns a terminal iterator. +func (s *LockSet) UpperBoundSegment(max uint64) LockIterator { + seg, gap := s.Find(max) + if seg.Ok() { + return seg + } + return gap.PrevSegment() +} + +// FindGap returns the gap containing the given key. If no such gap exists +// (i.e. the set contains a segment containing that key), FindGap returns a +// terminal iterator. +func (s *LockSet) FindGap(key uint64) LockGapIterator { + _, gap := s.Find(key) + return gap +} + +// LowerBoundGap returns the gap with the lowest range that is greater than or +// equal to min. +func (s *LockSet) LowerBoundGap(min uint64) LockGapIterator { + seg, gap := s.Find(min) + if gap.Ok() { + return gap + } + return seg.NextGap() +} + +// UpperBoundGap returns the gap with the highest range that is less than or +// equal to max. +func (s *LockSet) UpperBoundGap(max uint64) LockGapIterator { + seg, gap := s.Find(max) + if gap.Ok() { + return gap + } + return seg.PrevGap() +} + +// Add inserts the given segment into the set and returns true. If the new +// segment can be merged with adjacent segments, Add will do so. If the new +// segment would overlap an existing segment, Add returns false. If Add +// succeeds, all existing iterators are invalidated. +func (s *LockSet) Add(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.Insert(gap, r, val) + return true +} + +// AddWithoutMerging inserts the given segment into the set and returns true. +// If it would overlap an existing segment, AddWithoutMerging does nothing and +// returns false. If AddWithoutMerging succeeds, all existing iterators are +// invalidated. +func (s *LockSet) AddWithoutMerging(r LockRange, val Lock) bool { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + gap := s.FindGap(r.Start) + if !gap.Ok() { + return false + } + if r.End > gap.End() { + return false + } + s.InsertWithoutMergingUnchecked(gap, r, val) + return true +} + +// Insert inserts the given segment into the given gap. If the new segment can +// be merged with adjacent segments, Insert will do so. Insert returns an +// iterator to the segment containing the inserted value (which may have been +// merged with other values). All existing iterators (including gap, but not +// including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, Insert panics. +// +// Insert is semantically equivalent to a InsertWithoutMerging followed by a +// Merge, but may be more efficient. Note that there is no unchecked variant of +// Insert since Insert must retrieve and inspect gap's predecessor and +// successor segments regardless. +func (s *LockSet) Insert(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + prev, next := gap.PrevSegment(), gap.NextSegment() + if prev.Ok() && prev.End() > r.Start { + panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range())) + } + if next.Ok() && next.Start() < r.End { + panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range())) + } + if prev.Ok() && prev.End() == r.Start { + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := LocktrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + prev.SetEndUnchecked(r.End) + prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + if next.Ok() && next.Start() == r.End { + val = mval + if mval, ok := (lockSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { + prev.SetEndUnchecked(next.End()) + prev.SetValue(mval) + return s.Remove(next).PrevSegment() + } + } + return prev + } + } + if next.Ok() && next.Start() == r.End { + if mval, ok := (lockSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := LocktrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() + next.SetStartUnchecked(r.Start) + next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } + return next + } + } + + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMerging inserts the given segment into the given gap and +// returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// If the gap cannot accommodate the segment, or if r is invalid, +// InsertWithoutMerging panics. +func (s *LockSet) InsertWithoutMerging(gap LockGapIterator, r LockRange, val Lock) LockIterator { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if gr := gap.Range(); !gr.IsSupersetOf(r) { + panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr)) + } + return s.InsertWithoutMergingUnchecked(gap, r, val) +} + +// InsertWithoutMergingUnchecked inserts the given segment into the given gap +// and returns an iterator to the inserted segment. All existing iterators +// (including gap, but not including the returned iterator) are invalidated. +// +// Preconditions: +// * r.Start >= gap.Start(). +// * r.End <= gap.End(). +func (s *LockSet) InsertWithoutMergingUnchecked(gap LockGapIterator, r LockRange, val Lock) LockIterator { + gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := LocktrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) + copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) + copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) + gap.node.keys[gap.index] = r + gap.node.values[gap.index] = val + gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } + return LockIterator{gap.node, gap.index} +} + +// Remove removes the given segment and returns an iterator to the vacated gap. +// All existing iterators (including seg, but not including the returned +// iterator) are invalidated. +func (s *LockSet) Remove(seg LockIterator) LockGapIterator { + + if seg.node.hasChildren { + + victim := seg.PrevSegment() + + seg.SetRangeUnchecked(victim.Range()) + seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if LocktrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } + return s.Remove(victim).NextGap() + } + copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) + copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) + lockSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) + seg.node.nrSegments-- + if LocktrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } + return seg.node.rebalanceAfterRemove(LockGapIterator{seg.node, seg.index}) +} + +// RemoveAll removes all segments from the set. All existing iterators are +// invalidated. +func (s *LockSet) RemoveAll() { + s.root = Locknode{} +} + +// RemoveRange removes all segments in the given range. An iterator to the +// newly formed gap is returned, and all existing iterators are invalidated. +func (s *LockSet) RemoveRange(r LockRange) LockGapIterator { + seg, gap := s.Find(r.Start) + if seg.Ok() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() { + seg = s.Isolate(seg, r) + gap = s.Remove(seg) + } + return gap +} + +// Merge attempts to merge two neighboring segments. If successful, Merge +// returns an iterator to the merged segment, and all existing iterators are +// invalidated. Otherwise, Merge returns a terminal iterator. +// +// If first is not the predecessor of second, Merge panics. +func (s *LockSet) Merge(first, second LockIterator) LockIterator { + if first.NextSegment() != second { + panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range())) + } + return s.MergeUnchecked(first, second) +} + +// MergeUnchecked attempts to merge two neighboring segments. If successful, +// MergeUnchecked returns an iterator to the merged segment, and all existing +// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal +// iterator. +// +// Precondition: first is the predecessor of second: first.NextSegment() == +// second, first == second.PrevSegment(). +func (s *LockSet) MergeUnchecked(first, second LockIterator) LockIterator { + if first.End() == second.Start() { + if mval, ok := (lockSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok { + + first.SetEndUnchecked(second.End()) + first.SetValue(mval) + + return s.Remove(second).PrevSegment() + } + } + return LockIterator{} +} + +// MergeAll attempts to merge all adjacent segments in the set. All existing +// iterators are invalidated. +func (s *LockSet) MergeAll() { + seg := s.FirstSegment() + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeRange attempts to merge all adjacent segments that contain a key in the +// specific range. All existing iterators are invalidated. +func (s *LockSet) MergeRange(r LockRange) { + seg := s.LowerBoundSegment(r.Start) + if !seg.Ok() { + return + } + next := seg.NextSegment() + for next.Ok() && next.Range().Start < r.End { + if mseg := s.MergeUnchecked(seg, next); mseg.Ok() { + seg, next = mseg, mseg.NextSegment() + } else { + seg, next = next, next.NextSegment() + } + } +} + +// MergeAdjacent attempts to merge the segment containing r.Start with its +// predecessor, and the segment containing r.End-1 with its successor. +func (s *LockSet) MergeAdjacent(r LockRange) { + first := s.FindSegment(r.Start) + if first.Ok() { + if prev := first.PrevSegment(); prev.Ok() { + s.Merge(prev, first) + } + } + last := s.FindSegment(r.End - 1) + if last.Ok() { + if next := last.NextSegment(); next.Ok() { + s.Merge(last, next) + } + } +} + +// Split splits the given segment at the given key and returns iterators to the +// two resulting segments. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +// +// If the segment cannot be split at split (because split is at the start or +// end of the segment's range, so splitting would produce a segment with zero +// length, or because split falls outside the segment's range altogether), +// Split panics. +func (s *LockSet) Split(seg LockIterator, split uint64) (LockIterator, LockIterator) { + if !seg.Range().CanSplitAt(split) { + panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split)) + } + return s.SplitUnchecked(seg, split) +} + +// SplitUnchecked splits the given segment at the given key and returns +// iterators to the two resulting segments. All existing iterators (including +// seg, but not including the returned iterators) are invalidated. +// +// Preconditions: seg.Start() < key < seg.End(). +func (s *LockSet) SplitUnchecked(seg LockIterator, split uint64) (LockIterator, LockIterator) { + val1, val2 := (lockSetFunctions{}).Split(seg.Range(), seg.Value(), split) + end2 := seg.End() + seg.SetEndUnchecked(split) + seg.SetValue(val1) + seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), LockRange{split, end2}, val2) + + return seg2.PrevSegment(), seg2 +} + +// SplitAt splits the segment straddling split, if one exists. SplitAt returns +// true if a segment was split and false otherwise. If SplitAt splits a +// segment, all existing iterators are invalidated. +func (s *LockSet) SplitAt(split uint64) bool { + if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) { + s.SplitUnchecked(seg, split) + return true + } + return false +} + +// Isolate ensures that the given segment's range does not escape r by +// splitting at r.Start and r.End if necessary, and returns an updated iterator +// to the bounded segment. All existing iterators (including seg, but not +// including the returned iterators) are invalidated. +func (s *LockSet) Isolate(seg LockIterator, r LockRange) LockIterator { + if seg.Range().CanSplitAt(r.Start) { + _, seg = s.SplitUnchecked(seg, r.Start) + } + if seg.Range().CanSplitAt(r.End) { + seg, _ = s.SplitUnchecked(seg, r.End) + } + return seg +} + +// ApplyContiguous applies a function to a contiguous range of segments, +// splitting if necessary. The function is applied until the first gap is +// encountered, at which point the gap is returned. If the function is applied +// across the entire range, a terminal gap is returned. All existing iterators +// are invalidated. +// +// N.B. The Iterator must not be invalidated by the function. +func (s *LockSet) ApplyContiguous(r LockRange, fn func(seg LockIterator)) LockGapIterator { + seg, gap := s.Find(r.Start) + if !seg.Ok() { + return gap + } + for { + seg = s.Isolate(seg, r) + fn(seg) + if seg.End() >= r.End { + return LockGapIterator{} + } + gap = seg.NextGap() + if !gap.IsEmpty() { + return gap + } + seg = gap.NextSegment() + if !seg.Ok() { + + return LockGapIterator{} + } + } +} + +// +stateify savable +type Locknode struct { + // An internal binary tree node looks like: + // + // K + // / \ + // Cl Cr + // + // where all keys in the subtree rooted by Cl (the left subtree) are less + // than K (the key of the parent node), and all keys in the subtree rooted + // by Cr (the right subtree) are greater than K. + // + // An internal B-tree node's indexes work out to look like: + // + // K0 K1 K2 ... Kn-1 + // / \/ \/ \ ... / \ + // C0 C1 C2 C3 ... Cn-1 Cn + // + // where n is nrSegments. + nrSegments int + + // parent is a pointer to this node's parent. If this node is root, parent + // is nil. + parent *Locknode + + // parentIndex is the index of this node in parent.children. + parentIndex int + + // Flag for internal nodes that is technically redundant with "children[0] + // != nil", but is stored in the first cache line. "hasChildren" rather + // than "isLeaf" because false must be the correct value for an empty root. + hasChildren bool + + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap LockdynamicGap + + // Nodes store keys and values in separate arrays to maximize locality in + // the common case (scanning keys for lookup). + keys [LockmaxDegree - 1]LockRange + values [LockmaxDegree - 1]Lock + children [LockmaxDegree]*Locknode +} + +// firstSegment returns the first segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) firstSegment() LockIterator { + for n.hasChildren { + n = n.children[0] + } + return LockIterator{n, 0} +} + +// lastSegment returns the last segment in the subtree rooted by n. +// +// Preconditions: n.nrSegments != 0. +func (n *Locknode) lastSegment() LockIterator { + for n.hasChildren { + n = n.children[n.nrSegments] + } + return LockIterator{n, n.nrSegments - 1} +} + +func (n *Locknode) prevSibling() *Locknode { + if n.parent == nil || n.parentIndex == 0 { + return nil + } + return n.parent.children[n.parentIndex-1] +} + +func (n *Locknode) nextSibling() *Locknode { + if n.parent == nil || n.parentIndex == n.parent.nrSegments { + return nil + } + return n.parent.children[n.parentIndex+1] +} + +// rebalanceBeforeInsert splits n and its ancestors if they are full, as +// required for insertion, and returns an updated iterator to the position +// represented by gap. +func (n *Locknode) rebalanceBeforeInsert(gap LockGapIterator) LockGapIterator { + if n.nrSegments < LockmaxDegree-1 { + return gap + } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } + if n.parent == nil { + + left := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 0, + hasChildren: n.hasChildren, + } + right := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n, + parentIndex: 1, + hasChildren: n.hasChildren, + } + copy(left.keys[:LockminDegree-1], n.keys[:LockminDegree-1]) + copy(left.values[:LockminDegree-1], n.values[:LockminDegree-1]) + copy(right.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(right.values[:LockminDegree-1], n.values[LockminDegree:]) + n.keys[0], n.values[0] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + LockzeroValueSlice(n.values[1:]) + if n.hasChildren { + copy(left.children[:LockminDegree], n.children[:LockminDegree]) + copy(right.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[2:]) + for i := 0; i < LockminDegree; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + right.children[i].parent = right + right.children[i].parentIndex = i + } + } + n.nrSegments = 1 + n.hasChildren = true + n.children[0] = left + n.children[1] = right + + if LocktrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return LockGapIterator{left, gap.index} + } + return LockGapIterator{right, gap.index - LockminDegree} + } + + copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments]) + copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments]) + n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[LockminDegree-1], n.values[LockminDegree-1] + copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1]) + for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ { + n.parent.children[i].parentIndex = i + } + sibling := &Locknode{ + nrSegments: LockminDegree - 1, + parent: n.parent, + parentIndex: n.parentIndex + 1, + hasChildren: n.hasChildren, + } + n.parent.children[n.parentIndex+1] = sibling + n.parent.nrSegments++ + copy(sibling.keys[:LockminDegree-1], n.keys[LockminDegree:]) + copy(sibling.values[:LockminDegree-1], n.values[LockminDegree:]) + LockzeroValueSlice(n.values[LockminDegree-1:]) + if n.hasChildren { + copy(sibling.children[:LockminDegree], n.children[LockminDegree:]) + LockzeroNodeSlice(n.children[LockminDegree:]) + for i := 0; i < LockminDegree; i++ { + sibling.children[i].parent = sibling + sibling.children[i].parentIndex = i + } + } + n.nrSegments = LockminDegree - 1 + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + + if gap.node != n { + return gap + } + if gap.index < LockminDegree { + return gap + } + return LockGapIterator{sibling, gap.index - LockminDegree} +} + +// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient +// (contain fewer segments than required by B-tree invariants), as required for +// removal, and returns an updated iterator to the position represented by gap. +// +// Precondition: n is the only node in the tree that may currently violate a +// B-tree invariant. +func (n *Locknode) rebalanceAfterRemove(gap LockGapIterator) LockGapIterator { + for { + if n.nrSegments >= LockminDegree-1 { + return gap + } + if n.parent == nil { + + return gap + } + + if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + copy(n.keys[1:], n.keys[:n.nrSegments]) + copy(n.values[1:], n.values[:n.nrSegments]) + n.keys[0] = n.parent.keys[n.parentIndex-1] + n.values[0] = n.parent.values[n.parentIndex-1] + n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1] + n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1] + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + copy(n.children[1:], n.children[:n.nrSegments+1]) + n.children[0] = sibling.children[sibling.nrSegments] + sibling.children[sibling.nrSegments] = nil + n.children[0].parent = n + n.children[0].parentIndex = 0 + for i := 1; i < n.nrSegments+2; i++ { + n.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling && gap.index == sibling.nrSegments { + return LockGapIterator{n, 0} + } + if gap.node == n { + return LockGapIterator{n, gap.index + 1} + } + return gap + } + if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= LockminDegree { + n.keys[n.nrSegments] = n.parent.keys[n.parentIndex] + n.values[n.nrSegments] = n.parent.values[n.parentIndex] + n.parent.keys[n.parentIndex] = sibling.keys[0] + n.parent.values[n.parentIndex] = sibling.values[0] + copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:]) + copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:]) + lockSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1]) + if n.hasChildren { + n.children[n.nrSegments+1] = sibling.children[0] + copy(sibling.children[:sibling.nrSegments], sibling.children[1:]) + sibling.children[sibling.nrSegments] = nil + n.children[n.nrSegments+1].parent = n + n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1 + for i := 0; i < sibling.nrSegments; i++ { + sibling.children[i].parentIndex = i + } + } + n.nrSegments++ + sibling.nrSegments-- + + if LocktrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node == sibling { + if gap.index == 0 { + return LockGapIterator{n, n.nrSegments} + } + return LockGapIterator{sibling, gap.index - 1} + } + return gap + } + + p := n.parent + if p.nrSegments == 1 { + + left, right := p.children[0], p.children[1] + p.nrSegments = left.nrSegments + right.nrSegments + 1 + p.hasChildren = left.hasChildren + p.keys[left.nrSegments] = p.keys[0] + p.values[left.nrSegments] = p.values[0] + copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments]) + copy(p.values[:left.nrSegments], left.values[:left.nrSegments]) + copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1]) + copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := 0; i < p.nrSegments+1; i++ { + p.children[i].parent = p + p.children[i].parentIndex = i + } + } else { + p.children[0] = nil + p.children[1] = nil + } + + if gap.node == left { + return LockGapIterator{p, gap.index} + } + if gap.node == right { + return LockGapIterator{p, gap.index + left.nrSegments + 1} + } + return gap + } + // Merge n and either sibling, along with the segment separating the + // two, into whichever of the two nodes comes first. This is the + // reverse of the non-root splitting case in + // node.rebalanceBeforeInsert. + var left, right *Locknode + if n.parentIndex > 0 { + left = n.prevSibling() + right = n + } else { + left = n + right = n.nextSibling() + } + + if gap.node == right { + gap = LockGapIterator{left, gap.index + left.nrSegments + 1} + } + left.keys[left.nrSegments] = p.keys[left.parentIndex] + left.values[left.nrSegments] = p.values[left.parentIndex] + copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments]) + copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments]) + if left.hasChildren { + copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1]) + for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ { + left.children[i].parent = left + left.children[i].parentIndex = i + } + } + left.nrSegments += right.nrSegments + 1 + copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments]) + copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments]) + lockSetFunctions{}.ClearValue(&p.values[p.nrSegments-1]) + copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1]) + for i := 0; i < p.nrSegments; i++ { + p.children[i].parentIndex = i + } + p.children[p.nrSegments] = nil + p.nrSegments-- + + if LocktrackGaps != 0 { + left.updateMaxGapLocal() + } + + n = p + } +} + +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *Locknode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *Locknode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *Locknode) calculateMaxGapLeaf() uint64 { + max := LockGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (LockGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *Locknode) calculateMaxGapInternal() uint64 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Locknode) searchFirstLargeEnoughGap(minSize uint64) LockGapIterator { + if n.maxGap.Get() < minSize { + return LockGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := LockGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *Locknode) searchLastLargeEnoughGap(minSize uint64) LockGapIterator { + if n.maxGap.Get() < minSize { + return LockGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := LockGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// A Iterator is conceptually one of: +// +// - A pointer to a segment in a set; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Iterators are copyable values and are meaningfully equality-comparable. The +// zero value of Iterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockIterator struct { + // node is the node containing the iterated segment. If the iterator is + // terminal, node is nil. + node *Locknode + + // index is the index of the segment in node.keys/values. + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (seg LockIterator) Ok() bool { + return seg.node != nil +} + +// Range returns the iterated segment's range key. +func (seg LockIterator) Range() LockRange { + return seg.node.keys[seg.index] +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (seg LockIterator) Start() uint64 { + return seg.node.keys[seg.index].Start +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (seg LockIterator) End() uint64 { + return seg.node.keys[seg.index].End +} + +// SetRangeUnchecked mutates the iterated segment's range key. This operation +// does not invalidate any iterators. +// +// Preconditions: +// * r.Length() > 0. +// * The new range must not overlap an existing one: +// * If seg.NextSegment().Ok(), then r.end <= seg.NextSegment().Start(). +// * If seg.PrevSegment().Ok(), then r.start >= seg.PrevSegment().End(). +func (seg LockIterator) SetRangeUnchecked(r LockRange) { + seg.node.keys[seg.index] = r +} + +// SetRange mutates the iterated segment's range key. If the new range would +// cause the iterated segment to overlap another segment, or if the new range +// is invalid, SetRange panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetRange(r LockRange) { + if r.Length() <= 0 { + panic(fmt.Sprintf("invalid segment range %v", r)) + } + if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range())) + } + if next := seg.NextSegment(); next.Ok() && r.End > next.Start() { + panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range())) + } + seg.SetRangeUnchecked(r) +} + +// SetStartUnchecked mutates the iterated segment's start. This operation does +// not invalidate any iterators. +// +// Preconditions: The new start must be valid: +// * start < seg.End() +// * If seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End(). +func (seg LockIterator) SetStartUnchecked(start uint64) { + seg.node.keys[seg.index].Start = start +} + +// SetStart mutates the iterated segment's start. If the new start value would +// cause the iterated segment to overlap another segment, or would result in an +// invalid range, SetStart panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetStart(start uint64) { + if start >= seg.End() { + panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range())) + } + if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() { + panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range())) + } + seg.SetStartUnchecked(start) +} + +// SetEndUnchecked mutates the iterated segment's end. This operation does not +// invalidate any iterators. +// +// Preconditions: The new end must be valid: +// * end > seg.Start(). +// * If seg.NextSegment().Ok(), then end <= seg.NextSegment().Start(). +func (seg LockIterator) SetEndUnchecked(end uint64) { + seg.node.keys[seg.index].End = end +} + +// SetEnd mutates the iterated segment's end. If the new end value would cause +// the iterated segment to overlap another segment, or would result in an +// invalid range, SetEnd panics. This operation does not invalidate any +// iterators. +func (seg LockIterator) SetEnd(end uint64) { + if end <= seg.Start() { + panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range())) + } + if next := seg.NextSegment(); next.Ok() && end > next.Start() { + panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range())) + } + seg.SetEndUnchecked(end) +} + +// Value returns a copy of the iterated segment's value. +func (seg LockIterator) Value() Lock { + return seg.node.values[seg.index] +} + +// ValuePtr returns a pointer to the iterated segment's value. The pointer is +// invalidated if the iterator is invalidated. This operation does not +// invalidate any iterators. +func (seg LockIterator) ValuePtr() *Lock { + return &seg.node.values[seg.index] +} + +// SetValue mutates the iterated segment's value. This operation does not +// invalidate any iterators. +func (seg LockIterator) SetValue(val Lock) { + seg.node.values[seg.index] = val +} + +// PrevSegment returns the iterated segment's predecessor. If there is no +// preceding segment, PrevSegment returns a terminal iterator. +func (seg LockIterator) PrevSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index].lastSegment() + } + if seg.index > 0 { + return LockIterator{seg.node, seg.index - 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentBeforePosition(seg.node.parent, seg.node.parentIndex) +} + +// NextSegment returns the iterated segment's successor. If there is no +// succeeding segment, NextSegment returns a terminal iterator. +func (seg LockIterator) NextSegment() LockIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment() + } + if seg.index < seg.node.nrSegments-1 { + return LockIterator{seg.node, seg.index + 1} + } + if seg.node.parent == nil { + return LockIterator{} + } + return LocksegmentAfterPosition(seg.node.parent, seg.node.parentIndex) +} + +// PrevGap returns the gap immediately before the iterated segment. +func (seg LockIterator) PrevGap() LockGapIterator { + if seg.node.hasChildren { + + return seg.node.children[seg.index].lastSegment().NextGap() + } + return LockGapIterator{seg.node, seg.index} +} + +// NextGap returns the gap immediately after the iterated segment. +func (seg LockIterator) NextGap() LockGapIterator { + if seg.node.hasChildren { + return seg.node.children[seg.index+1].firstSegment().PrevGap() + } + return LockGapIterator{seg.node, seg.index + 1} +} + +// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent, +// or the gap before the iterated segment otherwise. If seg.Start() == +// Functions.MinKey(), PrevNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be +// non-terminal. +func (seg LockIterator) PrevNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.PrevGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.PrevSegment(), LockGapIterator{} +} + +// NextNonEmpty returns the iterated segment's successor if it is adjacent, or +// the gap after the iterated segment otherwise. If seg.End() == +// Functions.MaxKey(), NextNonEmpty will return two terminal iterators. +// Otherwise, exactly one of the iterators returned by NextNonEmpty will be +// non-terminal. +func (seg LockIterator) NextNonEmpty() (LockIterator, LockGapIterator) { + gap := seg.NextGap() + if gap.Range().Length() != 0 { + return LockIterator{}, gap + } + return gap.NextSegment(), LockGapIterator{} +} + +// A GapIterator is conceptually one of: +// +// - A pointer to a position between two segments, before the first segment, or +// after the last segment in a set, called a *gap*; or +// +// - A terminal iterator, which is a sentinel indicating that the end of +// iteration has been reached. +// +// Note that the gap between two adjacent segments exists (iterators to it are +// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true +// for such gaps. An empty set contains a single gap, spanning the entire range +// of the set's keys. +// +// GapIterators are copyable values and are meaningfully equality-comparable. +// The zero value of GapIterator is a terminal iterator. +// +// Unless otherwise specified, any mutation of a set invalidates all existing +// iterators into the set. +type LockGapIterator struct { + // The representation of a GapIterator is identical to that of an Iterator, + // except that index corresponds to positions between segments in the same + // way as for node.children (see comment for node.nrSegments). + node *Locknode + index int +} + +// Ok returns true if the iterator is not terminal. All other methods are only +// valid for non-terminal iterators. +func (gap LockGapIterator) Ok() bool { + return gap.node != nil +} + +// Range returns the range spanned by the iterated gap. +func (gap LockGapIterator) Range() LockRange { + return LockRange{gap.Start(), gap.End()} +} + +// Start is equivalent to Range().Start, but should be preferred if only the +// start of the range is needed. +func (gap LockGapIterator) Start() uint64 { + if ps := gap.PrevSegment(); ps.Ok() { + return ps.End() + } + return lockSetFunctions{}.MinKey() +} + +// End is equivalent to Range().End, but should be preferred if only the end of +// the range is needed. +func (gap LockGapIterator) End() uint64 { + if ns := gap.NextSegment(); ns.Ok() { + return ns.Start() + } + return lockSetFunctions{}.MaxKey() +} + +// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is +// between two adjacent segments.) +func (gap LockGapIterator) IsEmpty() bool { + return gap.Range().Length() == 0 +} + +// PrevSegment returns the segment immediately before the iterated gap. If no +// such segment exists, PrevSegment returns a terminal iterator. +func (gap LockGapIterator) PrevSegment() LockIterator { + return LocksegmentBeforePosition(gap.node, gap.index) +} + +// NextSegment returns the segment immediately after the iterated gap. If no +// such segment exists, NextSegment returns a terminal iterator. +func (gap LockGapIterator) NextSegment() LockIterator { + return LocksegmentAfterPosition(gap.node, gap.index) +} + +// PrevGap returns the iterated gap's predecessor. If no such gap exists, +// PrevGap returns a terminal iterator. +func (gap LockGapIterator) PrevGap() LockGapIterator { + seg := gap.PrevSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.PrevGap() +} + +// NextGap returns the iterated gap's successor. If no such gap exists, NextGap +// returns a terminal iterator. +func (gap LockGapIterator) NextGap() LockGapIterator { + seg := gap.NextSegment() + if !seg.Ok() { + return LockGapIterator{} + } + return seg.NextGap() +} + +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap LockGapIterator) NextLargeEnoughGap(minSize uint64) LockGapIterator { + if LocktrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap LockGapIterator) nextLargeEnoughGapHelper(minSize uint64) LockGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return LockGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap LockGapIterator) PrevLargeEnoughGap(minSize uint64) LockGapIterator { + if LocktrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap LockGapIterator) prevLargeEnoughGapHelper(minSize uint64) LockGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return LockGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// segmentBeforePosition returns the predecessor segment of the position given +// by n.children[i], which may or may not contain a child. If no such segment +// exists, segmentBeforePosition returns a terminal iterator. +func LocksegmentBeforePosition(n *Locknode, i int) LockIterator { + for i == 0 { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i - 1} +} + +// segmentAfterPosition returns the successor segment of the position given by +// n.children[i], which may or may not contain a child. If no such segment +// exists, segmentAfterPosition returns a terminal iterator. +func LocksegmentAfterPosition(n *Locknode, i int) LockIterator { + for i == n.nrSegments { + if n.parent == nil { + return LockIterator{} + } + n, i = n.parent, n.parentIndex + } + return LockIterator{n, i} +} + +func LockzeroValueSlice(slice []Lock) { + + for i := range slice { + lockSetFunctions{}.ClearValue(&slice[i]) + } +} + +func LockzeroNodeSlice(slice []*Locknode) { + for i := range slice { + slice[i] = nil + } +} + +// String stringifies a Set for debugging. +func (s *LockSet) String() string { + return s.root.String() +} + +// String stringifies a node (and all of its children) for debugging. +func (n *Locknode) String() string { + var buf bytes.Buffer + n.writeDebugString(&buf, "") + return buf.String() +} + +func (n *Locknode) writeDebugString(buf *bytes.Buffer, prefix string) { + if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) { + buf.WriteString(prefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren)) + } + for i := 0; i < n.nrSegments; i++ { + if child := n.children[i]; child != nil { + cprefix := fmt.Sprintf("%s- % 3d ", prefix, i) + if child.parent != n || child.parentIndex != i { + buf.WriteString(cprefix) + buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i)) + } + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) + } + buf.WriteString(prefix) + if n.hasChildren { + if LocktrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } + if child := n.children[n.nrSegments]; child != nil { + child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) + } +} + +// SegmentDataSlices represents segments from a set as slices of start, end, and +// values. SegmentDataSlices is primarily used as an intermediate representation +// for save/restore and the layout here is optimized for that. +// +// +stateify savable +type LockSegmentDataSlices struct { + Start []uint64 + End []uint64 + Values []Lock +} + +// ExportSortedSlice returns a copy of all segments in the given set, in ascending +// key order. +func (s *LockSet) ExportSortedSlices() *LockSegmentDataSlices { + var sds LockSegmentDataSlices + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + sds.Start = append(sds.Start, seg.Start()) + sds.End = append(sds.End, seg.End()) + sds.Values = append(sds.Values, seg.Value()) + } + sds.Start = sds.Start[:len(sds.Start):len(sds.Start)] + sds.End = sds.End[:len(sds.End):len(sds.End)] + sds.Values = sds.Values[:len(sds.Values):len(sds.Values)] + return &sds +} + +// ImportSortedSlice initializes the given set from the given slice. +// +// Preconditions: +// * s must be empty. +// * sds must represent a valid set (the segments in sds must have valid +// lengths that do not overlap). +// * The segments in sds must be sorted in ascending key order. +func (s *LockSet) ImportSortedSlices(sds *LockSegmentDataSlices) error { + if !s.IsEmpty() { + return fmt.Errorf("cannot import into non-empty set %v", s) + } + gap := s.FirstGap() + for i := range sds.Start { + r := LockRange{sds.Start[i], sds.End[i]} + if !gap.Range().IsSupersetOf(r) { + return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i]) + } + gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap() + } + return nil +} + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *LockSet) segmentTestCheck(expectedSegments int, segFunc func(int, LockRange, Lock) error) error { + havePrev := false + prev := uint64(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *LockSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} +func (s *LockSet) saveRoot() *LockSegmentDataSlices { + return s.ExportSortedSlices() +} + +func (s *LockSet) loadRoot(sds *LockSegmentDataSlices) { + if err := s.ImportSortedSlices(sds); err != nil { + panic(err) + } +} diff --git a/pkg/sentry/fs/lock/lock_state_autogen.go b/pkg/sentry/fs/lock/lock_state_autogen.go new file mode 100644 index 000000000..707a7518f --- /dev/null +++ b/pkg/sentry/fs/lock/lock_state_autogen.go @@ -0,0 +1,191 @@ +// automatically generated by stateify. + +package lock + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *Lock) StateTypeName() string { + return "pkg/sentry/fs/lock.Lock" +} + +func (l *Lock) StateFields() []string { + return []string{ + "Readers", + "Writer", + } +} + +func (l *Lock) beforeSave() {} + +func (l *Lock) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Readers) + stateSinkObject.Save(1, &l.Writer) +} + +func (l *Lock) afterLoad() {} + +func (l *Lock) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Readers) + stateSourceObject.Load(1, &l.Writer) +} + +func (l *Locks) StateTypeName() string { + return "pkg/sentry/fs/lock.Locks" +} + +func (l *Locks) StateFields() []string { + return []string{ + "locks", + } +} + +func (l *Locks) beforeSave() {} + +func (l *Locks) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + if !state.IsZeroValue(&l.blockedQueue) { + state.Failf("blockedQueue is %#v, expected zero", &l.blockedQueue) + } + stateSinkObject.Save(0, &l.locks) +} + +func (l *Locks) afterLoad() {} + +func (l *Locks) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.locks) +} + +func (r *LockRange) StateTypeName() string { + return "pkg/sentry/fs/lock.LockRange" +} + +func (r *LockRange) StateFields() []string { + return []string{ + "Start", + "End", + } +} + +func (r *LockRange) beforeSave() {} + +func (r *LockRange) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.Start) + stateSinkObject.Save(1, &r.End) +} + +func (r *LockRange) afterLoad() {} + +func (r *LockRange) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.Start) + stateSourceObject.Load(1, &r.End) +} + +func (s *LockSet) StateTypeName() string { + return "pkg/sentry/fs/lock.LockSet" +} + +func (s *LockSet) StateFields() []string { + return []string{ + "root", + } +} + +func (s *LockSet) beforeSave() {} + +func (s *LockSet) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + var rootValue *LockSegmentDataSlices = s.saveRoot() + stateSinkObject.SaveValue(0, rootValue) +} + +func (s *LockSet) afterLoad() {} + +func (s *LockSet) StateLoad(stateSourceObject state.Source) { + stateSourceObject.LoadValue(0, new(*LockSegmentDataSlices), func(y interface{}) { s.loadRoot(y.(*LockSegmentDataSlices)) }) +} + +func (n *Locknode) StateTypeName() string { + return "pkg/sentry/fs/lock.Locknode" +} + +func (n *Locknode) StateFields() []string { + return []string{ + "nrSegments", + "parent", + "parentIndex", + "hasChildren", + "maxGap", + "keys", + "values", + "children", + } +} + +func (n *Locknode) beforeSave() {} + +func (n *Locknode) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.nrSegments) + stateSinkObject.Save(1, &n.parent) + stateSinkObject.Save(2, &n.parentIndex) + stateSinkObject.Save(3, &n.hasChildren) + stateSinkObject.Save(4, &n.maxGap) + stateSinkObject.Save(5, &n.keys) + stateSinkObject.Save(6, &n.values) + stateSinkObject.Save(7, &n.children) +} + +func (n *Locknode) afterLoad() {} + +func (n *Locknode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.nrSegments) + stateSourceObject.Load(1, &n.parent) + stateSourceObject.Load(2, &n.parentIndex) + stateSourceObject.Load(3, &n.hasChildren) + stateSourceObject.Load(4, &n.maxGap) + stateSourceObject.Load(5, &n.keys) + stateSourceObject.Load(6, &n.values) + stateSourceObject.Load(7, &n.children) +} + +func (l *LockSegmentDataSlices) StateTypeName() string { + return "pkg/sentry/fs/lock.LockSegmentDataSlices" +} + +func (l *LockSegmentDataSlices) StateFields() []string { + return []string{ + "Start", + "End", + "Values", + } +} + +func (l *LockSegmentDataSlices) beforeSave() {} + +func (l *LockSegmentDataSlices) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.Start) + stateSinkObject.Save(1, &l.End) + stateSinkObject.Save(2, &l.Values) +} + +func (l *LockSegmentDataSlices) afterLoad() {} + +func (l *LockSegmentDataSlices) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.Start) + stateSourceObject.Load(1, &l.End) + stateSourceObject.Load(2, &l.Values) +} + +func init() { + state.Register((*Lock)(nil)) + state.Register((*Locks)(nil)) + state.Register((*LockRange)(nil)) + state.Register((*LockSet)(nil)) + state.Register((*Locknode)(nil)) + state.Register((*LockSegmentDataSlices)(nil)) +} diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go deleted file mode 100644 index fad90984b..000000000 --- a/pkg/sentry/fs/lock/lock_test.go +++ /dev/null @@ -1,1060 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package lock - -import ( - "reflect" - "testing" -) - -type entry struct { - Lock - LockRange -} - -func equals(e0, e1 []entry) bool { - if len(e0) != len(e1) { - return false - } - for i := range e0 { - for k := range e0[i].Lock.Readers { - if !e1[i].Lock.Readers[k] { - return false - } - } - for k := range e1[i].Lock.Readers { - if !e0[i].Lock.Readers[k] { - return false - } - } - if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) { - return false - } - if e0[i].Lock.Writer != e1[i].Lock.Writer { - return false - } - } - return true -} - -// fill a LockSet with consecutive region locks. Will panic if -// LockRanges are not consecutive. -func fill(entries []entry) LockSet { - l := LockSet{} - for _, e := range entries { - gap := l.FindGap(e.LockRange.Start) - if !gap.Ok() { - panic("cannot insert into existing segment") - } - l.Insert(gap, e.LockRange, e.Lock) - } - return l -} - -func TestCanLockEmpty(t *testing.T) { - l := LockSet{} - - // Expect to be able to take any locks given that the set is empty. - eof := l.FirstGap().End() - r := LockRange{0, eof} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } -} - -func TestCanLock(t *testing.T) { - // + -------------- + ---------- + -------------- + --------- + - // | Readers 1 & 2 | Readers 1 | Readers 1 & 3 | Writer 1 | - // + ------------- + ---------- + -------------- + --------- + - // 0 1024 2048 3072 4096 - l := fill([]entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 3: true}}, - LockRange: LockRange{2048, 3072}, - }, - { - Lock: Lock{Writer: 1}, - LockRange: LockRange{3072, 4096}, - }, - }) - - // Now that we have a mildly interesting layout, try some checks on different - // ranges, uids, and lock types. - // - // Expect to be able to extend the read lock, despite the writer lock, because - // the writer has the same uid as the requested read lock. - r := LockRange{0, 8192} - if !l.canLock(1, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 1) - } - // Expect to *not* be able to extend the read lock since there is an overlapping - // writer region locked by someone other than the uid. - if l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", ReadLock, r, 2) - } - // Expect to be able to extend the read lock if there are only other readers in - // the way. - r = LockRange{64, 3072} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - // Expect to be able to set a read lock beyond the range of any existing locks. - r = LockRange{4096, 10240} - if !l.canLock(2, ReadLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", ReadLock, r, 2) - } - - // Expect to not be able to take a write lock with other readers in the way. - r = LockRange{0, 8192} - if l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 1) - } - // Expect to be able to extend the write lock for the same uid. - r = LockRange{3072, 8192} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - // Expect to not be able to overlap a write lock for two different uids. - if l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got true, want false", WriteLock, r, 2) - } - // Expect to be able to set a write lock that is beyond the range of any - // existing locks. - r = LockRange{8192, 10240} - if !l.canLock(2, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 2) - } - // Expect to be able to upgrade a read lock (any portion of it). - r = LockRange{1024, 2048} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } - r = LockRange{1080, 2000} - if !l.canLock(1, WriteLock, r) { - t.Fatalf("canLock type %d for range %v and uid %d got false, want true", WriteLock, r, 1) - } -} - -func TestSetLock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to lock: - // - // start is the file offset of the lock. - start uint64 - // end is the end file offset of the lock. - end uint64 - // uid of lock attempter. - uid UniqueID - // lock type requested. - lockType LockType - - // success is true if taking the above - // lock should succeed. - success bool - - // Expected layout of the set after locking - // if success is true. - after []entry - }{ - { - name: "set zero length ReadLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: ReadLock, - success: true, - }, - { - name: "set zero length WriteLock on empty set", - start: 0, - end: 0, - uid: 0, - lockType: WriteLock, - success: true, - }, - { - name: "set ReadLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on empty set", - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock same uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------- + --------------------------- + - // | Readers 0 | Writer 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set WriteLock on ReadLock same uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ----------- + --------------------------- + - // | Writer 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "set ReadLock on WriteLock different uid", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: false, - }, - { - name: "set WriteLock on ReadLock different uid", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: WriteLock, - success: false, - }, - { - name: "split ReadLock for overlapping lock at start 0", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + -------------- + --------------------------- + - // | Readers 0 & 1 | Readers 0 | - // + -------------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "split ReadLock for overlapping lock at non-zero start", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: 8192, - uid: 1, - lockType: ReadLock, - success: true, - // + ---------- + -------------- + ----------- + - // | Readers 0 | Readers 0 & 1 | Readers 0 | - // + ---------- + -------------- + ----------- + - // 0 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - { - name: "fill front gap with ReadLock", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 8192, - uid: 0, - lockType: ReadLock, - success: true, - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "fill end gap with ReadLock", - // + ---------------------------- + - // | Readers 0 | - // + ---------------------------- + - // 0 4096 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - }, - start: 1024, - end: LockEOF, - uid: 0, - lockType: ReadLock, - success: true, - // Note that this is not merged after lock does a Split. This is - // fine because the two locks will still *behave* as one. In other - // words we can fragment any lock all we want and semantically it - // makes no difference. - // - // + ----------- + --------------------------- + - // | Readers 0 | Readers 0 | - // + ----------- + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - { - name: "fill gap with ReadLock and split", - // + --------- + ---------------------------- + - // | gap | Readers 0 | - // + --------- + ---------------------------- + - // 0 1024 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 1, - lockType: ReadLock, - success: true, - // + --------- + ------------- + ------------- + - // | Reader 1 | Readers 0 & 1 | Reader 0 | - // + ----------+ ------------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid fill gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + ------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 2 | - // + ------------- + -------- + ------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "upgrade ReadLock to WriteLock for single uid keep gap", - // + ------------- + --------- + --- + ------------- + - // | Readers 0 & 1 | Readers 0 | gap | Readers 0 & 2 | - // + ------------- + --------- + --- + ------------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 1024, - end: 3072, - uid: 0, - lockType: WriteLock, - success: true, - // + ------------- + -------- + --- + ------------- + - // | Readers 0 & 1 | Writer 0 | gap | Readers 0 & 2 | - // + ------------- + -------- + --- + ------------- + - // 0 1024 3072 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 3072}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "fail to upgrade ReadLock to WriteLock with conflicting Reader", - // + ------------- + --------- + - // | Readers 0 & 1 | Readers 0 | - // + ------------- + --------- + - // 0 1024 2048 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - }, - start: 0, - end: 2048, - uid: 0, - lockType: WriteLock, - success: false, - }, - { - name: "take WriteLock on whole file if all uids are the same", - // + ------------- + --------- + --------- + ---------- + - // | Writer 0 | Readers 0 | Readers 0 | Readers 0 | - // + ------------- + --------- + --------- + ---------- + - // 0 1024 2048 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{1024, 2048}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{2048, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - lockType: WriteLock, - success: true, - // We do not manually merge locks. Semantically a fragmented lock - // held by the same uid will behave as one lock so it makes no difference. - // - // + ------------- + ---------------------------- + - // | Writer 0 | Writer 0 | - // + ------------- + ---------------------------- + - // 0 1024 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - success := l.lock(test.uid, test.lockType, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - - if success != test.success { - t.Errorf("setlock(%v, %+v, %d, %d) got success %v, want %v", test.before, r, test.uid, test.lockType, success, test.success) - return - } - - if success { - if !equals(got, test.after) { - t.Errorf("got set %+v, want %+v", got, test.after) - } - } - }) - } -} - -func TestUnlock(t *testing.T) { - tests := []struct { - // description of test. - name string - - // LockSet entries to pre-fill. - before []entry - - // Description of region to unlock: - // - // start is the file start of the lock. - start uint64 - // end is the end file start of the lock. - end uint64 - // uid of lock holder. - uid UniqueID - - // Expected layout of the set after unlocking. - after []entry - }{ - { - name: "unlock zero length on empty set", - start: 0, - end: 0, - uid: 0, - }, - { - name: "unlock on empty set (no-op)", - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock uid not locked (no-op)", - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + --------------------------- + - // | Readers 1 & 2 | - // + --------------------------- + - // 0 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - }, - { - name: "unlock ReadLock over entire file", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock WriteLock over entire file", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - }, - { - name: "unlock partial ReadLock (start)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Readers 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial WriteLock (start)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 0, - end: 4096, - uid: 0, - // + ------ + --------------------------- + - // | gap | Writer 0 | - // +------- + --------------------------- + - // 0 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock partial ReadLock (end)", - // + ----------------------------------------- + - // | Readers 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Readers 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true}}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock partial WriteLock (end)", - // + ----------------------------------------- + - // | Writer 0 | - // + ----------------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 4096, - end: LockEOF, - uid: 0, - // + --------------------------- + - // | Writer 0 | - // +---------------------------- + - // 0 4096 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 4096}, - }, - }, - }, - { - name: "unlock for single uid", - // + ------------- + --------- + ------------------- + - // | Readers 0 & 1 | Writer 0 | Readers 0 & 1 & 2 | - // + ------------- + --------- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 0, - end: LockEOF, - uid: 0, - // + --------- + --- + --------------- + - // | Readers 1 | gap | Readers 1 & 2 | - // + --------- + --- + --------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock subsection locked", - // + ------------------------------- + - // | Readers 0 & 1 & 2 | - // + ------------------------------- + - // 0 max uint64 - before: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{0, LockEOF}, - }, - }, - start: 1024, - end: 4096, - uid: 0, - // + ----------------- + ------------- + ----------------- + - // | Readers 0 & 1 & 2 | Readers 1 & 2 | Readers 0 & 1 & 2 | - // + ----------------- + ------------- + ----------------- + - // 0 1024 4096 max uint64 - after: []entry{ - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true, 2: true}}, - LockRange: LockRange{1024, 4096}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true, 2: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock mid-gap to increase gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 8, - end: 2048, - uid: 0, - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 8 4096 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 8}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - }, - { - name: "unlock split region on uid mid-gap", - // + --------- + ----- + ------------------- + - // | Writer 0 | gap | Readers 0 & 1 | - // + --------- + ----- + ------------------- + - // 0 1024 4096 max uint64 - before: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{4096, LockEOF}, - }, - }, - start: 2048, - end: 8192, - uid: 0, - // + --------- + ----- + --------- + ------------- + - // | Writer 0 | gap | Readers 1 | Readers 0 & 1 | - // + --------- + ----- + --------- + ------------- + - // 0 1024 4096 8192 max uint64 - after: []entry{ - { - Lock: Lock{Writer: 0}, - LockRange: LockRange{0, 1024}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{1: true}}, - LockRange: LockRange{4096, 8192}, - }, - { - Lock: Lock{Readers: map[UniqueID]bool{0: true, 1: true}}, - LockRange: LockRange{8192, LockEOF}, - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - l := fill(test.before) - - r := LockRange{Start: test.start, End: test.end} - l.unlock(test.uid, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - if !equals(got, test.after) { - t.Errorf("got set %+v, want %+v", got, test.after) - } - }) - } -} diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go deleted file mode 100644 index 6c296f5d0..000000000 --- a/pkg/sentry/fs/mount_test.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "fmt" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" -) - -// cacheReallyContains iterates through the dirent cache to determine whether -// it contains the given dirent. -func cacheReallyContains(cache *DirentCache, d *Dirent) bool { - for i := cache.list.Front(); i != nil; i = i.Next() { - if i == d { - return true - } - } - return false -} - -func mountPathsAre(ctx context.Context, root *Dirent, got []*Mount, want ...string) error { - gotPaths := make(map[string]struct{}, len(got)) - gotStr := make([]string, len(got)) - for i, g := range got { - if groot := g.Root(); groot != nil { - name, _ := groot.FullName(root) - groot.DecRef(ctx) - gotStr[i] = name - gotPaths[name] = struct{}{} - } - } - if len(got) != len(want) { - return fmt.Errorf("mount paths are different, got: %q, want: %q", gotStr, want) - } - for _, w := range want { - if _, ok := gotPaths[w]; !ok { - return fmt.Errorf("no mount with path %q found", w) - } - } - return nil -} - -// TestMountSourceOnlyCachedOnce tests that a Dirent that is mounted over only ends -// up in a single Dirent Cache. NOTE(b/63848693): Having a dirent in multiple -// caches causes major consistency issues. -func TestMountSourceOnlyCachedOnce(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Get a child of the root which we will mount over. Note that the - // MockInodeOperations causes Walk to always succeed. - child, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - child.maybeExtendReference() // Cache. - - // Ensure that the root cache contains the child. - if !cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache to contain child dirent, but it did not") - } - - // Create a new cache and inode, and mount it over child. - submountCache := NewDirentCache(100) - submountInode := NewMockInode(ctx, NewMockMountSource(submountCache), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, child, submountInode); err != nil { - t.Fatalf("failed to mount over child: %v", err) - } - - // Walk to the child again. - child2, err := rootDirent.Walk(ctx, rootDirent, "child") - if err != nil { - t.Fatalf("failed to walk to child dirent: %v", err) - } - - // Should have a different Dirent than before. - if child == child2 { - t.Fatalf("expected %v not equal to %v, but they are the same", child, child2) - } - - // Neither of the caches should no contain the child. - if cacheReallyContains(rootCache, child) { - t.Errorf("wanted rootCache not to contain child dirent, but it did") - } - if cacheReallyContains(submountCache, child) { - t.Errorf("wanted submountCache not to contain child dirent, but it did") - } -} - -func TestAllMountsUnder(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/baz", - "/foo/qux", - "/waldo", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef(ctx) - } - - // mm root should contain all submounts (and does not include the root mount). - rootMnt := mm.FindMount(rootDirent) - submounts := mm.AllMountsUnder(rootMnt) - allPaths := append(paths, "/") - if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - - // Each mount should have a unique ID. - foundIDs := make(map[uint64]struct{}) - for _, m := range submounts { - if _, ok := foundIDs[m.ID]; ok { - t.Errorf("got multiple mounts with id %d", m.ID) - } - foundIDs[m.ID] = struct{}{} - } - - // Root mount should have no parent. - if p := rootMnt.ParentID; p != invalidMountID { - t.Errorf("root.Parent got %v wanted nil", p) - } - - // Check that "foo" mount has 3 children. - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, "/foo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/foo", err) - } - defer d.DecRef(ctx) - submounts = mm.AllMountsUnder(mm.FindMount(d)) - if err := mountPathsAre(ctx, rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil { - t.Error(err) - } - - // "waldo" mount should have no children. - maxTraversals = 0 - waldo, err := mm.FindLink(ctx, rootDirent, nil, "/waldo", &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err) - } - defer waldo.DecRef(ctx) - submounts = mm.AllMountsUnder(mm.FindMount(waldo)) - if err := mountPathsAre(ctx, rootDirent, submounts, "/waldo"); err != nil { - t.Error(err) - } -} - -func TestUnmount(t *testing.T) { - ctx := contexttest.Context(t) - - rootCache := NewDirentCache(100) - rootInode := NewMockInode(ctx, NewMockMountSource(rootCache), StableAttr{ - Type: Directory, - }) - mm, err := NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - rootDirent := mm.Root() - defer rootDirent.DecRef(ctx) - - // Add mounts at the following paths: - paths := []string{ - "/foo", - "/foo/bar", - "/foo/bar/goo", - "/foo/bar/goo/abc", - "/foo/abc", - "/foo/def", - "/waldo", - "/wally", - } - - var maxTraversals uint - for _, p := range paths { - maxTraversals = 0 - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - submountInode := NewMockInode(ctx, NewMockMountSource(nil), StableAttr{ - Type: Directory, - }) - if err := mm.Mount(ctx, d, submountInode); err != nil { - t.Fatalf("could not mount at %q: %v", p, err) - } - d.DecRef(ctx) - } - - allPaths := make([]string, len(paths)+1) - allPaths[0] = "/" - copy(allPaths[1:], paths) - - rootMnt := mm.FindMount(rootDirent) - for i := len(paths) - 1; i >= 0; i-- { - maxTraversals = 0 - p := paths[i] - d, err := mm.FindLink(ctx, rootDirent, nil, p, &maxTraversals) - if err != nil { - t.Fatalf("could not find path %q in mount manager: %v", p, err) - } - - if err := mm.Unmount(ctx, d, false); err != nil { - t.Fatalf("could not unmount at %q: %v", p, err) - } - d.DecRef(ctx) - - // Remove the path that has been unmounted and the check that the remaining - // mounts are still there. - allPaths = allPaths[:len(allPaths)-1] - submounts := mm.AllMountsUnder(rootMnt) - if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { - t.Error(err) - } - } -} diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go deleted file mode 100644 index 975d6cbc9..000000000 --- a/pkg/sentry/fs/mounts_test.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs_test - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" -) - -// Creates a new MountNamespace with filesystem: -// / (root dir) -// |-foo (dir) -// |-bar (file) -func createMountNamespace(ctx context.Context) (*fs.MountNamespace, error) { - perms := fs.FilePermsFromMode(0777) - m := fs.NewPseudoMountSource(ctx) - - barFile := fsutil.NewSimpleFileInode(ctx, fs.RootOwner, perms, 0) - fooDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "bar": fs.NewInode(ctx, barFile, m, fs.StableAttr{Type: fs.RegularFile}), - }, fs.RootOwner, perms) - rootDir := ramfs.NewDir(ctx, map[string]*fs.Inode{ - "foo": fs.NewInode(ctx, fooDir, m, fs.StableAttr{Type: fs.Directory}), - }, fs.RootOwner, perms) - - return fs.NewMountNamespace(ctx, fs.NewInode(ctx, rootDir, m, fs.StableAttr{Type: fs.Directory})) -} - -func TestFindLink(t *testing.T) { - ctx := contexttest.Context(t) - mm, err := createMountNamespace(ctx) - if err != nil { - t.Fatalf("createMountNamespace failed: %v", err) - } - - root := mm.Root() - defer root.DecRef(ctx) - foo, err := root.Walk(ctx, root, "foo") - if err != nil { - t.Fatalf("Error walking to foo: %v", err) - } - - // Positive cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - wantPath string - }{ - {".", root, "/"}, - {".", foo, "/foo"}, - {"..", foo, "/"}, - {"../../..", foo, "/"}, - {"///foo", foo, "/foo"}, - {"/foo", foo, "/foo"}, - {"/foo/bar", foo, "/foo/bar"}, - {"/foo/.///./bar", foo, "/foo/bar"}, - {"/foo///bar", foo, "/foo/bar"}, - {"/foo/../foo/bar", foo, "/foo/bar"}, - {"foo/bar", root, "/foo/bar"}, - {"foo////bar", root, "/foo/bar"}, - {"bar", foo, "/foo/bar"}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if d, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err != nil { - t.Errorf("FindLink(%q, wd=%q) failed: %v", tc.findPath, wdPath, err) - } else if got, _ := d.FullName(root); got != tc.wantPath { - t.Errorf("FindLink(%q, wd=%q) got dirent %q, want %q", tc.findPath, wdPath, got, tc.wantPath) - } - } - - // Negative cases. - for _, tc := range []struct { - findPath string - wd *fs.Dirent - }{ - {"bar", root}, - {"/bar", root}, - {"/foo/../../bar", root}, - {"foo", foo}, - } { - wdPath, _ := tc.wd.FullName(root) - maxTraversals := uint(0) - if _, err := mm.FindLink(ctx, root, tc.wd, tc.findPath, &maxTraversals); err == nil { - t.Errorf("FindLink(%q, wd=%q) did not return error", tc.findPath, wdPath) - } - } -} diff --git a/pkg/sentry/fs/path_test.go b/pkg/sentry/fs/path_test.go deleted file mode 100644 index e6f57ebba..000000000 --- a/pkg/sentry/fs/path_test.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package fs - -import ( - "testing" -) - -// TestSplitLast tests variants of path splitting. -func TestSplitLast(t *testing.T) { - cases := []struct { - path string - dir string - file string - }{ - {path: "/", dir: "/", file: "."}, - {path: "/.", dir: "/", file: "."}, - {path: "/./", dir: "/", file: "."}, - {path: "/./.", dir: "/.", file: "."}, - {path: "/././", dir: "/.", file: "."}, - {path: "/./..", dir: "/.", file: ".."}, - {path: "/./../", dir: "/.", file: ".."}, - {path: "/..", dir: "/", file: ".."}, - {path: "/../", dir: "/", file: ".."}, - {path: "/../.", dir: "/..", file: "."}, - {path: "/.././", dir: "/..", file: "."}, - {path: "/../..", dir: "/..", file: ".."}, - {path: "/../../", dir: "/..", file: ".."}, - - {path: "", dir: ".", file: "."}, - {path: ".", dir: ".", file: "."}, - {path: "./", dir: ".", file: "."}, - {path: "./.", dir: ".", file: "."}, - {path: "././", dir: ".", file: "."}, - {path: "./..", dir: ".", file: ".."}, - {path: "./../", dir: ".", file: ".."}, - {path: "..", dir: ".", file: ".."}, - {path: "../", dir: ".", file: ".."}, - {path: "../.", dir: "..", file: "."}, - {path: ".././", dir: "..", file: "."}, - {path: "../..", dir: "..", file: ".."}, - {path: "../../", dir: "..", file: ".."}, - - {path: "/foo", dir: "/", file: "foo"}, - {path: "/foo/", dir: "/", file: "foo"}, - {path: "/foo/.", dir: "/foo", file: "."}, - {path: "/foo/./", dir: "/foo", file: "."}, - {path: "/foo/./.", dir: "/foo/.", file: "."}, - {path: "/foo/./..", dir: "/foo/.", file: ".."}, - {path: "/foo/..", dir: "/foo", file: ".."}, - {path: "/foo/../", dir: "/foo", file: ".."}, - {path: "/foo/../.", dir: "/foo/..", file: "."}, - {path: "/foo/../..", dir: "/foo/..", file: ".."}, - - {path: "/foo/bar", dir: "/foo", file: "bar"}, - {path: "/foo/bar/", dir: "/foo", file: "bar"}, - {path: "/foo/bar/.", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./", dir: "/foo/bar", file: "."}, - {path: "/foo/bar/./.", dir: "/foo/bar/.", file: "."}, - {path: "/foo/bar/./..", dir: "/foo/bar/.", file: ".."}, - {path: "/foo/bar/..", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../", dir: "/foo/bar", file: ".."}, - {path: "/foo/bar/../.", dir: "/foo/bar/..", file: "."}, - {path: "/foo/bar/../..", dir: "/foo/bar/..", file: ".."}, - - {path: "foo", dir: ".", file: "foo"}, - {path: "foo", dir: ".", file: "foo"}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - {path: "foo/./", dir: "foo", file: "."}, - {path: "foo/./.", dir: "foo/.", file: "."}, - {path: "foo/./..", dir: "foo/.", file: ".."}, - {path: "foo/..", dir: "foo", file: ".."}, - {path: "foo/../", dir: "foo", file: ".."}, - {path: "foo/../.", dir: "foo/..", file: "."}, - {path: "foo/../..", dir: "foo/..", file: ".."}, - {path: "foo/", dir: ".", file: "foo"}, - {path: "foo/.", dir: "foo", file: "."}, - - {path: "foo/bar", dir: "foo", file: "bar"}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - {path: "foo/bar/./", dir: "foo/bar", file: "."}, - {path: "foo/bar/./.", dir: "foo/bar/.", file: "."}, - {path: "foo/bar/./..", dir: "foo/bar/.", file: ".."}, - {path: "foo/bar/..", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../", dir: "foo/bar", file: ".."}, - {path: "foo/bar/../.", dir: "foo/bar/..", file: "."}, - {path: "foo/bar/../..", dir: "foo/bar/..", file: ".."}, - {path: "foo/bar/", dir: "foo", file: "bar"}, - {path: "foo/bar/.", dir: "foo/bar", file: "."}, - } - - for _, c := range cases { - dir, file := SplitLast(c.path) - if dir != c.dir || file != c.file { - t.Errorf("SplitLast(%q) got (%q, %q), expected (%q, %q)", c.path, dir, file, c.dir, c.file) - } - } -} - -// TestSplitFirst tests variants of path splitting. -func TestSplitFirst(t *testing.T) { - cases := []struct { - path string - first string - remainder string - }{ - {path: "/", first: "/", remainder: ""}, - {path: "/.", first: "/", remainder: "."}, - {path: "///.", first: "/", remainder: "//."}, - {path: "/.///", first: "/", remainder: "."}, - {path: "/./.", first: "/", remainder: "./."}, - {path: "/././", first: "/", remainder: "./."}, - {path: "/./..", first: "/", remainder: "./.."}, - {path: "/./../", first: "/", remainder: "./.."}, - {path: "/..", first: "/", remainder: ".."}, - {path: "/../", first: "/", remainder: ".."}, - {path: "/../.", first: "/", remainder: "../."}, - {path: "/.././", first: "/", remainder: "../."}, - {path: "/../..", first: "/", remainder: "../.."}, - {path: "/../../", first: "/", remainder: "../.."}, - - {path: "", first: ".", remainder: ""}, - {path: ".", first: ".", remainder: ""}, - {path: "./", first: ".", remainder: ""}, - {path: ".///", first: ".", remainder: ""}, - {path: "./.", first: ".", remainder: "."}, - {path: "././", first: ".", remainder: "."}, - {path: "./..", first: ".", remainder: ".."}, - {path: "./../", first: ".", remainder: ".."}, - {path: "..", first: "..", remainder: ""}, - {path: "../", first: "..", remainder: ""}, - {path: "../.", first: "..", remainder: "."}, - {path: ".././", first: "..", remainder: "."}, - {path: "../..", first: "..", remainder: ".."}, - {path: "../../", first: "..", remainder: ".."}, - - {path: "/foo", first: "/", remainder: "foo"}, - {path: "/foo/", first: "/", remainder: "foo"}, - {path: "/foo///", first: "/", remainder: "foo"}, - {path: "/foo/.", first: "/", remainder: "foo/."}, - {path: "/foo/./", first: "/", remainder: "foo/."}, - {path: "/foo/./.", first: "/", remainder: "foo/./."}, - {path: "/foo/./..", first: "/", remainder: "foo/./.."}, - {path: "/foo/..", first: "/", remainder: "foo/.."}, - {path: "/foo/../", first: "/", remainder: "foo/.."}, - {path: "/foo/../.", first: "/", remainder: "foo/../."}, - {path: "/foo/../..", first: "/", remainder: "foo/../.."}, - - {path: "/foo/bar", first: "/", remainder: "foo/bar"}, - {path: "///foo/bar", first: "/", remainder: "//foo/bar"}, - {path: "/foo///bar", first: "/", remainder: "foo///bar"}, - {path: "/foo/bar/.", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./", first: "/", remainder: "foo/bar/."}, - {path: "/foo/bar/./.", first: "/", remainder: "foo/bar/./."}, - {path: "/foo/bar/./..", first: "/", remainder: "foo/bar/./.."}, - {path: "/foo/bar/..", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../", first: "/", remainder: "foo/bar/.."}, - {path: "/foo/bar/../.", first: "/", remainder: "foo/bar/../."}, - {path: "/foo/bar/../..", first: "/", remainder: "foo/bar/../.."}, - - {path: "foo", first: "foo", remainder: ""}, - {path: "foo", first: "foo", remainder: ""}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo///", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - {path: "foo/./", first: "foo", remainder: "."}, - {path: "foo/./.", first: "foo", remainder: "./."}, - {path: "foo/./..", first: "foo", remainder: "./.."}, - {path: "foo/..", first: "foo", remainder: ".."}, - {path: "foo/../", first: "foo", remainder: ".."}, - {path: "foo/../.", first: "foo", remainder: "../."}, - {path: "foo/../..", first: "foo", remainder: "../.."}, - {path: "foo/", first: "foo", remainder: ""}, - {path: "foo/.", first: "foo", remainder: "."}, - - {path: "foo/bar", first: "foo", remainder: "bar"}, - {path: "foo///bar", first: "foo", remainder: "bar"}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./", first: "foo", remainder: "bar/."}, - {path: "foo/bar/./.", first: "foo", remainder: "bar/./."}, - {path: "foo/bar/./..", first: "foo", remainder: "bar/./.."}, - {path: "foo/bar/..", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../", first: "foo", remainder: "bar/.."}, - {path: "foo/bar/../.", first: "foo", remainder: "bar/../."}, - {path: "foo/bar/../..", first: "foo", remainder: "bar/../.."}, - {path: "foo/bar/", first: "foo", remainder: "bar"}, - {path: "foo/bar/.", first: "foo", remainder: "bar/."}, - } - - for _, c := range cases { - first, remainder := SplitFirst(c.path) - if first != c.first || remainder != c.remainder { - t.Errorf("SplitFirst(%q) got (%q, %q), expected (%q, %q)", c.path, first, remainder, c.first, c.remainder) - } - } -} - -// TestIsSubpath tests the IsSubpath method. -func TestIsSubpath(t *testing.T) { - tcs := []struct { - // Two absolute paths. - pathA string - pathB string - - // Whether pathA is a subpath of pathB. - wantIsSubpath bool - - // Relative path from pathA to pathB. Only checked if - // wantIsSubpath is true. - wantRelpath string - }{ - { - pathA: "/foo/bar/baz", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar/baz", - }, - { - pathA: "/foo", - pathB: "/foo/bar/baz", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foobar", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/foobar", - wantIsSubpath: false, - }, - { - pathA: "/", - pathB: "/foo", - wantIsSubpath: false, - }, - { - pathA: "/foo", - pathB: "/", - wantIsSubpath: true, - wantRelpath: "foo", - }, - { - pathA: "/foo/bar/../bar", - pathB: "/foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - { - pathA: "/foo/bar", - pathB: "/foo/../foo", - wantIsSubpath: true, - wantRelpath: "bar", - }, - } - - for _, tc := range tcs { - gotRelpath, gotIsSubpath := IsSubpath(tc.pathA, tc.pathB) - if gotRelpath != tc.wantRelpath || gotIsSubpath != tc.wantIsSubpath { - t.Errorf("IsSubpath(%q, %q) got %q %t, want %q %t", tc.pathA, tc.pathB, gotRelpath, gotIsSubpath, tc.wantRelpath, tc.wantIsSubpath) - } - } -} diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD deleted file mode 100644 index b8b2281a8..000000000 --- a/pkg/sentry/fs/proc/BUILD +++ /dev/null @@ -1,73 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "proc", - srcs = [ - "cgroup.go", - "cpuinfo.go", - "exec_args.go", - "fds.go", - "filesystems.go", - "fs.go", - "inode.go", - "loadavg.go", - "meminfo.go", - "mounts.go", - "net.go", - "proc.go", - "stat.go", - "sys.go", - "sys_net.go", - "sys_net_state.go", - "task.go", - "uid_gid_map.go", - "uptime.go", - "version.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/fs/proc/seqfile", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/fsbridge", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/tcpip/header", - "//pkg/tcpip/network/ipv4", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "proc_test", - size = "small", - srcs = [ - "net_test.go", - "sys_net_test.go", - ], - library = ":proc", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/inet", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md deleted file mode 100644 index 6667a0916..000000000 --- a/pkg/sentry/fs/proc/README.md +++ /dev/null @@ -1,336 +0,0 @@ -This document tracks what is implemented in procfs. Refer to -Documentation/filesystems/proc.txt in the Linux project for information about -procfs generally. - -**NOTE**: This document is not guaranteed to be up to date. If you find an -inconsistency, please file a bug. - -[TOC] - -## Kernel data - -The following files are implemented: - -<!-- mdformat off(don't wrap the table) --> - -| File /proc/ | Content | -| :------------------------ | :---------------------------------------------------- | -| [cpuinfo](#cpuinfo) | Info about the CPU | -| [filesystems](#filesystems) | Supported filesystems | -| [loadavg](#loadavg) | Load average of last 1, 5 & 15 minutes | -| [meminfo](#meminfo) | Overall memory info | -| [stat](#stat) | Overall kernel statistics | -| [sys](#sys) | Change parameters within the kernel | -| [uptime](#uptime) | Wall clock since boot, combined idle time of all cpus | -| [version](#version) | Kernel version | - -<!-- mdformat on --> - -### cpuinfo - -```bash -$ cat /proc/cpuinfo -processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 45 -model name : unknown -stepping : unknown -cpu MHz : 1234.588 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx xsaveopt -bogomips : 1234.59 -clflush size : 64 -cache_alignment : 64 -address sizes : 46 bits physical, 48 bits virtual -power management: - -... -``` - -Notable divergences: - -Field name | Notes -:--------------- | :--------------------------------------- -model name | Always unknown -stepping | Always unknown -fpu | Always yes -fpu_exception | Always yes -wp | Always yes -bogomips | Bogus value (matches cpu MHz) -clflush size | Always 64 -cache_alignment | Always 64 -address sizes | Always 46 bits physical, 48 bits virtual -power management | Always blank - -Otherwise fields are derived from the sentry configuration. - -### filesystems - -```bash -$ cat /proc/filesystems -nodev 9p -nodev devpts -nodev devtmpfs -nodev proc -nodev sysfs -nodev tmpfs -``` - -### loadavg - -```bash -$ cat /proc/loadavg -0.00 0.00 0.00 0/0 0 -``` - -Column | Notes -:------------------------------------ | :---------- -CPU.IO utilization in last 1 minute | Always zero -CPU.IO utilization in last 5 minutes | Always zero -CPU.IO utilization in last 10 minutes | Always zero -Num currently running processes | Always zero -Total num processes | Always zero - -TODO(b/62345059): Populate the columns with accurate statistics. - -### meminfo - -```bash -$ cat /proc/meminfo -MemTotal: 2097152 kB -MemFree: 2083540 kB -MemAvailable: 2083540 kB -Buffers: 0 kB -Cached: 4428 kB -SwapCache: 0 kB -Active: 10812 kB -Inactive: 2216 kB -Active(anon): 8600 kB -Inactive(anon): 0 kB -Active(file): 2212 kB -Inactive(file): 2216 kB -Unevictable: 0 kB -Mlocked: 0 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 0 kB -Writeback: 0 kB -AnonPages: 8600 kB -Mapped: 4428 kB -Shmem: 0 kB - -``` - -Notable divergences: - -Field name | Notes -:---------------- | :----------------------------------------------------- -Buffers | Always zero, no block devices -SwapCache | Always zero, no swap -Inactive(anon) | Always zero, see SwapCache -Unevictable | Always zero TODO(b/31823263) -Mlocked | Always zero TODO(b/31823263) -SwapTotal | Always zero, no swap -SwapFree | Always zero, no swap -Dirty | Always zero TODO(b/31823263) -Writeback | Always zero TODO(b/31823263) -MemAvailable | Uses the same value as MemFree since there is no swap. -Slab | Missing -SReclaimable | Missing -SUnreclaim | Missing -KernelStack | Missing -PageTables | Missing -NFS_Unstable | Missing -Bounce | Missing -WritebackTmp | Missing -CommitLimit | Missing -Committed_AS | Missing -VmallocTotal | Missing -VmallocUsed | Missing -VmallocChunk | Missing -HardwareCorrupted | Missing -AnonHugePages | Missing -ShmemHugePages | Missing -ShmemPmdMapped | Missing -HugePages_Total | Missing -HugePages_Free | Missing -HugePages_Rsvd | Missing -HugePages_Surp | Missing -Hugepagesize | Missing -DirectMap4k | Missing -DirectMap2M | Missing -DirectMap1G | Missing - -### stat - -```bash -$ cat /proc/stat -cpu 0 0 0 0 0 0 0 0 0 0 -cpu0 0 0 0 0 0 0 0 0 0 0 -cpu1 0 0 0 0 0 0 0 0 0 0 -cpu2 0 0 0 0 0 0 0 0 0 0 -cpu3 0 0 0 0 0 0 0 0 0 0 -cpu4 0 0 0 0 0 0 0 0 0 0 -cpu5 0 0 0 0 0 0 0 0 0 0 -cpu6 0 0 0 0 0 0 0 0 0 0 -cpu7 0 0 0 0 0 0 0 0 0 0 -intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -ctxt 0 -btime 1504040968 -processes 0 -procs_running 0 -procs_blokkcked 0 -softirq 0 0 0 0 0 0 0 0 0 0 0 -``` - -All fields except for `btime` are always zero. - -TODO(b/37226836): Populate with accurate fields. - -### sys - -```bash -$ ls /proc/sys -kernel vm -``` - -Directory | Notes -:-------- | :---------------------------- -abi | Missing -debug | Missing -dev | Missing -fs | Missing -kernel | Contains hostname (only) -net | Missing -user | Missing -vm | Contains mmap_min_addr (only) - -### uptime - -```bash -$ cat /proc/uptime -3204.62 0.00 -``` - -Column | Notes -:------------------------------- | :---------------------------- -Total num seconds system running | Time since procfs was mounted -Number of seconds idle | Always zero - -### version - -```bash -$ cat /proc/version -Linux version 4.4 #1 SMP Sun Jan 10 15:06:54 PST 2016 -``` - -## Process-specific data - -The following files are implemented: - -File /proc/PID | Content -:---------------------- | :--------------------------------------------------- -[auxv](#auxv) | Copy of auxiliary vector for the process -[cmdline](#cmdline) | Command line arguments -[comm](#comm) | Command name associated with the process -[environ](#environ) | Process environment -[exe](#exe) | Symlink to the process's executable -[fd](#fd) | Directory containing links to open file descriptors -[fdinfo](#fdinfo) | Information associated with open file descriptors -[gid_map](#gid_map) | Mappings for group IDs inside the user namespace -[io](#io) | IO statistics -[maps](#maps) | Memory mappings (anon, executables, library files) -[mounts](#mounts) | Mounted filesystems -[mountinfo](#mountinfo) | Information about mounts -[ns](#ns) | Directory containing info about supported namespaces -[stat](#stat) | Process statistics -[statm](#statm) | Process memory statistics -[status](#status) | Process status in human readable format -[task](#task) | Directory containing info about running threads -[uid_map](#uid_map) | Mappings for user IDs inside the user namespace - -### auxv - -TODO - -### cmdline - -TODO - -### comm - -TODO - -### environment - -TODO - -### exe - -TODO - -### fd - -TODO - -### fdinfo - -TODO - -### gid_map - -TODO - -### io - -Only has data for rchar, wchar, syscr, and syscw. - -TODO: add more detail. - -### maps - -TODO - -### mounts - -TODO - -### mountinfo - -TODO - -### ns - -TODO - -### stat - -Only has data for pid, comm, state, ppid, utime, stime, cutime, cstime, -num_threads, and exit_signal. - -TODO: add more detail. - -### statm - -Only has data for vss and rss. - -TODO: add more detail. - -### status - -Contains data for Name, State, Tgid, Pid, Ppid, TracerPid, FDSize, VmSize, -VmRSS, Threads, CapInh, CapPrm, CapEff, CapBnd, Seccomp. - -TODO: add more detail. - -### task - -TODO - -### uid_map - -TODO diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD deleted file mode 100644 index 52c9aa93d..000000000 --- a/pkg/sentry/fs/proc/device/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "device", - srcs = ["device.go"], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/sentry/device"], -) diff --git a/pkg/sentry/fs/proc/device/device_state_autogen.go b/pkg/sentry/fs/proc/device/device_state_autogen.go new file mode 100644 index 000000000..4a5e3cc88 --- /dev/null +++ b/pkg/sentry/fs/proc/device/device_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package device diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go deleted file mode 100644 index f18681405..000000000 --- a/pkg/sentry/fs/proc/net_test.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{s: newIPv6TestStack()} - if got := n.contents(); got != nil { - t.Errorf("Got n.contents() = %v, want = %v", got, nil) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{s: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fs/proc/proc_state_autogen.go b/pkg/sentry/fs/proc/proc_state_autogen.go new file mode 100644 index 000000000..d39cd1d2e --- /dev/null +++ b/pkg/sentry/fs/proc/proc_state_autogen.go @@ -0,0 +1,1575 @@ +// automatically generated by stateify. + +package proc + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (i *execArgInode) StateTypeName() string { + return "pkg/sentry/fs/proc.execArgInode" +} + +func (i *execArgInode) StateFields() []string { + return []string{ + "SimpleFileInode", + "arg", + "t", + } +} + +func (i *execArgInode) beforeSave() {} + +func (i *execArgInode) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.SimpleFileInode) + stateSinkObject.Save(1, &i.arg) + stateSinkObject.Save(2, &i.t) +} + +func (i *execArgInode) afterLoad() {} + +func (i *execArgInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.SimpleFileInode) + stateSourceObject.Load(1, &i.arg) + stateSourceObject.Load(2, &i.t) +} + +func (f *execArgFile) StateTypeName() string { + return "pkg/sentry/fs/proc.execArgFile" +} + +func (f *execArgFile) StateFields() []string { + return []string{ + "arg", + "t", + } +} + +func (f *execArgFile) beforeSave() {} + +func (f *execArgFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.arg) + stateSinkObject.Save(1, &f.t) +} + +func (f *execArgFile) afterLoad() {} + +func (f *execArgFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.arg) + stateSourceObject.Load(1, &f.t) +} + +func (f *fdDir) StateTypeName() string { + return "pkg/sentry/fs/proc.fdDir" +} + +func (f *fdDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (f *fdDir) beforeSave() {} + +func (f *fdDir) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.Dir) + stateSinkObject.Save(1, &f.t) +} + +func (f *fdDir) afterLoad() {} + +func (f *fdDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.Dir) + stateSourceObject.Load(1, &f.t) +} + +func (f *fdDirFile) StateTypeName() string { + return "pkg/sentry/fs/proc.fdDirFile" +} + +func (f *fdDirFile) StateFields() []string { + return []string{ + "isInfoFile", + "t", + } +} + +func (f *fdDirFile) beforeSave() {} + +func (f *fdDirFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.isInfoFile) + stateSinkObject.Save(1, &f.t) +} + +func (f *fdDirFile) afterLoad() {} + +func (f *fdDirFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.isInfoFile) + stateSourceObject.Load(1, &f.t) +} + +func (fdid *fdInfoDir) StateTypeName() string { + return "pkg/sentry/fs/proc.fdInfoDir" +} + +func (fdid *fdInfoDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (fdid *fdInfoDir) beforeSave() {} + +func (fdid *fdInfoDir) StateSave(stateSinkObject state.Sink) { + fdid.beforeSave() + stateSinkObject.Save(0, &fdid.Dir) + stateSinkObject.Save(1, &fdid.t) +} + +func (fdid *fdInfoDir) afterLoad() {} + +func (fdid *fdInfoDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &fdid.Dir) + stateSourceObject.Load(1, &fdid.t) +} + +func (f *filesystemsData) StateTypeName() string { + return "pkg/sentry/fs/proc.filesystemsData" +} + +func (f *filesystemsData) StateFields() []string { + return []string{} +} + +func (f *filesystemsData) beforeSave() {} + +func (f *filesystemsData) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystemsData) afterLoad() {} + +func (f *filesystemsData) StateLoad(stateSourceObject state.Source) { +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/proc.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (i *taskOwnedInodeOps) StateTypeName() string { + return "pkg/sentry/fs/proc.taskOwnedInodeOps" +} + +func (i *taskOwnedInodeOps) StateFields() []string { + return []string{ + "InodeOperations", + "t", + } +} + +func (i *taskOwnedInodeOps) beforeSave() {} + +func (i *taskOwnedInodeOps) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.InodeOperations) + stateSinkObject.Save(1, &i.t) +} + +func (i *taskOwnedInodeOps) afterLoad() {} + +func (i *taskOwnedInodeOps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeOperations) + stateSourceObject.Load(1, &i.t) +} + +func (s *staticFileInodeOps) StateTypeName() string { + return "pkg/sentry/fs/proc.staticFileInodeOps" +} + +func (s *staticFileInodeOps) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeStaticFileGetter", + } +} + +func (s *staticFileInodeOps) beforeSave() {} + +func (s *staticFileInodeOps) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeStaticFileGetter) +} + +func (s *staticFileInodeOps) afterLoad() {} + +func (s *staticFileInodeOps) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeStaticFileGetter) +} + +func (d *loadavgData) StateTypeName() string { + return "pkg/sentry/fs/proc.loadavgData" +} + +func (d *loadavgData) StateFields() []string { + return []string{} +} + +func (d *loadavgData) beforeSave() {} + +func (d *loadavgData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() +} + +func (d *loadavgData) afterLoad() {} + +func (d *loadavgData) StateLoad(stateSourceObject state.Source) { +} + +func (d *meminfoData) StateTypeName() string { + return "pkg/sentry/fs/proc.meminfoData" +} + +func (d *meminfoData) StateFields() []string { + return []string{ + "k", + } +} + +func (d *meminfoData) beforeSave() {} + +func (d *meminfoData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.k) +} + +func (d *meminfoData) afterLoad() {} + +func (d *meminfoData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.k) +} + +func (mif *mountInfoFile) StateTypeName() string { + return "pkg/sentry/fs/proc.mountInfoFile" +} + +func (mif *mountInfoFile) StateFields() []string { + return []string{ + "t", + } +} + +func (mif *mountInfoFile) beforeSave() {} + +func (mif *mountInfoFile) StateSave(stateSinkObject state.Sink) { + mif.beforeSave() + stateSinkObject.Save(0, &mif.t) +} + +func (mif *mountInfoFile) afterLoad() {} + +func (mif *mountInfoFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mif.t) +} + +func (mf *mountsFile) StateTypeName() string { + return "pkg/sentry/fs/proc.mountsFile" +} + +func (mf *mountsFile) StateFields() []string { + return []string{ + "t", + } +} + +func (mf *mountsFile) beforeSave() {} + +func (mf *mountsFile) StateSave(stateSinkObject state.Sink) { + mf.beforeSave() + stateSinkObject.Save(0, &mf.t) +} + +func (mf *mountsFile) afterLoad() {} + +func (mf *mountsFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mf.t) +} + +func (n *ifinet6) StateTypeName() string { + return "pkg/sentry/fs/proc.ifinet6" +} + +func (n *ifinet6) StateFields() []string { + return []string{ + "s", + } +} + +func (n *ifinet6) beforeSave() {} + +func (n *ifinet6) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *ifinet6) afterLoad() {} + +func (n *ifinet6) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netDev) StateTypeName() string { + return "pkg/sentry/fs/proc.netDev" +} + +func (n *netDev) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netDev) beforeSave() {} + +func (n *netDev) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netDev) afterLoad() {} + +func (n *netDev) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netSnmp) StateTypeName() string { + return "pkg/sentry/fs/proc.netSnmp" +} + +func (n *netSnmp) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netSnmp) beforeSave() {} + +func (n *netSnmp) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netSnmp) afterLoad() {} + +func (n *netSnmp) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netRoute) StateTypeName() string { + return "pkg/sentry/fs/proc.netRoute" +} + +func (n *netRoute) StateFields() []string { + return []string{ + "s", + } +} + +func (n *netRoute) beforeSave() {} + +func (n *netRoute) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.s) +} + +func (n *netRoute) afterLoad() {} + +func (n *netRoute) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.s) +} + +func (n *netUnix) StateTypeName() string { + return "pkg/sentry/fs/proc.netUnix" +} + +func (n *netUnix) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netUnix) beforeSave() {} + +func (n *netUnix) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netUnix) afterLoad() {} + +func (n *netUnix) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netTCP) StateTypeName() string { + return "pkg/sentry/fs/proc.netTCP" +} + +func (n *netTCP) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netTCP) beforeSave() {} + +func (n *netTCP) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netTCP) afterLoad() {} + +func (n *netTCP) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netTCP6) StateTypeName() string { + return "pkg/sentry/fs/proc.netTCP6" +} + +func (n *netTCP6) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netTCP6) beforeSave() {} + +func (n *netTCP6) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netTCP6) afterLoad() {} + +func (n *netTCP6) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (n *netUDP) StateTypeName() string { + return "pkg/sentry/fs/proc.netUDP" +} + +func (n *netUDP) StateFields() []string { + return []string{ + "k", + } +} + +func (n *netUDP) beforeSave() {} + +func (n *netUDP) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.k) +} + +func (n *netUDP) afterLoad() {} + +func (n *netUDP) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.k) +} + +func (p *proc) StateTypeName() string { + return "pkg/sentry/fs/proc.proc" +} + +func (p *proc) StateFields() []string { + return []string{ + "Dir", + "k", + "pidns", + "cgroupControllers", + } +} + +func (p *proc) beforeSave() {} + +func (p *proc) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.Dir) + stateSinkObject.Save(1, &p.k) + stateSinkObject.Save(2, &p.pidns) + stateSinkObject.Save(3, &p.cgroupControllers) +} + +func (p *proc) afterLoad() {} + +func (p *proc) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.Dir) + stateSourceObject.Load(1, &p.k) + stateSourceObject.Load(2, &p.pidns) + stateSourceObject.Load(3, &p.cgroupControllers) +} + +func (s *self) StateTypeName() string { + return "pkg/sentry/fs/proc.self" +} + +func (s *self) StateFields() []string { + return []string{ + "Symlink", + "pidns", + } +} + +func (s *self) beforeSave() {} + +func (s *self) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *self) afterLoad() {} + +func (s *self) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) + stateSourceObject.Load(1, &s.pidns) +} + +func (s *threadSelf) StateTypeName() string { + return "pkg/sentry/fs/proc.threadSelf" +} + +func (s *threadSelf) StateFields() []string { + return []string{ + "Symlink", + "pidns", + } +} + +func (s *threadSelf) beforeSave() {} + +func (s *threadSelf) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *threadSelf) afterLoad() {} + +func (s *threadSelf) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) + stateSourceObject.Load(1, &s.pidns) +} + +func (rpf *rootProcFile) StateTypeName() string { + return "pkg/sentry/fs/proc.rootProcFile" +} + +func (rpf *rootProcFile) StateFields() []string { + return []string{ + "iops", + } +} + +func (rpf *rootProcFile) beforeSave() {} + +func (rpf *rootProcFile) StateSave(stateSinkObject state.Sink) { + rpf.beforeSave() + stateSinkObject.Save(0, &rpf.iops) +} + +func (rpf *rootProcFile) afterLoad() {} + +func (rpf *rootProcFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &rpf.iops) +} + +func (s *statData) StateTypeName() string { + return "pkg/sentry/fs/proc.statData" +} + +func (s *statData) StateFields() []string { + return []string{ + "k", + } +} + +func (s *statData) beforeSave() {} + +func (s *statData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.k) +} + +func (s *statData) afterLoad() {} + +func (s *statData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.k) +} + +func (d *mmapMinAddrData) StateTypeName() string { + return "pkg/sentry/fs/proc.mmapMinAddrData" +} + +func (d *mmapMinAddrData) StateFields() []string { + return []string{ + "k", + } +} + +func (d *mmapMinAddrData) beforeSave() {} + +func (d *mmapMinAddrData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.k) +} + +func (d *mmapMinAddrData) afterLoad() {} + +func (d *mmapMinAddrData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.k) +} + +func (o *overcommitMemory) StateTypeName() string { + return "pkg/sentry/fs/proc.overcommitMemory" +} + +func (o *overcommitMemory) StateFields() []string { + return []string{} +} + +func (o *overcommitMemory) beforeSave() {} + +func (o *overcommitMemory) StateSave(stateSinkObject state.Sink) { + o.beforeSave() +} + +func (o *overcommitMemory) afterLoad() {} + +func (o *overcommitMemory) StateLoad(stateSourceObject state.Source) { +} + +func (h *hostname) StateTypeName() string { + return "pkg/sentry/fs/proc.hostname" +} + +func (h *hostname) StateFields() []string { + return []string{ + "SimpleFileInode", + } +} + +func (h *hostname) beforeSave() {} + +func (h *hostname) StateSave(stateSinkObject state.Sink) { + h.beforeSave() + stateSinkObject.Save(0, &h.SimpleFileInode) +} + +func (h *hostname) afterLoad() {} + +func (h *hostname) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &h.SimpleFileInode) +} + +func (hf *hostnameFile) StateTypeName() string { + return "pkg/sentry/fs/proc.hostnameFile" +} + +func (hf *hostnameFile) StateFields() []string { + return []string{} +} + +func (hf *hostnameFile) beforeSave() {} + +func (hf *hostnameFile) StateSave(stateSinkObject state.Sink) { + hf.beforeSave() +} + +func (hf *hostnameFile) afterLoad() {} + +func (hf *hostnameFile) StateLoad(stateSourceObject state.Source) { +} + +func (t *tcpMemInode) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpMemInode" +} + +func (t *tcpMemInode) StateFields() []string { + return []string{ + "SimpleFileInode", + "dir", + "s", + "size", + } +} + +func (t *tcpMemInode) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.SimpleFileInode) + stateSinkObject.Save(1, &t.dir) + stateSinkObject.Save(2, &t.s) + stateSinkObject.Save(3, &t.size) +} + +func (t *tcpMemInode) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.SimpleFileInode) + stateSourceObject.Load(1, &t.dir) + stateSourceObject.LoadWait(2, &t.s) + stateSourceObject.Load(3, &t.size) + stateSourceObject.AfterLoad(t.afterLoad) +} + +func (f *tcpMemFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpMemFile" +} + +func (f *tcpMemFile) StateFields() []string { + return []string{ + "tcpMemInode", + } +} + +func (f *tcpMemFile) beforeSave() {} + +func (f *tcpMemFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpMemInode) +} + +func (f *tcpMemFile) afterLoad() {} + +func (f *tcpMemFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpMemInode) +} + +func (s *tcpSack) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpSack" +} + +func (s *tcpSack) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "enabled", + } +} + +func (s *tcpSack) beforeSave() {} + +func (s *tcpSack) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.SimpleFileInode) + stateSinkObject.Save(1, &s.stack) + stateSinkObject.Save(2, &s.enabled) +} + +func (s *tcpSack) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.SimpleFileInode) + stateSourceObject.LoadWait(1, &s.stack) + stateSourceObject.Load(2, &s.enabled) + stateSourceObject.AfterLoad(s.afterLoad) +} + +func (f *tcpSackFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpSackFile" +} + +func (f *tcpSackFile) StateFields() []string { + return []string{ + "tcpSack", + "stack", + } +} + +func (f *tcpSackFile) beforeSave() {} + +func (f *tcpSackFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpSack) + stateSinkObject.Save(1, &f.stack) +} + +func (f *tcpSackFile) afterLoad() {} + +func (f *tcpSackFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpSack) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (r *tcpRecovery) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpRecovery" +} + +func (r *tcpRecovery) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "recovery", + } +} + +func (r *tcpRecovery) beforeSave() {} + +func (r *tcpRecovery) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.SimpleFileInode) + stateSinkObject.Save(1, &r.stack) + stateSinkObject.Save(2, &r.recovery) +} + +func (r *tcpRecovery) afterLoad() {} + +func (r *tcpRecovery) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.SimpleFileInode) + stateSourceObject.LoadWait(1, &r.stack) + stateSourceObject.Load(2, &r.recovery) +} + +func (f *tcpRecoveryFile) StateTypeName() string { + return "pkg/sentry/fs/proc.tcpRecoveryFile" +} + +func (f *tcpRecoveryFile) StateFields() []string { + return []string{ + "tcpRecovery", + "stack", + } +} + +func (f *tcpRecoveryFile) beforeSave() {} + +func (f *tcpRecoveryFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.tcpRecovery) + stateSinkObject.Save(1, &f.stack) +} + +func (f *tcpRecoveryFile) afterLoad() {} + +func (f *tcpRecoveryFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.tcpRecovery) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (ipf *ipForwarding) StateTypeName() string { + return "pkg/sentry/fs/proc.ipForwarding" +} + +func (ipf *ipForwarding) StateFields() []string { + return []string{ + "SimpleFileInode", + "stack", + "enabled", + } +} + +func (ipf *ipForwarding) beforeSave() {} + +func (ipf *ipForwarding) StateSave(stateSinkObject state.Sink) { + ipf.beforeSave() + stateSinkObject.Save(0, &ipf.SimpleFileInode) + stateSinkObject.Save(1, &ipf.stack) + stateSinkObject.Save(2, &ipf.enabled) +} + +func (ipf *ipForwarding) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &ipf.SimpleFileInode) + stateSourceObject.LoadWait(1, &ipf.stack) + stateSourceObject.Load(2, &ipf.enabled) + stateSourceObject.AfterLoad(ipf.afterLoad) +} + +func (f *ipForwardingFile) StateTypeName() string { + return "pkg/sentry/fs/proc.ipForwardingFile" +} + +func (f *ipForwardingFile) StateFields() []string { + return []string{ + "ipf", + "stack", + } +} + +func (f *ipForwardingFile) beforeSave() {} + +func (f *ipForwardingFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.ipf) + stateSinkObject.Save(1, &f.stack) +} + +func (f *ipForwardingFile) afterLoad() {} + +func (f *ipForwardingFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.ipf) + stateSourceObject.LoadWait(1, &f.stack) +} + +func (t *taskDir) StateTypeName() string { + return "pkg/sentry/fs/proc.taskDir" +} + +func (t *taskDir) StateFields() []string { + return []string{ + "Dir", + "t", + } +} + +func (t *taskDir) beforeSave() {} + +func (t *taskDir) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + stateSinkObject.Save(0, &t.Dir) + stateSinkObject.Save(1, &t.t) +} + +func (t *taskDir) afterLoad() {} + +func (t *taskDir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.Dir) + stateSourceObject.Load(1, &t.t) +} + +func (s *subtasks) StateTypeName() string { + return "pkg/sentry/fs/proc.subtasks" +} + +func (s *subtasks) StateFields() []string { + return []string{ + "Dir", + "t", + "p", + } +} + +func (s *subtasks) beforeSave() {} + +func (s *subtasks) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Dir) + stateSinkObject.Save(1, &s.t) + stateSinkObject.Save(2, &s.p) +} + +func (s *subtasks) afterLoad() {} + +func (s *subtasks) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Dir) + stateSourceObject.Load(1, &s.t) + stateSourceObject.Load(2, &s.p) +} + +func (f *subtasksFile) StateTypeName() string { + return "pkg/sentry/fs/proc.subtasksFile" +} + +func (f *subtasksFile) StateFields() []string { + return []string{ + "t", + "pidns", + } +} + +func (f *subtasksFile) beforeSave() {} + +func (f *subtasksFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) + stateSinkObject.Save(1, &f.pidns) +} + +func (f *subtasksFile) afterLoad() {} + +func (f *subtasksFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) + stateSourceObject.Load(1, &f.pidns) +} + +func (e *exe) StateTypeName() string { + return "pkg/sentry/fs/proc.exe" +} + +func (e *exe) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (e *exe) beforeSave() {} + +func (e *exe) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Symlink) + stateSinkObject.Save(1, &e.t) +} + +func (e *exe) afterLoad() {} + +func (e *exe) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Symlink) + stateSourceObject.Load(1, &e.t) +} + +func (e *cwd) StateTypeName() string { + return "pkg/sentry/fs/proc.cwd" +} + +func (e *cwd) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (e *cwd) beforeSave() {} + +func (e *cwd) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.Symlink) + stateSinkObject.Save(1, &e.t) +} + +func (e *cwd) afterLoad() {} + +func (e *cwd) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.Symlink) + stateSourceObject.Load(1, &e.t) +} + +func (n *namespaceSymlink) StateTypeName() string { + return "pkg/sentry/fs/proc.namespaceSymlink" +} + +func (n *namespaceSymlink) StateFields() []string { + return []string{ + "Symlink", + "t", + } +} + +func (n *namespaceSymlink) beforeSave() {} + +func (n *namespaceSymlink) StateSave(stateSinkObject state.Sink) { + n.beforeSave() + stateSinkObject.Save(0, &n.Symlink) + stateSinkObject.Save(1, &n.t) +} + +func (n *namespaceSymlink) afterLoad() {} + +func (n *namespaceSymlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &n.Symlink) + stateSourceObject.Load(1, &n.t) +} + +func (md *mapsData) StateTypeName() string { + return "pkg/sentry/fs/proc.mapsData" +} + +func (md *mapsData) StateFields() []string { + return []string{ + "t", + } +} + +func (md *mapsData) beforeSave() {} + +func (md *mapsData) StateSave(stateSinkObject state.Sink) { + md.beforeSave() + stateSinkObject.Save(0, &md.t) +} + +func (md *mapsData) afterLoad() {} + +func (md *mapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &md.t) +} + +func (sd *smapsData) StateTypeName() string { + return "pkg/sentry/fs/proc.smapsData" +} + +func (sd *smapsData) StateFields() []string { + return []string{ + "t", + } +} + +func (sd *smapsData) beforeSave() {} + +func (sd *smapsData) StateSave(stateSinkObject state.Sink) { + sd.beforeSave() + stateSinkObject.Save(0, &sd.t) +} + +func (sd *smapsData) afterLoad() {} + +func (sd *smapsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sd.t) +} + +func (s *taskStatData) StateTypeName() string { + return "pkg/sentry/fs/proc.taskStatData" +} + +func (s *taskStatData) StateFields() []string { + return []string{ + "t", + "tgstats", + "pidns", + } +} + +func (s *taskStatData) beforeSave() {} + +func (s *taskStatData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) + stateSinkObject.Save(1, &s.tgstats) + stateSinkObject.Save(2, &s.pidns) +} + +func (s *taskStatData) afterLoad() {} + +func (s *taskStatData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) + stateSourceObject.Load(1, &s.tgstats) + stateSourceObject.Load(2, &s.pidns) +} + +func (s *statmData) StateTypeName() string { + return "pkg/sentry/fs/proc.statmData" +} + +func (s *statmData) StateFields() []string { + return []string{ + "t", + } +} + +func (s *statmData) beforeSave() {} + +func (s *statmData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) +} + +func (s *statmData) afterLoad() {} + +func (s *statmData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) +} + +func (s *statusData) StateTypeName() string { + return "pkg/sentry/fs/proc.statusData" +} + +func (s *statusData) StateFields() []string { + return []string{ + "t", + "pidns", + } +} + +func (s *statusData) beforeSave() {} + +func (s *statusData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.t) + stateSinkObject.Save(1, &s.pidns) +} + +func (s *statusData) afterLoad() {} + +func (s *statusData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.t) + stateSourceObject.Load(1, &s.pidns) +} + +func (i *ioData) StateTypeName() string { + return "pkg/sentry/fs/proc.ioData" +} + +func (i *ioData) StateFields() []string { + return []string{ + "ioUsage", + } +} + +func (i *ioData) beforeSave() {} + +func (i *ioData) StateSave(stateSinkObject state.Sink) { + i.beforeSave() + stateSinkObject.Save(0, &i.ioUsage) +} + +func (i *ioData) afterLoad() {} + +func (i *ioData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.ioUsage) +} + +func (c *comm) StateTypeName() string { + return "pkg/sentry/fs/proc.comm" +} + +func (c *comm) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (c *comm) beforeSave() {} + +func (c *comm) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.SimpleFileInode) + stateSinkObject.Save(1, &c.t) +} + +func (c *comm) afterLoad() {} + +func (c *comm) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.SimpleFileInode) + stateSourceObject.Load(1, &c.t) +} + +func (f *commFile) StateTypeName() string { + return "pkg/sentry/fs/proc.commFile" +} + +func (f *commFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *commFile) beforeSave() {} + +func (f *commFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *commFile) afterLoad() {} + +func (f *commFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (a *auxvec) StateTypeName() string { + return "pkg/sentry/fs/proc.auxvec" +} + +func (a *auxvec) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (a *auxvec) beforeSave() {} + +func (a *auxvec) StateSave(stateSinkObject state.Sink) { + a.beforeSave() + stateSinkObject.Save(0, &a.SimpleFileInode) + stateSinkObject.Save(1, &a.t) +} + +func (a *auxvec) afterLoad() {} + +func (a *auxvec) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &a.SimpleFileInode) + stateSourceObject.Load(1, &a.t) +} + +func (f *auxvecFile) StateTypeName() string { + return "pkg/sentry/fs/proc.auxvecFile" +} + +func (f *auxvecFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *auxvecFile) beforeSave() {} + +func (f *auxvecFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *auxvecFile) afterLoad() {} + +func (f *auxvecFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (o *oomScoreAdj) StateTypeName() string { + return "pkg/sentry/fs/proc.oomScoreAdj" +} + +func (o *oomScoreAdj) StateFields() []string { + return []string{ + "SimpleFileInode", + "t", + } +} + +func (o *oomScoreAdj) beforeSave() {} + +func (o *oomScoreAdj) StateSave(stateSinkObject state.Sink) { + o.beforeSave() + stateSinkObject.Save(0, &o.SimpleFileInode) + stateSinkObject.Save(1, &o.t) +} + +func (o *oomScoreAdj) afterLoad() {} + +func (o *oomScoreAdj) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &o.SimpleFileInode) + stateSourceObject.Load(1, &o.t) +} + +func (f *oomScoreAdjFile) StateTypeName() string { + return "pkg/sentry/fs/proc.oomScoreAdjFile" +} + +func (f *oomScoreAdjFile) StateFields() []string { + return []string{ + "t", + } +} + +func (f *oomScoreAdjFile) beforeSave() {} + +func (f *oomScoreAdjFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.t) +} + +func (f *oomScoreAdjFile) afterLoad() {} + +func (f *oomScoreAdjFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.t) +} + +func (imio *idMapInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/proc.idMapInodeOperations" +} + +func (imio *idMapInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "t", + "gids", + } +} + +func (imio *idMapInodeOperations) beforeSave() {} + +func (imio *idMapInodeOperations) StateSave(stateSinkObject state.Sink) { + imio.beforeSave() + stateSinkObject.Save(0, &imio.InodeSimpleAttributes) + stateSinkObject.Save(1, &imio.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &imio.t) + stateSinkObject.Save(3, &imio.gids) +} + +func (imio *idMapInodeOperations) afterLoad() {} + +func (imio *idMapInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &imio.InodeSimpleAttributes) + stateSourceObject.Load(1, &imio.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &imio.t) + stateSourceObject.Load(3, &imio.gids) +} + +func (imfo *idMapFileOperations) StateTypeName() string { + return "pkg/sentry/fs/proc.idMapFileOperations" +} + +func (imfo *idMapFileOperations) StateFields() []string { + return []string{ + "iops", + } +} + +func (imfo *idMapFileOperations) beforeSave() {} + +func (imfo *idMapFileOperations) StateSave(stateSinkObject state.Sink) { + imfo.beforeSave() + stateSinkObject.Save(0, &imfo.iops) +} + +func (imfo *idMapFileOperations) afterLoad() {} + +func (imfo *idMapFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &imfo.iops) +} + +func (u *uptime) StateTypeName() string { + return "pkg/sentry/fs/proc.uptime" +} + +func (u *uptime) StateFields() []string { + return []string{ + "SimpleFileInode", + "startTime", + } +} + +func (u *uptime) beforeSave() {} + +func (u *uptime) StateSave(stateSinkObject state.Sink) { + u.beforeSave() + stateSinkObject.Save(0, &u.SimpleFileInode) + stateSinkObject.Save(1, &u.startTime) +} + +func (u *uptime) afterLoad() {} + +func (u *uptime) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &u.SimpleFileInode) + stateSourceObject.Load(1, &u.startTime) +} + +func (f *uptimeFile) StateTypeName() string { + return "pkg/sentry/fs/proc.uptimeFile" +} + +func (f *uptimeFile) StateFields() []string { + return []string{ + "startTime", + } +} + +func (f *uptimeFile) beforeSave() {} + +func (f *uptimeFile) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.startTime) +} + +func (f *uptimeFile) afterLoad() {} + +func (f *uptimeFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.startTime) +} + +func (v *versionData) StateTypeName() string { + return "pkg/sentry/fs/proc.versionData" +} + +func (v *versionData) StateFields() []string { + return []string{ + "k", + } +} + +func (v *versionData) beforeSave() {} + +func (v *versionData) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.k) +} + +func (v *versionData) afterLoad() {} + +func (v *versionData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.k) +} + +func init() { + state.Register((*execArgInode)(nil)) + state.Register((*execArgFile)(nil)) + state.Register((*fdDir)(nil)) + state.Register((*fdDirFile)(nil)) + state.Register((*fdInfoDir)(nil)) + state.Register((*filesystemsData)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*taskOwnedInodeOps)(nil)) + state.Register((*staticFileInodeOps)(nil)) + state.Register((*loadavgData)(nil)) + state.Register((*meminfoData)(nil)) + state.Register((*mountInfoFile)(nil)) + state.Register((*mountsFile)(nil)) + state.Register((*ifinet6)(nil)) + state.Register((*netDev)(nil)) + state.Register((*netSnmp)(nil)) + state.Register((*netRoute)(nil)) + state.Register((*netUnix)(nil)) + state.Register((*netTCP)(nil)) + state.Register((*netTCP6)(nil)) + state.Register((*netUDP)(nil)) + state.Register((*proc)(nil)) + state.Register((*self)(nil)) + state.Register((*threadSelf)(nil)) + state.Register((*rootProcFile)(nil)) + state.Register((*statData)(nil)) + state.Register((*mmapMinAddrData)(nil)) + state.Register((*overcommitMemory)(nil)) + state.Register((*hostname)(nil)) + state.Register((*hostnameFile)(nil)) + state.Register((*tcpMemInode)(nil)) + state.Register((*tcpMemFile)(nil)) + state.Register((*tcpSack)(nil)) + state.Register((*tcpSackFile)(nil)) + state.Register((*tcpRecovery)(nil)) + state.Register((*tcpRecoveryFile)(nil)) + state.Register((*ipForwarding)(nil)) + state.Register((*ipForwardingFile)(nil)) + state.Register((*taskDir)(nil)) + state.Register((*subtasks)(nil)) + state.Register((*subtasksFile)(nil)) + state.Register((*exe)(nil)) + state.Register((*cwd)(nil)) + state.Register((*namespaceSymlink)(nil)) + state.Register((*mapsData)(nil)) + state.Register((*smapsData)(nil)) + state.Register((*taskStatData)(nil)) + state.Register((*statmData)(nil)) + state.Register((*statusData)(nil)) + state.Register((*ioData)(nil)) + state.Register((*comm)(nil)) + state.Register((*commFile)(nil)) + state.Register((*auxvec)(nil)) + state.Register((*auxvecFile)(nil)) + state.Register((*oomScoreAdj)(nil)) + state.Register((*oomScoreAdjFile)(nil)) + state.Register((*idMapInodeOperations)(nil)) + state.Register((*idMapFileOperations)(nil)) + state.Register((*uptime)(nil)) + state.Register((*uptimeFile)(nil)) + state.Register((*versionData)(nil)) +} diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD deleted file mode 100644 index 21338d912..000000000 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "seqfile", - srcs = ["seqfile.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/proc/device", - "//pkg/sentry/kernel/time", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "seqfile_test", - size = "small", - srcs = ["seqfile_test.go"], - library = ":seqfile", - deps = [ - "//pkg/context", - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/ramfs", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go new file mode 100644 index 000000000..ab08bbaa3 --- /dev/null +++ b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go @@ -0,0 +1,100 @@ +// automatically generated by stateify. + +package seqfile + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (s *SeqData) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.SeqData" +} + +func (s *SeqData) StateFields() []string { + return []string{ + "Buf", + "Handle", + } +} + +func (s *SeqData) beforeSave() {} + +func (s *SeqData) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Buf) + stateSinkObject.Save(1, &s.Handle) +} + +func (s *SeqData) afterLoad() {} + +func (s *SeqData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Buf) + stateSourceObject.Load(1, &s.Handle) +} + +func (s *SeqFile) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.SeqFile" +} + +func (s *SeqFile) StateFields() []string { + return []string{ + "InodeSimpleExtendedAttributes", + "InodeSimpleAttributes", + "SeqSource", + "source", + "generation", + "lastRead", + } +} + +func (s *SeqFile) beforeSave() {} + +func (s *SeqFile) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(1, &s.InodeSimpleAttributes) + stateSinkObject.Save(2, &s.SeqSource) + stateSinkObject.Save(3, &s.source) + stateSinkObject.Save(4, &s.generation) + stateSinkObject.Save(5, &s.lastRead) +} + +func (s *SeqFile) afterLoad() {} + +func (s *SeqFile) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(1, &s.InodeSimpleAttributes) + stateSourceObject.Load(2, &s.SeqSource) + stateSourceObject.Load(3, &s.source) + stateSourceObject.Load(4, &s.generation) + stateSourceObject.Load(5, &s.lastRead) +} + +func (sfo *seqFileOperations) StateTypeName() string { + return "pkg/sentry/fs/proc/seqfile.seqFileOperations" +} + +func (sfo *seqFileOperations) StateFields() []string { + return []string{ + "seqFile", + } +} + +func (sfo *seqFileOperations) beforeSave() {} + +func (sfo *seqFileOperations) StateSave(stateSinkObject state.Sink) { + sfo.beforeSave() + stateSinkObject.Save(0, &sfo.seqFile) +} + +func (sfo *seqFileOperations) afterLoad() {} + +func (sfo *seqFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sfo.seqFile) +} + +func init() { + state.Register((*SeqData)(nil)) + state.Register((*SeqFile)(nil)) + state.Register((*seqFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go deleted file mode 100644 index 98e394569..000000000 --- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqfile - -import ( - "bytes" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -type seqTest struct { - actual []SeqData - update bool -} - -func (s *seqTest) Init() { - var sq []SeqData - // Create some SeqData. - for i := 0; i < 10; i++ { - var b []byte - for j := 0; j < 10; j++ { - b = append(b, byte(i)) - } - sq = append(sq, SeqData{ - Buf: b, - Handle: &testHandle{i: i}, - }) - } - s.actual = sq -} - -// NeedsUpdate reports whether we need to update the data we've previously read. -func (s *seqTest) NeedsUpdate(int64) bool { - return s.update -} - -// ReadSeqFiledata returns a slice of SeqData which contains elements -// greater than the handle. -func (s *seqTest) ReadSeqFileData(ctx context.Context, handle SeqHandle) ([]SeqData, int64) { - if handle == nil { - return s.actual, 0 - } - h := *handle.(*testHandle) - var ret []SeqData - for _, b := range s.actual { - // We want the next one. - h2 := *b.Handle.(*testHandle) - if h2.i > h.i { - ret = append(ret, b) - } - } - return ret, 0 -} - -// Flatten a slice of slices into one slice. -func flatten(buf ...[]byte) []byte { - var flat []byte - for _, b := range buf { - flat = append(flat, b...) - } - return flat -} - -type testHandle struct { - i int -} - -type testTable struct { - offset int64 - readBufferSize int - expectedData []byte - expectedError error -} - -func runTableTests(ctx context.Context, table []testTable, dirent *fs.Dirent) error { - for _, tt := range table { - file, err := dirent.Inode.InodeOperations.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - return fmt.Errorf("GetFile returned error: %v", err) - } - - data := make([]byte, tt.readBufferSize) - resultLen, err := file.Preadv(ctx, usermem.BytesIOSequence(data), tt.offset) - if err != tt.expectedError { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (error) => %v expected %v", tt.readBufferSize, tt.offset, err, tt.expectedError) - } - expectedLen := int64(len(tt.expectedData)) - if resultLen != expectedLen { - // We make this just an error so we wall through and print the data below. - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (size) => %v expected %v", tt.readBufferSize, tt.offset, resultLen, expectedLen) - } - if !bytes.Equal(data[:expectedLen], tt.expectedData) { - return fmt.Errorf("t.Preadv(len: %v, offset: %v) (data) => %v expected %v", tt.readBufferSize, tt.offset, data[:expectedLen], tt.expectedData) - } - } - return nil -} - -func TestSeqFile(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for n2: %v", err) - } - n2 := dirent2.Inode.InodeOperations - file2, err := n2.GetFile(ctx, dirent2, fs.FileFlags{Read: true, Write: true}) - if err != nil { - t.Fatalf("GetFile returned error: %v", err) - } - - // Writing? - if _, err := file2.Writev(ctx, usermem.BytesIOSequence([]byte("test"))); err == nil { - t.Fatalf("managed to write to n2: %v", err) - } - - // How about reading? - dirent3, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo: %v", err) - } - n3 := dirent3.Inode.InodeOperations - if n2 != n3 { - t.Error("got n2 != n3, want same") - } - - testSource.update = true - - table := []testTable{ - // Read past the end. - {100, 4, []byte{}, io.EOF}, - {110, 4, []byte{}, io.EOF}, - {200, 4, []byte{}, io.EOF}, - // Read a truncated first line. - {0, 4, testSource.actual[0].Buf[:4], nil}, - // Read the whole first line. - {0, 10, testSource.actual[0].Buf, nil}, - // Read the whole first line + 5 bytes of second line. - {0, 15, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:5]), nil}, - // First 4 bytes of the second line. - {10, 4, testSource.actual[1].Buf[:4], nil}, - // Read the two first lines. - {0, 20, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf), nil}, - // Read three lines. - {0, 30, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf), nil}, - // Read everything, but use a bigger buffer than necessary. - {0, 150, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf, testSource.actual[3].Buf, testSource.actual[4].Buf, testSource.actual[5].Buf, testSource.actual[6].Buf, testSource.actual[7].Buf, testSource.actual[8].Buf, testSource.actual[9].Buf), nil}, - // Read the last 3 bytes. - {97, 10, testSource.actual[9].Buf[7:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v : %v", testSource.update, err) - } - - // Disable updates and do it again. - testSource.update = false - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed with testSource.update = %v: %v", testSource.update, err) - } -} - -// Test that we behave correctly when the file is updated. -func TestSeqFileFileUpdated(t *testing.T) { - testSource := &seqTest{} - testSource.Init() - testSource.update = true - - // Create a file that can be R/W. - ctx := contexttest.Context(t) - m := fs.NewPseudoMountSource(ctx) - contents := map[string]*fs.Inode{ - "foo": NewSeqFileInode(ctx, testSource, m), - } - root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777)) - - // How about opening it? - inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory}) - dirent2, err := root.Lookup(ctx, inode, "foo") - if err != nil { - t.Fatalf("failed to walk to foo for dirent2: %v", err) - } - - table := []testTable{ - {0, 16, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:6]), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed: %v", err) - } - // Delete the first entry. - cut := testSource.actual[0].Buf - testSource.actual = testSource.actual[1:] - - table = []testTable{ - // Try reading buffer 0 with an offset. This will not delete the old data. - {1, 5, cut[1:6], nil}, - // Reset our file by reading at offset 0. - {0, 10, testSource.actual[0].Buf, nil}, - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the same data a second time. - {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil}, - // Read the following two lines. - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing first entry: %v", err) - } - - // Add a new duplicate line in the middle (6666...) - after := testSource.actual[5:] - testSource.actual = testSource.actual[:4] - // Note the list must be sorted. - testSource.actual = append(testSource.actual, after[0]) - testSource.actual = append(testSource.actual, after...) - - table = []testTable{ - {50, 20, flatten(testSource.actual[4].Buf, testSource.actual[5].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding middle entry: %v", err) - } - // This will be used in a later test. - oldTestData := testSource.actual - - // Delete everything. - testSource.actual = testSource.actual[:0] - table = []testTable{ - {20, 20, []byte{}, io.EOF}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after removing all entries: %v", err) - } - // Restore some of the data. - testSource.actual = oldTestData[:1] - table = []testTable{ - {6, 20, testSource.actual[0].Buf[6:], nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after adding first entry back: %v", err) - } - - // Re-extend the data - testSource.actual = oldTestData - table = []testTable{ - {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil}, - } - if err := runTableTests(ctx, table, dirent2); err != nil { - t.Errorf("runTableTest failed after extending testSource: %v", err) - } -} diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go deleted file mode 100644 index 6ef5738e7..000000000 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/usermem" -) - -func TestQuerySendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPSendBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -func TestQueryRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - s.TCPRecvBufSize = inet.TCPBufferSize{100, 200, 300} - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - buf := make([]byte, 100) - dst := usermem.BytesIOSequence(buf) - n, err := tmf.Read(ctx, nil, dst, 0) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if got, want := string(buf[:n]), "100\t200\t300\n"; got != want { - t.Fatalf("Bad string: got %v, want %v", got, want) - } -} - -var cases = []struct { - str string - initial inet.TCPBufferSize - final inet.TCPBufferSize -}{ - { - str: "", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{1, 2, 3}, - }, - { - str: "100\n", - initial: inet.TCPBufferSize{1, 100, 200}, - final: inet.TCPBufferSize{100, 100, 200}, - }, - { - str: "100 200 300\n", - initial: inet.TCPBufferSize{1, 2, 3}, - final: inet.TCPBufferSize{100, 200, 300}, - }, -} - -func TestConfigureSendBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPSendBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpWMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPSendBufSize != c.final { - t.Errorf("TCPSendBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPSendBufSize, c.final) - } - } -} - -func TestConfigureRecvBufferSize(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - for _, c := range cases { - s.TCPRecvBufSize = c.initial - tmi := &tcpMemInode{s: s, dir: tcpRMem} - tmf := &tcpMemFile{tcpMemInode: tmi} - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := tmf.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("Write, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if s.TCPRecvBufSize != c.final { - t.Errorf("TCPRecvBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPRecvBufSize, c.final) - } - } -} - -// TestIPForwarding tests the implementation of -// /proc/sys/net/ipv4/ip_forwarding -func TestIPForwarding(t *testing.T) { - ctx := context.Background() - s := inet.NewTestStack() - - var cases = []struct { - comment string - initial bool - str string - final bool - }{ - { - comment: `Forwarding is disabled; write 1 and enable forwarding`, - initial: false, - str: "1", - final: true, - }, - { - comment: `Forwarding is disabled; write 0 and disable forwarding`, - initial: false, - str: "0", - final: false, - }, - { - comment: `Forwarding is enabled; write 1 and enable forwarding`, - initial: true, - str: "1", - final: true, - }, - { - comment: `Forwarding is enabled; write 0 and disable forwarding`, - initial: true, - str: "0", - final: false, - }, - { - comment: `Forwarding is disabled; write 2404 and enable forwarding`, - initial: false, - str: "2404", - final: true, - }, - { - comment: `Forwarding is enabled; write 2404 and enable forwarding`, - initial: true, - str: "2404", - final: true, - }, - } - for _, c := range cases { - t.Run(c.comment, func(t *testing.T) { - s.IPForwarding = c.initial - ipf := &ipForwarding{stack: s} - file := &ipForwardingFile{ - stack: s, - ipf: ipf, - } - - // Write the values. - src := usermem.BytesIOSequence([]byte(c.str)) - if n, err := file.Write(ctx, nil, src, 0); n != int64(len(c.str)) || err != nil { - t.Errorf("file.Write(ctx, nil, %q, 0) = (%d, %v); want (%d, nil)", c.str, n, err, len(c.str)) - } - - // Read the values from the stack and check them. - if got, want := s.IPForwarding, c.final; got != want { - t.Errorf("s.IPForwarding incorrect; got: %v, want: %v", got, want) - } - - }) - } -} diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD deleted file mode 100644 index 8ca823fb3..000000000 --- a/pkg/sentry/fs/ramfs/BUILD +++ /dev/null @@ -1,37 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "ramfs", - srcs = [ - "dir.go", - "socket.go", - "symlink.go", - "tree.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/socket/unix/transport", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "ramfs_test", - size = "small", - srcs = ["tree_test.go"], - library = ":ramfs", - deps = [ - "//pkg/sentry/contexttest", - "//pkg/sentry/fs", - ], -) diff --git a/pkg/sentry/fs/ramfs/ramfs_state_autogen.go b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go new file mode 100644 index 000000000..482d5d391 --- /dev/null +++ b/pkg/sentry/fs/ramfs/ramfs_state_autogen.go @@ -0,0 +1,170 @@ +// automatically generated by stateify. + +package ramfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *Dir) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Dir" +} + +func (d *Dir) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "children", + "dentryMap", + } +} + +func (d *Dir) beforeSave() {} + +func (d *Dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.InodeSimpleAttributes) + stateSinkObject.Save(1, &d.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &d.children) + stateSinkObject.Save(3, &d.dentryMap) +} + +func (d *Dir) afterLoad() {} + +func (d *Dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.InodeSimpleAttributes) + stateSourceObject.Load(1, &d.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &d.children) + stateSourceObject.Load(3, &d.dentryMap) +} + +func (dfo *dirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.dirFileOperations" +} + +func (dfo *dirFileOperations) StateFields() []string { + return []string{ + "dirCursor", + "dir", + } +} + +func (dfo *dirFileOperations) beforeSave() {} + +func (dfo *dirFileOperations) StateSave(stateSinkObject state.Sink) { + dfo.beforeSave() + stateSinkObject.Save(0, &dfo.dirCursor) + stateSinkObject.Save(1, &dfo.dir) +} + +func (dfo *dirFileOperations) afterLoad() {} + +func (dfo *dirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &dfo.dirCursor) + stateSourceObject.Load(1, &dfo.dir) +} + +func (s *Socket) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Socket" +} + +func (s *Socket) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "ep", + } +} + +func (s *Socket) beforeSave() {} + +func (s *Socket) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &s.ep) +} + +func (s *Socket) afterLoad() {} + +func (s *Socket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &s.ep) +} + +func (s *socketFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.socketFileOperations" +} + +func (s *socketFileOperations) StateFields() []string { + return []string{} +} + +func (s *socketFileOperations) beforeSave() {} + +func (s *socketFileOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *socketFileOperations) afterLoad() {} + +func (s *socketFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func (s *Symlink) StateTypeName() string { + return "pkg/sentry/fs/ramfs.Symlink" +} + +func (s *Symlink) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeSimpleExtendedAttributes", + "Target", + } +} + +func (s *Symlink) beforeSave() {} + +func (s *Symlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.InodeSimpleAttributes) + stateSinkObject.Save(1, &s.InodeSimpleExtendedAttributes) + stateSinkObject.Save(2, &s.Target) +} + +func (s *Symlink) afterLoad() {} + +func (s *Symlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.InodeSimpleAttributes) + stateSourceObject.Load(1, &s.InodeSimpleExtendedAttributes) + stateSourceObject.Load(2, &s.Target) +} + +func (s *symlinkFileOperations) StateTypeName() string { + return "pkg/sentry/fs/ramfs.symlinkFileOperations" +} + +func (s *symlinkFileOperations) StateFields() []string { + return []string{} +} + +func (s *symlinkFileOperations) beforeSave() {} + +func (s *symlinkFileOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *symlinkFileOperations) afterLoad() {} + +func (s *symlinkFileOperations) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*Dir)(nil)) + state.Register((*dirFileOperations)(nil)) + state.Register((*Socket)(nil)) + state.Register((*socketFileOperations)(nil)) + state.Register((*Symlink)(nil)) + state.Register((*symlinkFileOperations)(nil)) +} diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go deleted file mode 100644 index 3e0d1e07e..000000000 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ramfs - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -func TestMakeDirectoryTree(t *testing.T) { - - for _, test := range []struct { - name string - subdirs []string - }{ - { - name: "abs paths", - subdirs: []string{ - "/tmp", - "/tmp/a/b", - "/tmp/a/c/d", - "/tmp/c", - "/proc", - "/dev/a/b", - "/tmp", - }, - }, - { - name: "rel paths", - subdirs: []string{ - "tmp", - "tmp/a/b", - "tmp/a/c/d", - "tmp/c", - "proc", - "dev/a/b", - "tmp", - }, - }, - } { - ctx := contexttest.Context(t) - mount := fs.NewPseudoMountSource(ctx) - tree, err := MakeDirectoryTree(ctx, mount, test.subdirs) - if err != nil { - t.Errorf("%s: failed to make ramfs tree, got error %v, want nil", test.name, err) - continue - } - - // Expect to be able to find each of the paths. - mm, err := fs.NewMountNamespace(ctx, tree) - if err != nil { - t.Errorf("%s: failed to create mount manager: %v", test.name, err) - continue - } - root := mm.Root() - defer mm.DecRef(ctx) - - for _, p := range test.subdirs { - maxTraversals := uint(0) - if _, err := mm.FindInode(ctx, root, nil, p, &maxTraversals); err != nil { - t.Errorf("%s: failed to find node %s: %v", test.name, p, err) - break - } - } - } -} diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD deleted file mode 100644 index f2e8b9932..000000000 --- a/pkg/sentry/fs/sys/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "sys", - srcs = [ - "device.go", - "devices.go", - "fs.go", - "sys.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/kernel", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/sys/sys_state_autogen.go b/pkg/sentry/fs/sys/sys_state_autogen.go new file mode 100644 index 000000000..390f092cb --- /dev/null +++ b/pkg/sentry/fs/sys/sys_state_autogen.go @@ -0,0 +1,57 @@ +// automatically generated by stateify. + +package sys + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (c *cpunum) StateTypeName() string { + return "pkg/sentry/fs/sys.cpunum" +} + +func (c *cpunum) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "InodeStaticFileGetter", + } +} + +func (c *cpunum) beforeSave() {} + +func (c *cpunum) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.InodeSimpleAttributes) + stateSinkObject.Save(1, &c.InodeStaticFileGetter) +} + +func (c *cpunum) afterLoad() {} + +func (c *cpunum) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.InodeSimpleAttributes) + stateSourceObject.Load(1, &c.InodeStaticFileGetter) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/sys.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func init() { + state.Register((*cpunum)(nil)) + state.Register((*filesystem)(nil)) +} diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD deleted file mode 100644 index d16cdb4df..000000000 --- a/pkg/sentry/fs/timerfd/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "timerfd", - srcs = ["timerfd.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/anon", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel/time", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) diff --git a/pkg/sentry/fs/timerfd/timerfd_state_autogen.go b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go new file mode 100644 index 000000000..15f2d63f6 --- /dev/null +++ b/pkg/sentry/fs/timerfd/timerfd_state_autogen.go @@ -0,0 +1,40 @@ +// automatically generated by stateify. + +package timerfd + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (t *TimerOperations) StateTypeName() string { + return "pkg/sentry/fs/timerfd.TimerOperations" +} + +func (t *TimerOperations) StateFields() []string { + return []string{ + "timer", + "val", + } +} + +func (t *TimerOperations) beforeSave() {} + +func (t *TimerOperations) StateSave(stateSinkObject state.Sink) { + t.beforeSave() + if !state.IsZeroValue(&t.events) { + state.Failf("events is %#v, expected zero", &t.events) + } + stateSinkObject.Save(0, &t.timer) + stateSinkObject.Save(1, &t.val) +} + +func (t *TimerOperations) afterLoad() {} + +func (t *TimerOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &t.timer) + stateSourceObject.Load(1, &t.val) +} + +func init() { + state.Register((*TimerOperations)(nil)) +} diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD deleted file mode 100644 index aa7199014..000000000 --- a/pkg/sentry/fs/tmpfs/BUILD +++ /dev/null @@ -1,50 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "tmpfs", - srcs = [ - "device.go", - "file_regular.go", - "fs.go", - "inode_file.go", - "tmpfs.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/metric", - "//pkg/safemem", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/fs/ramfs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/kernel/time", - "//pkg/sentry/memmap", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "tmpfs_test", - size = "small", - srcs = ["file_test.go"], - library = ":tmpfs", - deps = [ - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usage", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go deleted file mode 100644 index d4d613ea9..000000000 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/usermem" -) - -func newFileInode(ctx context.Context) *fs.Inode { - m := fs.NewCachingMountSource(ctx, &Filesystem{}, fs.MountSourceFlags{}) - iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.WithCurrentTime(ctx, fs.UnstableAttr{})) - return fs.NewInode(ctx, iops, m, fs.StableAttr{ - DeviceID: tmpfsDevice.DeviceID(), - InodeID: tmpfsDevice.NextIno(), - BlockSize: usermem.PageSize, - Type: fs.RegularFile, - }) -} - -func newFile(ctx context.Context) *fs.File { - inode := newFileInode(ctx) - f, _ := inode.GetFile(ctx, fs.NewDirent(ctx, inode, "stub"), fs.FileFlags{Read: true, Write: true}) - return f -} - -// Allocate once, write twice. -func TestGrow(t *testing.T) { - ctx := contexttest.Context(t) - f := newFile(ctx) - defer f.DecRef(ctx) - - abuf := bytes.Repeat([]byte{'a'}, 68) - n, err := f.Pwritev(ctx, usermem.BytesIOSequence(abuf), 0) - if n != int64(len(abuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(abuf)) - } - - bbuf := bytes.Repeat([]byte{'b'}, 856) - n, err = f.Pwritev(ctx, usermem.BytesIOSequence(bbuf), 68) - if n != int64(len(bbuf)) || err != nil { - t.Fatalf("Pwritev got (%d, %v) want (%d, nil)", n, err, len(bbuf)) - } - - rbuf := make([]byte, len(abuf)+len(bbuf)) - n, err = f.Preadv(ctx, usermem.BytesIOSequence(rbuf), 0) - if n != int64(len(rbuf)) || err != nil { - t.Fatalf("Preadv got (%d, %v) want (%d, nil)", n, err, len(rbuf)) - } - - if want := append(abuf, bbuf...); !bytes.Equal(rbuf, want) { - t.Fatalf("Read %v, want %v", rbuf, want) - } -} diff --git a/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go new file mode 100644 index 000000000..29c25b4d6 --- /dev/null +++ b/pkg/sentry/fs/tmpfs/tmpfs_state_autogen.go @@ -0,0 +1,197 @@ +// automatically generated by stateify. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (r *regularFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.regularFileOperations" +} + +func (r *regularFileOperations) StateFields() []string { + return []string{ + "iops", + } +} + +func (r *regularFileOperations) beforeSave() {} + +func (r *regularFileOperations) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.iops) +} + +func (r *regularFileOperations) afterLoad() {} + +func (r *regularFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.iops) +} + +func (f *Filesystem) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Filesystem" +} + +func (f *Filesystem) StateFields() []string { + return []string{} +} + +func (f *Filesystem) beforeSave() {} + +func (f *Filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *Filesystem) afterLoad() {} + +func (f *Filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (f *fileInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.fileInodeOperations" +} + +func (f *fileInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleExtendedAttributes", + "kernel", + "memUsage", + "attr", + "mappings", + "writableMappingPages", + "data", + "seals", + } +} + +func (f *fileInodeOperations) beforeSave() {} + +func (f *fileInodeOperations) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeSimpleExtendedAttributes) + stateSinkObject.Save(1, &f.kernel) + stateSinkObject.Save(2, &f.memUsage) + stateSinkObject.Save(3, &f.attr) + stateSinkObject.Save(4, &f.mappings) + stateSinkObject.Save(5, &f.writableMappingPages) + stateSinkObject.Save(6, &f.data) + stateSinkObject.Save(7, &f.seals) +} + +func (f *fileInodeOperations) afterLoad() {} + +func (f *fileInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeSimpleExtendedAttributes) + stateSourceObject.Load(1, &f.kernel) + stateSourceObject.Load(2, &f.memUsage) + stateSourceObject.Load(3, &f.attr) + stateSourceObject.Load(4, &f.mappings) + stateSourceObject.Load(5, &f.writableMappingPages) + stateSourceObject.Load(6, &f.data) + stateSourceObject.Load(7, &f.seals) +} + +func (d *Dir) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Dir" +} + +func (d *Dir) StateFields() []string { + return []string{ + "ramfsDir", + "kernel", + } +} + +func (d *Dir) beforeSave() {} + +func (d *Dir) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.ramfsDir) + stateSinkObject.Save(1, &d.kernel) +} + +func (d *Dir) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.ramfsDir) + stateSourceObject.Load(1, &d.kernel) + stateSourceObject.AfterLoad(d.afterLoad) +} + +func (s *Symlink) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Symlink" +} + +func (s *Symlink) StateFields() []string { + return []string{ + "Symlink", + } +} + +func (s *Symlink) beforeSave() {} + +func (s *Symlink) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Symlink) +} + +func (s *Symlink) afterLoad() {} + +func (s *Symlink) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Symlink) +} + +func (s *Socket) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Socket" +} + +func (s *Socket) StateFields() []string { + return []string{ + "Socket", + } +} + +func (s *Socket) beforeSave() {} + +func (s *Socket) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.Socket) +} + +func (s *Socket) afterLoad() {} + +func (s *Socket) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.Socket) +} + +func (f *Fifo) StateTypeName() string { + return "pkg/sentry/fs/tmpfs.Fifo" +} + +func (f *Fifo) StateFields() []string { + return []string{ + "InodeOperations", + } +} + +func (f *Fifo) beforeSave() {} + +func (f *Fifo) StateSave(stateSinkObject state.Sink) { + f.beforeSave() + stateSinkObject.Save(0, &f.InodeOperations) +} + +func (f *Fifo) afterLoad() {} + +func (f *Fifo) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &f.InodeOperations) +} + +func init() { + state.Register((*regularFileOperations)(nil)) + state.Register((*Filesystem)(nil)) + state.Register((*fileInodeOperations)(nil)) + state.Register((*Dir)(nil)) + state.Register((*Symlink)(nil)) + state.Register((*Socket)(nil)) + state.Register((*Fifo)(nil)) +} diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD deleted file mode 100644 index e6d0eb359..000000000 --- a/pkg/sentry/fs/tty/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "tty", - srcs = [ - "dir.go", - "fs.go", - "line_discipline.go", - "master.go", - "queue.go", - "replica.go", - "terminal.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/marshal/primitive", - "//pkg/refs", - "//pkg/safemem", - "//pkg/sentry/arch", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/unimpl", - "//pkg/sync", - "//pkg/syserror", - "//pkg/usermem", - "//pkg/waiter", - ], -) - -go_test( - name = "tty_test", - size = "small", - srcs = ["tty_test.go"], - library = ":tty", - deps = [ - "//pkg/abi/linux", - "//pkg/sentry/contexttest", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/tty/tty_state_autogen.go b/pkg/sentry/fs/tty/tty_state_autogen.go new file mode 100644 index 000000000..0d3785768 --- /dev/null +++ b/pkg/sentry/fs/tty/tty_state_autogen.go @@ -0,0 +1,381 @@ +// automatically generated by stateify. + +package tty + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (d *dirInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.dirInodeOperations" +} + +func (d *dirInodeOperations) StateFields() []string { + return []string{ + "InodeSimpleAttributes", + "msrc", + "master", + "replicas", + "dentryMap", + "next", + } +} + +func (d *dirInodeOperations) beforeSave() {} + +func (d *dirInodeOperations) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.InodeSimpleAttributes) + stateSinkObject.Save(1, &d.msrc) + stateSinkObject.Save(2, &d.master) + stateSinkObject.Save(3, &d.replicas) + stateSinkObject.Save(4, &d.dentryMap) + stateSinkObject.Save(5, &d.next) +} + +func (d *dirInodeOperations) afterLoad() {} + +func (d *dirInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.InodeSimpleAttributes) + stateSourceObject.Load(1, &d.msrc) + stateSourceObject.Load(2, &d.master) + stateSourceObject.Load(3, &d.replicas) + stateSourceObject.Load(4, &d.dentryMap) + stateSourceObject.Load(5, &d.next) +} + +func (df *dirFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.dirFileOperations" +} + +func (df *dirFileOperations) StateFields() []string { + return []string{ + "di", + "dirCursor", + } +} + +func (df *dirFileOperations) beforeSave() {} + +func (df *dirFileOperations) StateSave(stateSinkObject state.Sink) { + df.beforeSave() + stateSinkObject.Save(0, &df.di) + stateSinkObject.Save(1, &df.dirCursor) +} + +func (df *dirFileOperations) afterLoad() {} + +func (df *dirFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &df.di) + stateSourceObject.Load(1, &df.dirCursor) +} + +func (f *filesystem) StateTypeName() string { + return "pkg/sentry/fs/tty.filesystem" +} + +func (f *filesystem) StateFields() []string { + return []string{} +} + +func (f *filesystem) beforeSave() {} + +func (f *filesystem) StateSave(stateSinkObject state.Sink) { + f.beforeSave() +} + +func (f *filesystem) afterLoad() {} + +func (f *filesystem) StateLoad(stateSourceObject state.Source) { +} + +func (s *superOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.superOperations" +} + +func (s *superOperations) StateFields() []string { + return []string{} +} + +func (s *superOperations) beforeSave() {} + +func (s *superOperations) StateSave(stateSinkObject state.Sink) { + s.beforeSave() +} + +func (s *superOperations) afterLoad() {} + +func (s *superOperations) StateLoad(stateSourceObject state.Source) { +} + +func (l *lineDiscipline) StateTypeName() string { + return "pkg/sentry/fs/tty.lineDiscipline" +} + +func (l *lineDiscipline) StateFields() []string { + return []string{ + "size", + "inQueue", + "outQueue", + "termios", + "column", + } +} + +func (l *lineDiscipline) beforeSave() {} + +func (l *lineDiscipline) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + if !state.IsZeroValue(&l.masterWaiter) { + state.Failf("masterWaiter is %#v, expected zero", &l.masterWaiter) + } + if !state.IsZeroValue(&l.replicaWaiter) { + state.Failf("replicaWaiter is %#v, expected zero", &l.replicaWaiter) + } + stateSinkObject.Save(0, &l.size) + stateSinkObject.Save(1, &l.inQueue) + stateSinkObject.Save(2, &l.outQueue) + stateSinkObject.Save(3, &l.termios) + stateSinkObject.Save(4, &l.column) +} + +func (l *lineDiscipline) afterLoad() {} + +func (l *lineDiscipline) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.size) + stateSourceObject.Load(1, &l.inQueue) + stateSourceObject.Load(2, &l.outQueue) + stateSourceObject.Load(3, &l.termios) + stateSourceObject.Load(4, &l.column) +} + +func (o *outputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fs/tty.outputQueueTransformer" +} + +func (o *outputQueueTransformer) StateFields() []string { + return []string{} +} + +func (o *outputQueueTransformer) beforeSave() {} + +func (o *outputQueueTransformer) StateSave(stateSinkObject state.Sink) { + o.beforeSave() +} + +func (o *outputQueueTransformer) afterLoad() {} + +func (o *outputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (i *inputQueueTransformer) StateTypeName() string { + return "pkg/sentry/fs/tty.inputQueueTransformer" +} + +func (i *inputQueueTransformer) StateFields() []string { + return []string{} +} + +func (i *inputQueueTransformer) beforeSave() {} + +func (i *inputQueueTransformer) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *inputQueueTransformer) afterLoad() {} + +func (i *inputQueueTransformer) StateLoad(stateSourceObject state.Source) { +} + +func (mi *masterInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.masterInodeOperations" +} + +func (mi *masterInodeOperations) StateFields() []string { + return []string{ + "SimpleFileInode", + "d", + } +} + +func (mi *masterInodeOperations) beforeSave() {} + +func (mi *masterInodeOperations) StateSave(stateSinkObject state.Sink) { + mi.beforeSave() + stateSinkObject.Save(0, &mi.SimpleFileInode) + stateSinkObject.Save(1, &mi.d) +} + +func (mi *masterInodeOperations) afterLoad() {} + +func (mi *masterInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mi.SimpleFileInode) + stateSourceObject.Load(1, &mi.d) +} + +func (mf *masterFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.masterFileOperations" +} + +func (mf *masterFileOperations) StateFields() []string { + return []string{ + "d", + "t", + } +} + +func (mf *masterFileOperations) beforeSave() {} + +func (mf *masterFileOperations) StateSave(stateSinkObject state.Sink) { + mf.beforeSave() + stateSinkObject.Save(0, &mf.d) + stateSinkObject.Save(1, &mf.t) +} + +func (mf *masterFileOperations) afterLoad() {} + +func (mf *masterFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &mf.d) + stateSourceObject.Load(1, &mf.t) +} + +func (q *queue) StateTypeName() string { + return "pkg/sentry/fs/tty.queue" +} + +func (q *queue) StateFields() []string { + return []string{ + "readBuf", + "waitBuf", + "waitBufLen", + "readable", + "transformer", + } +} + +func (q *queue) beforeSave() {} + +func (q *queue) StateSave(stateSinkObject state.Sink) { + q.beforeSave() + stateSinkObject.Save(0, &q.readBuf) + stateSinkObject.Save(1, &q.waitBuf) + stateSinkObject.Save(2, &q.waitBufLen) + stateSinkObject.Save(3, &q.readable) + stateSinkObject.Save(4, &q.transformer) +} + +func (q *queue) afterLoad() {} + +func (q *queue) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &q.readBuf) + stateSourceObject.Load(1, &q.waitBuf) + stateSourceObject.Load(2, &q.waitBufLen) + stateSourceObject.Load(3, &q.readable) + stateSourceObject.Load(4, &q.transformer) +} + +func (si *replicaInodeOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.replicaInodeOperations" +} + +func (si *replicaInodeOperations) StateFields() []string { + return []string{ + "SimpleFileInode", + "d", + "t", + } +} + +func (si *replicaInodeOperations) beforeSave() {} + +func (si *replicaInodeOperations) StateSave(stateSinkObject state.Sink) { + si.beforeSave() + stateSinkObject.Save(0, &si.SimpleFileInode) + stateSinkObject.Save(1, &si.d) + stateSinkObject.Save(2, &si.t) +} + +func (si *replicaInodeOperations) afterLoad() {} + +func (si *replicaInodeOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &si.SimpleFileInode) + stateSourceObject.Load(1, &si.d) + stateSourceObject.Load(2, &si.t) +} + +func (sf *replicaFileOperations) StateTypeName() string { + return "pkg/sentry/fs/tty.replicaFileOperations" +} + +func (sf *replicaFileOperations) StateFields() []string { + return []string{ + "si", + } +} + +func (sf *replicaFileOperations) beforeSave() {} + +func (sf *replicaFileOperations) StateSave(stateSinkObject state.Sink) { + sf.beforeSave() + stateSinkObject.Save(0, &sf.si) +} + +func (sf *replicaFileOperations) afterLoad() {} + +func (sf *replicaFileOperations) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &sf.si) +} + +func (tm *Terminal) StateTypeName() string { + return "pkg/sentry/fs/tty.Terminal" +} + +func (tm *Terminal) StateFields() []string { + return []string{ + "AtomicRefCount", + "n", + "d", + "ld", + "masterKTTY", + "replicaKTTY", + } +} + +func (tm *Terminal) beforeSave() {} + +func (tm *Terminal) StateSave(stateSinkObject state.Sink) { + tm.beforeSave() + stateSinkObject.Save(0, &tm.AtomicRefCount) + stateSinkObject.Save(1, &tm.n) + stateSinkObject.Save(2, &tm.d) + stateSinkObject.Save(3, &tm.ld) + stateSinkObject.Save(4, &tm.masterKTTY) + stateSinkObject.Save(5, &tm.replicaKTTY) +} + +func (tm *Terminal) afterLoad() {} + +func (tm *Terminal) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &tm.AtomicRefCount) + stateSourceObject.Load(1, &tm.n) + stateSourceObject.Load(2, &tm.d) + stateSourceObject.Load(3, &tm.ld) + stateSourceObject.Load(4, &tm.masterKTTY) + stateSourceObject.Load(5, &tm.replicaKTTY) +} + +func init() { + state.Register((*dirInodeOperations)(nil)) + state.Register((*dirFileOperations)(nil)) + state.Register((*filesystem)(nil)) + state.Register((*superOperations)(nil)) + state.Register((*lineDiscipline)(nil)) + state.Register((*outputQueueTransformer)(nil)) + state.Register((*inputQueueTransformer)(nil)) + state.Register((*masterInodeOperations)(nil)) + state.Register((*masterFileOperations)(nil)) + state.Register((*queue)(nil)) + state.Register((*replicaInodeOperations)(nil)) + state.Register((*replicaFileOperations)(nil)) + state.Register((*Terminal)(nil)) +} diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go deleted file mode 100644 index 49edee83d..000000000 --- a/pkg/sentry/fs/tty/tty_test.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tty - -import ( - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/usermem" -) - -func TestSimpleMasterToReplica(t *testing.T) { - ld := newLineDiscipline(linux.DefaultReplicaTermios) - ctx := contexttest.Context(t) - inBytes := []byte("hello, tty\n") - src := usermem.BytesIOSequence(inBytes) - outBytes := make([]byte, 32) - dst := usermem.BytesIOSequence(outBytes) - - // Write to the input queue. - nw, err := ld.inputQueueWrite(ctx, src) - if err != nil { - t.Fatalf("error writing to input queue: %v", err) - } - if nw != int64(len(inBytes)) { - t.Fatalf("wrote wrong length: got %d, want %d", nw, len(inBytes)) - } - - // Read from the input queue. - nr, err := ld.inputQueueRead(ctx, dst) - if err != nil { - t.Fatalf("error reading from input queue: %v", err) - } - if nr != int64(len(inBytes)) { - t.Fatalf("read wrong length: got %d, want %d", nr, len(inBytes)) - } - - outStr := string(outBytes[:nr]) - inStr := string(inBytes) - if outStr != inStr { - t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) - } -} diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD deleted file mode 100644 index 66e949c95..000000000 --- a/pkg/sentry/fs/user/BUILD +++ /dev/null @@ -1,40 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "user", - srcs = [ - "path.go", - "user.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fspath", - "//pkg/log", - "//pkg/sentry/fs", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) - -go_test( - name = "user_test", - size = "small", - srcs = ["user_test.go"], - library = ":user", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/contexttest", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fs/user/user_state_autogen.go b/pkg/sentry/fs/user/user_state_autogen.go new file mode 100644 index 000000000..8083e036c --- /dev/null +++ b/pkg/sentry/fs/user/user_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package user diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go deleted file mode 100644 index 12b786224..000000000 --- a/pkg/sentry/fs/user/user_test.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package user - -import ( - "fmt" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/usermem" -) - -// createEtcPasswd creates /etc/passwd with the given contents and mode. If -// mode is empty, then no file will be created. If mode is not a regular file -// mode, then contents is ignored. -func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode linux.FileMode) error { - if err := root.CreateDirectory(ctx, root, "etc", fs.FilePermsFromMode(0755)); err != nil { - return err - } - etc, err := root.Walk(ctx, root, "etc") - if err != nil { - return err - } - defer etc.DecRef(ctx) - switch mode.FileType() { - case 0: - // Don't create anything. - return nil - case linux.S_IFREG: - passwd, err := etc.Create(ctx, root, "passwd", fs.FileFlags{Write: true}, fs.FilePermsFromMode(mode)) - if err != nil { - return err - } - defer passwd.DecRef(ctx) - if _, err := passwd.Writev(ctx, usermem.BytesIOSequence([]byte(contents))); err != nil { - return err - } - return nil - case linux.S_IFDIR: - return etc.CreateDirectory(ctx, root, "passwd", fs.FilePermsFromMode(mode)) - case linux.S_IFIFO: - return etc.CreateFifo(ctx, root, "passwd", fs.FilePermsFromMode(mode)) - default: - return fmt.Errorf("unknown file type %x", mode.FileType()) - } -} - -// TestGetExecUserHome tests the getExecUserHome function. -func TestGetExecUserHome(t *testing.T) { - tests := map[string]struct { - uid auth.KUID - passwdContents string - passwdMode linux.FileMode - expected string - }{ - "success": { - uid: 1000, - passwdContents: "adin::1000:1111::/home/adin:/bin/sh", - passwdMode: linux.S_IFREG | 0666, - expected: "/home/adin", - }, - "no_perms": { - uid: 1000, - passwdContents: "adin::1000:1111::/home/adin:/bin/sh", - passwdMode: linux.S_IFREG, - expected: "/", - }, - "no_passwd": { - uid: 1000, - expected: "/", - }, - "directory": { - uid: 1000, - passwdMode: linux.S_IFDIR | 0666, - expected: "/", - }, - // Currently we don't allow named pipes. - "named_pipe": { - uid: 1000, - passwdMode: linux.S_IFIFO | 0666, - expected: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - ctx := contexttest.Context(t) - msrc := fs.NewPseudoMountSource(ctx) - rootInode := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc) - - mns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - defer mns.DecRef(ctx) - root := mns.Root() - defer root.DecRef(ctx) - ctx = fs.WithRoot(ctx, root) - - if err := createEtcPasswd(ctx, root, tc.passwdContents, tc.passwdMode); err != nil { - t.Fatalf("createEtcPasswd failed: %v", err) - } - - got, err := getExecUserHome(ctx, mns, tc.uid) - if err != nil { - t.Fatalf("failed to get user home: %v", err) - } - - if got != tc.expected { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} - -// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. -func TestFindHomeInPasswd(t *testing.T) { - tests := map[string]struct { - uid uint32 - passwd string - expected string - def string - }{ - "empty": { - uid: 1000, - passwd: "", - expected: "/", - def: "/", - }, - "whitespace": { - uid: 1000, - passwd: " ", - expected: "/", - def: "/", - }, - "full": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh", - expected: "/home/adin", - def: "/", - }, - // For better or worse, this is how runc works. - "partial": { - uid: 1000, - passwd: "adin::1000:1111:", - expected: "", - def: "/", - }, - "multiple": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - "duplicate": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", - expected: "/home/adin", - def: "/", - }, - "empty_lines": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) - if err != nil { - t.Fatalf("error parsing passwd: %v", err) - } - if tc.expected != got { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} |