diff options
author | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
commit | ceb0d792f328d1fc0692197d8856a43c3936a571 (patch) | |
tree | 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/proc | |
parent | deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff) | |
parent | 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff) |
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/sentry/fs/proc')
26 files changed, 4761 insertions, 0 deletions
diff --git a/pkg/sentry/fs/proc/cgroup.go b/pkg/sentry/fs/proc/cgroup.go new file mode 100644 index 000000000..1019f862a --- /dev/null +++ b/pkg/sentry/fs/proc/cgroup.go @@ -0,0 +1,41 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" +) + +func newCGroupInode(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string]string) *fs.Inode { + // From man 7 cgroups: "For each cgroup hierarchy of which the process + // is a member, there is one entry containing three colon-separated + // fields: hierarchy-ID:controller-list:cgroup-path" + + // The hierarchy ids must be positive integers (for cgroup v1), but the + // exact number does not matter, so long as they are unique. We can + // just use a counter, but since linux sorts this file in descending + // order, we must count down to perserve this behavior. + i := len(cgroupControllers) + var data string + for name, dir := range cgroupControllers { + data += fmt.Sprintf("%d:%s:%s\n", i, name, dir) + i-- + } + + return newStaticProcInode(ctx, msrc, []byte(data)) +} diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go new file mode 100644 index 000000000..15031234e --- /dev/null +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -0,0 +1,35 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" +) + +func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + k := kernel.KernelFromContext(ctx) + features := k.FeatureSet() + if features == nil { + // Kernel is always initialized with a FeatureSet. + panic("cpuinfo read with nil FeatureSet") + } + contents := make([]byte, 0, 1024) + for i, max := uint(0), k.ApplicationCores(); i < max; i++ { + contents = append(contents, []byte(features.CPUInfo(i))...) + } + return newStaticProcInode(ctx, msrc, contents) +} diff --git a/pkg/sentry/fs/proc/device/device.go b/pkg/sentry/fs/proc/device/device.go new file mode 100644 index 000000000..0de466c73 --- /dev/null +++ b/pkg/sentry/fs/proc/device/device.go @@ -0,0 +1,23 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package device contains the proc device to avoid dependency loops. +package device + +import ( + "gvisor.googlesource.com/gvisor/pkg/sentry/device" +) + +// ProcDevice is the kernel proc device. +var ProcDevice = device.NewAnonDevice() diff --git a/pkg/sentry/fs/proc/device/device_state_autogen.go b/pkg/sentry/fs/proc/device/device_state_autogen.go new file mode 100755 index 000000000..be407ac45 --- /dev/null +++ b/pkg/sentry/fs/proc/device/device_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package device + diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go new file mode 100644 index 000000000..cb28f6bc3 --- /dev/null +++ b/pkg/sentry/fs/proc/exec_args.go @@ -0,0 +1,203 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// execArgType enumerates the types of exec arguments that are exposed through +// proc. +type execArgType int + +const ( + cmdlineExecArg execArgType = iota + environExecArg +) + +// execArgInode is a inode containing the exec args (either cmdline or environ) +// for a given task. +// +// +stateify savable +type execArgInode struct { + fsutil.SimpleFileInode + + // arg is the type of exec argument this file contains. + arg execArgType + + // t is the Task to read the exec arg line from. + t *kernel.Task +} + +var _ fs.InodeOperations = (*execArgInode)(nil) + +// newExecArgFile creates a file containing the exec args of the given type. +func newExecArgInode(t *kernel.Task, msrc *fs.MountSource, arg execArgType) *fs.Inode { + if arg != cmdlineExecArg && arg != environExecArg { + panic(fmt.Sprintf("unknown exec arg type %v", arg)) + } + f := &execArgInode{ + SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + arg: arg, + t: t, + } + return newProcInode(f, msrc, fs.SpecialFile, t) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (i *execArgInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &execArgFile{ + arg: i.arg, + t: i.t, + }), nil +} + +// +stateify savable +type execArgFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopWrite `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + // arg is the type of exec argument this file contains. + arg execArgType + + // t is the Task to read the exec arg line from. + t *kernel.Task +} + +var _ fs.FileOperations = (*execArgFile)(nil) + +// Read reads the exec arg from the process's address space.. +func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + + m, err := getTaskMM(f.t) + if err != nil { + return 0, err + } + defer m.DecUsers(ctx) + + // Figure out the bounds of the exec arg we are trying to read. + var execArgStart, execArgEnd usermem.Addr + switch f.arg { + case cmdlineExecArg: + execArgStart, execArgEnd = m.ArgvStart(), m.ArgvEnd() + case environExecArg: + execArgStart, execArgEnd = m.EnvvStart(), m.EnvvEnd() + default: + panic(fmt.Sprintf("unknown exec arg type %v", f.arg)) + } + if execArgStart == 0 || execArgEnd == 0 { + // Don't attempt to read before the start/end are set up. + return 0, io.EOF + } + + start, ok := execArgStart.AddLength(uint64(offset)) + if !ok { + return 0, io.EOF + } + if start >= execArgEnd { + return 0, io.EOF + } + + length := int(execArgEnd - start) + if dstlen := dst.NumBytes(); int64(length) > dstlen { + length = int(dstlen) + } + + buf := make([]byte, length) + // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true + // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading + // cmdline and environment"). + copyN, err := m.CopyIn(ctx, start, buf, usermem.IOOpts{}) + if copyN == 0 { + // Nothing to copy. + return 0, err + } + buf = buf[:copyN] + + // On Linux, if the NUL byte at the end of the argument vector has been + // overwritten, it continues reading the environment vector as part of + // the argument vector. + + if f.arg == cmdlineExecArg && buf[copyN-1] != 0 { + // Linux will limit the return up to and including the first null character in argv + + copyN = bytes.IndexByte(buf, 0) + if copyN == -1 { + copyN = len(buf) + } + // If we found a NUL character in argv, return upto and including that character. + if copyN < len(buf) { + buf = buf[:copyN] + } else { // Otherwise return into envp. + lengthEnvv := int(m.EnvvEnd() - m.EnvvStart()) + + // Upstream limits the returned amount to one page of slop. + // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208 + // we'll return one page total between argv and envp because of the + // above page restrictions. + if lengthEnvv > usermem.PageSize-len(buf) { + lengthEnvv = usermem.PageSize - len(buf) + } + // Make a new buffer to fit the whole thing + tmp := make([]byte, length+lengthEnvv) + copyNE, err := m.CopyIn(ctx, m.EnvvStart(), tmp[copyN:], usermem.IOOpts{}) + if err != nil { + return 0, err + } + + // Linux will return envp up to and including the first NUL character, so find it. + for i, c := range tmp[copyN:] { + if c == 0 { + copyNE = i + break + } + } + + copy(tmp, buf) + buf = tmp[:copyN+copyNE] + + } + + } + + n, dstErr := dst.CopyOut(ctx, buf) + if dstErr != nil { + return int64(n), dstErr + } + return int64(n), err +} diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go new file mode 100644 index 000000000..744b31c74 --- /dev/null +++ b/pkg/sentry/fs/proc/fds.go @@ -0,0 +1,285 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + "sort" + "strconv" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// walkDescriptors finds the descriptor (file-flag pair) for the fd identified +// by p, and calls the toInodeOperations callback with that descriptor. This is a helper +// method for implementing fs.InodeOperations.Lookup. +func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDFlags) *fs.Inode) (*fs.Inode, error) { + n, err := strconv.ParseUint(p, 10, 64) + if err != nil { + // Not found. + return nil, syserror.ENOENT + } + + var file *fs.File + var fdFlags kernel.FDFlags + t.WithMuLocked(func(t *kernel.Task) { + if fdm := t.FDMap(); fdm != nil { + file, fdFlags = fdm.GetDescriptor(kdefs.FD(n)) + } + }) + if file == nil { + return nil, syserror.ENOENT + } + return toInode(file, fdFlags), nil +} + +// readDescriptors reads fds in the task starting at offset, and calls the +// toDentAttr callback for each to get a DentAttr, which it then emits. This is +// a helper for implementing fs.InodeOperations.Readdir. +func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) { + var fds kernel.FDs + t.WithMuLocked(func(t *kernel.Task) { + if fdm := t.FDMap(); fdm != nil { + fds = fdm.GetFDs() + } + }) + + fdInts := make([]int, 0, len(fds)) + for _, fd := range fds { + fdInts = append(fdInts, int(fd)) + } + + // Find the fd to start at. + idx := sort.SearchInts(fdInts, int(offset)) + if idx == len(fdInts) { + return offset, nil + } + fdInts = fdInts[idx:] + + var fd int + for _, fd = range fdInts { + name := strconv.FormatUint(uint64(fd), 10) + if err := c.DirEmit(name, toDentAttr(fd)); err != nil { + // Returned offset is the next fd to serialize. + return int64(fd), err + } + } + // We serialized them all. Next offset should be higher than last + // serialized fd. + return int64(fd + 1), nil +} + +// fd implements fs.InodeOperations for a file in /proc/TID/fd/. +type fd struct { + ramfs.Symlink + file *fs.File +} + +var _ fs.InodeOperations = (*fd)(nil) + +// newFd returns a new fd based on an existing file. +// +// This inherits one reference to the file. +func newFd(t *kernel.Task, f *fs.File, msrc *fs.MountSource) *fs.Inode { + fd := &fd{ + // RootOwner overridden by taskOwnedInodeOps.UnstableAttrs(). + Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), + file: f, + } + return newProcInode(fd, msrc, fs.Symlink, t) +} + +// GetFile returns the fs.File backing this fd. The dirent and flags +// arguments are ignored. +func (f *fd) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) { + // Take a reference on the fs.File. + f.file.IncRef() + return f.file, nil +} + +// Readlink returns the current target. +func (f *fd) Readlink(ctx context.Context, _ *fs.Inode) (string, error) { + root := fs.RootFromContext(ctx) + if root != nil { + defer root.DecRef() + } + n, _ := f.file.Dirent.FullName(root) + return n, nil +} + +// Getlink implements fs.InodeOperations.Getlink. +func (f *fd) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { + f.file.Dirent.IncRef() + return f.file.Dirent, nil +} + +// Truncate is ignored. +func (f *fd) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +func (f *fd) Release(ctx context.Context) { + f.Symlink.Release(ctx) + f.file.DecRef() +} + +// Close releases the reference on the file. +func (f *fd) Close() error { + f.file.DecRef() + return nil +} + +// fdDir is an InodeOperations for /proc/TID/fd. +// +// +stateify savable +type fdDir struct { + ramfs.Dir + + // We hold a reference on the task's fdmap but only keep an indirect + // task pointer to avoid Dirent loading circularity caused by fdmap's + // potential back pointers into the dirent tree. + t *kernel.Task +} + +var _ fs.InodeOperations = (*fdDir)(nil) + +// newFdDir creates a new fdDir. +func newFdDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + f := &fdDir{ + Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermissions{User: fs.PermMask{Read: true, Execute: true}}), + t: t, + } + return newProcInode(f, msrc, fs.SpecialDirectory, t) +} + +// Check implements InodeOperations.Check. +// +// This is to match Linux, which uses a special permission handler to guarantee +// that a process can still access /proc/self/fd after it has executed +// setuid. See fs/proc/fd.c:proc_fd_permission. +func (f *fdDir) Check(ctx context.Context, inode *fs.Inode, req fs.PermMask) bool { + if fs.ContextCanAccessFile(ctx, inode, req) { + return true + } + if t := kernel.TaskFromContext(ctx); t != nil { + // Allow access if the task trying to access it is in the + // thread group corresponding to this directory. + if f.t.ThreadGroup() == t.ThreadGroup() { + return true + } + } + return false +} + +// Lookup loads an Inode in /proc/TID/fd into a Dirent. +func (f *fdDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) { + n, err := walkDescriptors(f.t, p, func(file *fs.File, _ kernel.FDFlags) *fs.Inode { + return newFd(f.t, file, dir.MountSource) + }) + if err != nil { + return nil, err + } + return fs.NewDirent(n, p), nil +} + +// GetFile implements fs.FileOperations.GetFile. +func (f *fdDir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + fops := &fdDirFile{ + isInfoFile: false, + t: f.t, + } + return fs.NewFile(ctx, dirent, flags, fops), nil +} + +// +stateify savable +type fdDirFile struct { + fsutil.DirFileOperations `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + isInfoFile bool + + t *kernel.Task +} + +var _ fs.FileOperations = (*fdDirFile)(nil) + +// Readdir implements fs.FileOperations.Readdir. +func (f *fdDirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { + dirCtx := &fs.DirCtx{ + Serializer: ser, + } + typ := fs.RegularFile + if f.isInfoFile { + typ = fs.Symlink + } + return readDescriptors(f.t, dirCtx, file.Offset(), func(fd int) fs.DentAttr { + return fs.GenericDentAttr(typ, device.ProcDevice) + }) +} + +// fdInfoDir implements /proc/TID/fdinfo. It embeds an fdDir, but overrides +// Lookup and Readdir. +// +// +stateify savable +type fdInfoDir struct { + ramfs.Dir + + t *kernel.Task +} + +// newFdInfoDir creates a new fdInfoDir. +func newFdInfoDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + fdid := &fdInfoDir{ + Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0500)), + t: t, + } + return newProcInode(fdid, msrc, fs.SpecialDirectory, t) +} + +// Lookup loads an fd in /proc/TID/fdinfo into a Dirent. +func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) { + inode, err := walkDescriptors(fdid.t, p, func(file *fs.File, fdFlags kernel.FDFlags) *fs.Inode { + // TODO(b/121266871): Using a static inode here means that the + // data can be out-of-date if, for instance, the flags on the + // FD change before we read this file. We should switch to + // generating the data on Read(). Also, we should include pos, + // locks, and other data. For now we only have flags. + // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt + flags := file.Flags().ToLinux() | fdFlags.ToLinuxFileFlags() + file.DecRef() + contents := []byte(fmt.Sprintf("flags:\t0%o\n", flags)) + return newStaticProcInode(ctx, dir.MountSource, contents) + }) + if err != nil { + return nil, err + } + return fs.NewDirent(inode, p), nil +} + +// GetFile implements fs.FileOperations.GetFile. +func (fdid *fdInfoDir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + fops := &fdDirFile{ + isInfoFile: true, + t: fdid.t, + } + return fs.NewFile(ctx, dirent, flags, fops), nil +} diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go new file mode 100644 index 000000000..7bb081d0e --- /dev/null +++ b/pkg/sentry/fs/proc/filesystems.go @@ -0,0 +1,61 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" +) + +// filesystemsData backs /proc/filesystems. +// +// +stateify savable +type filesystemsData struct{} + +// NeedsUpdate returns true on the first generation. The set of registered file +// systems doesn't change so there's no need to generate SeqData more than once. +func (*filesystemsData) NeedsUpdate(generation int64) bool { + return generation == 0 +} + +// ReadSeqFileData returns data for the SeqFile reader. +// SeqData, the current generation and where in the file the handle corresponds to. +func (*filesystemsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + // We don't ever expect to see a non-nil SeqHandle. + if h != nil { + return nil, 0 + } + + // Generate the file contents. + var buf bytes.Buffer + for _, sys := range fs.GetFilesystems() { + if !sys.AllowUserList() { + continue + } + nodev := "nodev" + if sys.Flags()&fs.FilesystemRequiresDev != 0 { + nodev = "" + } + // Matches the format of fs/filesystems.c:filesystems_proc_show. + fmt.Fprintf(&buf, "%s\t%s\n", nodev, sys.Name()) + } + + // Return the SeqData and advance the generation counter. + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*filesystemsData)(nil)}}, 1 +} diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go new file mode 100644 index 000000000..d57d6cc5d --- /dev/null +++ b/pkg/sentry/fs/proc/fs.go @@ -0,0 +1,81 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" +) + +// filesystem is a procfs. +// +// +stateify savable +type filesystem struct{} + +func init() { + fs.RegisterFilesystem(&filesystem{}) +} + +// FilesystemName is the name underwhich the filesystem is registered. +// Name matches fs/proc/root.c:proc_fs_type.name. +const FilesystemName = "proc" + +// Name is the name of the file system. +func (*filesystem) Name() string { + return FilesystemName +} + +// AllowUserMount allows users to mount(2) this file system. +func (*filesystem) AllowUserMount() bool { + return true +} + +// AllowUserList allows this filesystem to be listed in /proc/filesystems. +func (*filesystem) AllowUserList() bool { + return true +} + +// Flags returns that there is nothing special about this file system. +// +// In Linux, proc returns FS_USERNS_VISIBLE | FS_USERNS_MOUNT, see fs/proc/root.c. +func (*filesystem) Flags() fs.FilesystemFlags { + return 0 +} + +// Mount returns the root of a procfs that can be positioned in the vfs. +func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, cgroupsInt interface{}) (*fs.Inode, error) { + // device is always ignored. + + // Parse generic comma-separated key=value options, this file system expects them. + options := fs.GenericMountSourceOptions(data) + + // Proc options parsing checks for either a gid= or hidepid= and barfs on + // anything else, see fs/proc/root.c:proc_parse_options. Since we don't know + // what to do with gid= or hidepid=, we blow up if we get any options. + if len(options) > 0 { + return nil, fmt.Errorf("unsupported mount options: %v", options) + } + + var cgroups map[string]string + if cgroupsInt != nil { + cgroups = cgroupsInt.(map[string]string) + } + + // Construct the procfs root. Since procfs files are all virtual, we + // never want them cached. + return New(ctx, fs.NewNonCachingMountSource(f, flags), cgroups) +} diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go new file mode 100644 index 000000000..379569823 --- /dev/null +++ b/pkg/sentry/fs/proc/inode.go @@ -0,0 +1,97 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + +// taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr +// method to return the task as the owner. +// +// +stateify savable +type taskOwnedInodeOps struct { + fs.InodeOperations + + // t is the task that owns this file. + t *kernel.Task +} + +// UnstableAttr implement fs.InodeOperations.UnstableAttr. +func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { + uattr, err := i.InodeOperations.UnstableAttr(ctx, inode) + if err != nil { + return fs.UnstableAttr{}, err + } + // Set the task owner as the file owner. + creds := i.t.Credentials() + uattr.Owner = fs.FileOwner{creds.EffectiveKUID, creds.EffectiveKGID} + return uattr, nil +} + +// staticFileInodeOps is an InodeOperations implementation that can be used to +// return file contents which are constant. This file is not writable and will +// always have mode 0444. +// +// +stateify savable +type staticFileInodeOps struct { + fsutil.InodeDenyWriteChecker `state:"nosave"` + fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` + fsutil.InodeNoopRelease `state:"nosave"` + fsutil.InodeNoopTruncate `state:"nosave"` + fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotDirectory `state:"nosave"` + fsutil.InodeNotMappable `state:"nosave"` + fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` + fsutil.InodeVirtual `state:"nosave"` + + fsutil.InodeSimpleAttributes + fsutil.InodeStaticFileGetter +} + +var _ fs.InodeOperations = (*staticFileInodeOps)(nil) + +// newStaticFileInode returns a procfs InodeOperations with static contents. +func newStaticProcInode(ctx context.Context, msrc *fs.MountSource, contents []byte) *fs.Inode { + iops := &staticFileInodeOps{ + InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + InodeStaticFileGetter: fsutil.InodeStaticFileGetter{ + Contents: contents, + }, + } + return newProcInode(iops, msrc, fs.SpecialFile, nil) +} + +// newProcInode creates a new inode from the given inode operations. +func newProcInode(iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeType, t *kernel.Task) *fs.Inode { + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: typ, + } + if t != nil { + iops = &taskOwnedInodeOps{iops, t} + } + return fs.NewInode(iops, msrc, sattr) +} diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go new file mode 100644 index 000000000..2dfe7089a --- /dev/null +++ b/pkg/sentry/fs/proc/loadavg.go @@ -0,0 +1,55 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" +) + +// loadavgData backs /proc/loadavg. +// +// +stateify savable +type loadavgData struct{} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*loadavgData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (d *loadavgData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + + // TODO(b/62345059): Include real data in fields. + // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. + // Column 4-5: currently running processes and the total number of processes. + // Column 6: the last process ID used. + fmt.Fprintf(&buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) + + return []seqfile.SeqData{ + { + Buf: buf.Bytes(), + Handle: (*loadavgData)(nil), + }, + }, 0 +} diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go new file mode 100644 index 000000000..d2b9b92c7 --- /dev/null +++ b/pkg/sentry/fs/proc/meminfo.go @@ -0,0 +1,85 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + +// meminfoData backs /proc/meminfo. +// +// +stateify savable +type meminfoData struct { + // k is the owning Kernel. + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*meminfoData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + mf := d.k.MemoryFile() + mf.UpdateUsage() + snapshot, totalUsage := usage.MemoryAccounting.Copy() + totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) + anon := snapshot.Anonymous + snapshot.Tmpfs + file := snapshot.PageCache + snapshot.Mapped + // We don't actually have active/inactive LRUs, so just make up numbers. + activeFile := (file / 2) &^ (usermem.PageSize - 1) + inactiveFile := file - activeFile + + var buf bytes.Buffer + fmt.Fprintf(&buf, "MemTotal: %8d kB\n", totalSize/1024) + memFree := (totalSize - totalUsage) / 1024 + // We use MemFree as MemAvailable because we don't swap. + // TODO(rahat): When reclaim is implemented the value of MemAvailable + // should change. + fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree) + fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(&buf, "Buffers: 0 kB\n") // memory usage by block devices + fmt.Fprintf(&buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) + // Emulate a system with no swap, which disables inactivation of anon pages. + fmt.Fprintf(&buf, "SwapCache: 0 kB\n") + fmt.Fprintf(&buf, "Active: %8d kB\n", (anon+activeFile)/1024) + fmt.Fprintf(&buf, "Inactive: %8d kB\n", inactiveFile/1024) + fmt.Fprintf(&buf, "Active(anon): %8d kB\n", anon/1024) + fmt.Fprintf(&buf, "Inactive(anon): 0 kB\n") + fmt.Fprintf(&buf, "Active(file): %8d kB\n", activeFile/1024) + fmt.Fprintf(&buf, "Inactive(file): %8d kB\n", inactiveFile/1024) + fmt.Fprintf(&buf, "Unevictable: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(&buf, "Mlocked: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(&buf, "SwapTotal: 0 kB\n") + fmt.Fprintf(&buf, "SwapFree: 0 kB\n") + fmt.Fprintf(&buf, "Dirty: 0 kB\n") + fmt.Fprintf(&buf, "Writeback: 0 kB\n") + fmt.Fprintf(&buf, "AnonPages: %8d kB\n", anon/1024) + fmt.Fprintf(&buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know + fmt.Fprintf(&buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*meminfoData)(nil)}}, 0 +} diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go new file mode 100644 index 000000000..1f7817947 --- /dev/null +++ b/pkg/sentry/fs/proc/mounts.go @@ -0,0 +1,197 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "sort" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" +) + +// forEachMountSource runs f for the process root mount and each mount that is a +// descendant of the root. +func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) { + var fsctx *kernel.FSContext + t.WithMuLocked(func(t *kernel.Task) { + fsctx = t.FSContext() + }) + if fsctx == nil { + // The task has been destroyed. Nothing to show here. + return + } + + // All mount points must be relative to the rootDir, and mounts outside + // will be excluded. + rootDir := fsctx.RootDirectory() + if rootDir == nil { + // The task has been destroyed. Nothing to show here. + return + } + defer rootDir.DecRef() + + mnt := t.MountNamespace().FindMount(rootDir) + if mnt == nil { + // Has it just been unmounted? + return + } + ms := t.MountNamespace().AllMountsUnder(mnt) + sort.Slice(ms, func(i, j int) bool { + return ms[i].ID < ms[j].ID + }) + for _, m := range ms { + mroot := m.Root() + mountPath, desc := mroot.FullName(rootDir) + mroot.DecRef() + if !desc { + // MountSources that are not descendants of the chroot jail are ignored. + continue + } + + fn(mountPath, m) + } +} + +// mountInfoFile is used to implement /proc/[pid]/mountinfo. +// +// +stateify savable +type mountInfoFile struct { + t *kernel.Task +} + +// NeedsUpdate implements SeqSource.NeedsUpdate. +func (mif *mountInfoFile) NeedsUpdate(_ int64) bool { + return true +} + +// ReadSeqFileData implements SeqSource.ReadSeqFileData. +func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if handle != nil { + return nil, 0 + } + + var buf bytes.Buffer + forEachMount(mif.t, func(mountPath string, m *fs.Mount) { + // Format: + // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) + + // (1) MountSource ID. + fmt.Fprintf(&buf, "%d ", m.ID) + + // (2) Parent ID (or this ID if there is no parent). + pID := m.ID + if !m.IsRoot() && !m.IsUndo() { + pID = m.ParentID + } + fmt.Fprintf(&buf, "%d ", pID) + + // (3) Major:Minor device ID. We don't have a superblock, so we + // just use the root inode device number. + mroot := m.Root() + defer mroot.DecRef() + + sa := mroot.Inode.StableAttr + fmt.Fprintf(&buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor) + + // (4) Root: the pathname of the directory in the filesystem + // which forms the root of this mount. + // + // NOTE(b/78135857): This will always be "/" until we implement + // bind mounts. + fmt.Fprintf(&buf, "/ ") + + // (5) Mount point (relative to process root). + fmt.Fprintf(&buf, "%s ", mountPath) + + // (6) Mount options. + flags := mroot.Inode.MountSource.Flags + opts := "rw" + if flags.ReadOnly { + opts = "ro" + } + if flags.NoAtime { + opts += ",noatime" + } + if flags.NoExec { + opts += ",noexec" + } + fmt.Fprintf(&buf, "%s ", opts) + + // (7) Optional fields: zero or more fields of the form "tag[:value]". + // (8) Separator: the end of the optional fields is marked by a single hyphen. + fmt.Fprintf(&buf, "- ") + + // (9) Filesystem type. + fmt.Fprintf(&buf, "%s ", mroot.Inode.MountSource.FilesystemType) + + // (10) Mount source: filesystem-specific information or "none". + fmt.Fprintf(&buf, "none ") + + // (11) Superblock options. Only "ro/rw" is supported for now, + // and is the same as the filesystem option. + fmt.Fprintf(&buf, "%s\n", opts) + }) + + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountInfoFile)(nil)}}, 0 +} + +// mountsFile is used to implement /proc/[pid]/mounts. +// +// +stateify savable +type mountsFile struct { + t *kernel.Task +} + +// NeedsUpdate implements SeqSource.NeedsUpdate. +func (mf *mountsFile) NeedsUpdate(_ int64) bool { + return true +} + +// ReadSeqFileData implements SeqSource.ReadSeqFileData. +func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if handle != nil { + return nil, 0 + } + + var buf bytes.Buffer + forEachMount(mf.t, func(mountPath string, m *fs.Mount) { + // Format: + // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order> + // + // We use the filesystem name as the first field, since there + // is no real block device we can point to, and we also should + // not expose anything about the remote filesystem. + // + // Only ro/rw option is supported for now. + // + // The "needs dump"and fsck flags are always 0, which is allowed. + root := m.Root() + defer root.DecRef() + + flags := root.Inode.MountSource.Flags + opts := "rw" + if flags.ReadOnly { + opts = "ro" + } + fmt.Fprintf(&buf, "%s %s %s %s %d %d\n", "none", mountPath, root.Inode.MountSource.FilesystemType, opts, 0, 0) + }) + + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountsFile)(nil)}}, 0 +} diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go new file mode 100644 index 000000000..4a107c739 --- /dev/null +++ b/pkg/sentry/fs/proc/net.go @@ -0,0 +1,308 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "time" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" +) + +// newNet creates a new proc net entry. +func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { + var contents map[string]*fs.Inode + if s := p.k.NetworkStack(); s != nil { + contents = map[string]*fs.Inode{ + "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc), + + // The following files are simple stubs until they are + // implemented in netstack, if the file contains a + // header the stub is just the header otherwise it is + // an empty file. + "arp": newStaticProcInode(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device")), + + "netlink": newStaticProcInode(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode")), + "netstat": newStaticProcInode(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess")), + "packet": newStaticProcInode(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode")), + "protocols": newStaticProcInode(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em")), + // Linux sets psched values to: nsec per usec, psched + // tick in ns, 1000000, high res timer ticks per sec + // (ClockGetres returns 1ns resolution). + "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), + "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")), + "route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")), + "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")), + + "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), + + "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), + } + + if s.SupportsIPv6() { + contents["if_inet6"] = seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc) + contents["ipv6_route"] = newStaticProcInode(ctx, msrc, []byte("")) + contents["tcp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")) + contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")) + } + } + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +// ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6. +// +// +stateify savable +type ifinet6 struct { + s inet.Stack +} + +func (n *ifinet6) contents() []string { + var lines []string + nics := n.s.Interfaces() + for id, naddrs := range n.s.InterfaceAddrs() { + nic, ok := nics[id] + if !ok { + // NIC was added after NICNames was called. We'll just + // ignore it. + continue + } + + for _, a := range naddrs { + // IPv6 only. + if a.Family != linux.AF_INET6 { + continue + } + + // Fields: + // IPv6 address displayed in 32 hexadecimal chars without colons + // Netlink device number (interface index) in hexadecimal (use nic id) + // Prefix length in hexadecimal + // Scope value (use 0) + // Interface flags + // Device name + lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) + } + } + return lines +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*ifinet6) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (n *ifinet6) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var data []seqfile.SeqData + for _, l := range n.contents() { + data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*ifinet6)(nil)}) + } + + return data, 0 +} + +// netDev implements seqfile.SeqSource for /proc/net/dev. +// +// +stateify savable +type netDev struct { + s inet.Stack +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (n *netDev) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. See Linux's +// net/core/net-procfs.c:dev_seq_show. +func (n *netDev) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + interfaces := n.s.Interfaces() + contents := make([]string, 2, 2+len(interfaces)) + // Add the table header. From net/core/net-procfs.c:dev_seq_show. + contents[0] = "Inter-| Receive | Transmit\n" + contents[1] = " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n" + + for _, i := range interfaces { + // TODO(b/71872867): Collect stats from each inet.Stack + // implementation (hostinet, epsocket, and rpcinet). + + // Implements the same format as + // net/core/net-procfs.c:dev_seq_printf_stats. + l := fmt.Sprintf("%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", + i.Name, + // Received + 0, // bytes + 0, // packets + 0, // errors + 0, // dropped + 0, // fifo + 0, // frame + 0, // compressed + 0, // multicast + // Transmitted + 0, // bytes + 0, // packets + 0, // errors + 0, // dropped + 0, // fifo + 0, // frame + 0, // compressed + 0) // multicast + contents = append(contents, l) + } + + var data []seqfile.SeqData + for _, l := range contents { + data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*ifinet6)(nil)}) + } + + return data, 0 +} + +// netUnix implements seqfile.SeqSource for /proc/net/unix. +// +// +stateify savable +type netUnix struct { + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*netUnix) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return []seqfile.SeqData{}, 0 + } + + var buf bytes.Buffer + // Header + fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n") + + // Entries + for _, sref := range n.k.ListSockets(linux.AF_UNIX) { + s := sref.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", sref) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(*unix.SocketOperations) + if !ok { + panic(fmt.Sprintf("Found non-unix socket file in unix socket table: %+v", sfile)) + } + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "<unknown>" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + var sockState int + switch sops.Endpoint().Type() { + case linux.SOCK_DGRAM: + sockState = linux.SS_CONNECTING + // Unlike Linux, we don't have unbound connection-less sockets, + // so no SS_DISCONNECTING. + + case linux.SOCK_SEQPACKET: + fallthrough + case linux.SOCK_STREAM: + // Connectioned. + if sops.Endpoint().(transport.ConnectingEndpoint).Connected() { + sockState = linux.SS_CONNECTED + } else { + sockState = linux.SS_UNCONNECTED + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // On gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(&buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sockState, // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(&buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(&buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(&buf, "\n") + + sfile.DecRef() + } + + data := []seqfile.SeqData{{ + Buf: buf.Bytes(), + Handle: (*netUnix)(nil), + }} + return data, 0 +} diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go new file mode 100644 index 000000000..0e15894b4 --- /dev/null +++ b/pkg/sentry/fs/proc/proc.go @@ -0,0 +1,251 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package proc implements a partial in-memory file system for profs. +package proc + +import ( + "fmt" + "sort" + "strconv" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// proc is a root proc node. +// +// +stateify savable +type proc struct { + ramfs.Dir + + // k is the Kernel containing this proc node. + k *kernel.Kernel + + // pidns is the PID namespace of the task that mounted the proc filesystem + // that this node represents. + pidns *kernel.PIDNamespace + + // cgroupControllers is a map of controller name to directory in the + // cgroup hierarchy. These controllers are immutable and will be listed + // in /proc/pid/cgroup if not nil. + cgroupControllers map[string]string +} + +// New returns the root node of a partial simple procfs. +func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string]string) (*fs.Inode, error) { + k := kernel.KernelFromContext(ctx) + if k == nil { + return nil, fmt.Errorf("procfs requires a kernel") + } + pidns := kernel.PIDNamespaceFromContext(ctx) + if pidns == nil { + return nil, fmt.Errorf("procfs requires a PID namespace") + } + + // Note that these are just the static members. There are dynamic + // members populated in Readdir and Lookup below. + contents := map[string]*fs.Inode{ + "cpuinfo": newCPUInfo(ctx, msrc), + "filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc), + "loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc), + "meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc), + "mounts": newProcInode(ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil), + "self": newSelf(ctx, pidns, msrc), + "stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc), + "thread-self": newThreadSelf(ctx, pidns, msrc), + "uptime": newUptime(ctx, msrc), + "version": seqfile.NewSeqFileInode(ctx, &versionData{k}, msrc), + } + + // Construct the proc InodeOperations. + p := &proc{ + Dir: *ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), + k: k, + pidns: pidns, + cgroupControllers: cgroupControllers, + } + + // Add more contents that need proc to be initialized. + p.AddChild(ctx, "sys", p.newSysDir(ctx, msrc)) + + // If we're using rpcinet we will let it manage /proc/net. + if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { + p.AddChild(ctx, "net", newRPCInetProcNet(ctx, msrc)) + } else { + p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) + } + + return newProcInode(p, msrc, fs.SpecialDirectory, nil), nil +} + +// self is a magical link. +// +// +stateify savable +type self struct { + ramfs.Symlink + + pidns *kernel.PIDNamespace +} + +// newSelf returns a new "self" node. +func newSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.MountSource) *fs.Inode { + s := &self{ + Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), + pidns: pidns, + } + return newProcInode(s, msrc, fs.Symlink, nil) +} + +// newThreadSelf returns a new "threadSelf" node. +func newThreadSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.MountSource) *fs.Inode { + s := &threadSelf{ + Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), + pidns: pidns, + } + return newProcInode(s, msrc, fs.Symlink, nil) +} + +// Readlink implements fs.InodeOperations.Readlink. +func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if t := kernel.TaskFromContext(ctx); t != nil { + tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) + if tgid == 0 { + return "", syserror.ENOENT + } + return strconv.FormatUint(uint64(tgid), 10), nil + } + + // Who is reading this link? + return "", syserror.EINVAL +} + +// threadSelf is more magical than "self" link. +// +// +stateify savable +type threadSelf struct { + ramfs.Symlink + + pidns *kernel.PIDNamespace +} + +// Readlink implements fs.InodeOperations.Readlink. +func (s *threadSelf) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if t := kernel.TaskFromContext(ctx); t != nil { + tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) + tid := s.pidns.IDOfTask(t) + if tid == 0 || tgid == 0 { + return "", syserror.ENOENT + } + return fmt.Sprintf("%d/task/%d", tgid, tid), nil + } + + // Who is reading this link? + return "", syserror.EINVAL +} + +// Lookup loads an Inode at name into a Dirent. +func (p *proc) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { + dirent, walkErr := p.Dir.Lookup(ctx, dir, name) + if walkErr == nil { + return dirent, nil + } + + // Try to lookup a corresponding task. + tid, err := strconv.ParseUint(name, 10, 64) + if err != nil { + // Ignore the parse error and return the original. + return nil, walkErr + } + + // Grab the other task. + otherTask := p.pidns.TaskWithID(kernel.ThreadID(tid)) + if otherTask == nil { + // Per above. + return nil, walkErr + } + + // Wrap it in a taskDir. + td := p.newTaskDir(otherTask, dir.MountSource, true) + return fs.NewDirent(td, name), nil +} + +// GetFile implements fs.InodeOperations. +func (p *proc) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &rootProcFile{iops: p}), nil +} + +// rootProcFile implements fs.FileOperations for the proc directory. +// +// +stateify savable +type rootProcFile struct { + fsutil.DirFileOperations `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + iops *proc +} + +var _ fs.FileOperations = (*rootProcFile)(nil) + +// Readdir implements fs.FileOperations.Readdir. +func (rpf *rootProcFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { + offset := file.Offset() + dirCtx := &fs.DirCtx{ + Serializer: ser, + } + + // Get normal directory contents from ramfs dir. + names, m := rpf.iops.Dir.Children() + + // Add dot and dotdot. + root := fs.RootFromContext(ctx) + if root != nil { + defer root.DecRef() + } + dot, dotdot := file.Dirent.GetDotAttrs(root) + names = append(names, ".", "..") + m["."] = dot + m[".."] = dotdot + + // Collect tasks. + // Per linux we only include it in directory listings if it's the leader. + // But for whatever crazy reason, you can still walk to the given node. + for _, tg := range rpf.iops.pidns.ThreadGroups() { + if leader := tg.Leader(); leader != nil { + name := strconv.FormatUint(uint64(tg.ID()), 10) + m[name] = fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice) + names = append(names, name) + } + } + + if offset >= int64(len(m)) { + return offset, nil + } + sort.Strings(names) + names = names[offset:] + for _, name := range names { + if err := dirCtx.DirEmit(name, m[name]); err != nil { + return offset, err + } + offset++ + } + return offset, nil +} diff --git a/pkg/sentry/fs/proc/proc_state_autogen.go b/pkg/sentry/fs/proc/proc_state_autogen.go new file mode 100755 index 000000000..788606f21 --- /dev/null +++ b/pkg/sentry/fs/proc/proc_state_autogen.go @@ -0,0 +1,657 @@ +// automatically generated by stateify. + +package proc + +import ( + "gvisor.googlesource.com/gvisor/pkg/state" +) + +func (x *execArgInode) beforeSave() {} +func (x *execArgInode) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("arg", &x.arg) + m.Save("t", &x.t) +} + +func (x *execArgInode) afterLoad() {} +func (x *execArgInode) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("arg", &x.arg) + m.Load("t", &x.t) +} + +func (x *execArgFile) beforeSave() {} +func (x *execArgFile) save(m state.Map) { + x.beforeSave() + m.Save("arg", &x.arg) + m.Save("t", &x.t) +} + +func (x *execArgFile) afterLoad() {} +func (x *execArgFile) load(m state.Map) { + m.Load("arg", &x.arg) + m.Load("t", &x.t) +} + +func (x *fdDir) beforeSave() {} +func (x *fdDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) +} + +func (x *fdDir) afterLoad() {} +func (x *fdDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) +} + +func (x *fdDirFile) beforeSave() {} +func (x *fdDirFile) save(m state.Map) { + x.beforeSave() + m.Save("isInfoFile", &x.isInfoFile) + m.Save("t", &x.t) +} + +func (x *fdDirFile) afterLoad() {} +func (x *fdDirFile) load(m state.Map) { + m.Load("isInfoFile", &x.isInfoFile) + m.Load("t", &x.t) +} + +func (x *fdInfoDir) beforeSave() {} +func (x *fdInfoDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) +} + +func (x *fdInfoDir) afterLoad() {} +func (x *fdInfoDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) +} + +func (x *filesystemsData) beforeSave() {} +func (x *filesystemsData) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystemsData) afterLoad() {} +func (x *filesystemsData) load(m state.Map) { +} + +func (x *filesystem) beforeSave() {} +func (x *filesystem) save(m state.Map) { + x.beforeSave() +} + +func (x *filesystem) afterLoad() {} +func (x *filesystem) load(m state.Map) { +} + +func (x *taskOwnedInodeOps) beforeSave() {} +func (x *taskOwnedInodeOps) save(m state.Map) { + x.beforeSave() + m.Save("InodeOperations", &x.InodeOperations) + m.Save("t", &x.t) +} + +func (x *taskOwnedInodeOps) afterLoad() {} +func (x *taskOwnedInodeOps) load(m state.Map) { + m.Load("InodeOperations", &x.InodeOperations) + m.Load("t", &x.t) +} + +func (x *staticFileInodeOps) beforeSave() {} +func (x *staticFileInodeOps) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *staticFileInodeOps) afterLoad() {} +func (x *staticFileInodeOps) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeStaticFileGetter", &x.InodeStaticFileGetter) +} + +func (x *loadavgData) beforeSave() {} +func (x *loadavgData) save(m state.Map) { + x.beforeSave() +} + +func (x *loadavgData) afterLoad() {} +func (x *loadavgData) load(m state.Map) { +} + +func (x *meminfoData) beforeSave() {} +func (x *meminfoData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *meminfoData) afterLoad() {} +func (x *meminfoData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *mountInfoFile) beforeSave() {} +func (x *mountInfoFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mountInfoFile) afterLoad() {} +func (x *mountInfoFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *mountsFile) beforeSave() {} +func (x *mountsFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mountsFile) afterLoad() {} +func (x *mountsFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *ifinet6) beforeSave() {} +func (x *ifinet6) save(m state.Map) { + x.beforeSave() + m.Save("s", &x.s) +} + +func (x *ifinet6) afterLoad() {} +func (x *ifinet6) load(m state.Map) { + m.Load("s", &x.s) +} + +func (x *netDev) beforeSave() {} +func (x *netDev) save(m state.Map) { + x.beforeSave() + m.Save("s", &x.s) +} + +func (x *netDev) afterLoad() {} +func (x *netDev) load(m state.Map) { + m.Load("s", &x.s) +} + +func (x *netUnix) beforeSave() {} +func (x *netUnix) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *netUnix) afterLoad() {} +func (x *netUnix) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *proc) beforeSave() {} +func (x *proc) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("k", &x.k) + m.Save("pidns", &x.pidns) + m.Save("cgroupControllers", &x.cgroupControllers) +} + +func (x *proc) afterLoad() {} +func (x *proc) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("k", &x.k) + m.Load("pidns", &x.pidns) + m.Load("cgroupControllers", &x.cgroupControllers) +} + +func (x *self) beforeSave() {} +func (x *self) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("pidns", &x.pidns) +} + +func (x *self) afterLoad() {} +func (x *self) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("pidns", &x.pidns) +} + +func (x *threadSelf) beforeSave() {} +func (x *threadSelf) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("pidns", &x.pidns) +} + +func (x *threadSelf) afterLoad() {} +func (x *threadSelf) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("pidns", &x.pidns) +} + +func (x *rootProcFile) beforeSave() {} +func (x *rootProcFile) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) +} + +func (x *rootProcFile) afterLoad() {} +func (x *rootProcFile) load(m state.Map) { + m.Load("iops", &x.iops) +} + +func (x *statData) beforeSave() {} +func (x *statData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *statData) afterLoad() {} +func (x *statData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *mmapMinAddrData) beforeSave() {} +func (x *mmapMinAddrData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *mmapMinAddrData) afterLoad() {} +func (x *mmapMinAddrData) load(m state.Map) { + m.Load("k", &x.k) +} + +func (x *overcommitMemory) beforeSave() {} +func (x *overcommitMemory) save(m state.Map) { + x.beforeSave() +} + +func (x *overcommitMemory) afterLoad() {} +func (x *overcommitMemory) load(m state.Map) { +} + +func (x *hostname) beforeSave() {} +func (x *hostname) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *hostname) afterLoad() {} +func (x *hostname) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *hostnameFile) beforeSave() {} +func (x *hostnameFile) save(m state.Map) { + x.beforeSave() +} + +func (x *hostnameFile) afterLoad() {} +func (x *hostnameFile) load(m state.Map) { +} + +func (x *tcpMemInode) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("dir", &x.dir) + m.Save("s", &x.s) + m.Save("size", &x.size) +} + +func (x *tcpMemInode) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("dir", &x.dir) + m.LoadWait("s", &x.s) + m.Load("size", &x.size) + m.AfterLoad(x.afterLoad) +} + +func (x *tcpMemFile) beforeSave() {} +func (x *tcpMemFile) save(m state.Map) { + x.beforeSave() + m.Save("tcpMemInode", &x.tcpMemInode) +} + +func (x *tcpMemFile) afterLoad() {} +func (x *tcpMemFile) load(m state.Map) { + m.Load("tcpMemInode", &x.tcpMemInode) +} + +func (x *tcpSack) beforeSave() {} +func (x *tcpSack) save(m state.Map) { + x.beforeSave() + m.Save("stack", &x.stack) + m.Save("enabled", &x.enabled) + m.Save("SimpleFileInode", &x.SimpleFileInode) +} + +func (x *tcpSack) load(m state.Map) { + m.LoadWait("stack", &x.stack) + m.Load("enabled", &x.enabled) + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.AfterLoad(x.afterLoad) +} + +func (x *tcpSackFile) beforeSave() {} +func (x *tcpSackFile) save(m state.Map) { + x.beforeSave() + m.Save("tcpSack", &x.tcpSack) + m.Save("stack", &x.stack) +} + +func (x *tcpSackFile) afterLoad() {} +func (x *tcpSackFile) load(m state.Map) { + m.Load("tcpSack", &x.tcpSack) + m.LoadWait("stack", &x.stack) +} + +func (x *taskDir) beforeSave() {} +func (x *taskDir) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *taskDir) afterLoad() {} +func (x *taskDir) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *subtasks) beforeSave() {} +func (x *subtasks) save(m state.Map) { + x.beforeSave() + m.Save("Dir", &x.Dir) + m.Save("t", &x.t) + m.Save("p", &x.p) +} + +func (x *subtasks) afterLoad() {} +func (x *subtasks) load(m state.Map) { + m.Load("Dir", &x.Dir) + m.Load("t", &x.t) + m.Load("p", &x.p) +} + +func (x *subtasksFile) beforeSave() {} +func (x *subtasksFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *subtasksFile) afterLoad() {} +func (x *subtasksFile) load(m state.Map) { + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *exe) beforeSave() {} +func (x *exe) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("t", &x.t) +} + +func (x *exe) afterLoad() {} +func (x *exe) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("t", &x.t) +} + +func (x *namespaceSymlink) beforeSave() {} +func (x *namespaceSymlink) save(m state.Map) { + x.beforeSave() + m.Save("Symlink", &x.Symlink) + m.Save("t", &x.t) +} + +func (x *namespaceSymlink) afterLoad() {} +func (x *namespaceSymlink) load(m state.Map) { + m.Load("Symlink", &x.Symlink) + m.Load("t", &x.t) +} + +func (x *mapsData) beforeSave() {} +func (x *mapsData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *mapsData) afterLoad() {} +func (x *mapsData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *smapsData) beforeSave() {} +func (x *smapsData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *smapsData) afterLoad() {} +func (x *smapsData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *taskStatData) beforeSave() {} +func (x *taskStatData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("tgstats", &x.tgstats) + m.Save("pidns", &x.pidns) +} + +func (x *taskStatData) afterLoad() {} +func (x *taskStatData) load(m state.Map) { + m.Load("t", &x.t) + m.Load("tgstats", &x.tgstats) + m.Load("pidns", &x.pidns) +} + +func (x *statmData) beforeSave() {} +func (x *statmData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *statmData) afterLoad() {} +func (x *statmData) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *statusData) beforeSave() {} +func (x *statusData) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) + m.Save("pidns", &x.pidns) +} + +func (x *statusData) afterLoad() {} +func (x *statusData) load(m state.Map) { + m.Load("t", &x.t) + m.Load("pidns", &x.pidns) +} + +func (x *ioData) beforeSave() {} +func (x *ioData) save(m state.Map) { + x.beforeSave() + m.Save("ioUsage", &x.ioUsage) +} + +func (x *ioData) afterLoad() {} +func (x *ioData) load(m state.Map) { + m.Load("ioUsage", &x.ioUsage) +} + +func (x *comm) beforeSave() {} +func (x *comm) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("t", &x.t) +} + +func (x *comm) afterLoad() {} +func (x *comm) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("t", &x.t) +} + +func (x *commFile) beforeSave() {} +func (x *commFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *commFile) afterLoad() {} +func (x *commFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *auxvec) beforeSave() {} +func (x *auxvec) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("t", &x.t) +} + +func (x *auxvec) afterLoad() {} +func (x *auxvec) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("t", &x.t) +} + +func (x *auxvecFile) beforeSave() {} +func (x *auxvecFile) save(m state.Map) { + x.beforeSave() + m.Save("t", &x.t) +} + +func (x *auxvecFile) afterLoad() {} +func (x *auxvecFile) load(m state.Map) { + m.Load("t", &x.t) +} + +func (x *idMapInodeOperations) beforeSave() {} +func (x *idMapInodeOperations) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("t", &x.t) + m.Save("gids", &x.gids) +} + +func (x *idMapInodeOperations) afterLoad() {} +func (x *idMapInodeOperations) load(m state.Map) { + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("t", &x.t) + m.Load("gids", &x.gids) +} + +func (x *idMapFileOperations) beforeSave() {} +func (x *idMapFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("iops", &x.iops) +} + +func (x *idMapFileOperations) afterLoad() {} +func (x *idMapFileOperations) load(m state.Map) { + m.Load("iops", &x.iops) +} + +func (x *uptime) beforeSave() {} +func (x *uptime) save(m state.Map) { + x.beforeSave() + m.Save("SimpleFileInode", &x.SimpleFileInode) + m.Save("startTime", &x.startTime) +} + +func (x *uptime) afterLoad() {} +func (x *uptime) load(m state.Map) { + m.Load("SimpleFileInode", &x.SimpleFileInode) + m.Load("startTime", &x.startTime) +} + +func (x *uptimeFile) beforeSave() {} +func (x *uptimeFile) save(m state.Map) { + x.beforeSave() + m.Save("startTime", &x.startTime) +} + +func (x *uptimeFile) afterLoad() {} +func (x *uptimeFile) load(m state.Map) { + m.Load("startTime", &x.startTime) +} + +func (x *versionData) beforeSave() {} +func (x *versionData) save(m state.Map) { + x.beforeSave() + m.Save("k", &x.k) +} + +func (x *versionData) afterLoad() {} +func (x *versionData) load(m state.Map) { + m.Load("k", &x.k) +} + +func init() { + state.Register("proc.execArgInode", (*execArgInode)(nil), state.Fns{Save: (*execArgInode).save, Load: (*execArgInode).load}) + state.Register("proc.execArgFile", (*execArgFile)(nil), state.Fns{Save: (*execArgFile).save, Load: (*execArgFile).load}) + state.Register("proc.fdDir", (*fdDir)(nil), state.Fns{Save: (*fdDir).save, Load: (*fdDir).load}) + state.Register("proc.fdDirFile", (*fdDirFile)(nil), state.Fns{Save: (*fdDirFile).save, Load: (*fdDirFile).load}) + state.Register("proc.fdInfoDir", (*fdInfoDir)(nil), state.Fns{Save: (*fdInfoDir).save, Load: (*fdInfoDir).load}) + state.Register("proc.filesystemsData", (*filesystemsData)(nil), state.Fns{Save: (*filesystemsData).save, Load: (*filesystemsData).load}) + state.Register("proc.filesystem", (*filesystem)(nil), state.Fns{Save: (*filesystem).save, Load: (*filesystem).load}) + state.Register("proc.taskOwnedInodeOps", (*taskOwnedInodeOps)(nil), state.Fns{Save: (*taskOwnedInodeOps).save, Load: (*taskOwnedInodeOps).load}) + state.Register("proc.staticFileInodeOps", (*staticFileInodeOps)(nil), state.Fns{Save: (*staticFileInodeOps).save, Load: (*staticFileInodeOps).load}) + state.Register("proc.loadavgData", (*loadavgData)(nil), state.Fns{Save: (*loadavgData).save, Load: (*loadavgData).load}) + state.Register("proc.meminfoData", (*meminfoData)(nil), state.Fns{Save: (*meminfoData).save, Load: (*meminfoData).load}) + state.Register("proc.mountInfoFile", (*mountInfoFile)(nil), state.Fns{Save: (*mountInfoFile).save, Load: (*mountInfoFile).load}) + state.Register("proc.mountsFile", (*mountsFile)(nil), state.Fns{Save: (*mountsFile).save, Load: (*mountsFile).load}) + state.Register("proc.ifinet6", (*ifinet6)(nil), state.Fns{Save: (*ifinet6).save, Load: (*ifinet6).load}) + state.Register("proc.netDev", (*netDev)(nil), state.Fns{Save: (*netDev).save, Load: (*netDev).load}) + state.Register("proc.netUnix", (*netUnix)(nil), state.Fns{Save: (*netUnix).save, Load: (*netUnix).load}) + state.Register("proc.proc", (*proc)(nil), state.Fns{Save: (*proc).save, Load: (*proc).load}) + state.Register("proc.self", (*self)(nil), state.Fns{Save: (*self).save, Load: (*self).load}) + state.Register("proc.threadSelf", (*threadSelf)(nil), state.Fns{Save: (*threadSelf).save, Load: (*threadSelf).load}) + state.Register("proc.rootProcFile", (*rootProcFile)(nil), state.Fns{Save: (*rootProcFile).save, Load: (*rootProcFile).load}) + state.Register("proc.statData", (*statData)(nil), state.Fns{Save: (*statData).save, Load: (*statData).load}) + state.Register("proc.mmapMinAddrData", (*mmapMinAddrData)(nil), state.Fns{Save: (*mmapMinAddrData).save, Load: (*mmapMinAddrData).load}) + state.Register("proc.overcommitMemory", (*overcommitMemory)(nil), state.Fns{Save: (*overcommitMemory).save, Load: (*overcommitMemory).load}) + state.Register("proc.hostname", (*hostname)(nil), state.Fns{Save: (*hostname).save, Load: (*hostname).load}) + state.Register("proc.hostnameFile", (*hostnameFile)(nil), state.Fns{Save: (*hostnameFile).save, Load: (*hostnameFile).load}) + state.Register("proc.tcpMemInode", (*tcpMemInode)(nil), state.Fns{Save: (*tcpMemInode).save, Load: (*tcpMemInode).load}) + state.Register("proc.tcpMemFile", (*tcpMemFile)(nil), state.Fns{Save: (*tcpMemFile).save, Load: (*tcpMemFile).load}) + state.Register("proc.tcpSack", (*tcpSack)(nil), state.Fns{Save: (*tcpSack).save, Load: (*tcpSack).load}) + state.Register("proc.tcpSackFile", (*tcpSackFile)(nil), state.Fns{Save: (*tcpSackFile).save, Load: (*tcpSackFile).load}) + state.Register("proc.taskDir", (*taskDir)(nil), state.Fns{Save: (*taskDir).save, Load: (*taskDir).load}) + state.Register("proc.subtasks", (*subtasks)(nil), state.Fns{Save: (*subtasks).save, Load: (*subtasks).load}) + state.Register("proc.subtasksFile", (*subtasksFile)(nil), state.Fns{Save: (*subtasksFile).save, Load: (*subtasksFile).load}) + state.Register("proc.exe", (*exe)(nil), state.Fns{Save: (*exe).save, Load: (*exe).load}) + state.Register("proc.namespaceSymlink", (*namespaceSymlink)(nil), state.Fns{Save: (*namespaceSymlink).save, Load: (*namespaceSymlink).load}) + state.Register("proc.mapsData", (*mapsData)(nil), state.Fns{Save: (*mapsData).save, Load: (*mapsData).load}) + state.Register("proc.smapsData", (*smapsData)(nil), state.Fns{Save: (*smapsData).save, Load: (*smapsData).load}) + state.Register("proc.taskStatData", (*taskStatData)(nil), state.Fns{Save: (*taskStatData).save, Load: (*taskStatData).load}) + state.Register("proc.statmData", (*statmData)(nil), state.Fns{Save: (*statmData).save, Load: (*statmData).load}) + state.Register("proc.statusData", (*statusData)(nil), state.Fns{Save: (*statusData).save, Load: (*statusData).load}) + state.Register("proc.ioData", (*ioData)(nil), state.Fns{Save: (*ioData).save, Load: (*ioData).load}) + state.Register("proc.comm", (*comm)(nil), state.Fns{Save: (*comm).save, Load: (*comm).load}) + state.Register("proc.commFile", (*commFile)(nil), state.Fns{Save: (*commFile).save, Load: (*commFile).load}) + state.Register("proc.auxvec", (*auxvec)(nil), state.Fns{Save: (*auxvec).save, Load: (*auxvec).load}) + state.Register("proc.auxvecFile", (*auxvecFile)(nil), state.Fns{Save: (*auxvecFile).save, Load: (*auxvecFile).load}) + state.Register("proc.idMapInodeOperations", (*idMapInodeOperations)(nil), state.Fns{Save: (*idMapInodeOperations).save, Load: (*idMapInodeOperations).load}) + state.Register("proc.idMapFileOperations", (*idMapFileOperations)(nil), state.Fns{Save: (*idMapFileOperations).save, Load: (*idMapFileOperations).load}) + state.Register("proc.uptime", (*uptime)(nil), state.Fns{Save: (*uptime).save, Load: (*uptime).load}) + state.Register("proc.uptimeFile", (*uptimeFile)(nil), state.Fns{Save: (*uptimeFile).save, Load: (*uptimeFile).load}) + state.Register("proc.versionData", (*versionData)(nil), state.Fns{Save: (*versionData).save, Load: (*versionData).load}) +} diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go new file mode 100644 index 000000000..e36c0bfa6 --- /dev/null +++ b/pkg/sentry/fs/proc/rpcinet_proc.go @@ -0,0 +1,217 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// rpcInetInode implments fs.InodeOperations. +type rpcInetInode struct { + fsutil.SimpleFileInode + + // filepath is the full path of this rpcInetInode. + filepath string + + k *kernel.Kernel +} + +func newRPCInetInode(ctx context.Context, msrc *fs.MountSource, filepath string, mode linux.FileMode) *fs.Inode { + f := &rpcInetInode{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(mode), linux.PROC_SUPER_MAGIC), + filepath: filepath, + k: kernel.KernelFromContext(ctx), + } + return newProcInode(f, msrc, fs.SpecialFile, nil) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (i *rpcInetInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + fops := &rpcInetFile{ + inode: i, + } + return fs.NewFile(ctx, dirent, flags, fops), nil +} + +// rpcInetFile implements fs.FileOperations as RPCs. +type rpcInetFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + inode *rpcInetInode +} + +// Read implements fs.FileOperations.Read. +// +// This method can panic if an rpcInetInode was created without an rpcinet +// stack. +func (f *rpcInetFile) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) + if !ok { + panic("Network stack is not a rpcinet.") + } + + contents, se := s.RPCReadFile(f.inode.filepath) + if se != nil || offset >= int64(len(contents)) { + return 0, io.EOF + } + + n, err := dst.CopyOut(ctx, contents[offset:]) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +// +// This method can panic if an rpcInetInode was created without an rpcInet +// stack. +func (f *rpcInetFile) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) + if !ok { + panic("Network stack is not a rpcinet.") + } + + if src.NumBytes() == 0 { + return 0, nil + } + + b := make([]byte, src.NumBytes(), src.NumBytes()) + n, err := src.CopyIn(ctx, b) + if err != nil { + return int64(n), err + } + + written, se := s.RPCWriteFile(f.inode.filepath, b) + return int64(written), se.ToError() +} + +// newRPCInetProcNet will build an inode for /proc/net. +func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + contents := map[string]*fs.Inode{ + "arp": newRPCInetInode(ctx, msrc, "/proc/net/arp", 0444), + "dev": newRPCInetInode(ctx, msrc, "/proc/net/dev", 0444), + "if_inet6": newRPCInetInode(ctx, msrc, "/proc/net/if_inet6", 0444), + "ipv6_route": newRPCInetInode(ctx, msrc, "/proc/net/ipv6_route", 0444), + "netlink": newRPCInetInode(ctx, msrc, "/proc/net/netlink", 0444), + "netstat": newRPCInetInode(ctx, msrc, "/proc/net/netstat", 0444), + "packet": newRPCInetInode(ctx, msrc, "/proc/net/packet", 0444), + "protocols": newRPCInetInode(ctx, msrc, "/proc/net/protocols", 0444), + "psched": newRPCInetInode(ctx, msrc, "/proc/net/psched", 0444), + "ptype": newRPCInetInode(ctx, msrc, "/proc/net/ptype", 0444), + "route": newRPCInetInode(ctx, msrc, "/proc/net/route", 0444), + "tcp": newRPCInetInode(ctx, msrc, "/proc/net/tcp", 0444), + "tcp6": newRPCInetInode(ctx, msrc, "/proc/net/tcp6", 0444), + "udp": newRPCInetInode(ctx, msrc, "/proc/net/udp", 0444), + "udp6": newRPCInetInode(ctx, msrc, "/proc/net/udp6", 0444), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetProcSysNet will build an inode for /proc/sys/net. +func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + contents := map[string]*fs.Inode{ + "ipv4": newRPCInetSysNetIPv4Dir(ctx, msrc), + "core": newRPCInetSysNetCore(ctx, msrc), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetSysNetCore builds the /proc/sys/net/core directory. +func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + contents := map[string]*fs.Inode{ + "default_qdisc": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/default_qdisc", 0444), + "message_burst": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_burst", 0444), + "message_cost": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_cost", 0444), + "optmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/optmem_max", 0444), + "rmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_default", 0444), + "rmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_max", 0444), + "somaxconn": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/somaxconn", 0444), + "wmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_default", 0444), + "wmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_max", 0444), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory. +func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + contents := map[string]*fs.Inode{ + "ip_local_port_range": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_port_range", 0444), + "ip_local_reserved_ports": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_reserved_ports", 0444), + "ipfrag_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ipfrag_time", 0444), + "ip_nonlocal_bind": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_nonlocal_bind", 0444), + "ip_no_pmtu_disc": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_no_pmtu_disc", 0444), + "tcp_allowed_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_allowed_congestion_control", 0444), + "tcp_available_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_available_congestion_control", 0444), + "tcp_base_mss": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_base_mss", 0444), + "tcp_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_congestion_control", 0644), + "tcp_dsack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_dsack", 0644), + "tcp_early_retrans": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_early_retrans", 0644), + "tcp_fack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fack", 0644), + "tcp_fastopen": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen", 0644), + "tcp_fastopen_key": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen_key", 0444), + "tcp_fin_timeout": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fin_timeout", 0644), + "tcp_invalid_ratelimit": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_invalid_ratelimit", 0444), + "tcp_keepalive_intvl": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_intvl", 0644), + "tcp_keepalive_probes": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_probes", 0644), + "tcp_keepalive_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_time", 0644), + "tcp_mem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mem", 0444), + "tcp_mtu_probing": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mtu_probing", 0644), + "tcp_no_metrics_save": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_no_metrics_save", 0444), + "tcp_probe_interval": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_interval", 0444), + "tcp_probe_threshold": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_threshold", 0444), + "tcp_retries1": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries1", 0644), + "tcp_retries2": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries2", 0644), + "tcp_rfc1337": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rfc1337", 0444), + "tcp_rmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rmem", 0444), + "tcp_sack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_sack", 0644), + "tcp_slow_start_after_idle": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_slow_start_after_idle", 0644), + "tcp_synack_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_synack_retries", 0644), + "tcp_syn_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_syn_retries", 0644), + "tcp_timestamps": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_timestamps", 0644), + "tcp_wmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_wmem", 0444), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go new file mode 100644 index 000000000..8364d86ed --- /dev/null +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -0,0 +1,282 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seqfile + +import ( + "io" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// SeqHandle is a helper handle to seek in the file. +type SeqHandle interface{} + +// SeqData holds the data for one unit in the file. +// +// +stateify savable +type SeqData struct { + // The data to be returned to the user. + Buf []byte + + // A seek handle used to find the next valid unit in ReadSeqFiledata. + Handle SeqHandle +} + +// SeqSource is a data source for a SeqFile file. +type SeqSource interface { + // NeedsUpdate returns true if the consumer of SeqData should call + // ReadSeqFileData again. Generation is the generation returned by + // ReadSeqFile or 0. + NeedsUpdate(generation int64) bool + + // Returns a slice of SeqData ordered by unit and the current + // generation. The first entry in the slice is greater than the handle. + // If handle is nil then all known records are returned. Generation + // must always be greater than 0. + ReadSeqFileData(ctx context.Context, handle SeqHandle) ([]SeqData, int64) +} + +// SeqGenerationCounter is a counter to keep track if the SeqSource should be +// updated. SeqGenerationCounter is not thread-safe and should be protected +// with a mutex. +type SeqGenerationCounter struct { + // The generation that the SeqData is at. + generation int64 +} + +// SetGeneration sets the generation to the new value, be careful to not set it +// to a value less than current. +func (s *SeqGenerationCounter) SetGeneration(generation int64) { + s.generation = generation +} + +// Update increments the current generation. +func (s *SeqGenerationCounter) Update() { + s.generation++ +} + +// Generation returns the current generation counter. +func (s *SeqGenerationCounter) Generation() int64 { + return s.generation +} + +// IsCurrent returns whether the given generation is current or not. +func (s *SeqGenerationCounter) IsCurrent(generation int64) bool { + return s.Generation() == generation +} + +// SeqFile is used to provide dynamic files that can be ordered by record. +// +// +stateify savable +type SeqFile struct { + fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeNoopRelease `state:"nosave"` + fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` + fsutil.InodeNotDirectory `state:"nosave"` + fsutil.InodeNotMappable `state:"nosave"` + fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` + fsutil.InodeNotTruncatable `state:"nosave"` + fsutil.InodeVirtual `state:"nosave"` + + fsutil.InodeSimpleExtendedAttributes + fsutil.InodeSimpleAttributes + + // mu protects the fields below. + mu sync.Mutex `state:"nosave"` + + SeqSource + + source []SeqData + generation int64 + lastRead int64 +} + +var _ fs.InodeOperations = (*SeqFile)(nil) + +// NewSeqFile returns a seqfile suitable for use by external consumers. +func NewSeqFile(ctx context.Context, source SeqSource) *SeqFile { + return &SeqFile{ + InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + SeqSource: source, + } +} + +// NewSeqFileInode returns an Inode with SeqFile InodeOperations. +func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource) *fs.Inode { + iops := NewSeqFile(ctx, source) + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(iops, msrc, sattr) +} + +// UnstableAttr returns unstable attributes of the SeqFile. +func (s *SeqFile) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { + uattr, err := s.InodeSimpleAttributes.UnstableAttr(ctx, inode) + if err != nil { + return fs.UnstableAttr{}, err + } + uattr.ModificationTime = ktime.NowFromContext(ctx) + return uattr, nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (s *SeqFile) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &seqFileOperations{seqFile: s}), nil +} + +// findIndexAndOffset finds the unit that corresponds to a certain offset. +// Returns the unit and the offset within the unit. If there are not enough +// units len(data) and leftover offset is returned. +func findIndexAndOffset(data []SeqData, offset int64) (int, int64) { + for i, buf := range data { + l := int64(len(buf.Buf)) + if offset < l { + return i, offset + } + offset -= l + } + return len(data), offset +} + +// updateSourceLocked requires that s.mu is held. +func (s *SeqFile) updateSourceLocked(ctx context.Context, record int) { + var h SeqHandle + if record == 0 { + h = nil + } else { + h = s.source[record-1].Handle + } + // Save what we have previously read. + s.source = s.source[:record] + var newSource []SeqData + newSource, s.generation = s.SeqSource.ReadSeqFileData(ctx, h) + s.source = append(s.source, newSource...) +} + +// seqFileOperations implements fs.FileOperations. +// +// +stateify savable +type seqFileOperations struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + seqFile *SeqFile +} + +var _ fs.FileOperations = (*seqFileOperations)(nil) + +// Write implements fs.FileOperations.Write. +func (*seqFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) { + return 0, syserror.EACCES +} + +// Read implements fs.FileOperations.Read. +func (sfo *seqFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + sfo.seqFile.mu.Lock() + defer sfo.seqFile.mu.Unlock() + + sfo.seqFile.NotifyAccess(ctx) + defer func() { sfo.seqFile.lastRead = offset }() + + updated := false + + // Try to find where we should start reading this file. + i, recordOffset := findIndexAndOffset(sfo.seqFile.source, offset) + if i == len(sfo.seqFile.source) { + // Ok, we're at EOF. Let's first check to see if there might be + // more data available to us. If there is more data, add it to + // the end and try reading again. + if !sfo.seqFile.SeqSource.NeedsUpdate(sfo.seqFile.generation) { + return 0, io.EOF + } + oldLen := len(sfo.seqFile.source) + sfo.seqFile.updateSourceLocked(ctx, len(sfo.seqFile.source)) + updated = true + // We know that we had consumed everything up until this point + // so we search in the new slice instead of starting over. + i, recordOffset = findIndexAndOffset(sfo.seqFile.source[oldLen:], recordOffset) + i += oldLen + // i is at most the length of the slice which is + // len(sfo.seqFile.source) - oldLen. So at most i will be equal to + // len(sfo.seqFile.source). + if i == len(sfo.seqFile.source) { + return 0, io.EOF + } + } + + var done int64 + // We're reading parts of a record, finish reading the current object + // before continuing on to the next. We don't refresh our data source + // before this record is completed. + if recordOffset != 0 { + n, err := dst.CopyOut(ctx, sfo.seqFile.source[i].Buf[recordOffset:]) + done += int64(n) + dst = dst.DropFirst(n) + if dst.NumBytes() == 0 || err != nil { + return done, err + } + i++ + } + + // Next/New unit, update the source file if necessary. Make an extra + // check to see if we've seeked backwards and if so always update our + // data source. + if !updated && (sfo.seqFile.SeqSource.NeedsUpdate(sfo.seqFile.generation) || sfo.seqFile.lastRead > offset) { + sfo.seqFile.updateSourceLocked(ctx, i) + // recordOffset is 0 here and we won't update records behind the + // current one so recordOffset is still 0 even though source + // just got updated. Just read the next record. + } + + // Finish by reading all the available data. + for _, buf := range sfo.seqFile.source[i:] { + n, err := dst.CopyOut(ctx, buf.Buf) + done += int64(n) + dst = dst.DropFirst(n) + if dst.NumBytes() == 0 || err != nil { + return done, err + } + } + + // If the file shrank (entries not yet read were removed above) + // while we tried to read we can end up with nothing read. + if done == 0 && dst.NumBytes() != 0 { + return 0, io.EOF + } + return done, nil +} diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go new file mode 100755 index 000000000..c3b15d513 --- /dev/null +++ b/pkg/sentry/fs/proc/seqfile/seqfile_state_autogen.go @@ -0,0 +1,58 @@ +// automatically generated by stateify. + +package seqfile + +import ( + "gvisor.googlesource.com/gvisor/pkg/state" +) + +func (x *SeqData) beforeSave() {} +func (x *SeqData) save(m state.Map) { + x.beforeSave() + m.Save("Buf", &x.Buf) + m.Save("Handle", &x.Handle) +} + +func (x *SeqData) afterLoad() {} +func (x *SeqData) load(m state.Map) { + m.Load("Buf", &x.Buf) + m.Load("Handle", &x.Handle) +} + +func (x *SeqFile) beforeSave() {} +func (x *SeqFile) save(m state.Map) { + x.beforeSave() + m.Save("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Save("SeqSource", &x.SeqSource) + m.Save("source", &x.source) + m.Save("generation", &x.generation) + m.Save("lastRead", &x.lastRead) +} + +func (x *SeqFile) afterLoad() {} +func (x *SeqFile) load(m state.Map) { + m.Load("InodeSimpleExtendedAttributes", &x.InodeSimpleExtendedAttributes) + m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes) + m.Load("SeqSource", &x.SeqSource) + m.Load("source", &x.source) + m.Load("generation", &x.generation) + m.Load("lastRead", &x.lastRead) +} + +func (x *seqFileOperations) beforeSave() {} +func (x *seqFileOperations) save(m state.Map) { + x.beforeSave() + m.Save("seqFile", &x.seqFile) +} + +func (x *seqFileOperations) afterLoad() {} +func (x *seqFileOperations) load(m state.Map) { + m.Load("seqFile", &x.seqFile) +} + +func init() { + state.Register("seqfile.SeqData", (*SeqData)(nil), state.Fns{Save: (*SeqData).save, Load: (*SeqData).load}) + state.Register("seqfile.SeqFile", (*SeqFile)(nil), state.Fns{Save: (*SeqFile).save, Load: (*SeqFile).load}) + state.Register("seqfile.seqFileOperations", (*seqFileOperations)(nil), state.Fns{Save: (*seqFileOperations).save, Load: (*seqFileOperations).load}) +} diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go new file mode 100644 index 000000000..397f9ec6b --- /dev/null +++ b/pkg/sentry/fs/proc/stat.go @@ -0,0 +1,142 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" +) + +// statData backs /proc/stat. +// +// +stateify savable +type statData struct { + // k is the owning Kernel. + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*statData) NeedsUpdate(generation int64) bool { + return true +} + +// cpuStats contains the breakdown of CPU time for /proc/stat. +type cpuStats struct { + // user is time spent in userspace tasks with non-positive niceness. + user uint64 + + // nice is time spent in userspace tasks with positive niceness. + nice uint64 + + // system is time spent in non-interrupt kernel context. + system uint64 + + // idle is time spent idle. + idle uint64 + + // ioWait is time spent waiting for IO. + ioWait uint64 + + // irq is time spent in interrupt context. + irq uint64 + + // softirq is time spent in software interrupt context. + softirq uint64 + + // steal is involuntary wait time. + steal uint64 + + // guest is time spent in guests with non-positive niceness. + guest uint64 + + // guestNice is time spent in guests with positive niceness. + guestNice uint64 +} + +// String implements fmt.Stringer. +func (c cpuStats) String() string { + return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (s *statData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + + // TODO(b/37226836): We currently export only zero CPU stats. We could + // at least provide some aggregate stats. + var cpu cpuStats + fmt.Fprintf(&buf, "cpu %s\n", cpu) + + for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ { + fmt.Fprintf(&buf, "cpu%d %s\n", c, cpu) + } + + // The total number of interrupts is dependent on the CPUs and PCI + // devices on the system. See arch_probe_nr_irqs. + // + // Since we don't report real interrupt stats, just choose an arbitrary + // value from a representative VM. + const numInterrupts = 256 + + // The Kernel doesn't handle real interrupts, so report all zeroes. + // TODO(b/37226836): We could count page faults as #PF. + fmt.Fprintf(&buf, "intr 0") // total + for i := 0; i < numInterrupts; i++ { + fmt.Fprintf(&buf, " 0") + } + fmt.Fprintf(&buf, "\n") + + // Total number of context switches. + // TODO(b/37226836): Count this. + fmt.Fprintf(&buf, "ctxt 0\n") + + // CLOCK_REALTIME timestamp from boot, in seconds. + fmt.Fprintf(&buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds()) + + // Total number of clones. + // TODO(b/37226836): Count this. + fmt.Fprintf(&buf, "processes 0\n") + + // Number of runnable tasks. + // TODO(b/37226836): Count this. + fmt.Fprintf(&buf, "procs_running 0\n") + + // Number of tasks waiting on IO. + // TODO(b/37226836): Count this. + fmt.Fprintf(&buf, "procs_blocked 0\n") + + // Number of each softirq handled. + fmt.Fprintf(&buf, "softirq 0") // total + for i := 0; i < linux.NumSoftIRQ; i++ { + fmt.Fprintf(&buf, " 0") + } + fmt.Fprintf(&buf, "\n") + + return []seqfile.SeqData{ + { + Buf: buf.Bytes(), + Handle: (*statData)(nil), + }, + }, 0 +} diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go new file mode 100644 index 000000000..59846af4f --- /dev/null +++ b/pkg/sentry/fs/proc/sys.go @@ -0,0 +1,162 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + "io" + "strconv" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// mmapMinAddrData backs /proc/sys/vm/mmap_min_addr. +// +// +stateify savable +type mmapMinAddrData struct { + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*mmapMinAddrData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (d *mmapMinAddrData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + return []seqfile.SeqData{ + { + Buf: []byte(fmt.Sprintf("%d\n", d.k.Platform.MinUserAddress())), + Handle: (*mmapMinAddrData)(nil), + }, + }, 0 +} + +// +stateify savable +type overcommitMemory struct{} + +func (*overcommitMemory) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource. +func (*overcommitMemory) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + return []seqfile.SeqData{ + { + Buf: []byte("0\n"), + Handle: (*overcommitMemory)(nil), + }, + }, 0 +} + +func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + h := hostname{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + } + + children := map[string]*fs.Inode{ + "hostname": newProcInode(&h, msrc, fs.SpecialFile, nil), + "shmall": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMALL, 10))), + "shmmax": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMAX, 10))), + "shmmni": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMNI, 10))), + } + + d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + children := map[string]*fs.Inode{ + "mmap_min_addr": seqfile.NewSeqFileInode(ctx, &mmapMinAddrData{p.k}, msrc), + "overcommit_memory": seqfile.NewSeqFileInode(ctx, &overcommitMemory{}, msrc), + } + d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + children := map[string]*fs.Inode{ + "kernel": p.newKernelDir(ctx, msrc), + "vm": p.newVMDir(ctx, msrc), + } + + // If we're using rpcinet we will let it manage /proc/sys/net. + if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { + children["net"] = newRPCInetProcSysNet(ctx, msrc) + } else { + children["net"] = p.newSysNetDir(ctx, msrc) + } + + d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +// hostname is the inode for a file containing the system hostname. +// +// +stateify savable +type hostname struct { + fsutil.SimpleFileInode +} + +// GetFile implements fs.InodeOperations.GetFile. +func (h *hostname) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, d, flags, &hostnameFile{}), nil +} + +var _ fs.InodeOperations = (*hostname)(nil) + +// +stateify savable +type hostnameFile struct { + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSeek `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileNoWrite `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` +} + +// Read implements fs.FileOperations.Read. +func (hf *hostnameFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + utsns := kernel.UTSNamespaceFromContext(ctx) + contents := []byte(utsns.HostName() + "\n") + if offset >= int64(len(contents)) { + return 0, io.EOF + } + n, err := dst.CopyOut(ctx, contents[offset:]) + return int64(n), err + +} + +var _ fs.FileOperations = (*hostnameFile)(nil) diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go new file mode 100644 index 000000000..dbf1a987c --- /dev/null +++ b/pkg/sentry/fs/proc/sys_net.go @@ -0,0 +1,355 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + "io" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +type tcpMemDir int + +const ( + tcpRMem tcpMemDir = iota + tcpWMem +) + +// tcpMemInode is used to read/write the size of netstack tcp buffers. +// +// TODO(b/121381035): If we have multiple proc mounts, concurrent writes can +// leave netstack and the proc files in an inconsistent state. Since we set the +// buffer size from these proc files on restore, we may also race and end up in +// an inconsistent state on restore. +// +// +stateify savable +type tcpMemInode struct { + fsutil.SimpleFileInode + dir tcpMemDir + s inet.Stack `state:"wait"` + + // size stores the tcp buffer size during save, and sets the buffer + // size in netstack in restore. We must save/restore this here, since + // netstack itself is stateless. + size inet.TCPBufferSize + + // mu protects against concurrent reads/writes to files based on this + // inode. + mu sync.Mutex `state:"nosave"` +} + +var _ fs.InodeOperations = (*tcpMemInode)(nil) + +func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode { + tm := &tcpMemInode{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + s: s, + dir: dir, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(tm, msrc, sattr) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (m *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + return fs.NewFile(ctx, dirent, flags, &tcpMemFile{tcpMemInode: m}), nil +} + +// +stateify savable +type tcpMemFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpMemInode *tcpMemInode +} + +var _ fs.FileOperations = (*tcpMemFile)(nil) + +// Read implements fs.FileOperations.Read. +func (f *tcpMemFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + f.tcpMemInode.mu.Lock() + defer f.tcpMemInode.mu.Unlock() + + size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s) + if err != nil { + return 0, err + } + s := fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max) + n, err := dst.CopyOut(ctx, []byte(s)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + f.tcpMemInode.mu.Lock() + defer f.tcpMemInode.mu.Unlock() + + src = src.TakeFirst(usermem.PageSize - 1) + size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s) + if err != nil { + return 0, err + } + buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)} + n, cperr := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts) + newSize := inet.TCPBufferSize{ + Min: int(buf[0]), + Default: int(buf[1]), + Max: int(buf[2]), + } + if err := writeSize(f.tcpMemInode.dir, f.tcpMemInode.s, newSize); err != nil { + return n, err + } + return n, cperr +} + +func readSize(dirType tcpMemDir, s inet.Stack) (inet.TCPBufferSize, error) { + switch dirType { + case tcpRMem: + return s.TCPReceiveBufferSize() + case tcpWMem: + return s.TCPSendBufferSize() + default: + panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType)) + } +} + +func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error { + switch dirType { + case tcpRMem: + return s.SetTCPReceiveBufferSize(size) + case tcpWMem: + return s.SetTCPSendBufferSize(size) + default: + panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType)) + } +} + +// +stateify savable +type tcpSack struct { + stack inet.Stack `state:"wait"` + enabled *bool + fsutil.SimpleFileInode +} + +func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ts := &tcpSack{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ts, msrc, sattr) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &tcpSackFile{ + tcpSack: s, + stack: s.stack, + }), nil +} + +// +stateify savable +type tcpSackFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpSack *tcpSack + + stack inet.Stack `state:"wait"` +} + +// Read implements fs.FileOperations.Read. +func (f *tcpSackFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + if f.tcpSack.enabled == nil { + sack, err := f.stack.TCPSACKEnabled() + if err != nil { + return 0, err + } + f.tcpSack.enabled = &sack + } + + val := "0\n" + if *f.tcpSack.enabled { + // Technically, this is not quite compatible with Linux. Linux + // stores these as an integer, so if you write "2" into + // tcp_sack, you should get 2 back. Tough luck. + val = "1\n" + } + n, err := dst.CopyOut(ctx, []byte(val)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return n, err + } + if f.tcpSack.enabled == nil { + f.tcpSack.enabled = new(bool) + } + *f.tcpSack.enabled = v != 0 + return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled) +} + +func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, + // all of these files will have mode 0444 (read-only for all users). + contents := map[string]*fs.Inode{ + "default_qdisc": newStaticProcInode(ctx, msrc, []byte("pfifo_fast")), + "message_burst": newStaticProcInode(ctx, msrc, []byte("10")), + "message_cost": newStaticProcInode(ctx, msrc, []byte("5")), + "optmem_max": newStaticProcInode(ctx, msrc, []byte("0")), + "rmem_default": newStaticProcInode(ctx, msrc, []byte("212992")), + "rmem_max": newStaticProcInode(ctx, msrc, []byte("212992")), + "somaxconn": newStaticProcInode(ctx, msrc, []byte("128")), + "wmem_default": newStaticProcInode(ctx, msrc, []byte("212992")), + "wmem_max": newStaticProcInode(ctx, msrc, []byte("212992")), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + contents := map[string]*fs.Inode{ + // Add tcp_sack. + "tcp_sack": newTCPSackInode(ctx, msrc, s), + + // The following files are simple stubs until they are + // implemented in netstack, most of these files are + // configuration related. We use the value closest to the + // actual netstack behavior or any empty file, all of these + // files will have mode 0444 (read-only for all users). + "ip_local_port_range": newStaticProcInode(ctx, msrc, []byte("16000 65535")), + "ip_local_reserved_ports": newStaticProcInode(ctx, msrc, []byte("")), + "ipfrag_time": newStaticProcInode(ctx, msrc, []byte("30")), + "ip_nonlocal_bind": newStaticProcInode(ctx, msrc, []byte("0")), + "ip_no_pmtu_disc": newStaticProcInode(ctx, msrc, []byte("1")), + + // tcp_allowed_congestion_control tell the user what they are + // able to do as an unprivledged process so we leave it empty. + "tcp_allowed_congestion_control": newStaticProcInode(ctx, msrc, []byte("")), + "tcp_available_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")), + "tcp_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")), + + // Many of the following stub files are features netstack + // doesn't support. The unsupported features return "0" to + // indicate they are disabled. + "tcp_base_mss": newStaticProcInode(ctx, msrc, []byte("1280")), + "tcp_dsack": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_early_retrans": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fack": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fastopen": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fastopen_key": newStaticProcInode(ctx, msrc, []byte("")), + "tcp_invalid_ratelimit": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_intvl": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_probes": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_time": newStaticProcInode(ctx, msrc, []byte("7200")), + "tcp_mtu_probing": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_no_metrics_save": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_probe_interval": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_probe_threshold": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_retries1": newStaticProcInode(ctx, msrc, []byte("3")), + "tcp_retries2": newStaticProcInode(ctx, msrc, []byte("15")), + "tcp_rfc1337": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_slow_start_after_idle": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_synack_retries": newStaticProcInode(ctx, msrc, []byte("5")), + "tcp_syn_retries": newStaticProcInode(ctx, msrc, []byte("3")), + "tcp_timestamps": newStaticProcInode(ctx, msrc, []byte("1")), + } + + // Add tcp_rmem. + if _, err := s.TCPReceiveBufferSize(); err == nil { + contents["tcp_rmem"] = newTCPMemInode(ctx, msrc, s, tcpRMem) + } + + // Add tcp_wmem. + if _, err := s.TCPSendBufferSize(); err == nil { + contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem) + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + var contents map[string]*fs.Inode + if s := p.k.NetworkStack(); s != nil { + contents = map[string]*fs.Inode{ + "ipv4": p.newSysNetIPv4Dir(ctx, msrc, s), + "core": p.newSysNetCore(ctx, msrc, s), + } + } + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} diff --git a/pkg/sentry/fs/proc/sys_net_state.go b/pkg/sentry/fs/proc/sys_net_state.go new file mode 100644 index 000000000..6eba709c6 --- /dev/null +++ b/pkg/sentry/fs/proc/sys_net_state.go @@ -0,0 +1,42 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import "fmt" + +// beforeSave is invoked by stateify. +func (t *tcpMemInode) beforeSave() { + size, err := readSize(t.dir, t.s) + if err != nil { + panic(fmt.Sprintf("failed to read TCP send / receive buffer sizes: %v", err)) + } + t.size = size +} + +// afterLoad is invoked by stateify. +func (t *tcpMemInode) afterLoad() { + if err := writeSize(t.dir, t.s, t.size); err != nil { + panic(fmt.Sprintf("failed to write previous TCP send / receive buffer sizes [%v]: %v", t.size, err)) + } +} + +// afterLoad is invoked by stateify. +func (s *tcpSack) afterLoad() { + if s.enabled != nil { + if err := s.stack.SetTCPSACKEnabled(*s.enabled); err != nil { + panic(fmt.Sprintf("failed to set previous TCP sack configuration [%v]: %v", *s.enabled, err)) + } + } +} diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go new file mode 100644 index 000000000..77e03d349 --- /dev/null +++ b/pkg/sentry/fs/proc/task.go @@ -0,0 +1,776 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "io" + "sort" + "strconv" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's +// users count is incremented, and must be decremented by the caller when it is +// no longer in use. +func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { + if t.ExitState() == kernel.TaskExitDead { + return nil, syserror.ESRCH + } + var m *mm.MemoryManager + t.WithMuLocked(func(t *kernel.Task) { + m = t.MemoryManager() + }) + if m == nil || !m.IncUsers() { + return nil, io.EOF + } + return m, nil +} + +// taskDir represents a task-level directory. +// +// +stateify savable +type taskDir struct { + ramfs.Dir + + t *kernel.Task + pidns *kernel.PIDNamespace +} + +var _ fs.InodeOperations = (*taskDir)(nil) + +// newTaskDir creates a new proc task entry. +func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool) *fs.Inode { + contents := map[string]*fs.Inode{ + "auxv": newAuxvec(t, msrc), + "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), + "comm": newComm(t, msrc), + "environ": newExecArgInode(t, msrc, environExecArg), + "exe": newExe(t, msrc), + "fd": newFdDir(t, msrc), + "fdinfo": newFdInfoDir(t, msrc), + "gid_map": newGIDMap(t, msrc), + // FIXME(b/123511468): create the correct io file for threads. + "io": newIO(t, msrc), + "maps": newMaps(t, msrc), + "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), + "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "ns": newNamespaceDir(t, msrc), + "smaps": newSmaps(t, msrc), + "stat": newTaskStat(t, msrc, showSubtasks, p.pidns), + "statm": newStatm(t, msrc), + "status": newStatus(t, msrc, p.pidns), + "uid_map": newUIDMap(t, msrc), + } + if showSubtasks { + contents["task"] = p.newSubtasks(t, msrc) + } + if len(p.cgroupControllers) > 0 { + contents["cgroup"] = newCGroupInode(t, msrc, p.cgroupControllers) + } + + // TODO(b/31916171): Set EUID/EGID based on dumpability. + d := &taskDir{ + Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), + t: t, + } + return newProcInode(d, msrc, fs.SpecialDirectory, t) +} + +// subtasks represents a /proc/TID/task directory. +// +// +stateify savable +type subtasks struct { + ramfs.Dir + + t *kernel.Task + p *proc +} + +var _ fs.InodeOperations = (*subtasks)(nil) + +func (p *proc) newSubtasks(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + s := &subtasks{ + Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0555)), + t: t, + p: p, + } + return newProcInode(s, msrc, fs.SpecialDirectory, t) +} + +// UnstableAttr returns unstable attributes of the subtasks. +func (s *subtasks) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { + uattr, err := s.Dir.UnstableAttr(ctx, inode) + if err != nil { + return fs.UnstableAttr{}, err + } + // We can't rely on ramfs' implementation because the task directories are + // generated dynamically. + uattr.Links = uint64(2 + s.t.ThreadGroup().Count()) + return uattr, nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (s *subtasks) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &subtasksFile{t: s.t, pidns: s.p.pidns}), nil +} + +// +stateify savable +type subtasksFile struct { + fsutil.DirFileOperations `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + t *kernel.Task + pidns *kernel.PIDNamespace +} + +// Readdir implements fs.FileOperations.Readdir. +func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { + dirCtx := fs.DirCtx{ + Serializer: ser, + } + + // Note that unlike most Readdir implementations, the offset here is + // not an index into the subtasks, but rather the TID of the next + // subtask to emit. + offset := file.Offset() + + if offset == 0 { + // Serialize "." and "..". + root := fs.RootFromContext(ctx) + if root != nil { + defer root.DecRef() + } + dot, dotdot := file.Dirent.GetDotAttrs(root) + if err := dirCtx.DirEmit(".", dot); err != nil { + return offset, err + } + if err := dirCtx.DirEmit("..", dotdot); err != nil { + return offset, err + } + } + + // Serialize tasks. + tasks := f.t.ThreadGroup().MemberIDs(f.pidns) + taskInts := make([]int, 0, len(tasks)) + for _, tid := range tasks { + taskInts = append(taskInts, int(tid)) + } + + // Find the task to start at. + idx := sort.SearchInts(taskInts, int(offset)) + if idx == len(taskInts) { + return offset, nil + } + taskInts = taskInts[idx:] + + var tid int + for _, tid = range taskInts { + name := strconv.FormatUint(uint64(tid), 10) + attr := fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice) + if err := dirCtx.DirEmit(name, attr); err != nil { + // Returned offset is next tid to serialize. + return int64(tid), err + } + } + // We serialized them all. Next offset should be higher than last + // serialized tid. + return int64(tid) + 1, nil +} + +var _ fs.FileOperations = (*subtasksFile)(nil) + +// Lookup loads an Inode in a task's subtask directory into a Dirent. +func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) { + tid, err := strconv.ParseUint(p, 10, 32) + if err != nil { + return nil, syserror.ENOENT + } + + task := s.p.pidns.TaskWithID(kernel.ThreadID(tid)) + if task == nil { + return nil, syserror.ENOENT + } + if task.ThreadGroup() != s.t.ThreadGroup() { + return nil, syserror.ENOENT + } + + td := s.p.newTaskDir(task, dir.MountSource, false) + return fs.NewDirent(td, p), nil +} + +// exe is an fs.InodeOperations symlink for the /proc/PID/exe file. +// +// +stateify savable +type exe struct { + ramfs.Symlink + + t *kernel.Task +} + +func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + exeSymlink := &exe{ + Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""), + t: t, + } + return newProcInode(exeSymlink, msrc, fs.Symlink, t) +} + +func (e *exe) executable() (d *fs.Dirent, err error) { + e.t.WithMuLocked(func(t *kernel.Task) { + mm := t.MemoryManager() + if mm == nil { + // TODO(b/34851096): Check shouldn't allow Readlink once the + // Task is zombied. + err = syserror.EACCES + return + } + + // The MemoryManager may be destroyed, in which case + // MemoryManager.destroy will simply set the executable to nil + // (with locks held). + d = mm.Executable() + if d == nil { + err = syserror.ENOENT + } + }) + return +} + +// Readlink implements fs.InodeOperations. +func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if !kernel.ContextCanTrace(ctx, e.t, false) { + return "", syserror.EACCES + } + + // Pull out the executable for /proc/TID/exe. + exec, err := e.executable() + if err != nil { + return "", err + } + defer exec.DecRef() + + root := fs.RootFromContext(ctx) + if root == nil { + // This doesn't correspond to anything in Linux because the vfs is + // global there. + return "", syserror.EINVAL + } + defer root.DecRef() + n, _ := exec.FullName(root) + return n, nil +} + +// namespaceSymlink represents a symlink in the namespacefs, such as the files +// in /proc/<pid>/ns. +// +// +stateify savable +type namespaceSymlink struct { + ramfs.Symlink + + t *kernel.Task +} + +func newNamespaceSymlink(t *kernel.Task, msrc *fs.MountSource, name string) *fs.Inode { + // TODO(rahat): Namespace symlinks should contain the namespace name and the + // inode number for the namespace instance, so for example user:[123456]. We + // currently fake the inode number by sticking the symlink inode in its + // place. + target := fmt.Sprintf("%s:[%d]", name, device.ProcDevice.NextIno()) + n := &namespaceSymlink{ + Symlink: *ramfs.NewSymlink(t, fs.RootOwner, target), + t: t, + } + return newProcInode(n, msrc, fs.Symlink, t) +} + +// Getlink implements fs.InodeOperations.Getlink. +func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) { + if !kernel.ContextCanTrace(ctx, n.t, false) { + return nil, syserror.EACCES + } + + // Create a new regular file to fake the namespace file. + iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC) + return fs.NewDirent(newProcInode(iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil +} + +func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + contents := map[string]*fs.Inode{ + "net": newNamespaceSymlink(t, msrc, "net"), + "pid": newNamespaceSymlink(t, msrc, "pid"), + "user": newNamespaceSymlink(t, msrc, "user"), + } + d := ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0511)) + return newProcInode(d, msrc, fs.SpecialDirectory, t) +} + +// mapsData implements seqfile.SeqSource for /proc/[pid]/maps. +// +// +stateify savable +type mapsData struct { + t *kernel.Task +} + +func newMaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t) +} + +func (md *mapsData) mm() *mm.MemoryManager { + var tmm *mm.MemoryManager + md.t.WithMuLocked(func(t *kernel.Task) { + if mm := t.MemoryManager(); mm != nil { + // No additional reference is taken on mm here. This is safe + // because MemoryManager.destroy is required to leave the + // MemoryManager in a state where it's still usable as a SeqSource. + tmm = mm + } + }) + return tmm +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (md *mapsData) NeedsUpdate(generation int64) bool { + if mm := md.mm(); mm != nil { + return mm.NeedsUpdate(generation) + } + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (md *mapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if mm := md.mm(); mm != nil { + return mm.ReadMapsSeqFileData(ctx, h) + } + return []seqfile.SeqData{}, 0 +} + +// smapsData implements seqfile.SeqSource for /proc/[pid]/smaps. +// +// +stateify savable +type smapsData struct { + t *kernel.Task +} + +func newSmaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &smapsData{t}), msrc, fs.SpecialFile, t) +} + +func (sd *smapsData) mm() *mm.MemoryManager { + var tmm *mm.MemoryManager + sd.t.WithMuLocked(func(t *kernel.Task) { + if mm := t.MemoryManager(); mm != nil { + // No additional reference is taken on mm here. This is safe + // because MemoryManager.destroy is required to leave the + // MemoryManager in a state where it's still usable as a SeqSource. + tmm = mm + } + }) + return tmm +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (sd *smapsData) NeedsUpdate(generation int64) bool { + if mm := sd.mm(); mm != nil { + return mm.NeedsUpdate(generation) + } + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (sd *smapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if mm := sd.mm(); mm != nil { + return mm.ReadSmapsSeqFileData(ctx, h) + } + return []seqfile.SeqData{}, 0 +} + +// +stateify savable +type taskStatData struct { + t *kernel.Task + + // If tgstats is true, accumulate fault stats (not implemented) and CPU + // time across all tasks in t's thread group. + tgstats bool + + // pidns is the PID namespace associated with the proc filesystem that + // includes the file using this statData. + pidns *kernel.PIDNamespace +} + +func newTaskStat(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t) +} + +// NeedsUpdate returns whether the generation is old or not. +func (s *taskStatData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData returns data for the SeqFile reader. +// SeqData, the current generation and where in the file the handle corresponds to. +func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + + fmt.Fprintf(&buf, "%d ", s.pidns.IDOfTask(s.t)) + fmt.Fprintf(&buf, "(%s) ", s.t.Name()) + fmt.Fprintf(&buf, "%c ", s.t.StateStatus()[0]) + ppid := kernel.ThreadID(0) + if parent := s.t.Parent(); parent != nil { + ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) + } + fmt.Fprintf(&buf, "%d ", ppid) + fmt.Fprintf(&buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup())) + fmt.Fprintf(&buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session())) + fmt.Fprintf(&buf, "0 0 " /* tty_nr tpgid */) + fmt.Fprintf(&buf, "0 " /* flags */) + fmt.Fprintf(&buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */) + var cputime usage.CPUStats + if s.tgstats { + cputime = s.t.ThreadGroup().CPUStats() + } else { + cputime = s.t.CPUStats() + } + fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) + cputime = s.t.ThreadGroup().JoinedChildCPUStats() + fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) + fmt.Fprintf(&buf, "%d %d ", s.t.Priority(), s.t.Niceness()) + fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Count()) + + // itrealvalue. Since kernel 2.6.17, this field is no longer + // maintained, and is hard coded as 0. + fmt.Fprintf(&buf, "0 ") + + // Start time is relative to boot time, expressed in clock ticks. + fmt.Fprintf(&buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime()))) + + var vss, rss uint64 + s.t.WithMuLocked(func(t *kernel.Task) { + if mm := t.MemoryManager(); mm != nil { + vss = mm.VirtualMemorySize() + rss = mm.ResidentSetSize() + } + }) + fmt.Fprintf(&buf, "%d %d ", vss, rss/usermem.PageSize) + + // rsslim. + fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur) + + fmt.Fprintf(&buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */) + fmt.Fprintf(&buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */) + fmt.Fprintf(&buf, "0 0 " /* nswap cnswap */) + terminationSignal := linux.Signal(0) + if s.t == s.t.ThreadGroup().Leader() { + terminationSignal = s.t.ThreadGroup().TerminationSignal() + } + fmt.Fprintf(&buf, "%d ", terminationSignal) + fmt.Fprintf(&buf, "0 0 0 " /* processor rt_priority policy */) + fmt.Fprintf(&buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */) + fmt.Fprintf(&buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */) + fmt.Fprintf(&buf, "0\n" /* exit_code */) + + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*taskStatData)(nil)}}, 0 +} + +// statmData implements seqfile.SeqSource for /proc/[pid]/statm. +// +// +stateify savable +type statmData struct { + t *kernel.Task +} + +func newStatm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &statmData{t}), msrc, fs.SpecialFile, t) +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (s *statmData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var vss, rss uint64 + s.t.WithMuLocked(func(t *kernel.Task) { + if mm := t.MemoryManager(); mm != nil { + vss = mm.VirtualMemorySize() + rss = mm.ResidentSetSize() + } + }) + + var buf bytes.Buffer + fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize) + + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statmData)(nil)}}, 0 +} + +// statusData implements seqfile.SeqSource for /proc/[pid]/status. +// +// +stateify savable +type statusData struct { + t *kernel.Task + pidns *kernel.PIDNamespace +} + +func newStatus(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t) +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (s *statusData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + var buf bytes.Buffer + fmt.Fprintf(&buf, "Name:\t%s\n", s.t.Name()) + fmt.Fprintf(&buf, "State:\t%s\n", s.t.StateStatus()) + fmt.Fprintf(&buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup())) + fmt.Fprintf(&buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t)) + ppid := kernel.ThreadID(0) + if parent := s.t.Parent(); parent != nil { + ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) + } + fmt.Fprintf(&buf, "PPid:\t%d\n", ppid) + tpid := kernel.ThreadID(0) + if tracer := s.t.Tracer(); tracer != nil { + tpid = s.pidns.IDOfTask(tracer) + } + fmt.Fprintf(&buf, "TracerPid:\t%d\n", tpid) + var fds int + var vss, rss, data uint64 + s.t.WithMuLocked(func(t *kernel.Task) { + if fdm := t.FDMap(); fdm != nil { + fds = fdm.Size() + } + if mm := t.MemoryManager(); mm != nil { + vss = mm.VirtualMemorySize() + rss = mm.ResidentSetSize() + data = mm.VirtualDataSize() + } + }) + fmt.Fprintf(&buf, "FDSize:\t%d\n", fds) + fmt.Fprintf(&buf, "VmSize:\t%d kB\n", vss>>10) + fmt.Fprintf(&buf, "VmRSS:\t%d kB\n", rss>>10) + fmt.Fprintf(&buf, "VmData:\t%d kB\n", data>>10) + fmt.Fprintf(&buf, "Threads:\t%d\n", s.t.ThreadGroup().Count()) + creds := s.t.Credentials() + fmt.Fprintf(&buf, "CapInh:\t%016x\n", creds.InheritableCaps) + fmt.Fprintf(&buf, "CapPrm:\t%016x\n", creds.PermittedCaps) + fmt.Fprintf(&buf, "CapEff:\t%016x\n", creds.EffectiveCaps) + fmt.Fprintf(&buf, "CapBnd:\t%016x\n", creds.BoundingCaps) + fmt.Fprintf(&buf, "Seccomp:\t%d\n", s.t.SeccompMode()) + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statusData)(nil)}}, 0 +} + +// ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider. +type ioUsage interface { + // IOUsage returns the io usage data. + IOUsage() *usage.IO +} + +// +stateify savable +type ioData struct { + ioUsage +} + +func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newProcInode(seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) +} + +// NeedsUpdate returns whether the generation is old or not. +func (i *ioData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData returns data for the SeqFile reader. +// SeqData, the current generation and where in the file the handle corresponds to. +func (i *ioData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + io := usage.IO{} + io.Accumulate(i.IOUsage()) + + var buf bytes.Buffer + fmt.Fprintf(&buf, "char: %d\n", io.CharsRead) + fmt.Fprintf(&buf, "wchar: %d\n", io.CharsWritten) + fmt.Fprintf(&buf, "syscr: %d\n", io.ReadSyscalls) + fmt.Fprintf(&buf, "syscw: %d\n", io.WriteSyscalls) + fmt.Fprintf(&buf, "read_bytes: %d\n", io.BytesRead) + fmt.Fprintf(&buf, "write_bytes: %d\n", io.BytesWritten) + fmt.Fprintf(&buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled) + + return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*ioData)(nil)}}, 0 +} + +// comm is a file containing the command name for a task. +// +// On Linux, /proc/[pid]/comm is writable, and writing to the comm file changes +// the thread name. We don't implement this yet as there are no known users of +// this feature. +// +// +stateify savable +type comm struct { + fsutil.SimpleFileInode + + t *kernel.Task +} + +// newComm returns a new comm file. +func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + c := &comm{ + SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + t: t, + } + return newProcInode(c, msrc, fs.SpecialFile, t) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil +} + +// +stateify savable +type commFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoWrite `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + t *kernel.Task +} + +var _ fs.FileOperations = (*commFile)(nil) + +// Read implements fs.FileOperations.Read. +func (f *commFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + + buf := []byte(f.t.Name() + "\n") + if offset >= int64(len(buf)) { + return 0, io.EOF + } + + n, err := dst.CopyOut(ctx, buf[offset:]) + return int64(n), err +} + +// auxvec is a file containing the auxiliary vector for a task. +// +// +stateify savable +type auxvec struct { + fsutil.SimpleFileInode + + t *kernel.Task +} + +// newAuxvec returns a new auxvec file. +func newAuxvec(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + a := &auxvec{ + SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + t: t, + } + return newProcInode(a, msrc, fs.SpecialFile, t) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (a *auxvec) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &auxvecFile{t: a.t}), nil +} + +// +stateify savable +type auxvecFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoWrite `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + t *kernel.Task +} + +// Read implements fs.FileOperations.Read. +func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + + m, err := getTaskMM(f.t) + if err != nil { + return 0, err + } + defer m.DecUsers(ctx) + auxv := m.Auxv() + + // Space for buffer with AT_NULL (0) terminator at the end. + size := (len(auxv) + 1) * 16 + if offset >= int64(size) { + return 0, io.EOF + } + + buf := make([]byte, size) + for i, e := range auxv { + usermem.ByteOrder.PutUint64(buf[16*i:], e.Key) + usermem.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value)) + } + + n, err := dst.CopyOut(ctx, buf[offset:]) + return int64(n), err +} diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go new file mode 100644 index 000000000..a14b1b45f --- /dev/null +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -0,0 +1,179 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// idMapInodeOperations implements fs.InodeOperations for +// /proc/[pid]/{uid,gid}_map. +// +// +stateify savable +type idMapInodeOperations struct { + fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeNoopRelease `state:"nosave"` + fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotAllocatable `state:"nosave"` + fsutil.InodeNotDirectory `state:"nosave"` + fsutil.InodeNotMappable `state:"nosave"` + fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` + fsutil.InodeNotTruncatable `state:"nosave"` + fsutil.InodeVirtual `state:"nosave"` + + fsutil.InodeSimpleAttributes + fsutil.InodeSimpleExtendedAttributes + + t *kernel.Task + gids bool +} + +var _ fs.InodeOperations = (*idMapInodeOperations)(nil) + +// newUIDMap returns a new uid_map file. +func newUIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newIDMap(t, msrc, false /* gids */) +} + +// newGIDMap returns a new gid_map file. +func newGIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newIDMap(t, msrc, true /* gids */) +} + +func newIDMap(t *kernel.Task, msrc *fs.MountSource, gids bool) *fs.Inode { + return newProcInode(&idMapInodeOperations{ + InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(t, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + t: t, + gids: gids, + }, msrc, fs.SpecialFile, t) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (imio *idMapInodeOperations) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &idMapFileOperations{ + iops: imio, + }), nil +} + +// +stateify savable +type idMapFileOperations struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + iops *idMapInodeOperations +} + +var _ fs.FileOperations = (*idMapFileOperations)(nil) + +// "There is an (arbitrary) limit on the number of lines in the file. As at +// Linux 3.18, the limit is five lines." - user_namespaces(7) +const maxIDMapLines = 5 + +// Read implements fs.FileOperations.Read. +func (imfo *idMapFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + var entries []auth.IDMapEntry + if imfo.iops.gids { + entries = imfo.iops.t.UserNamespace().GIDMap() + } else { + entries = imfo.iops.t.UserNamespace().UIDMap() + } + var buf bytes.Buffer + for _, e := range entries { + fmt.Fprintf(&buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length) + } + if offset >= int64(buf.Len()) { + return 0, io.EOF + } + n, err := dst.CopyOut(ctx, buf.Bytes()[offset:]) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + // "In addition, the number of bytes written to the file must be less than + // the system page size, and the write must be performed at the start of + // the file ..." - user_namespaces(7) + srclen := src.NumBytes() + if srclen >= usermem.PageSize || offset != 0 { + return 0, syserror.EINVAL + } + b := make([]byte, srclen) + if _, err := src.CopyIn(ctx, b); err != nil { + return 0, err + } + + // Truncate from the first NULL byte. + var nul int64 + nul = int64(bytes.IndexByte(b, 0)) + if nul == -1 { + nul = srclen + } + b = b[:nul] + // Remove the last \n. + if nul >= 1 && b[nul-1] == '\n' { + b = b[:nul-1] + } + lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1) + if len(lines) > maxIDMapLines { + return 0, syserror.EINVAL + } + + entries := make([]auth.IDMapEntry, len(lines)) + for i, l := range lines { + var e auth.IDMapEntry + _, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length) + if err != nil { + return 0, syserror.EINVAL + } + entries[i] = e + } + var err error + if imfo.iops.gids { + err = imfo.iops.t.UserNamespace().SetGIDMap(ctx, entries) + } else { + err = imfo.iops.t.UserNamespace().SetUIDMap(ctx, entries) + } + if err != nil { + return 0, err + } + + // On success, Linux's kernel/user_namespace.c:map_write() always returns + // count, even if fewer bytes were used. + return int64(srclen), nil +} diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go new file mode 100644 index 000000000..35c3851e1 --- /dev/null +++ b/pkg/sentry/fs/proc/uptime.go @@ -0,0 +1,87 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// uptime is a file containing the system uptime. +// +// +stateify savable +type uptime struct { + fsutil.SimpleFileInode + + // The "start time" of the sandbox. + startTime ktime.Time +} + +// newUptime returns a new uptime file. +func newUptime(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + u := &uptime{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + startTime: ktime.NowFromContext(ctx), + } + return newProcInode(u, msrc, fs.SpecialFile, nil) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (u *uptime) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &uptimeFile{startTime: u.startTime}), nil +} + +// +stateify savable +type uptimeFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoWrite `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + startTime ktime.Time +} + +// Read implements fs.FileOperations.Read. +func (f *uptimeFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + + now := ktime.NowFromContext(ctx) + // Pretend that we've spent zero time sleeping (second number). + s := []byte(fmt.Sprintf("%.2f 0.00\n", now.Sub(f.startTime).Seconds())) + if offset >= int64(len(s)) { + return 0, io.EOF + } + + n, err := dst.CopyOut(ctx, s[offset:]) + return int64(n), err +} diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go new file mode 100644 index 000000000..a5479990c --- /dev/null +++ b/pkg/sentry/fs/proc/version.go @@ -0,0 +1,78 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" +) + +// versionData backs /proc/version. +// +// +stateify savable +type versionData struct { + // k is the owning Kernel. + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*versionData) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (v *versionData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return nil, 0 + } + + init := v.k.GlobalInit() + if init == nil { + // Attempted to read before the init Task is created. This can + // only occur during startup, which should never need to read + // this file. + panic("Attempted to read version before initial Task is available") + } + + // /proc/version takes the form: + // + // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) + // (COMPILER_VERSION) VERSION" + // + // where: + // - SYSNAME, RELEASE, and VERSION are the same as returned by + // sys_utsname + // - COMPILE_USER is the user that build the kernel + // - COMPILE_HOST is the hostname of the machine on which the kernel + // was built + // - COMPILER_VERSION is the version reported by the building compiler + // + // Since we don't really want to expose build information to + // applications, those fields are omitted. + // + // FIXME(mpratt): Using Version from the init task SyscallTable + // disregards the different version a task may have (e.g., in a uts + // namespace). + ver := init.Leader().SyscallTable().Version + return []seqfile.SeqData{ + { + Buf: []byte(fmt.Sprintf("%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)), + Handle: (*versionData)(nil), + }, + }, 0 +} |