summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/proc
diff options
context:
space:
mode:
authorGoogler <noreply@google.com>2018-04-27 10:37:02 -0700
committerAdin Scannell <ascannell@google.com>2018-04-28 01:44:26 -0400
commitd02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/fs/proc
parentf70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
Check in gVisor.
PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'pkg/sentry/fs/proc')
-rw-r--r--pkg/sentry/fs/proc/BUILD95
-rw-r--r--pkg/sentry/fs/proc/README.md317
-rw-r--r--pkg/sentry/fs/proc/cpuinfo.go64
-rw-r--r--pkg/sentry/fs/proc/device/BUILD11
-rw-r--r--pkg/sentry/fs/proc/device/device.go23
-rw-r--r--pkg/sentry/fs/proc/exec_args.go129
-rw-r--r--pkg/sentry/fs/proc/fds.go258
-rw-r--r--pkg/sentry/fs/proc/file.go56
-rw-r--r--pkg/sentry/fs/proc/filesystems.go55
-rw-r--r--pkg/sentry/fs/proc/fs.go69
-rw-r--r--pkg/sentry/fs/proc/loadavg.go51
-rw-r--r--pkg/sentry/fs/proc/meminfo.go82
-rw-r--r--pkg/sentry/fs/proc/mounts.go176
-rw-r--r--pkg/sentry/fs/proc/net.go151
-rw-r--r--pkg/sentry/fs/proc/net_test.go74
-rw-r--r--pkg/sentry/fs/proc/proc.go182
-rw-r--r--pkg/sentry/fs/proc/seqfile/BUILD55
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile.go232
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile_test.go272
-rw-r--r--pkg/sentry/fs/proc/stat.go139
-rw-r--r--pkg/sentry/fs/proc/sys.go117
-rw-r--r--pkg/sentry/fs/proc/sys_net.go188
-rw-r--r--pkg/sentry/fs/proc/sys_net_test.go121
-rw-r--r--pkg/sentry/fs/proc/task.go567
-rw-r--r--pkg/sentry/fs/proc/uid_gid_map.go152
-rw-r--r--pkg/sentry/fs/proc/uptime.go61
-rw-r--r--pkg/sentry/fs/proc/version.go75
27 files changed, 3772 insertions, 0 deletions
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
new file mode 100644
index 000000000..18372cfbf
--- /dev/null
+++ b/pkg/sentry/fs/proc/BUILD
@@ -0,0 +1,95 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools/go_stateify:defs.bzl", "go_stateify")
+
+go_stateify(
+ name = "proc_state",
+ srcs = [
+ "cpuinfo.go",
+ "exec_args.go",
+ "fds.go",
+ "file.go",
+ "filesystems.go",
+ "fs.go",
+ "loadavg.go",
+ "meminfo.go",
+ "mounts.go",
+ "net.go",
+ "proc.go",
+ "stat.go",
+ "sys.go",
+ "sys_net.go",
+ "task.go",
+ "uid_gid_map.go",
+ "uptime.go",
+ "version.go",
+ ],
+ out = "proc_state.go",
+ package = "proc",
+)
+
+go_library(
+ name = "proc",
+ srcs = [
+ "cpuinfo.go",
+ "exec_args.go",
+ "fds.go",
+ "file.go",
+ "filesystems.go",
+ "fs.go",
+ "loadavg.go",
+ "meminfo.go",
+ "mounts.go",
+ "net.go",
+ "proc.go",
+ "proc_state.go",
+ "stat.go",
+ "sys.go",
+ "sys_net.go",
+ "task.go",
+ "uid_gid_map.go",
+ "uptime.go",
+ "version.go",
+ ],
+ importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc",
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/amutex",
+ "//pkg/log",
+ "//pkg/sentry/arch",
+ "//pkg/sentry/context",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/fs/proc/device",
+ "//pkg/sentry/fs/proc/seqfile",
+ "//pkg/sentry/fs/ramfs",
+ "//pkg/sentry/inet",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/kdefs",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/mm",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/usermem",
+ "//pkg/state",
+ "//pkg/syserr",
+ "//pkg/syserror",
+ ],
+)
+
+go_test(
+ name = "proc_test",
+ size = "small",
+ srcs = [
+ "net_test.go",
+ "sys_net_test.go",
+ ],
+ embed = [":proc"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/sentry/context",
+ "//pkg/sentry/inet",
+ "//pkg/sentry/usermem",
+ ],
+)
diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md
new file mode 100644
index 000000000..c510ee63a
--- /dev/null
+++ b/pkg/sentry/fs/proc/README.md
@@ -0,0 +1,317 @@
+This document tracks what is implemented in procfs. Refer to
+Documentation/filesystems/proc.txt in the Linux project for information about
+procfs generally.
+
+**NOTE**: This document is not guaranteed to be up to date. If you find an
+inconsistency, please file a bug.
+
+[TOC]
+## Kernel data
+
+The following files are implemented:
+
+| File /proc/ | Content |
+| :------------------------ | :----------------------------------------------- |
+| [cpuinfo](#cpuinfo) | Info about the CPU |
+| [filesystem](#filesystem) | Supported filesystems |
+| [loadavg](#loadavg) | Load average of last 1, 5 & 15 minutes |
+| [meminfo](#meminfo) | Overall memory info |
+| [stat](#stat) | Overall kernel statistics |
+| [sys](#sys) | Change parameters within the kernel |
+| [uptime](#uptime) | Wall clock since boot, combined idle time of all |
+: : cpus :
+| [version](#version) | Kernel version |
+
+### cpuinfo
+
+```bash
+$ cat /proc/cpuinfo
+processor : 0
+vendor_id : GenuineIntel
+cpu family : 6
+model : 45
+model name : unknown
+stepping : unknown
+cpu MHz : 1234.588
+fpu : yes
+fpu_exception : yes
+cpuid level : 13
+wp : yes
+flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx xsaveopt
+bogomips : 1234.59
+clflush size : 64
+cache_alignment : 64
+address sizes : 46 bits physical, 48 bits virtual
+power management:
+
+...
+```
+
+Notable divergences:
+
+Field name | Notes
+:--------------- | :---------------------------------------
+model name | Always unknown
+stepping | Always unknown
+fpu | Always yes
+fpu_exception | Always yes
+wp | Always yes
+bogomips | Bogus value (matches cpu MHz)
+clflush size | Always 64
+cache_alignment | Always 64
+address sizes | Always 46 bits physical, 48 bits virtual
+power management | Always blank
+
+Otherwise fields are derived from the SentryCPUIDSpec proto config.
+
+### filesystem
+
+```bash
+$ cat /proc/filesystem
+nodev 9p
+nodev devtmpfs
+nodev proc
+nodev ramdiskfs
+nodev sysfs
+nodev tmpfs
+```
+
+Notable divergences:
+
+Filesystem | Notes
+:--------- | :--------------------------------------------------------
+ramdiskfs | No Linux equivalent, see the SentryRamdiskFS proto config
+
+### loadavg
+
+```bash
+$ cat /proc/loadavg
+0.00 0.00 0.00 0/0 0
+```
+
+Column | Notes
+:------------------------------------ | :----------
+CPU.IO utilization in last 1 minute | Always zero
+CPU.IO utilization in last 5 minutes | Always zero
+CPU.IO utilization in last 10 minutes | Always zero
+Num currently running processes | Always zero
+Total num processes | Always zero
+
+TODO: Populate the columns with accurate statistics.
+### meminfo
+
+```bash
+$ cat /proc/meminfo
+MemTotal: 2097152 kB
+MemFree: 2083540 kB
+MemAvailable: 2083540 kB
+Buffers: 0 kB
+Cached: 4428 kB
+SwapCache: 0 kB
+Active: 10812 kB
+Inactive: 2216 kB
+Active(anon): 8600 kB
+Inactive(anon): 0 kB
+Active(file): 2212 kB
+Inactive(file): 2216 kB
+Unevictable: 0 kB
+Mlocked: 0 kB
+SwapTotal: 0 kB
+SwapFree: 0 kB
+Dirty: 0 kB
+Writeback: 0 kB
+AnonPages: 8600 kB
+Mapped: 4428 kB
+Shmem: 0 kB
+
+```
+
+Notable divergences:
+
+Field name | Notes
+:---------------- | :--------------------------------------------------------
+Buffers | Always zero, no block devices
+SwapCache | Always zero, no swap
+Inactive(anon) | Always zero, see SwapCache
+Unevictable | Always zero TODO
+Mlocked | Always zero TODO
+SwapTotal | Always zero, no swap
+SwapFree | Always zero, no swap
+Dirty | Always zero TODO
+Writeback | Always zero TODO
+MemAvailable | Uses the same value as MemFree since there is no swap.
+Slab | Missing
+SReclaimable | Missing
+SUnreclaim | Missing
+KernelStack | Missing
+PageTables | Missing
+NFS_Unstable | Missing
+Bounce | Missing
+WritebackTmp | Missing
+CommitLimit | Missing
+Committed_AS | Missing
+VmallocTotal | Missing
+VmallocUsed | Missing
+VmallocChunk | Missing
+HardwareCorrupted | Missing
+AnonHugePages | Missing
+ShmemHugePages | Missing
+ShmemPmdMapped | Missing
+HugePages_Total | Missing
+HugePages_Free | Missing
+HugePages_Rsvd | Missing
+HugePages_Surp | Missing
+Hugepagesize | Missing
+DirectMap4k | Missing
+DirectMap2M | Missing
+DirectMap1G | Missing
+
+See [Memory
+Accounting](pkg/sentry/usage/g3doc/memory-accounting.md)
+for general caveats.
+
+### stat
+
+```bash
+$ cat /proc/stat
+cpu 0 0 0 0 0 0 0 0 0 0
+cpu0 0 0 0 0 0 0 0 0 0 0
+cpu1 0 0 0 0 0 0 0 0 0 0
+cpu2 0 0 0 0 0 0 0 0 0 0
+cpu3 0 0 0 0 0 0 0 0 0 0
+cpu4 0 0 0 0 0 0 0 0 0 0
+cpu5 0 0 0 0 0 0 0 0 0 0
+cpu6 0 0 0 0 0 0 0 0 0 0
+cpu7 0 0 0 0 0 0 0 0 0 0
+intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ctxt 0
+btime 1504040968
+processes 0
+procs_running 0
+procs_blokkcked 0
+softirq 0 0 0 0 0 0 0 0 0 0 0
+```
+
+All fields except for `btime` are always zero.
+TODO: Populate with accurate fields.
+
+### sys
+
+```bash
+$ ls /proc/sys
+kernel vm
+```
+
+Directory | Notes
+:-------- | :----------------------------
+abi | Missing
+debug | Missing
+dev | Missing
+fs | Missing
+kernel | Contains hostname (only)
+net | Missing
+user | Missing
+vm | Contains mmap_min_addr (only)
+
+### uptime
+
+```bash
+$ cat /proc/uptime
+3204.62 0.00
+```
+
+Column | Notes
+:------------------------------- | :----------------------------
+Total num seconds system running | Time since procfs was mounted
+Number of seconds idle | Always zero
+
+### version
+
+```bash
+$ cat /proc/version
+Linux version 3.11.10 #1 SMP Fri Nov 29 10:47:50 PST 2013
+```
+
+## Process-specific data
+
+The following files are implemented:
+
+File /proc/PID | Content
+:------------------ | :---------------------------------------------------
+[auxv](#auxv) | Copy of auxiliary vector for the process
+[cmdline](#cmdline) | Command line arguments
+[comm](#comm) | Command name associated with the process
+[exe](#exe) | Symlink to the process's executable
+[fd](#fd) | Directory containing links to open file descriptors
+[fdinfo](#fdinfo) | Information associated with open file descriptors
+[gid_map](#gid_map) | Mappings for group IDs inside the user namespace
+[io](#io) | IO statistics
+[maps](#maps) | Memory mappings (anon, executables, library files)
+[ns](#ns) | Directory containing info about supported namespaces
+[stat](#stat) | Process statistics
+[status](#status) | Process status in human readable format
+[task](#task) | Directory containing info about running threads
+[uid_map](#uid_map) | Mappings for user IDs inside the user namespace
+
+### auxv
+
+TODO
+
+### cmdline
+
+TODO
+
+### comm
+
+TODO
+
+### exe
+
+TODO
+
+### fd
+
+TODO
+
+### fdinfo
+
+TODO
+
+### gid_map
+
+TODO
+
+### io
+
+Only has data for rchar, wchar, syscr, and syscw.
+
+TODO: add more detail.
+
+### maps
+
+TODO
+
+### ns
+
+TODO
+
+### stat
+
+Only has data for pid, comm, state, ppid, utime, stime, cutime, cstime,
+num_threads, and exit_signal.
+
+TODO: add more detail.
+
+### status
+
+Statically created, most of the fields have no data.
+
+TODO: add more detail.
+
+### task
+
+TODO
+
+### uid_map
+
+TODO
diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go
new file mode 100644
index 000000000..f80aaa5b1
--- /dev/null
+++ b/pkg/sentry/fs/proc/cpuinfo.go
@@ -0,0 +1,64 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// cpuinfo is a file describing the CPU capabilities.
+//
+// Presently cpuinfo never changes, so it doesn't need to be a SeqFile.
+type cpuinfo struct {
+ ramfs.Entry
+
+ // k is the system kernel.
+ k *kernel.Kernel
+}
+
+// DeprecatedPreadv implements fs.InodeOperations.DeprecatedPreadv.
+func (c *cpuinfo) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ features := c.k.FeatureSet()
+ if features == nil {
+ // Kernel is always initialized with a FeatureSet.
+ panic("cpuinfo read with nil FeatureSet")
+ }
+
+ contents := make([]byte, 0, 1024)
+ for i, max := uint(0), c.k.ApplicationCores(); i < max; i++ {
+ contents = append(contents, []byte(features.CPUInfo(i))...)
+ }
+ if offset >= int64(len(contents)) {
+ return 0, io.EOF
+ }
+
+ n, err := dst.CopyOut(ctx, contents[offset:])
+ return int64(n), err
+}
+
+func (p *proc) newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ f := &cpuinfo{
+ k: p.k,
+ }
+ f.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0444))
+
+ return newFile(f, msrc, fs.SpecialFile, nil)
+}
diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD
new file mode 100644
index 000000000..b62062bd7
--- /dev/null
+++ b/pkg/sentry/fs/proc/device/BUILD
@@ -0,0 +1,11 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "device",
+ srcs = ["device.go"],
+ importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device",
+ visibility = ["//pkg/sentry:internal"],
+ deps = ["//pkg/sentry/device"],
+)
diff --git a/pkg/sentry/fs/proc/device/device.go b/pkg/sentry/fs/proc/device/device.go
new file mode 100644
index 000000000..6194afe88
--- /dev/null
+++ b/pkg/sentry/fs/proc/device/device.go
@@ -0,0 +1,23 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package device contains the proc device to avoid dependency loops.
+package device
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+)
+
+// ProcDevice is the kernel proc device.
+var ProcDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go
new file mode 100644
index 000000000..0e1523bf1
--- /dev/null
+++ b/pkg/sentry/fs/proc/exec_args.go
@@ -0,0 +1,129 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// execArgType enumerates the types of exec arguments that are exposed through
+// proc.
+type execArgType int
+
+const (
+ cmdlineExecArg execArgType = iota
+ environExecArg
+)
+
+// execArgFile is a file containing the exec args (either cmdline or environ)
+// for a given task.
+type execArgFile struct {
+ ramfs.Entry
+
+ // arg is the type of exec argument this file contains.
+ arg execArgType
+
+ // t is the Task to read the exec arg line from.
+ t *kernel.Task
+}
+
+// newExecArgFile creates a file containing the exec args of the given type.
+func newExecArgFile(t *kernel.Task, msrc *fs.MountSource, arg execArgType) *fs.Inode {
+ if arg != cmdlineExecArg && arg != environExecArg {
+ panic(fmt.Sprintf("unknown exec arg type %v", arg))
+ }
+ f := &execArgFile{
+ arg: arg,
+ t: t,
+ }
+ f.InitEntry(t, fs.RootOwner, fs.FilePermsFromMode(0444))
+ return newFile(f, msrc, fs.SpecialFile, t)
+}
+
+// DeprecatedPreadv reads the exec arg from the process's address space..
+func (f *execArgFile) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ // N.B. Linux 4.2 eliminates the arbitrary one page limit.
+ if offset > usermem.PageSize {
+ return 0, io.EOF
+ }
+ dst = dst.TakeFirst64(usermem.PageSize - offset)
+
+ m, err := getTaskMM(f.t)
+ if err != nil {
+ return 0, err
+ }
+ defer m.DecUsers(ctx)
+
+ // Figure out the bounds of the exec arg we are trying to read.
+ var execArgStart, execArgEnd usermem.Addr
+ switch f.arg {
+ case cmdlineExecArg:
+ execArgStart, execArgEnd = m.ArgvStart(), m.ArgvEnd()
+ case environExecArg:
+ execArgStart, execArgEnd = m.EnvvStart(), m.EnvvEnd()
+ default:
+ panic(fmt.Sprintf("unknown exec arg type %v", f.arg))
+ }
+ if execArgStart == 0 || execArgEnd == 0 {
+ // Don't attempt to read before the start/end are set up.
+ return 0, io.EOF
+ }
+
+ start, ok := execArgStart.AddLength(uint64(offset))
+ if !ok {
+ return 0, io.EOF
+ }
+ if start >= execArgEnd {
+ return 0, io.EOF
+ }
+
+ length := int(execArgEnd - start)
+ if dstlen := dst.NumBytes(); int64(length) > dstlen {
+ length = int(dstlen)
+ }
+
+ buf := make([]byte, length)
+ // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true
+ // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
+ // cmdline and environment").
+ copyN, copyErr := m.CopyIn(ctx, start, buf, usermem.IOOpts{})
+ if copyN == 0 {
+ // Nothing to copy.
+ return 0, copyErr
+ }
+ buf = buf[:copyN]
+
+ // TODO: On Linux, if the NUL byte at the end of the
+ // argument vector has been overwritten, it continues reading the
+ // environment vector as part of the argument vector.
+
+ n, dstErr := dst.CopyOut(ctx, buf)
+ if dstErr != nil {
+ return int64(n), dstErr
+ }
+ return int64(n), copyErr
+}
diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go
new file mode 100644
index 000000000..2eca9ac31
--- /dev/null
+++ b/pkg/sentry/fs/proc/fds.go
@@ -0,0 +1,258 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// walkDescriptors finds the descriptor (file-flag pair) for the fd identified
+// by p, and calls the toInodeOperations callback with that descriptor. This is a helper
+// method for implementing fs.InodeOperations.Lookup.
+func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDFlags) *fs.Inode) (*fs.Inode, error) {
+ n, err := strconv.ParseUint(p, 10, 64)
+ if err != nil {
+ // Not found.
+ return nil, syserror.ENOENT
+ }
+
+ var file *fs.File
+ var flags kernel.FDFlags
+ t.WithMuLocked(func(t *kernel.Task) {
+ if fdm := t.FDMap(); fdm != nil {
+ file, flags = fdm.GetDescriptor(kdefs.FD(n))
+ }
+ })
+ if file == nil {
+ return nil, syserror.ENOENT
+ }
+ return toInode(file, flags), nil
+}
+
+// readDescriptors reads fds in the task starting at offset, and calls the
+// toDentAttr callback for each to get a DentAttr, which it then emits. This is
+// a helper for implementing fs.InodeOperations.Readdir.
+func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int, toDentAttr func(int) fs.DentAttr) (int, error) {
+ var fds kernel.FDs
+ t.WithMuLocked(func(t *kernel.Task) {
+ if fdm := t.FDMap(); fdm != nil {
+ fds = fdm.GetFDs()
+ }
+ })
+
+ fdInts := make([]int, 0, len(fds))
+ for _, fd := range fds {
+ fdInts = append(fdInts, int(fd))
+ }
+
+ // Find the fd to start at.
+ idx := sort.SearchInts(fdInts, offset)
+ if idx == len(fdInts) {
+ return offset, nil
+ }
+ fdInts = fdInts[idx:]
+
+ var fd int
+ for _, fd = range fdInts {
+ name := strconv.FormatUint(uint64(fd), 10)
+ if err := c.DirEmit(name, toDentAttr(fd)); err != nil {
+ // Returned offset is the next fd to serialize.
+ return fd, err
+ }
+ }
+ // We serialized them all. Next offset should be higher than last
+ // serialized fd.
+ return fd + 1, nil
+}
+
+// fd is a single file in /proc/TID/fd/.
+type fd struct {
+ ramfs.Symlink
+ *fs.File
+}
+
+// newFD returns a new fd based on an existing file.
+//
+// This inherits one reference to the file.
+func newFd(t *kernel.Task, f *fs.File, msrc *fs.MountSource) *fs.Inode {
+ fd := &fd{File: f}
+ // RootOwner by default, is overridden in UnstableAttr()
+ fd.InitSymlink(t, fs.RootOwner, "")
+ return newFile(fd, msrc, fs.Symlink, t)
+}
+
+// GetFile returns the fs.File backing this fd. The dirent and flags
+// arguments are ignored.
+func (f *fd) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) {
+ // Take a reference on the fs.File.
+ f.File.IncRef()
+ return f.File, nil
+}
+
+// Readlink returns the current target.
+func (f *fd) Readlink(ctx context.Context, _ *fs.Inode) (string, error) {
+ root := fs.RootFromContext(ctx)
+ defer root.DecRef()
+ n, _ := f.Dirent.FullName(root)
+ return n, nil
+}
+
+// Getlink implements fs.InodeOperations.Getlink.
+func (f *fd) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
+ f.Dirent.IncRef()
+ return f.Dirent, nil
+}
+
+// Truncate is ignored.
+func (f *fd) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// Close releases the reference on the file.
+func (f *fd) Close() error {
+ f.DecRef()
+ return nil
+}
+
+// fdDir implements /proc/TID/fd.
+type fdDir struct {
+ ramfs.Dir
+
+ // We hold a reference on the task's fdmap but only keep an indirect
+ // task pointer to avoid Dirent loading circularity caused by fdmap's
+ // potential back pointers into the dirent tree.
+ t *kernel.Task
+}
+
+// newFdDir creates a new fdDir.
+func newFdDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ f := &fdDir{t: t}
+ f.InitDir(t, nil, fs.RootOwner, fs.FilePermissions{User: fs.PermMask{Read: true, Execute: true}})
+ return newFile(f, msrc, fs.SpecialDirectory, t)
+}
+
+// Check implements InodeOperations.Check.
+//
+// This is to match Linux, which uses a special permission handler to guarantee
+// that a process can still access /proc/self/fd after it has executed
+// setuid. See fs/proc/fd.c:proc_fd_permission.
+func (f *fdDir) Check(ctx context.Context, inode *fs.Inode, req fs.PermMask) bool {
+ if fs.ContextCanAccessFile(ctx, inode, req) {
+ return true
+ }
+ if t := kernel.TaskFromContext(ctx); t != nil {
+ // Allow access if the task trying to access it is in the
+ // thread group corresponding to this directory.
+ //
+ // N.B. Technically, in Linux 3.11, this compares what would be
+ // the equivalent of task pointers. However, this was fixed
+ // later in 54708d2858e7 ("proc: actually make
+ // proc_fd_permission() thread-friendly").
+ if f.t.ThreadGroup() == t.ThreadGroup() {
+ return true
+ }
+ }
+ return false
+}
+
+// Lookup loads an Inode in /proc/TID/fd into a Dirent.
+func (f *fdDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) {
+ n, err := walkDescriptors(f.t, p, func(file *fs.File, _ kernel.FDFlags) *fs.Inode {
+ return newFd(f.t, file, dir.MountSource)
+ })
+ if err != nil {
+ return nil, err
+ }
+ return fs.NewDirent(n, p), nil
+}
+
+// DeprecatedReaddir lists fds in /proc/TID/fd.
+func (f *fdDir) DeprecatedReaddir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+ return readDescriptors(f.t, dirCtx, offset, func(fd int) fs.DentAttr {
+ return fs.GenericDentAttr(fs.Symlink, device.ProcDevice)
+ })
+}
+
+// fdInfo is a single file in /proc/TID/fdinfo/.
+type fdInfo struct {
+ ramfs.File
+
+ flags kernel.FDFlags
+}
+
+// newFdInfo returns a new fdInfo based on an existing file.
+func newFdInfo(t *kernel.Task, _ *fs.File, flags kernel.FDFlags, msrc *fs.MountSource) *fs.Inode {
+ fdi := &fdInfo{flags: flags}
+ fdi.InitFile(t, fs.RootOwner, fs.FilePermissions{User: fs.PermMask{Read: true}})
+ // TODO: Get pos, locks, and other data. For now we only
+ // have flags.
+ // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+ fdi.Append([]byte(fmt.Sprintf("flags: %08o\n", flags)))
+ return newFile(fdi, msrc, fs.SpecialFile, t)
+}
+
+// DeprecatedPwritev implements fs.HandleOperations.DeprecatedPwritev.
+func (*fdInfo) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ return 0, ramfs.ErrInvalidOp
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (*fdInfo) Truncate(ctx context.Context, inode *fs.Inode, size int64) error {
+ return ramfs.ErrInvalidOp
+}
+
+// fdInfoDir implements /proc/TID/fdinfo. It embeds an fdDir, but overrides
+// Lookup and Readdir.
+type fdInfoDir struct {
+ ramfs.Dir
+
+ t *kernel.Task
+}
+
+// newFdInfoDir creates a new fdInfoDir.
+func newFdInfoDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ fdid := &fdInfoDir{t: t}
+ fdid.InitDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0500))
+ return newFile(fdid, msrc, fs.SpecialDirectory, t)
+}
+
+// Lookup loads an fd in /proc/TID/fdinfo into a Dirent.
+func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) {
+ n, err := walkDescriptors(fdid.t, p, func(file *fs.File, flags kernel.FDFlags) *fs.Inode {
+ return newFdInfo(fdid.t, file, flags, dir.MountSource)
+ })
+ if err != nil {
+ return nil, err
+ }
+ return fs.NewDirent(n, p), nil
+}
+
+// DeprecatedReaddir lists fds in /proc/TID/fdinfo.
+func (fdid *fdInfoDir) DeprecatedReaddir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+ return readDescriptors(fdid.t, dirCtx, offset, func(fd int) fs.DentAttr {
+ return fs.GenericDentAttr(fs.RegularFile, device.ProcDevice)
+ })
+}
diff --git a/pkg/sentry/fs/proc/file.go b/pkg/sentry/fs/proc/file.go
new file mode 100644
index 000000000..9a433cdf8
--- /dev/null
+++ b/pkg/sentry/fs/proc/file.go
@@ -0,0 +1,56 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+type file struct {
+ fs.InodeOperations
+
+ // nodeType is the file type of this file.
+ nodeType fs.InodeType
+
+ // t is the associated kernel task that owns this file.
+ t *kernel.Task
+}
+
+func newFile(node fs.InodeOperations, msrc *fs.MountSource, nodeType fs.InodeType, t *kernel.Task) *fs.Inode {
+ iops := &file{node, nodeType, t}
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: nodeType,
+ }
+ return fs.NewInode(iops, msrc, sattr)
+}
+
+// UnstableAttr returns all attributes of this file.
+func (f *file) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+ uattr, err := f.InodeOperations.UnstableAttr(ctx, inode)
+ if err != nil {
+ return fs.UnstableAttr{}, err
+ }
+ if f.t != nil {
+ uattr.Owner = fs.FileOwnerFromContext(f.t)
+ }
+ return uattr, nil
+}
diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go
new file mode 100644
index 000000000..fe4de18ba
--- /dev/null
+++ b/pkg/sentry/fs/proc/filesystems.go
@@ -0,0 +1,55 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+)
+
+// filesystemsData backs /proc/filesystems.
+type filesystemsData struct{}
+
+// NeedsUpdate returns true on the first generation. The set of registered file
+// systems doesn't change so there's no need to generate SeqData more than once.
+func (*filesystemsData) NeedsUpdate(generation int64) bool {
+ return generation == 0
+}
+
+// ReadSeqFileData returns data for the SeqFile reader.
+// SeqData, the current generation and where in the file the handle corresponds to.
+func (*filesystemsData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ // We don't ever expect to see a non-nil SeqHandle.
+ if h != nil {
+ return nil, 0
+ }
+
+ // Generate the file contents.
+ var buf bytes.Buffer
+ for _, sys := range fs.GetFilesystems() {
+ nodev := "nodev"
+ if sys.Flags()&fs.FilesystemRequiresDev != 0 {
+ nodev = ""
+ }
+ // Matches the format of fs/filesystems.c:filesystems_proc_show.
+ fmt.Fprintf(&buf, "%s\t%s\n", nodev, sys.Name())
+ }
+
+ // Return the SeqData and advance the generation counter.
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*filesystemsData)(nil)}}, 1
+}
diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go
new file mode 100644
index 000000000..072d00beb
--- /dev/null
+++ b/pkg/sentry/fs/proc/fs.go
@@ -0,0 +1,69 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+)
+
+// filesystem is a procfs.
+type filesystem struct{}
+
+func init() {
+ fs.RegisterFilesystem(&filesystem{})
+}
+
+// FilesystemName is the name underwhich the filesystem is registered.
+// Name matches fs/proc/root.c:proc_fs_type.name.
+const FilesystemName = "proc"
+
+// Name is the name of the file system.
+func (*filesystem) Name() string {
+ return FilesystemName
+}
+
+// AllowUserMount allows users to mount(2) this file system.
+func (*filesystem) AllowUserMount() bool {
+ return true
+}
+
+// Flags returns that there is nothing special about this file system.
+//
+// In Linux, proc returns FS_USERNS_VISIBLE | FS_USERNS_MOUNT, see fs/proc/root.c.
+func (*filesystem) Flags() fs.FilesystemFlags {
+ return 0
+}
+
+// Mount returns the root of a procfs that can be positioned in the vfs.
+func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string) (*fs.Inode, error) {
+ // device is always ignored.
+
+ // Parse generic comma-separated key=value options, this file system expects them.
+ options := fs.GenericMountSourceOptions(data)
+
+ // Proc options parsing checks for either a gid= or hidepid= and barfs on
+ // anything else, see fs/proc/root.c:proc_parse_options. Since we don't know
+ // what to do with gid= or hidepid=, we blow up if we get any options.
+ if len(options) > 0 {
+ return nil, fmt.Errorf("unsupported mount options: %v", options)
+ }
+
+ // Construct the procfs root. Since procfs files are all virtual, we
+ // never want them cached.
+ return New(ctx, fs.NewNonCachingMountSource(f, flags))
+}
diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go
new file mode 100644
index 000000000..694cde656
--- /dev/null
+++ b/pkg/sentry/fs/proc/loadavg.go
@@ -0,0 +1,51 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+)
+
+// loadavgData backs /proc/loadavg.
+type loadavgData struct{}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*loadavgData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+func (d *loadavgData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+
+ // TODO: Include real data in fields.
+ // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
+ // Column 4-5: currently running processes and the total number of processes.
+ // Column 6: the last process ID used.
+ fmt.Fprintf(&buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
+
+ return []seqfile.SeqData{
+ {
+ Buf: buf.Bytes(),
+ Handle: (*loadavgData)(nil),
+ },
+ }, 0
+}
diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go
new file mode 100644
index 000000000..489f796e5
--- /dev/null
+++ b/pkg/sentry/fs/proc/meminfo.go
@@ -0,0 +1,82 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// meminfoData backs /proc/meminfo.
+type meminfoData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*meminfoData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (d *meminfoData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ mem := d.k.Platform.Memory()
+ mem.UpdateUsage()
+ snapshot, totalUsage := usage.MemoryAccounting.Copy()
+ totalSize := usage.TotalMemory(mem.TotalSize(), totalUsage)
+ anon := snapshot.Anonymous + snapshot.Tmpfs
+ file := snapshot.PageCache + snapshot.Mapped
+ // We don't actually have active/inactive LRUs, so just make up numbers.
+ activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ inactiveFile := file - activeFile
+
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "MemTotal: %8d kB\n", totalSize/1024)
+ memFree := (totalSize - totalUsage) / 1024
+ // We use MemFree as MemAvailable because we don't swap.
+ // TODO: When reclaim is implemented the value of MemAvailable
+ // should change.
+ fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree)
+ fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree)
+ fmt.Fprintf(&buf, "Buffers: 0 kB\n") // memory usage by block devices
+ fmt.Fprintf(&buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
+ // Emulate a system with no swap, which disables inactivation of anon pages.
+ fmt.Fprintf(&buf, "SwapCache: 0 kB\n")
+ fmt.Fprintf(&buf, "Active: %8d kB\n", (anon+activeFile)/1024)
+ fmt.Fprintf(&buf, "Inactive: %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(&buf, "Active(anon): %8d kB\n", anon/1024)
+ fmt.Fprintf(&buf, "Inactive(anon): 0 kB\n")
+ fmt.Fprintf(&buf, "Active(file): %8d kB\n", activeFile/1024)
+ fmt.Fprintf(&buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(&buf, "Unevictable: 0 kB\n") // TODO
+ fmt.Fprintf(&buf, "Mlocked: 0 kB\n") // TODO
+ fmt.Fprintf(&buf, "SwapTotal: 0 kB\n")
+ fmt.Fprintf(&buf, "SwapFree: 0 kB\n")
+ fmt.Fprintf(&buf, "Dirty: 0 kB\n")
+ fmt.Fprintf(&buf, "Writeback: 0 kB\n")
+ fmt.Fprintf(&buf, "AnonPages: %8d kB\n", anon/1024)
+ fmt.Fprintf(&buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
+ fmt.Fprintf(&buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*meminfoData)(nil)}}, 0
+}
diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go
new file mode 100644
index 000000000..76092567d
--- /dev/null
+++ b/pkg/sentry/fs/proc/mounts.go
@@ -0,0 +1,176 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+)
+
+// forEachMountSource runs f for the process root mount and each mount that is a
+// descendant of the root.
+func forEachMountSource(t *kernel.Task, fn func(string, *fs.MountSource)) {
+ // All mount points must be relative to the rootDir, and mounts outside
+ // will be excluded.
+ rootDir := t.FSContext().RootDirectory()
+ defer rootDir.DecRef()
+
+ if rootDir.Inode == nil {
+ panic(fmt.Sprintf("root dirent has nil inode: %+v", rootDir))
+ }
+ if rootDir.Inode.MountSource == nil {
+ panic(fmt.Sprintf("root dirent has nil mount: %+v", rootDir))
+ }
+
+ ms := append(rootDir.Inode.MountSource.Submounts(), rootDir.Inode.MountSource)
+ sort.Slice(ms, func(i, j int) bool {
+ return ms[i].ID() < ms[j].ID()
+ })
+ for _, m := range ms {
+ mountPath, desc := m.Root().FullName(rootDir)
+ if !desc {
+ // MountSources that are not descendants of the chroot jail are ignored.
+ continue
+ }
+
+ fn(mountPath, m)
+ }
+}
+
+// mountInfoFile is used to implement /proc/[pid]/mountinfo.
+type mountInfoFile struct {
+ t *kernel.Task
+}
+
+// NeedsUpdate implements SeqSource.NeedsUpdate.
+func (mif *mountInfoFile) NeedsUpdate(_ int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements SeqSource.ReadSeqFileData.
+func (mif *mountInfoFile) ReadSeqFileData(handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if handle != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+ forEachMountSource(mif.t, func(mountPath string, m *fs.MountSource) {
+ // Format:
+ // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+ // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+
+ // (1) MountSource ID.
+ fmt.Fprintf(&buf, "%d ", m.ID())
+
+ // (2) Parent ID (or this ID if there is no parent).
+ pID := m.ID()
+ if p := m.Parent(); p != nil {
+ pID = p.ID()
+ }
+ fmt.Fprintf(&buf, "%d ", pID)
+
+ // (3) Major:Minor device ID. We don't have a superblock, so we
+ // just use the root inode device number.
+ sa := m.Root().Inode.StableAttr
+ fmt.Fprintf(&buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor)
+
+ // (4) Root: the pathname of the directory in the filesystem
+ // which forms the root of this mount.
+ //
+ // NOTE: This will always be "/" until we implement
+ // bind mounts.
+ fmt.Fprintf(&buf, "/ ")
+
+ // (5) Mount point (relative to process root).
+ fmt.Fprintf(&buf, "%s ", mountPath)
+
+ // (6) Mount options.
+ opts := "rw"
+ if m.Flags.ReadOnly {
+ opts = "ro"
+ }
+ if m.Flags.NoAtime {
+ opts += ",noatime"
+ }
+ fmt.Fprintf(&buf, "%s ", opts)
+
+ // (7) Optional fields: zero or more fields of the form "tag[:value]".
+ // (8) Separator: the end of the optional fields is marked by a single hyphen.
+ fmt.Fprintf(&buf, "- ")
+
+ // (9) Filesystem type.
+ name := "none"
+ if m.Filesystem != nil {
+ name = m.Filesystem.Name()
+ }
+ fmt.Fprintf(&buf, "%s ", name)
+
+ // (10) Mount source: filesystem-specific information or "none".
+ fmt.Fprintf(&buf, "none ")
+
+ // (11) Superblock options. Only "ro/rw" is supported for now,
+ // and is the same as the filesystem option.
+ fmt.Fprintf(&buf, "%s\n", opts)
+ })
+
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountInfoFile)(nil)}}, 0
+}
+
+// mountsFile is used to implement /proc/[pid]/mountinfo.
+type mountsFile struct {
+ t *kernel.Task
+}
+
+// NeedsUpdate implements SeqSource.NeedsUpdate.
+func (mf *mountsFile) NeedsUpdate(_ int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements SeqSource.ReadSeqFileData.
+func (mf *mountsFile) ReadSeqFileData(handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if handle != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+ forEachMountSource(mf.t, func(mountPath string, m *fs.MountSource) {
+ // Format (tab-separated):
+ // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
+ //
+ // We use the filesystem name as the first field, since there
+ // is no real block device we can point to, and we also should
+ // not expose anything about the remote filesystem.
+ //
+ // Only ro/rw option is supported for now.
+ //
+ // The "needs dump"and fsck flags are always 0, which is allowed.
+ opts := "rw"
+ if m.Flags.ReadOnly {
+ opts = "ro"
+ }
+ name := "none"
+ if m.Filesystem != nil {
+ name = m.Filesystem.Name()
+ }
+ fmt.Fprintf(&buf, "%s\t%s\t%s\t%s\t%d\t%d\n", "none", mountPath, name, opts, 0, 0)
+ })
+
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountsFile)(nil)}}, 0
+}
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
new file mode 100644
index 000000000..6e464857a
--- /dev/null
+++ b/pkg/sentry/fs/proc/net.go
@@ -0,0 +1,151 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+)
+
+// newNet creates a new proc net entry.
+func (p *proc) newNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ if s := p.k.NetworkStack(); s != nil && s.SupportsIPv6() {
+ d.AddChild(ctx, "dev", seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc))
+ d.AddChild(ctx, "if_inet6", seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc))
+ }
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
+
+// ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6.
+type ifinet6 struct {
+ s inet.Stack `state:"nosave"` // S/R-FIXME
+}
+
+func (n *ifinet6) contents() []string {
+ var lines []string
+ nics := n.s.Interfaces()
+ for id, naddrs := range n.s.InterfaceAddrs() {
+ nic, ok := nics[id]
+ if !ok {
+ // NIC was added after NICNames was called. We'll just
+ // ignore it.
+ continue
+ }
+
+ for _, a := range naddrs {
+ // IPv6 only.
+ if a.Family != linux.AF_INET6 {
+ continue
+ }
+
+ // Fields:
+ // IPv6 address displayed in 32 hexadecimal chars without colons
+ // Netlink device number (interface index) in hexadecimal (use nic id)
+ // Prefix length in hexadecimal
+ // Scope value (use 0)
+ // Interface flags
+ // Device name
+ lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
+ }
+ }
+ return lines
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*ifinet6) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *ifinet6) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ var data []seqfile.SeqData
+ for _, l := range n.contents() {
+ data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*ifinet6)(nil)})
+ }
+
+ return data, 0
+}
+
+// netDev implements seqfile.SeqSource for /proc/net/dev.
+type netDev struct {
+ s inet.Stack `state:"nosave"` // S/R-FIXME
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (n *netDev) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. See Linux's
+// net/core/net-procfs.c:dev_seq_show.
+func (n *netDev) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ interfaces := n.s.Interfaces()
+ contents := make([]string, 2, 2+len(interfaces))
+ // Add the table header. From net/core/net-procfs.c:dev_seq_show.
+ contents[0] = "Inter-| Receive | Transmit\n"
+ contents[1] = " face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n"
+
+ for _, i := range interfaces {
+ // TODO: Collect stats from each inet.Stack
+ // implementation (hostinet, epsocket, and rpcinet).
+
+ // Implements the same format as
+ // net/core/net-procfs.c:dev_seq_printf_stats.
+ l := fmt.Sprintf("%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
+ i.Name,
+ // Received
+ 0, // bytes
+ 0, // packets
+ 0, // errors
+ 0, // dropped
+ 0, // fifo
+ 0, // frame
+ 0, // compressed
+ 0, // multicast
+ // Transmitted
+ 0, // bytes
+ 0, // packets
+ 0, // errors
+ 0, // dropped
+ 0, // fifo
+ 0, // frame
+ 0, // compressed
+ 0) // multicast
+ contents = append(contents, l)
+ }
+
+ var data []seqfile.SeqData
+ for _, l := range contents {
+ data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*ifinet6)(nil)})
+ }
+
+ return data, 0
+}
diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go
new file mode 100644
index 000000000..a31a20494
--- /dev/null
+++ b/pkg/sentry/fs/proc/net_test.go
@@ -0,0 +1,74 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "reflect"
+ "testing"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+)
+
+func newIPv6TestStack() *inet.TestStack {
+ s := inet.NewTestStack()
+ s.SupportsIPv6Flag = true
+ return s
+}
+
+func TestIfinet6NoAddresses(t *testing.T) {
+ n := &ifinet6{s: newIPv6TestStack()}
+ if got := n.contents(); got != nil {
+ t.Errorf("Got n.contents() = %v, want = %v", got, nil)
+ }
+}
+
+func TestIfinet6(t *testing.T) {
+ s := newIPv6TestStack()
+ s.InterfacesMap[1] = inet.Interface{Name: "eth0"}
+ s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{
+ {
+ Family: linux.AF_INET6,
+ PrefixLen: 128,
+ Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"),
+ },
+ }
+ s.InterfacesMap[2] = inet.Interface{Name: "eth1"}
+ s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{
+ {
+ Family: linux.AF_INET6,
+ PrefixLen: 128,
+ Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"),
+ },
+ }
+ want := map[string]struct{}{
+ "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {},
+ "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {},
+ }
+
+ n := &ifinet6{s: s}
+ contents := n.contents()
+ if len(contents) != len(want) {
+ t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want))
+ }
+ got := map[string]struct{}{}
+ for _, l := range contents {
+ got[l] = struct{}{}
+ }
+
+ if !reflect.DeepEqual(got, want) {
+ t.Errorf("Got n.contents() = %v, want = %v", got, want)
+ }
+}
diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go
new file mode 100644
index 000000000..459eb7e62
--- /dev/null
+++ b/pkg/sentry/fs/proc/proc.go
@@ -0,0 +1,182 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package proc implements a partial in-memory file system for profs.
+package proc
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+)
+
+// proc is a root proc node.
+type proc struct {
+ ramfs.Dir
+
+ // k is the Kernel containing this proc node.
+ k *kernel.Kernel
+
+ // pidns is the PID namespace of the task that mounted the proc filesystem
+ // that this node represents.
+ pidns *kernel.PIDNamespace
+}
+
+// New returns the root node of a partial simple procfs.
+func New(ctx context.Context, msrc *fs.MountSource) (*fs.Inode, error) {
+ k := kernel.KernelFromContext(ctx)
+ if k == nil {
+ return nil, fmt.Errorf("procfs requires a kernel")
+ }
+ pidns := kernel.PIDNamespaceFromContext(ctx)
+ if pidns == nil {
+ return nil, fmt.Errorf("procfs requires a PID namespace")
+ }
+
+ p := &proc{k: k, pidns: pidns}
+ p.InitDir(ctx, map[string]*fs.Inode{
+ // Note that these are just the static members. There are
+ // dynamic members populated in Readdir and Lookup below.
+ "filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc),
+ "loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc),
+ "meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc),
+ "mounts": newMountsSymlink(ctx, msrc),
+ "stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc),
+ "version": seqfile.NewSeqFileInode(ctx, &versionData{k}, msrc),
+ }, fs.RootOwner, fs.FilePermsFromMode(0555))
+
+ p.AddChild(ctx, "cpuinfo", p.newCPUInfo(ctx, msrc))
+ p.AddChild(ctx, "uptime", p.newUptime(ctx, msrc))
+
+ return newFile(p, msrc, fs.SpecialDirectory, nil), nil
+}
+
+// self is a magical link.
+type self struct {
+ ramfs.Symlink
+
+ pidns *kernel.PIDNamespace
+}
+
+// newSelf returns a new "self" node.
+func (p *proc) newSelf(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ s := &self{pidns: p.pidns}
+ s.InitSymlink(ctx, fs.RootOwner, "")
+ return newFile(s, msrc, fs.Symlink, nil)
+}
+
+// Readlink implements fs.InodeOperations.Readlink.
+func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
+ if t := kernel.TaskFromContext(ctx); t != nil {
+ tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
+ if tgid == 0 {
+ return "", ramfs.ErrNotFound
+ }
+ return strconv.FormatUint(uint64(tgid), 10), nil
+ }
+
+ // Who is reading this link?
+ return "", ramfs.ErrInvalidOp
+}
+
+// Lookup loads an Inode at name into a Dirent.
+func (p *proc) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
+ // Is it one of the static ones?
+ dirent, walkErr := p.Dir.Lookup(ctx, dir, name)
+ if walkErr == nil {
+ return dirent, nil
+ }
+
+ // Is it a dynamic element?
+ nfs := map[string]func() *fs.Inode{
+ "net": func() *fs.Inode { return p.newNetDir(ctx, dir.MountSource) },
+ "self": func() *fs.Inode { return p.newSelf(ctx, dir.MountSource) },
+ "sys": func() *fs.Inode { return p.newSysDir(ctx, dir.MountSource) },
+ }
+ if nf, ok := nfs[name]; ok {
+ return fs.NewDirent(nf(), name), nil
+ }
+
+ // Try to lookup a corresponding task.
+ tid, err := strconv.ParseUint(name, 10, 64)
+ if err != nil {
+ // Ignore the parse error and return the original.
+ return nil, walkErr
+ }
+
+ // Grab the other task.
+ otherTask := p.pidns.TaskWithID(kernel.ThreadID(tid))
+ if otherTask == nil {
+ // Per above.
+ return nil, walkErr
+ }
+
+ // Wrap it in a taskDir.
+ td := newTaskDir(otherTask, dir.MountSource, p.pidns, true)
+ return fs.NewDirent(td, name), nil
+}
+
+// Readdir synthesizes proc contents.
+func (p *proc) DeprecatedReaddir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+ // Serialize normal contents.
+ _, err := p.Dir.DeprecatedReaddir(ctx, dirCtx, offset)
+ if err != nil {
+ return offset, err
+ }
+
+ m := make(map[string]fs.DentAttr)
+ var names []string
+
+ // Add special files.
+ m["sys"] = fs.GenericDentAttr(fs.SpecialFile, device.ProcDevice)
+ names = append(names, "sys")
+
+ // Collect tasks.
+ // Per linux we only include it in directory listings if it's the leader.
+ // But for whatever crazy reason, you can still walk to the given node.
+ for _, tg := range p.pidns.ThreadGroups() {
+ if leader := tg.Leader(); leader != nil {
+ name := strconv.FormatUint(uint64(tg.ID()), 10)
+ m[name] = fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice)
+ names = append(names, name)
+ }
+ }
+
+ if offset >= len(m) {
+ return offset, nil
+ }
+ sort.Strings(names)
+ names = names[offset:]
+ for _, name := range names {
+ if err := dirCtx.DirEmit(name, m[name]); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+ return offset, err
+}
+
+// newMountsSymlink returns a symlink to "self/mounts"
+func newMountsSymlink(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ s := &ramfs.Symlink{}
+ s.InitSymlink(ctx, fs.RootOwner, "self/mounts")
+ return newFile(s, msrc, fs.Symlink, nil)
+}
diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD
new file mode 100644
index 000000000..48dd25e5b
--- /dev/null
+++ b/pkg/sentry/fs/proc/seqfile/BUILD
@@ -0,0 +1,55 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//tools/go_stateify:defs.bzl", "go_stateify")
+
+go_stateify(
+ name = "seqfile_state",
+ srcs = [
+ "seqfile.go",
+ ],
+ out = "seqfile_state.go",
+ package = "seqfile",
+)
+
+go_library(
+ name = "seqfile",
+ srcs = [
+ "seqfile.go",
+ "seqfile_state.go",
+ ],
+ importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile",
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/sentry/context",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/fs/proc/device",
+ "//pkg/sentry/fs/ramfs",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/usermem",
+ "//pkg/state",
+ ],
+)
+
+go_stateify(
+ name = "seqfile_test_state",
+ srcs = ["seqfile_test.go"],
+ out = "seqfile_test_state.go",
+ package = "seqfile",
+)
+
+go_test(
+ name = "seqfile_test",
+ size = "small",
+ srcs = [
+ "seqfile_test.go",
+ "seqfile_test_state.go",
+ ],
+ embed = [":seqfile"],
+ deps = [
+ "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/fs/ramfs/test",
+ "//pkg/sentry/usermem",
+ ],
+)
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go
new file mode 100644
index 000000000..e37a85869
--- /dev/null
+++ b/pkg/sentry/fs/proc/seqfile/seqfile.go
@@ -0,0 +1,232 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seqfile
+
+import (
+ "io"
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// SeqHandle is a helper handle to seek in the file.
+type SeqHandle interface{}
+
+// SeqData holds the data for one unit in the file.
+type SeqData struct {
+ // The data to be returned to the user.
+ Buf []byte
+
+ // A seek handle used to find the next valid unit in ReadSeqFiledata.
+ Handle SeqHandle
+}
+
+// SeqSource is a data source for a SeqFile file.
+type SeqSource interface {
+ // NeedsUpdate returns true if the consumer of SeqData should call
+ // ReadSeqFileData again. Generation is the generation returned by
+ // ReadSeqFile or 0.
+ NeedsUpdate(generation int64) bool
+
+ // Returns a slice of SeqData ordered by unit and the current
+ // generation. The first entry in the slice is greater than the handle.
+ // If handle is nil then all known records are returned. Generation
+ // must always be greater than 0.
+ ReadSeqFileData(handle SeqHandle) ([]SeqData, int64)
+}
+
+// SeqGenerationCounter is a counter to keep track if the SeqSource should be
+// updated. SeqGenerationCounter is not thread-safe and should be protected
+// with a mutex.
+type SeqGenerationCounter struct {
+ // The generation that the SeqData is at.
+ generation int64
+}
+
+// SetGeneration sets the generation to the new value, be careful to not set it
+// to a value less than current.
+func (s *SeqGenerationCounter) SetGeneration(generation int64) {
+ s.generation = generation
+}
+
+// Update increments the current generation.
+func (s *SeqGenerationCounter) Update() {
+ s.generation++
+}
+
+// Generation returns the current generation counter.
+func (s *SeqGenerationCounter) Generation() int64 {
+ return s.generation
+}
+
+// IsCurrent returns whether the given generation is current or not.
+func (s *SeqGenerationCounter) IsCurrent(generation int64) bool {
+ return s.Generation() == generation
+}
+
+// SeqFile is used to provide dynamic files that can be ordered by record.
+type SeqFile struct {
+ ramfs.Entry
+
+ // mu protects the fields below.
+ mu sync.Mutex `state:"nosave"`
+
+ SeqSource
+
+ source []SeqData
+ generation int64
+ lastRead int64
+}
+
+// NewSeqFile returns a seqfile suitable for use by external consumers.
+func NewSeqFile(ctx context.Context, source SeqSource) *SeqFile {
+ s := &SeqFile{SeqSource: source}
+ s.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0444))
+ return s
+}
+
+// NewSeqFileInode returns an Inode with SeqFile InodeOperations.
+func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource) *fs.Inode {
+ iops := NewSeqFile(ctx, source)
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(iops, msrc, sattr)
+}
+
+// UnstableAttr returns unstable attributes of the SeqFile.
+func (s *SeqFile) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+ uattr, _ := s.Entry.UnstableAttr(ctx, inode)
+ uattr.ModificationTime = ktime.NowFromContext(ctx)
+ return uattr, nil
+}
+
+// findIndexAndOffset finds the unit that corresponds to a certain offset.
+// Returns the unit and the offset within the unit. If there are not enough
+// units len(data) and leftover offset is returned.
+func findIndexAndOffset(data []SeqData, offset int64) (int, int64) {
+ for i, buf := range data {
+ l := int64(len(buf.Buf))
+ if offset < l {
+ return i, offset
+ }
+ offset -= l
+ }
+ return len(data), offset
+}
+
+// DeprecatedPreadv reads from the file at the given offset.
+func (s *SeqFile) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ s.Entry.NotifyAccess(ctx)
+ defer func() { s.lastRead = offset }()
+
+ updated := false
+
+ // Try to find where we should start reading this file.
+ i, recordOffset := findIndexAndOffset(s.source, offset)
+ if i == len(s.source) {
+ // Ok, we're at EOF. Let's first check to see if there might be
+ // more data available to us. If there is more data, add it to
+ // the end and try reading again.
+ if !s.SeqSource.NeedsUpdate(s.generation) {
+ return 0, io.EOF
+ }
+ oldLen := len(s.source)
+ s.updateSourceLocked(len(s.source))
+ updated = true
+ // We know that we had consumed everything up until this point
+ // so we search in the new slice instead of starting over.
+ i, recordOffset = findIndexAndOffset(s.source[oldLen:], recordOffset)
+ i += oldLen
+ // i is at most the length of the slice which is
+ // len(s.source) - oldLen. So at most i will be equal to
+ // len(s.source).
+ if i == len(s.source) {
+ return 0, io.EOF
+ }
+ }
+
+ var done int64
+ // We're reading parts of a record, finish reading the current object
+ // before continuing on to the next. We don't refresh our data source
+ // before this record is completed.
+ if recordOffset != 0 {
+ n, err := dst.CopyOut(ctx, s.source[i].Buf[recordOffset:])
+ done += int64(n)
+ dst = dst.DropFirst(n)
+ if dst.NumBytes() == 0 || err != nil {
+ return done, err
+ }
+ i++
+ }
+
+ // Next/New unit, update the source file if necessary. Make an extra
+ // check to see if we've seeked backwards and if so always update our
+ // data source.
+ if !updated && (s.SeqSource.NeedsUpdate(s.generation) || s.lastRead > offset) {
+ s.updateSourceLocked(i)
+ // recordOffset is 0 here and we won't update records behind the
+ // current one so recordOffset is still 0 even though source
+ // just got updated. Just read the next record.
+ }
+
+ // Finish by reading all the available data.
+ for _, buf := range s.source[i:] {
+ n, err := dst.CopyOut(ctx, buf.Buf)
+ done += int64(n)
+ dst = dst.DropFirst(n)
+ if dst.NumBytes() == 0 || err != nil {
+ return done, err
+ }
+ }
+
+ // If the file shrank (entries not yet read were removed above)
+ // while we tried to read we can end up with nothing read.
+ if done == 0 && dst.NumBytes() != 0 {
+ return 0, io.EOF
+ }
+ return done, nil
+}
+
+// updateSourceLocked requires that s.mu is held.
+func (s *SeqFile) updateSourceLocked(record int) {
+ var h SeqHandle
+ if record == 0 {
+ h = nil
+ } else {
+ h = s.source[record-1].Handle
+ }
+ // Save what we have previously read.
+ s.source = s.source[:record]
+ var newSource []SeqData
+ newSource, s.generation = s.SeqSource.ReadSeqFileData(h)
+ s.source = append(s.source, newSource...)
+}
+
+// DeprecatedPwritev is always denied.
+func (*SeqFile) DeprecatedPwritev(context.Context, usermem.IOSequence, int64) (int64, error) {
+ return 0, ramfs.ErrDenied
+}
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go
new file mode 100644
index 000000000..0bf39ad82
--- /dev/null
+++ b/pkg/sentry/fs/proc/seqfile/seqfile_test.go
@@ -0,0 +1,272 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package seqfile
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "testing"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ ramfstest "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs/test"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+type seqTest struct {
+ actual []SeqData
+ update bool
+}
+
+func (s *seqTest) Init() {
+ var sq []SeqData
+ // Create some SeqData.
+ for i := 0; i < 10; i++ {
+ var b []byte
+ for j := 0; j < 10; j++ {
+ b = append(b, byte(i))
+ }
+ sq = append(sq, SeqData{
+ Buf: b,
+ Handle: &testHandle{i: i},
+ })
+ }
+ s.actual = sq
+}
+
+// NeedsUpdate reports whether we need to update the data we've previously read.
+func (s *seqTest) NeedsUpdate(int64) bool {
+ return s.update
+}
+
+// ReadSeqFiledata returns a slice of SeqData which contains elements
+// greater than the handle.
+func (s *seqTest) ReadSeqFileData(handle SeqHandle) ([]SeqData, int64) {
+ if handle == nil {
+ return s.actual, 0
+ }
+ h := *handle.(*testHandle)
+ var ret []SeqData
+ for _, b := range s.actual {
+ // We want the next one.
+ h2 := *b.Handle.(*testHandle)
+ if h2.i > h.i {
+ ret = append(ret, b)
+ }
+ }
+ return ret, 0
+}
+
+// Flatten a slice of slices into one slice.
+func flatten(buf ...[]byte) []byte {
+ var flat []byte
+ for _, b := range buf {
+ flat = append(flat, b...)
+ }
+ return flat
+}
+
+type testHandle struct {
+ i int
+}
+
+type testTable struct {
+ offset int64
+ readBufferSize int
+ expectedData []byte
+ expectedError error
+}
+
+func runTableTests(ctx context.Context, table []testTable, n fs.InodeOperations) error {
+ for _, tt := range table {
+ data := make([]byte, tt.readBufferSize)
+ resultLen, err := n.DeprecatedPreadv(ctx, usermem.BytesIOSequence(data), tt.offset)
+ if err != tt.expectedError {
+ return fmt.Errorf("t.Preadv(len: %v, offset: %v) (error) => %v expected %v", tt.readBufferSize, tt.offset, err, tt.expectedError)
+ }
+ expectedLen := int64(len(tt.expectedData))
+ if resultLen != expectedLen {
+ // We make this just an error so we wall through and print the data below.
+ return fmt.Errorf("t.Preadv(len: %v, offset: %v) (size) => %v expected %v", tt.readBufferSize, tt.offset, resultLen, expectedLen)
+ }
+ if !bytes.Equal(data[:expectedLen], tt.expectedData) {
+ return fmt.Errorf("t.Preadv(len: %v, offset: %v) (data) => %v expected %v", tt.readBufferSize, tt.offset, data[:expectedLen], tt.expectedData)
+ }
+ }
+ return nil
+}
+
+func TestSeqFile(t *testing.T) {
+ testSource := &seqTest{}
+ testSource.Init()
+
+ // Create a file that can be R/W.
+ m := fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{})
+ ctx := contexttest.Context(t)
+ contents := map[string]*fs.Inode{
+ "foo": NewSeqFileInode(ctx, testSource, m),
+ }
+ root := ramfstest.NewDir(ctx, contents, fs.FilePermsFromMode(0777))
+
+ // How about opening it?
+ inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory})
+ dirent2, err := root.Lookup(ctx, inode, "foo")
+ if err != nil {
+ t.Fatalf("failed to walk to foo for n2: %v", err)
+ }
+ n2 := dirent2.Inode.InodeOperations
+
+ // Writing?
+ if _, err := n2.DeprecatedPwritev(nil, usermem.BytesIOSequence([]byte("test")), 0); err == nil {
+ t.Fatalf("managed to write to n2: %v", err)
+ }
+
+ // How about reading?
+ dirent3, err := root.Lookup(ctx, inode, "foo")
+ if err != nil {
+ t.Fatalf("failed to walk to foo: %v", err)
+ }
+ n3 := dirent3.Inode.InodeOperations
+
+ if n2 != n3 {
+ t.Error("got n2 != n3, want same")
+ }
+
+ testSource.update = true
+
+ table := []testTable{
+ // Read past the end.
+ {100, 4, []byte{}, io.EOF},
+ {110, 4, []byte{}, io.EOF},
+ {200, 4, []byte{}, io.EOF},
+ // Read a truncated first line.
+ {0, 4, testSource.actual[0].Buf[:4], nil},
+ // Read the whole first line.
+ {0, 10, testSource.actual[0].Buf, nil},
+ // Read the whole first line + 5 bytes of second line.
+ {0, 15, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:5]), nil},
+ // First 4 bytes of the second line.
+ {10, 4, testSource.actual[1].Buf[:4], nil},
+ // Read the two first lines.
+ {0, 20, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf), nil},
+ // Read three lines.
+ {0, 30, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf), nil},
+ // Read everything, but use a bigger buffer than necessary.
+ {0, 150, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf, testSource.actual[2].Buf, testSource.actual[3].Buf, testSource.actual[4].Buf, testSource.actual[5].Buf, testSource.actual[6].Buf, testSource.actual[7].Buf, testSource.actual[8].Buf, testSource.actual[9].Buf), nil},
+ // Read the last 3 bytes.
+ {97, 10, testSource.actual[9].Buf[7:], nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed with testSource.update = %v : %v", testSource.update, err)
+ }
+
+ // Disable updates and do it again.
+ testSource.update = false
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed with testSource.update = %v: %v", testSource.update, err)
+ }
+}
+
+// Test that we behave correctly when the file is updated.
+func TestSeqFileFileUpdated(t *testing.T) {
+ testSource := &seqTest{}
+ testSource.Init()
+ testSource.update = true
+
+ // Create a file that can be R/W.
+ m := fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{})
+ ctx := contexttest.Context(t)
+ contents := map[string]*fs.Inode{
+ "foo": NewSeqFileInode(ctx, testSource, m),
+ }
+ root := ramfstest.NewDir(ctx, contents, fs.FilePermsFromMode(0777))
+
+ // How about opening it?
+ inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory})
+ dirent2, err := root.Lookup(ctx, inode, "foo")
+ if err != nil {
+ t.Fatalf("failed to walk to foo for n2: %v", err)
+ }
+ n2 := dirent2.Inode.InodeOperations
+
+ table := []testTable{
+ {0, 16, flatten(testSource.actual[0].Buf, testSource.actual[1].Buf[:6]), nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed: %v", err)
+ }
+ // Delete the first entry.
+ cut := testSource.actual[0].Buf
+ testSource.actual = testSource.actual[1:]
+
+ table = []testTable{
+ // Try reading buffer 0 with an offset. This will not delete the old data.
+ {1, 5, cut[1:6], nil},
+ // Reset our file by reading at offset 0.
+ {0, 10, testSource.actual[0].Buf, nil},
+ {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil},
+ // Read the same data a second time.
+ {16, 14, flatten(testSource.actual[1].Buf[6:], testSource.actual[2].Buf), nil},
+ // Read the following two lines.
+ {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed after removing first entry: %v", err)
+ }
+
+ // Add a new duplicate line in the middle (6666...)
+ after := testSource.actual[5:]
+ testSource.actual = testSource.actual[:4]
+ // Note the list must be sorted.
+ testSource.actual = append(testSource.actual, after[0])
+ testSource.actual = append(testSource.actual, after...)
+
+ table = []testTable{
+ {50, 20, flatten(testSource.actual[4].Buf, testSource.actual[5].Buf), nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed after adding middle entry: %v", err)
+ }
+ // This will be used in a later test.
+ oldTestData := testSource.actual
+
+ // Delete everything.
+ testSource.actual = testSource.actual[:0]
+ table = []testTable{
+ {20, 20, []byte{}, io.EOF},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed after removing all entries: %v", err)
+ }
+ // Restore some of the data.
+ testSource.actual = oldTestData[:1]
+ table = []testTable{
+ {6, 20, testSource.actual[0].Buf[6:], nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed after adding first entry back: %v", err)
+ }
+
+ // Re-extend the data
+ testSource.actual = oldTestData
+ table = []testTable{
+ {30, 20, flatten(testSource.actual[3].Buf, testSource.actual[4].Buf), nil},
+ }
+ if err := runTableTests(ctx, table, n2); err != nil {
+ t.Errorf("runTableTest failed after extending testSource: %v", err)
+ }
+}
diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go
new file mode 100644
index 000000000..dee836a05
--- /dev/null
+++ b/pkg/sentry/fs/proc/stat.go
@@ -0,0 +1,139 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+)
+
+// statData backs /proc/stat.
+type statData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*statData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// cpuStats contains the breakdown of CPU time for /proc/stat.
+type cpuStats struct {
+ // user is time spent in userspace tasks with non-positive niceness.
+ user uint64
+
+ // nice is time spent in userspace tasks with positive niceness.
+ nice uint64
+
+ // system is time spent in non-interrupt kernel context.
+ system uint64
+
+ // idle is time spent idle.
+ idle uint64
+
+ // ioWait is time spent waiting for IO.
+ ioWait uint64
+
+ // irq is time spent in interrupt context.
+ irq uint64
+
+ // softirq is time spent in software interrupt context.
+ softirq uint64
+
+ // steal is involuntary wait time.
+ steal uint64
+
+ // guest is time spent in guests with non-positive niceness.
+ guest uint64
+
+ // guestNice is time spent in guests with positive niceness.
+ guestNice uint64
+}
+
+// String implements fmt.Stringer.
+func (c cpuStats) String() string {
+ return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (s *statData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+
+ // TODO: We currently export only zero CPU stats. We could
+ // at least provide some aggregate stats.
+ var cpu cpuStats
+ fmt.Fprintf(&buf, "cpu %s\n", cpu)
+
+ for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ {
+ fmt.Fprintf(&buf, "cpu%d %s\n", c, cpu)
+ }
+
+ // The total number of interrupts is dependent on the CPUs and PCI
+ // devices on the system. See arch_probe_nr_irqs.
+ //
+ // Since we don't report real interrupt stats, just choose an arbitrary
+ // value from a representative VM.
+ const numInterrupts = 256
+
+ // The Kernel doesn't handle real interrupts, so report all zeroes.
+ // TODO: We could count page faults as #PF.
+ fmt.Fprintf(&buf, "intr 0") // total
+ for i := 0; i < numInterrupts; i++ {
+ fmt.Fprintf(&buf, " 0")
+ }
+ fmt.Fprintf(&buf, "\n")
+
+ // Total number of context switches.
+ // TODO: Count this.
+ fmt.Fprintf(&buf, "ctxt 0\n")
+
+ // CLOCK_REALTIME timestamp from boot, in seconds.
+ fmt.Fprintf(&buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds())
+
+ // Total number of clones.
+ // TODO: Count this.
+ fmt.Fprintf(&buf, "processes 0\n")
+
+ // Number of runnable tasks.
+ // TODO: Count this.
+ fmt.Fprintf(&buf, "procs_running 0\n")
+
+ // Number of tasks waiting on IO.
+ // TODO: Count this.
+ fmt.Fprintf(&buf, "procs_blocked 0\n")
+
+ // Number of each softirq handled.
+ fmt.Fprintf(&buf, "softirq 0") // total
+ for i := 0; i < linux.NumSoftIRQ; i++ {
+ fmt.Fprintf(&buf, " 0")
+ }
+ fmt.Fprintf(&buf, "\n")
+
+ return []seqfile.SeqData{
+ {
+ Buf: buf.Bytes(),
+ Handle: (*statData)(nil),
+ },
+ }, 0
+}
diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go
new file mode 100644
index 000000000..4323f3650
--- /dev/null
+++ b/pkg/sentry/fs/proc/sys.go
@@ -0,0 +1,117 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// hostname is a file containing the system hostname.
+type hostname struct {
+ ramfs.Entry
+}
+
+// DeprecatedPreadv implements fs.InodeOperations.DeprecatedPreadv.
+func (hostname) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ utsns := kernel.UTSNamespaceFromContext(ctx)
+ contents := []byte(utsns.HostName() + "\n")
+
+ if offset >= int64(len(contents)) {
+ return 0, io.EOF
+ }
+
+ n, err := dst.CopyOut(ctx, contents[offset:])
+ return int64(n), err
+}
+
+func (p *proc) newHostname(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ h := &hostname{}
+ h.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0444))
+ return newFile(h, msrc, fs.SpecialFile, nil)
+}
+
+// mmapMinAddrData backs /proc/sys/vm/mmap_min_addr.
+type mmapMinAddrData struct {
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*mmapMinAddrData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (d *mmapMinAddrData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+ return []seqfile.SeqData{
+ {
+ Buf: []byte(fmt.Sprintf("%d\n", d.k.Platform.MinUserAddress())),
+ Handle: (*mmapMinAddrData)(nil),
+ },
+ }, 0
+}
+
+type overcommitMemory struct{}
+
+func (*overcommitMemory) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.
+func (*overcommitMemory) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+ return []seqfile.SeqData{
+ {
+ Buf: []byte("0\n"),
+ Handle: (*overcommitMemory)(nil),
+ },
+ }, 0
+}
+
+func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ d.AddChild(ctx, "hostname", p.newHostname(ctx, msrc))
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
+
+func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ d.AddChild(ctx, "mmap_min_addr", seqfile.NewSeqFileInode(ctx, &mmapMinAddrData{p.k}, msrc))
+ d.AddChild(ctx, "overcommit_memory", seqfile.NewSeqFileInode(ctx, &overcommitMemory{}, msrc))
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
+
+func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ d.AddChild(ctx, "kernel", p.newKernelDir(ctx, msrc))
+ d.AddChild(ctx, "vm", p.newVMDir(ctx, msrc))
+ d.AddChild(ctx, "net", p.newSysNetDir(ctx, msrc))
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
new file mode 100644
index 000000000..db44c95cb
--- /dev/null
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -0,0 +1,188 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+type tcpMemDir int
+
+const (
+ tcpRMem tcpMemDir = iota
+ tcpWMem
+)
+
+type tcpMem struct {
+ ramfs.Entry
+ s inet.Stack
+ size inet.TCPBufferSize
+ dir tcpMemDir
+}
+
+func newTCPMem(s inet.Stack, size inet.TCPBufferSize, dir tcpMemDir) *tcpMem {
+ return &tcpMem{s: s, size: size, dir: dir}
+}
+
+func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, size inet.TCPBufferSize, dir tcpMemDir) *fs.Inode {
+ tm := newTCPMem(s, size, dir)
+ tm.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0644))
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(tm, msrc, sattr)
+}
+
+// DeprecatedPreadv implements fs.InodeOperations.DeprecatedPreadv.
+func (m *tcpMem) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ return 0, io.EOF
+ }
+ s := fmt.Sprintf("%d\t%d\t%d\n", m.size.Min, m.size.Default, m.size.Max)
+ n, err := dst.CopyOut(ctx, []byte(s))
+ return int64(n), err
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (*tcpMem) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// DeprecatedPwritev implements fs.InodeOperations.DeprecatedPwritev.
+func (m *tcpMem) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ buf := []int32{int32(m.size.Min), int32(m.size.Default), int32(m.size.Max)}
+ n, cperr := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts)
+ size := inet.TCPBufferSize{
+ Min: int(buf[0]),
+ Default: int(buf[1]),
+ Max: int(buf[2]),
+ }
+ var err error
+ switch m.dir {
+ case tcpRMem:
+ err = m.s.SetTCPReceiveBufferSize(size)
+ case tcpWMem:
+ err = m.s.SetTCPSendBufferSize(size)
+ default:
+ panic(fmt.Sprintf("unknown tcpMem.dir: %v", m.dir))
+ }
+ if err != nil {
+ return n, err
+ }
+ return n, cperr
+}
+
+type tcpSack struct {
+ ramfs.Entry
+ s inet.Stack `state:"nosave"` // S/R-FIXME
+}
+
+func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ ts := &tcpSack{s: s}
+ ts.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0644))
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(ts, msrc, sattr)
+}
+
+func (s *tcpSack) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ return 0, io.EOF
+ }
+
+ sack, err := s.s.TCPSACKEnabled()
+ if err != nil {
+ return 0, err
+ }
+
+ val := "0\n"
+ if sack {
+ // Technically, this is not quite compatible with Linux. Linux
+ // stores these as an integer, so if you write "2" into
+ // tcp_sack, you should get 2 back. Tough luck.
+ val = "1\n"
+ }
+ n, err := dst.CopyOut(ctx, []byte(val))
+ return int64(n), err
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (*tcpSack) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// DeprecatedPwritev implements fs.InodeOperations.DeprecatedPwritev.
+func (s *tcpSack) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return n, err
+ }
+ return n, s.s.SetTCPSACKEnabled(v != 0)
+}
+
+func newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+
+ // Add tcp_rmem.
+ if rs, err := s.TCPReceiveBufferSize(); err == nil {
+ d.AddChild(ctx, "tcp_rmem", newTCPMemInode(ctx, msrc, s, rs, tcpRMem))
+ }
+
+ // Add tcp_wmem.
+ if ss, err := s.TCPSendBufferSize(); err == nil {
+ d.AddChild(ctx, "tcp_wmem", newTCPMemInode(ctx, msrc, s, ss, tcpWMem))
+ }
+
+ // Add tcp_sack.
+ d.AddChild(ctx, "tcp_sack", newTCPSackInode(ctx, msrc, s))
+
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
+
+func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ if s := p.k.NetworkStack(); s != nil {
+ d.AddChild(ctx, "ipv4", newSysNetIPv4Dir(ctx, msrc, s))
+ }
+ return newFile(d, msrc, fs.SpecialDirectory, nil)
+}
diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go
new file mode 100644
index 000000000..7ba392346
--- /dev/null
+++ b/pkg/sentry/fs/proc/sys_net_test.go
@@ -0,0 +1,121 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "testing"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+func TestQuerySendBufferSize(t *testing.T) {
+ ctx := context.Background()
+ s := inet.NewTestStack()
+ s.TCPSendBufSize = inet.TCPBufferSize{100, 200, 300}
+ tm := newTCPMem(s, s.TCPSendBufSize, tcpWMem)
+
+ buf := make([]byte, 100)
+ dst := usermem.BytesIOSequence(buf)
+ n, err := tm.DeprecatedPreadv(ctx, dst, 0)
+ if err != nil {
+ t.Fatalf("DeprecatedPreadv failed: %v", err)
+ }
+
+ if got, want := string(buf[:n]), "100\t200\t300\n"; got != want {
+ t.Fatalf("Bad string: got %v, want %v", got, want)
+ }
+}
+
+func TestQueryRecvBufferSize(t *testing.T) {
+ ctx := context.Background()
+ s := inet.NewTestStack()
+ s.TCPRecvBufSize = inet.TCPBufferSize{100, 200, 300}
+ tm := newTCPMem(s, s.TCPRecvBufSize, tcpRMem)
+
+ buf := make([]byte, 100)
+ dst := usermem.BytesIOSequence(buf)
+ n, err := tm.DeprecatedPreadv(ctx, dst, 0)
+ if err != nil {
+ t.Fatalf("DeprecatedPreadv failed: %v", err)
+ }
+
+ if got, want := string(buf[:n]), "100\t200\t300\n"; got != want {
+ t.Fatalf("Bad string: got %v, want %v", got, want)
+ }
+}
+
+var cases = []struct {
+ str string
+ initial inet.TCPBufferSize
+ final inet.TCPBufferSize
+}{
+ {
+ str: "",
+ initial: inet.TCPBufferSize{1, 2, 3},
+ final: inet.TCPBufferSize{1, 2, 3},
+ },
+ {
+ str: "100\n",
+ initial: inet.TCPBufferSize{1, 100, 200},
+ final: inet.TCPBufferSize{100, 100, 200},
+ },
+ {
+ str: "100 200 300\n",
+ initial: inet.TCPBufferSize{1, 2, 3},
+ final: inet.TCPBufferSize{100, 200, 300},
+ },
+}
+
+func TestConfigureSendBufferSize(t *testing.T) {
+ ctx := context.Background()
+ s := inet.NewTestStack()
+ for _, c := range cases {
+ s.TCPSendBufSize = c.initial
+ tm := newTCPMem(s, c.initial, tcpWMem)
+
+ // Write the values.
+ src := usermem.BytesIOSequence([]byte(c.str))
+ if n, err := tm.DeprecatedPwritev(ctx, src, 0); n != int64(len(c.str)) || err != nil {
+ t.Errorf("DeprecatedPwritev, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str))
+ }
+
+ // Read the values from the stack and check them.
+ if s.TCPSendBufSize != c.final {
+ t.Errorf("TCPSendBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPSendBufSize, c.final)
+ }
+ }
+}
+
+func TestConfigureRecvBufferSize(t *testing.T) {
+ ctx := context.Background()
+ s := inet.NewTestStack()
+ for _, c := range cases {
+ s.TCPRecvBufSize = c.initial
+ tm := newTCPMem(s, c.initial, tcpRMem)
+
+ // Write the values.
+ src := usermem.BytesIOSequence([]byte(c.str))
+ if n, err := tm.DeprecatedPwritev(ctx, src, 0); n != int64(len(c.str)) || err != nil {
+ t.Errorf("DeprecatedPwritev, case = %q: got (%d, %v), wanted (%d, nil)", c.str, n, err, len(c.str))
+ }
+
+ // Read the values from the stack and check them.
+ if s.TCPRecvBufSize != c.final {
+ t.Errorf("TCPRecvBufferSize, case = %q: got %v, wanted %v", c.str, s.TCPRecvBufSize, c.final)
+ }
+ }
+}
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
new file mode 100644
index 000000000..3e9a1e50e
--- /dev/null
+++ b/pkg/sentry/fs/proc/task.go
@@ -0,0 +1,567 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "sort"
+ "strconv"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's
+// users count is incremented, and must be decremented by the caller when it is
+// no longer in use.
+func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) {
+ if t.ExitState() == kernel.TaskExitDead {
+ return nil, syserror.ESRCH
+ }
+ var m *mm.MemoryManager
+ t.WithMuLocked(func(t *kernel.Task) {
+ m = t.MemoryManager()
+ })
+ if m == nil || !m.IncUsers() {
+ return nil, io.EOF
+ }
+ return m, nil
+}
+
+// taskDir represents a task-level directory.
+type taskDir struct {
+ ramfs.Dir
+
+ // t is the associated kernel task that owns this file.
+ t *kernel.Task
+}
+
+// newTaskDir creates a new proc task entry.
+func newTaskDir(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace, showSubtasks bool) *fs.Inode {
+ d := &taskDir{t: t}
+ // TODO: Set EUID/EGID based on dumpability.
+ d.InitDir(t, map[string]*fs.Inode{
+ "auxv": newAuxvec(t, msrc),
+ "cmdline": newExecArgFile(t, msrc, cmdlineExecArg),
+ "comm": newComm(t, msrc),
+ "environ": newExecArgFile(t, msrc, environExecArg),
+ "exe": newExe(t, msrc),
+ "fd": newFdDir(t, msrc),
+ "fdinfo": newFdInfoDir(t, msrc),
+ "gid_map": newGIDMap(t, msrc),
+ // TODO: This is incorrect for /proc/[pid]/task/[tid]/io, i.e. if
+ // showSubtasks is false:
+ // http://lxr.free-electrons.com/source/fs/proc/base.c?v=3.11#L2980
+ "io": newIO(t, msrc),
+ "maps": newMaps(t, msrc),
+ "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc),
+ "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc),
+ "ns": newNamespaceDir(t, msrc),
+ "stat": newTaskStat(t, msrc, showSubtasks, pidns),
+ "status": newStatus(t, msrc, pidns),
+ "uid_map": newUIDMap(t, msrc),
+ }, fs.RootOwner, fs.FilePermsFromMode(0555))
+ if showSubtasks {
+ d.AddChild(t, "task", newSubtasks(t, msrc, pidns))
+ }
+ return newFile(d, msrc, fs.SpecialDirectory, t)
+}
+
+// subtasks represents a /proc/TID/task directory.
+type subtasks struct {
+ ramfs.Dir
+
+ t *kernel.Task
+
+ pidns *kernel.PIDNamespace
+}
+
+func newSubtasks(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode {
+ s := &subtasks{t: t, pidns: pidns}
+ s.InitDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
+ return newFile(s, msrc, fs.SpecialDirectory, t)
+}
+
+// UnstableAttr returns unstable attributes of the subtasks.
+func (s *subtasks) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+ uattr, err := s.Dir.UnstableAttr(ctx, inode)
+ if err != nil {
+ return fs.UnstableAttr{}, err
+ }
+ // We can't rely on ramfs' implementation because the task directories are
+ // generated dynamically.
+ uattr.Links = uint64(2 + s.t.ThreadGroup().Count())
+ return uattr, nil
+}
+
+// Lookup loads an Inode in a task's subtask directory into a Dirent.
+func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) {
+ tid, err := strconv.ParseUint(p, 10, 32)
+ if err != nil {
+ return nil, syserror.ENOENT
+ }
+
+ task := s.pidns.TaskWithID(kernel.ThreadID(tid))
+ if task == nil {
+ return nil, syserror.ENOENT
+ }
+ if task.ThreadGroup() != s.t.ThreadGroup() {
+ return nil, syserror.ENOENT
+ }
+
+ td := newTaskDir(task, dir.MountSource, s.pidns, false)
+ return fs.NewDirent(td, p), nil
+}
+
+// DeprecatedReaddir lists a task's subtask directory.
+func (s *subtasks) DeprecatedReaddir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+ tasks := s.t.ThreadGroup().MemberIDs(s.pidns)
+ taskInts := make([]int, 0, len(tasks))
+ for _, tid := range tasks {
+ taskInts = append(taskInts, int(tid))
+ }
+
+ // Find the task to start at.
+ idx := sort.SearchInts(taskInts, offset)
+ if idx == len(taskInts) {
+ return offset, nil
+ }
+ taskInts = taskInts[idx:]
+
+ var tid int
+ for _, tid = range taskInts {
+ name := strconv.FormatUint(uint64(tid), 10)
+ attr := fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice)
+ if err := dirCtx.DirEmit(name, attr); err != nil {
+ // Returned offset is next tid to serialize.
+ return tid, err
+ }
+ }
+ // We serialized them all. Next offset should be higher than last
+ // serialized tid.
+ return tid + 1, nil
+}
+
+// exe is an fs.InodeOperations symlink for the /proc/PID/exe file.
+type exe struct {
+ ramfs.Symlink
+
+ t *kernel.Task
+}
+
+func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ exeSymlink := &exe{t: t}
+ exeSymlink.InitSymlink(t, fs.RootOwner, "")
+ return newFile(exeSymlink, msrc, fs.Symlink, t)
+}
+
+func (e *exe) executable() (d *fs.Dirent, err error) {
+ e.t.WithMuLocked(func(t *kernel.Task) {
+ mm := t.MemoryManager()
+ if mm == nil {
+ // TODO: Check shouldn't allow Readlink once the
+ // Task is zombied.
+ err = syserror.EACCES
+ return
+ }
+
+ // The MemoryManager may be destroyed, in which case
+ // MemoryManager.destroy will simply set the executable to nil
+ // (with locks held).
+ d = mm.Executable()
+ if d == nil {
+ err = syserror.ENOENT
+ }
+ })
+ return
+}
+
+// Readlink implements fs.InodeOperations.
+func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
+ if !kernel.ContextCanTrace(ctx, e.t, false) {
+ return "", syserror.EACCES
+ }
+
+ // Pull out the executable for /proc/TID/exe.
+ exec, err := e.executable()
+ if err != nil {
+ return "", err
+ }
+ defer exec.DecRef()
+
+ root := fs.RootFromContext(ctx)
+ if root == nil {
+ // This doesn't correspond to anything in Linux because the vfs is
+ // global there.
+ return "", syserror.EINVAL
+ }
+ defer root.DecRef()
+ n, _ := exec.FullName(root)
+ return n, nil
+}
+
+// namespaceFile represents a file in the namespacefs, such as the files in
+// /proc/<pid>/ns.
+type namespaceFile struct {
+ ramfs.Symlink
+
+ t *kernel.Task
+}
+
+func newNamespaceFile(t *kernel.Task, msrc *fs.MountSource, name string) *fs.Inode {
+ n := &namespaceFile{t: t}
+ n.InitSymlink(t, fs.RootOwner, "")
+
+ // TODO: Namespace symlinks should contain the namespace name and the
+ // inode number for the namespace instance, so for example user:[123456]. We
+ // currently fake the inode number by sticking the symlink inode in its
+ // place.
+ n.Target = fmt.Sprintf("%s:[%d]", name, device.ProcDevice.NextIno())
+
+ return newFile(n, msrc, fs.Symlink, t)
+}
+
+// Getlink implements fs.InodeOperations.Getlink.
+func (n *namespaceFile) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) {
+ if !kernel.ContextCanTrace(ctx, n.t, false) {
+ return nil, syserror.EACCES
+ }
+
+ // Create a new regular file to fake the namespace file.
+ node := &ramfs.Entry{}
+ node.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0777))
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.RegularFile,
+ }
+ return fs.NewDirent(fs.NewInode(node, inode.MountSource, sattr), n.Symlink.Target), nil
+}
+
+func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ d := &ramfs.Dir{}
+ d.InitDir(t, map[string]*fs.Inode{
+ "net": newNamespaceFile(t, msrc, "net"),
+ "pid": newNamespaceFile(t, msrc, "pid"),
+ "user": newNamespaceFile(t, msrc, "user"),
+ }, fs.RootOwner, fs.FilePermsFromMode(0511))
+ return newFile(d, msrc, fs.SpecialDirectory, t)
+}
+
+// mapsData implements seqfile.SeqSource for /proc/[pid]/maps.
+type mapsData struct {
+ t *kernel.Task
+}
+
+func newMaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ return newFile(seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t)
+}
+
+func (md *mapsData) mm() *mm.MemoryManager {
+ var tmm *mm.MemoryManager
+ md.t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ // No additional reference is taken on mm here. This is safe
+ // because MemoryManager.destroy is required to leave the
+ // MemoryManager in a state where it's still usable as a SeqSource.
+ tmm = mm
+ }
+ })
+ return tmm
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (md *mapsData) NeedsUpdate(generation int64) bool {
+ if mm := md.mm(); mm != nil {
+ return mm.NeedsUpdate(generation)
+ }
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (md *mapsData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if mm := md.mm(); mm != nil {
+ return mm.ReadSeqFileData(md.t.AsyncContext(), h)
+ }
+ return []seqfile.SeqData{}, 0
+}
+
+type taskStatData struct {
+ t *kernel.Task
+
+ // If tgstats is true, accumulate fault stats (not implemented) and CPU
+ // time across all tasks in t's thread group.
+ tgstats bool
+
+ // pidns is the PID namespace associated with the proc filesystem that
+ // includes the file using this statData.
+ pidns *kernel.PIDNamespace
+}
+
+func newTaskStat(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode {
+ return newFile(seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t)
+}
+
+// NeedsUpdate returns whether the generation is old or not.
+func (s *taskStatData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData returns data for the SeqFile reader.
+// SeqData, the current generation and where in the file the handle corresponds to.
+func (s *taskStatData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+
+ fmt.Fprintf(&buf, "%d ", s.pidns.IDOfTask(s.t))
+ fmt.Fprintf(&buf, "(%s) ", s.t.Name())
+ fmt.Fprintf(&buf, "%c ", s.t.StateStatus()[0])
+ ppid := kernel.ThreadID(0)
+ if parent := s.t.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(&buf, "%d ", ppid)
+ fmt.Fprintf(&buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup()))
+ fmt.Fprintf(&buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session()))
+ fmt.Fprintf(&buf, "0 0 " /* tty_nr tpgid */)
+ fmt.Fprintf(&buf, "0 " /* flags */)
+ fmt.Fprintf(&buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
+ var cputime usage.CPUStats
+ if s.tgstats {
+ cputime = s.t.ThreadGroup().CPUStats()
+ } else {
+ cputime = s.t.CPUStats()
+ }
+ fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ cputime = s.t.ThreadGroup().JoinedChildCPUStats()
+ fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ fmt.Fprintf(&buf, "%d %d ", s.t.Priority(), s.t.Niceness())
+ fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Count())
+ fmt.Fprintf(&buf, "0 0 " /* itrealvalue starttime */)
+ var vss, rss uint64
+ s.t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+ fmt.Fprintf(&buf, "%d %d ", vss, rss/usermem.PageSize)
+ fmt.Fprintf(&buf, "0 0 0 0 0 0 " /* rsslim startcode endcode startstack kstkesp kstkeip */)
+ fmt.Fprintf(&buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
+ fmt.Fprintf(&buf, "0 0 " /* nswap cnswap */)
+ terminationSignal := linux.Signal(0)
+ if s.t == s.t.ThreadGroup().Leader() {
+ terminationSignal = s.t.ThreadGroup().TerminationSignal()
+ }
+ fmt.Fprintf(&buf, "%d ", terminationSignal)
+ fmt.Fprintf(&buf, "0 0 0 " /* processor rt_priority policy */)
+ fmt.Fprintf(&buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
+ fmt.Fprintf(&buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
+ fmt.Fprintf(&buf, "0\n" /* exit_code */)
+
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*taskStatData)(nil)}}, 0
+}
+
+// statusData implements seqfile.SeqSource for /proc/[pid]/status.
+type statusData struct {
+ t *kernel.Task
+ pidns *kernel.PIDNamespace
+}
+
+func newStatus(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode {
+ return newFile(seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t)
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (s *statusData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (s *statusData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "Name:\t%s\n", s.t.Name())
+ fmt.Fprintf(&buf, "State:\t%s\n", s.t.StateStatus())
+ fmt.Fprintf(&buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup()))
+ fmt.Fprintf(&buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t))
+ ppid := kernel.ThreadID(0)
+ if parent := s.t.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(&buf, "PPid:\t%d\n", ppid)
+ tpid := kernel.ThreadID(0)
+ if tracer := s.t.Tracer(); tracer != nil {
+ tpid = s.pidns.IDOfTask(tracer)
+ }
+ fmt.Fprintf(&buf, "TracerPid:\t%d\n", tpid)
+ var fds int
+ var vss, rss uint64
+ s.t.WithMuLocked(func(t *kernel.Task) {
+ if fdm := t.FDMap(); fdm != nil {
+ fds = fdm.Size()
+ }
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+ fmt.Fprintf(&buf, "FDSize:\t%d\n", fds)
+ fmt.Fprintf(&buf, "VmSize:\t%d kB\n", vss>>10)
+ fmt.Fprintf(&buf, "VmRSS:\t%d kB\n", rss>>10)
+ fmt.Fprintf(&buf, "Threads:\t%d\n", s.t.ThreadGroup().Count())
+ creds := s.t.Credentials()
+ fmt.Fprintf(&buf, "CapInh:\t%016x\n", creds.InheritableCaps)
+ fmt.Fprintf(&buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
+ fmt.Fprintf(&buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
+ fmt.Fprintf(&buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
+ fmt.Fprintf(&buf, "Seccomp:\t%d\n", s.t.SeccompMode())
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statusData)(nil)}}, 0
+}
+
+// ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider.
+type ioUsage interface {
+ // IOUsage returns the io usage data.
+ IOUsage() *usage.IO
+}
+
+type ioData struct {
+ ioUsage
+}
+
+func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ return newFile(seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t)
+}
+
+// NeedsUpdate returns whether the generation is old or not.
+func (i *ioData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData returns data for the SeqFile reader.
+// SeqData, the current generation and where in the file the handle corresponds to.
+func (i *ioData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ io := usage.IO{}
+ io.Accumulate(i.IOUsage())
+
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "char: %d\n", io.CharsRead)
+ fmt.Fprintf(&buf, "wchar: %d\n", io.CharsWritten)
+ fmt.Fprintf(&buf, "syscr: %d\n", io.ReadSyscalls)
+ fmt.Fprintf(&buf, "syscw: %d\n", io.WriteSyscalls)
+ fmt.Fprintf(&buf, "read_bytes: %d\n", io.BytesRead)
+ fmt.Fprintf(&buf, "write_bytes: %d\n", io.BytesWritten)
+ fmt.Fprintf(&buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
+
+ return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*ioData)(nil)}}, 0
+}
+
+// comm is a file containing the command name for a task.
+//
+// On Linux, /proc/[pid]/comm is writable, and writing to the comm file changes
+// the thread name. We don't implement this yet as there are no known users of
+// this feature.
+type comm struct {
+ ramfs.Entry
+
+ t *kernel.Task
+}
+
+// newComm returns a new comm file.
+func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ c := &comm{t: t}
+ c.InitEntry(t, fs.RootOwner, fs.FilePermsFromMode(0444))
+ return newFile(c, msrc, fs.SpecialFile, t)
+}
+
+// DeprecatedPreadv reads the current command name.
+func (c *comm) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ buf := []byte(c.t.Name() + "\n")
+ if offset >= int64(len(buf)) {
+ return 0, io.EOF
+ }
+
+ n, err := dst.CopyOut(ctx, buf[offset:])
+ return int64(n), err
+}
+
+// auxvec is a file containing the auxiliary vector for a task.
+type auxvec struct {
+ ramfs.Entry
+
+ t *kernel.Task
+}
+
+// newAuxvec returns a new auxvec file.
+func newAuxvec(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ a := &auxvec{t: t}
+ a.InitEntry(t, fs.RootOwner, fs.FilePermsFromMode(0400))
+ return newFile(a, msrc, fs.SpecialFile, t)
+}
+
+// DeprecatedPreadv reads the current auxiliary vector.
+func (a *auxvec) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ m, err := getTaskMM(a.t)
+ if err != nil {
+ return 0, err
+ }
+ defer m.DecUsers(ctx)
+ auxv := m.Auxv()
+
+ // Space for buffer with AT_NULL (0) terminator at the end.
+ size := (len(auxv) + 1) * 16
+ if offset >= int64(size) {
+ return 0, io.EOF
+ }
+
+ buf := make([]byte, size)
+ for i, e := range auxv {
+ usermem.ByteOrder.PutUint64(buf[16*i:], e.Key)
+ usermem.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value))
+ }
+
+ n, err := dst.CopyOut(ctx, buf[offset:])
+ return int64(n), err
+}
diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go
new file mode 100644
index 000000000..a2a070bdd
--- /dev/null
+++ b/pkg/sentry/fs/proc/uid_gid_map.go
@@ -0,0 +1,152 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// An idMapSeqSource is a seqfile.SeqSource that returns UID or GID mappings
+// from a task's user namespace.
+type idMapSeqSource struct {
+ t *kernel.Task
+ gids bool
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (imss *idMapSeqSource) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (imss *idMapSeqSource) ReadSeqFileData(handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ var start int
+ if handle != nil {
+ start = handle.(*idMapSeqHandle).value
+ }
+ var entries []auth.IDMapEntry
+ if imss.gids {
+ entries = imss.t.UserNamespace().GIDMap()
+ } else {
+ entries = imss.t.UserNamespace().UIDMap()
+ }
+ var data []seqfile.SeqData
+ i := 1
+ for _, e := range entries {
+ if i > start {
+ data = append(data, seqfile.SeqData{
+ Buf: idMapLineFromEntry(e),
+ Handle: &idMapSeqHandle{i},
+ })
+ }
+ i++
+ }
+ return data, 0
+}
+
+// TODO: Fix issue requiring idMapSeqHandle wrapping an int.
+type idMapSeqHandle struct {
+ value int
+}
+
+type idMapSeqFile struct {
+ seqfile.SeqFile
+}
+
+// newUIDMap returns a new uid_map file.
+func newUIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ return newIDMap(t, msrc, false /* gids */)
+}
+
+// newGIDMap returns a new gid_map file.
+func newGIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ return newIDMap(t, msrc, true /* gids */)
+}
+
+func newIDMap(t *kernel.Task, msrc *fs.MountSource, gids bool) *fs.Inode {
+ imsf := &idMapSeqFile{seqfile.SeqFile{SeqSource: &idMapSeqSource{
+ t: t,
+ gids: gids,
+ }}}
+ imsf.InitEntry(t, fs.RootOwner, fs.FilePermsFromMode(0644))
+ return newFile(imsf, msrc, fs.SpecialFile, t)
+}
+
+func (imsf *idMapSeqFile) source() *idMapSeqSource {
+ return imsf.SeqFile.SeqSource.(*idMapSeqSource)
+}
+
+// "There is an (arbitrary) limit on the number of lines in the file. As at
+// Linux 3.18, the limit is five lines." - user_namespaces(7)
+const maxIDMapLines = 5
+
+// DeprecatedPwritev implements fs.InodeOperations.DeprecatedPwritev.
+func (imsf *idMapSeqFile) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ // "In addition, the number of bytes written to the file must be less than
+ // the system page size, and the write must be performed at the start of
+ // the file ..." - user_namespaces(7)
+ srclen := src.NumBytes()
+ if srclen >= usermem.PageSize || offset != 0 {
+ return 0, syserror.EINVAL
+ }
+ b := make([]byte, srclen)
+ if _, err := src.CopyIn(ctx, b); err != nil {
+ return 0, err
+ }
+ lines := bytes.SplitN(bytes.TrimSpace(b), []byte("\n"), maxIDMapLines+1)
+ if len(lines) > maxIDMapLines {
+ return 0, syserror.EINVAL
+ }
+ entries := make([]auth.IDMapEntry, len(lines))
+ for i, l := range lines {
+ e, err := idMapEntryFromLine(string(l))
+ if err != nil {
+ return 0, syserror.EINVAL
+ }
+ entries[i] = e
+ }
+ t := imsf.source().t
+ var err error
+ if imsf.source().gids {
+ err = t.UserNamespace().SetGIDMap(ctx, entries)
+ } else {
+ err = t.UserNamespace().SetUIDMap(ctx, entries)
+ }
+ if err != nil {
+ return 0, err
+ }
+ return int64(len(b)), nil
+}
+
+func idMapLineFromEntry(e auth.IDMapEntry) []byte {
+ var b bytes.Buffer
+ fmt.Fprintf(&b, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length)
+ return b.Bytes()
+}
+
+func idMapEntryFromLine(line string) (auth.IDMapEntry, error) {
+ var e auth.IDMapEntry
+ _, err := fmt.Sscan(line, &e.FirstID, &e.FirstParentID, &e.Length)
+ return e, err
+}
diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go
new file mode 100644
index 000000000..4679d5821
--- /dev/null
+++ b/pkg/sentry/fs/proc/uptime.go
@@ -0,0 +1,61 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// uptime is a file containing the system uptime.
+type uptime struct {
+ ramfs.Entry
+
+ // The "start time" of the sandbox.
+ startTime ktime.Time
+}
+
+// newUptime returns a new uptime file.
+func (p *proc) newUptime(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ u := &uptime{
+ startTime: ktime.NowFromContext(ctx),
+ }
+ u.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0444))
+ return newFile(u, msrc, fs.SpecialFile, nil)
+}
+
+// DeprecatedPreadv reads the current uptime.
+func (u *uptime) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ now := ktime.NowFromContext(ctx)
+ // Pretend that we've spent zero time sleeping (second number).
+ s := []byte(fmt.Sprintf("%.2f 0.00\n", now.Sub(u.startTime).Seconds()))
+ if offset >= int64(len(s)) {
+ return 0, io.EOF
+ }
+
+ n, err := dst.CopyOut(ctx, s[offset:])
+ return int64(n), err
+}
diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go
new file mode 100644
index 000000000..df3040d37
--- /dev/null
+++ b/pkg/sentry/fs/proc/version.go
@@ -0,0 +1,75 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+)
+
+// versionData backs /proc/version.
+type versionData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*versionData) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (v *versionData) ReadSeqFileData(h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ init := v.k.GlobalInit()
+ if init == nil {
+ // Attempted to read before the init Task is created. This can
+ // only occur during startup, which should never need to read
+ // this file.
+ panic("Attempted to read version before initial Task is available")
+ }
+
+ // /proc/version takes the form:
+ //
+ // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
+ // (COMPILER_VERSION) VERSION"
+ //
+ // where:
+ // - SYSNAME, RELEASE, and VERSION are the same as returned by
+ // sys_utsname
+ // - COMPILE_USER is the user that build the kernel
+ // - COMPILE_HOST is the hostname of the machine on which the kernel
+ // was built
+ // - COMPILER_VERSION is the version reported by the building compiler
+ //
+ // Since we don't really want to expose build information to
+ // applications, those fields are omitted.
+ //
+ // FIXME: Using Version from the init task SyscallTable
+ // disregards the different version a task may have (e.g., in a uts
+ // namespace).
+ ver := init.Leader().SyscallTable().Version
+ return []seqfile.SeqData{
+ {
+ Buf: []byte(fmt.Sprintf("%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)),
+ Handle: (*versionData)(nil),
+ },
+ }, 0
+}