summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/fdpipe/pipe.go2
-rw-r--r--pkg/sentry/fs/host/socket.go8
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go18
-rw-r--r--pkg/sentry/fs/host/socket_unsafe.go9
-rw-r--r--pkg/sentry/fsimpl/ext/file_description.go26
-rw-r--r--pkg/sentry/fsimpl/memfs/memfs.go1
-rw-r--r--pkg/sentry/fsimpl/memfs/regular_file.go1
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD49
-rw-r--r--pkg/sentry/fsimpl/proc/filesystems.go25
-rw-r--r--pkg/sentry/fsimpl/proc/loadavg.go40
-rw-r--r--pkg/sentry/fsimpl/proc/meminfo.go77
-rw-r--r--pkg/sentry/fsimpl/proc/mounts.go33
-rw-r--r--pkg/sentry/fsimpl/proc/net.go338
-rw-r--r--pkg/sentry/fsimpl/proc/net_test.go78
-rw-r--r--pkg/sentry/fsimpl/proc/proc.go16
-rw-r--r--pkg/sentry/fsimpl/proc/stat.go127
-rw-r--r--pkg/sentry/fsimpl/proc/sys.go51
-rw-r--r--pkg/sentry/fsimpl/proc/task.go261
-rw-r--r--pkg/sentry/fsimpl/proc/version.go68
-rw-r--r--pkg/sentry/kernel/task_block.go12
-rw-r--r--pkg/sentry/mm/procfs.go92
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go3
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go72
-rw-r--r--pkg/sentry/socket/epsocket/stack.go16
-rw-r--r--pkg/sentry/socket/hostinet/stack.go8
-rw-r--r--pkg/sentry/socket/netfilter/BUILD1
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go254
-rw-r--r--pkg/sentry/socket/unix/io.go4
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go2
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go2
-rw-r--r--pkg/sentry/socket/unix/transport/unix.go36
-rw-r--r--pkg/sentry/socket/unix/unix.go4
-rw-r--r--pkg/sentry/strace/socket.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_read.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_write.go3
-rw-r--r--pkg/sentry/vfs/BUILD10
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go122
-rw-r--r--pkg/sentry/vfs/file_description_impl_util_test.go141
-rw-r--r--pkg/sentry/vfs/testutil.go139
39 files changed, 2008 insertions, 146 deletions
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 5a0a67eab..669ffcb75 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -87,7 +87,7 @@ func (p *pipeOperations) init() error {
log.Warningf("pipe: cannot stat fd %d: %v", p.file.FD(), err)
return syscall.EINVAL
}
- if s.Mode&syscall.S_IFIFO != syscall.S_IFIFO {
+ if (s.Mode & syscall.S_IFMT) != syscall.S_IFIFO {
log.Warningf("pipe: cannot load fd %d as pipe, file type: %o", p.file.FD(), s.Mode)
return syscall.EINVAL
}
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 44c4ee5f2..2392787cb 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -65,7 +65,7 @@ type ConnectedEndpoint struct {
// GetSockOpt and message splitting/rejection in SendMsg, but do not
// prevent lots of small messages from filling the real send buffer
// size on the host.
- sndbuf int `state:"nosave"`
+ sndbuf int64 `state:"nosave"`
// mu protects the fields below.
mu sync.RWMutex `state:"nosave"`
@@ -107,7 +107,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
}
c.stype = linux.SockType(stype)
- c.sndbuf = sndbuf
+ c.sndbuf = int64(sndbuf)
return nil
}
@@ -202,7 +202,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
}
// Send implements transport.ConnectedEndpoint.Send.
-func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
@@ -279,7 +279,7 @@ func (c *ConnectedEndpoint) EventUpdate() {
}
// Recv implements transport.Receiver.Recv.
-func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 05d7c79ad..af6955675 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -55,19 +55,19 @@ func copyFromMulti(dst []byte, src [][]byte) {
//
// If intermediate != nil, iovecs references intermediate rather than bufs and
// the caller must copy to/from bufs as necessary.
-func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovecs []syscall.Iovec, intermediate []byte, err error) {
+func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) {
var iovsRequired int
for _, b := range bufs {
- length += uintptr(len(b))
+ length += int64(len(b))
if len(b) > 0 {
iovsRequired++
}
}
stopLen := length
- if length > uintptr(maxlen) {
+ if length > maxlen {
if truncate {
- stopLen = uintptr(maxlen)
+ stopLen = maxlen
err = syserror.EAGAIN
} else {
return 0, nil, nil, syserror.EMSGSIZE
@@ -85,7 +85,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
}}, b, err
}
- var total uintptr
+ var total int64
iovecs = make([]syscall.Iovec, 0, iovsRequired)
for i := range bufs {
l := len(bufs[i])
@@ -93,9 +93,9 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
continue
}
- stop := l
- if total+uintptr(stop) > stopLen {
- stop = int(stopLen - total)
+ stop := int64(l)
+ if total+stop > stopLen {
+ stop = stopLen - total
}
iovecs = append(iovecs, syscall.Iovec{
@@ -103,7 +103,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
Len: uint64(stop),
})
- total += uintptr(stop)
+ total += stop
if total >= stopLen {
break
}
diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go
index e57be0506..f3bbed7ea 100644
--- a/pkg/sentry/fs/host/socket_unsafe.go
+++ b/pkg/sentry/fs/host/socket_unsafe.go
@@ -23,7 +23,7 @@ import (
//
// If the total length of bufs is > maxlen, fdReadVec will do a partial read
// and err will indicate why the message was truncated.
-func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (readLen uintptr, msgLen uintptr, controlLen uint64, controlTrunc bool, err error) {
+func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) {
flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC)
if peek {
flags |= syscall.MSG_PEEK
@@ -48,11 +48,12 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
msg.Iovlen = uint64(len(iovecs))
}
- n, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
+ rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
if e != 0 {
// N.B. prioritize the syscall error over the buildIovec error.
return 0, 0, 0, false, e
}
+ n := int64(rawN)
// Copy data back to bufs.
if intermediate != nil {
@@ -72,7 +73,7 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
//
// If the total length of bufs is > maxlen && truncate, fdWriteVec will do a
// partial write and err will indicate why the message was truncated.
-func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uintptr, error) {
+func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) {
length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate)
if err != nil && len(iovecs) == 0 {
// No partial write to do, return error immediately.
@@ -96,5 +97,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uint
return 0, length, e
}
- return n, length, err
+ return int64(n), length, err
}
diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go
index d244cf1e7..a0065343b 100644
--- a/pkg/sentry/fsimpl/ext/file_description.go
+++ b/pkg/sentry/fsimpl/ext/file_description.go
@@ -16,18 +16,16 @@ package ext
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/waiter"
)
// fileDescription is embedded by ext implementations of
// vfs.FileDescriptionImpl.
type fileDescription struct {
vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
// flags is the same as vfs.OpenOptions.Flags which are passed to
// vfs.FilesystemImpl.OpenAt.
@@ -82,29 +80,7 @@ func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
return stat, nil
}
-// Readiness implements waiter.Waitable.Readiness analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
- // include/linux/poll.h:vfs_poll() => DEFAULT_POLLMASK
- return waiter.EventIn | waiter.EventOut
-}
-
-// EventRegister implements waiter.Waitable.EventRegister analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {}
-
-// EventUnregister implements waiter.Waitable.EventUnregister analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) EventUnregister(e *waiter.Entry) {}
-
// Sync implements vfs.FileDescriptionImpl.Sync.
func (fd *fileDescription) Sync(ctx context.Context) error {
return nil
}
-
-// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
-func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // ioctl(2) specifies that ENOTTY must be returned if the file descriptor is
- // not associated with a character special device (which is unimplemented).
- return 0, syserror.ENOTTY
-}
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
index 59612da14..45cd42b3e 100644
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ b/pkg/sentry/fsimpl/memfs/memfs.go
@@ -258,6 +258,7 @@ func (i *inode) direntType() uint8 {
// vfs.FileDescriptionImpl.
type fileDescription struct {
vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
flags uint32 // status flags; immutable
}
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
index 7a16d5719..55f869798 100644
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ b/pkg/sentry/fsimpl/memfs/regular_file.go
@@ -46,7 +46,6 @@ func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inod
type regularFileFD struct {
fileDescription
- vfs.FileDescriptionDefaultImpl
// These are immutable.
readable bool
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
new file mode 100644
index 000000000..3d8a4deaf
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -0,0 +1,49 @@
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "proc",
+ srcs = [
+ "filesystems.go",
+ "loadavg.go",
+ "meminfo.go",
+ "mounts.go",
+ "net.go",
+ "proc.go",
+ "stat.go",
+ "sys.go",
+ "task.go",
+ "version.go",
+ ],
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc",
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/binary",
+ "//pkg/log",
+ "//pkg/sentry/context",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/inet",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/limits",
+ "//pkg/sentry/mm",
+ "//pkg/sentry/socket",
+ "//pkg/sentry/socket/unix",
+ "//pkg/sentry/socket/unix/transport",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/usermem",
+ "//pkg/sentry/vfs",
+ ],
+)
+
+go_test(
+ name = "proc_test",
+ size = "small",
+ srcs = ["net_test.go"],
+ embed = [":proc"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/inet",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/proc/filesystems.go b/pkg/sentry/fsimpl/proc/filesystems.go
new file mode 100644
index 000000000..c36c4aff5
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/filesystems.go
@@ -0,0 +1,25 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+// filesystemsData implements vfs.DynamicBytesSource for /proc/filesystems.
+//
+// +stateify savable
+type filesystemsData struct{}
+
+// TODO(b/138862512): Implement vfs.DynamicBytesSource.Generate for
+// filesystemsData. We would need to retrive filesystem names from
+// vfs.VirtualFilesystem. Also needs vfs replacement for
+// fs.Filesystem.AllowUserList() and fs.FilesystemRequiresDev.
diff --git a/pkg/sentry/fsimpl/proc/loadavg.go b/pkg/sentry/fsimpl/proc/loadavg.go
new file mode 100644
index 000000000..9135afef1
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/loadavg.go
@@ -0,0 +1,40 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// loadavgData backs /proc/loadavg.
+//
+// +stateify savable
+type loadavgData struct{}
+
+var _ vfs.DynamicBytesSource = (*loadavgData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/62345059): Include real data in fields.
+ // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
+ // Column 4-5: currently running processes and the total number of processes.
+ // Column 6: the last process ID used.
+ fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/meminfo.go b/pkg/sentry/fsimpl/proc/meminfo.go
new file mode 100644
index 000000000..9a827cd66
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/meminfo.go
@@ -0,0 +1,77 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
+//
+// +stateify savable
+type meminfoData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*meminfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ mf := d.k.MemoryFile()
+ mf.UpdateUsage()
+ snapshot, totalUsage := usage.MemoryAccounting.Copy()
+ totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
+ anon := snapshot.Anonymous + snapshot.Tmpfs
+ file := snapshot.PageCache + snapshot.Mapped
+ // We don't actually have active/inactive LRUs, so just make up numbers.
+ activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ inactiveFile := file - activeFile
+
+ fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
+ memFree := (totalSize - totalUsage) / 1024
+ // We use MemFree as MemAvailable because we don't swap.
+ // TODO(rahat): When reclaim is implemented the value of MemAvailable
+ // should change.
+ fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
+ fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
+ // Emulate a system with no swap, which disables inactivation of anon pages.
+ fmt.Fprintf(buf, "SwapCache: 0 kB\n")
+ fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
+ fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
+ fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
+ fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
+ fmt.Fprintf(buf, "SwapFree: 0 kB\n")
+ fmt.Fprintf(buf, "Dirty: 0 kB\n")
+ fmt.Fprintf(buf, "Writeback: 0 kB\n")
+ fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
+ fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/mounts.go b/pkg/sentry/fsimpl/proc/mounts.go
new file mode 100644
index 000000000..e81b1e910
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/mounts.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import "gvisor.dev/gvisor/pkg/sentry/kernel"
+
+// TODO(b/138862512): Implement mountInfoFile and mountsFile.
+
+// mountInfoFile implements vfs.DynamicBytesSource for /proc/[pid]/mountinfo.
+//
+// +stateify savable
+type mountInfoFile struct {
+ t *kernel.Task
+}
+
+// mountsFile implements vfs.DynamicBytesSource for /proc/[pid]/mounts.
+//
+// +stateify savable
+type mountsFile struct {
+ t *kernel.Task
+}
diff --git a/pkg/sentry/fsimpl/proc/net.go b/pkg/sentry/fsimpl/proc/net.go
new file mode 100644
index 000000000..fd46eebf8
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/net.go
@@ -0,0 +1,338 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
+//
+// +stateify savable
+type ifinet6 struct {
+ s inet.Stack
+}
+
+var _ vfs.DynamicBytesSource = (*ifinet6)(nil)
+
+func (n *ifinet6) contents() []string {
+ var lines []string
+ nics := n.s.Interfaces()
+ for id, naddrs := range n.s.InterfaceAddrs() {
+ nic, ok := nics[id]
+ if !ok {
+ // NIC was added after NICNames was called. We'll just
+ // ignore it.
+ continue
+ }
+
+ for _, a := range naddrs {
+ // IPv6 only.
+ if a.Family != linux.AF_INET6 {
+ continue
+ }
+
+ // Fields:
+ // IPv6 address displayed in 32 hexadecimal chars without colons
+ // Netlink device number (interface index) in hexadecimal (use nic id)
+ // Prefix length in hexadecimal
+ // Scope value (use 0)
+ // Interface flags
+ // Device name
+ lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
+ }
+ }
+ return lines
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ for _, l := range n.contents() {
+ buf.WriteString(l)
+ }
+ return nil
+}
+
+// netDev implements vfs.DynamicBytesSource for /proc/net/dev.
+//
+// +stateify savable
+type netDev struct {
+ s inet.Stack
+}
+
+var _ vfs.DynamicBytesSource = (*netDev)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ interfaces := n.s.Interfaces()
+ buf.WriteString("Inter-| Receive | Transmit\n")
+ buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n")
+
+ for _, i := range interfaces {
+ // Implements the same format as
+ // net/core/net-procfs.c:dev_seq_printf_stats.
+ var stats inet.StatDev
+ if err := n.s.Statistics(&stats, i.Name); err != nil {
+ log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
+ continue
+ }
+ fmt.Fprintf(
+ buf,
+ "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
+ i.Name,
+ // Received
+ stats[0], // bytes
+ stats[1], // packets
+ stats[2], // errors
+ stats[3], // dropped
+ stats[4], // fifo
+ stats[5], // frame
+ stats[6], // compressed
+ stats[7], // multicast
+ // Transmitted
+ stats[8], // bytes
+ stats[9], // packets
+ stats[10], // errors
+ stats[11], // dropped
+ stats[12], // fifo
+ stats[13], // frame
+ stats[14], // compressed
+ stats[15], // multicast
+ )
+ }
+
+ return nil
+}
+
+// netUnix implements vfs.DynamicBytesSource for /proc/net/unix.
+//
+// +stateify savable
+type netUnix struct {
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*netUnix)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n")
+ for _, se := range n.k.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock)
+ continue
+ }
+ sfile := s.(*fs.File)
+ if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+ s.DecRef()
+ // Not a unix socket.
+ continue
+ }
+ sops := sfile.FileOperations.(*unix.SocketOperations)
+
+ addr, err := sops.Endpoint().GetLocalAddress()
+ if err != nil {
+ log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err)
+ addr.Addr = "<unknown>"
+ }
+
+ sockFlags := 0
+ if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
+ if ce.Listening() {
+ // For unix domain sockets, linux reports a single flag
+ // value if the socket is listening, of __SO_ACCEPTCON.
+ sockFlags = linux.SO_ACCEPTCON
+ }
+ }
+
+ // In the socket entry below, the value for the 'Num' field requires
+ // some consideration. Linux prints the address to the struct
+ // unix_sock representing a socket in the kernel, but may redact the
+ // value for unprivileged users depending on the kptr_restrict
+ // sysctl.
+ //
+ // One use for this field is to allow a privileged user to
+ // introspect into the kernel memory to determine information about
+ // a socket not available through procfs, such as the socket's peer.
+ //
+ // In gvisor, returning a pointer to our internal structures would
+ // be pointless, as it wouldn't match the memory layout for struct
+ // unix_sock, making introspection difficult. We could populate a
+ // struct unix_sock with the appropriate data, but even that
+ // requires consideration for which kernel version to emulate, as
+ // the definition of this struct changes over time.
+ //
+ // For now, we always redact this pointer.
+ fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d",
+ (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
+ sfile.ReadRefs()-1, // RefCount, don't count our own ref.
+ 0, // Protocol, always 0 for UDS.
+ sockFlags, // Flags.
+ sops.Endpoint().Type(), // Type.
+ sops.State(), // State.
+ sfile.InodeID(), // Inode.
+ )
+
+ // Path
+ if len(addr.Addr) != 0 {
+ if addr.Addr[0] == 0 {
+ // Abstract path.
+ fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
+ } else {
+ fmt.Fprintf(buf, " %s", string(addr.Addr))
+ }
+ }
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+ return nil
+}
+
+// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*netTCP)(nil)
+
+func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ t := kernel.TaskFromContext(ctx)
+ buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
+ for _, se := range n.k.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
+ continue
+ }
+ sfile := s.(*fs.File)
+ sops, ok := sfile.FileOperations.(socket.Socket)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+ }
+ if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+ s.DecRef()
+ // Not tcp4 sockets.
+ continue
+ }
+
+ // Linux's documentation for the fields below can be found at
+ // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+ // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+ // Note that the header doesn't contain labels for all the fields.
+
+ // Field: sl; entry number.
+ fmt.Fprintf(buf, "%4d: ", se.ID)
+
+ portBuf := make([]byte, 2)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddrInet
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = *local.(*linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
+ fmt.Fprintf(buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(localAddr.Addr[:]),
+ portBuf)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddrInet
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = *remote.(*linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
+ fmt.Fprintf(buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
+ portBuf)
+
+ // Field: state; socket state.
+ fmt.Fprintf(buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when; timer active state and number of jiffies
+ // until timer expires. Unimplemented.
+ fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt; number of unrecovered RTO timeouts.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%08X ", 0)
+
+ // Field: uid.
+ uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+ if err != nil {
+ log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+ fmt.Fprintf(buf, "%5d ", 0)
+ } else {
+ fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
+ }
+
+ // Field: timeout; number of unanswered 0-window probes.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%8d ", 0)
+
+ // Field: inode.
+ fmt.Fprintf(buf, "%8d ", sfile.InodeID())
+
+ // Field: refcount. Don't count the ref we obtain while deferencing
+ // the weakref to this socket.
+ fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: retransmit timeout. Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: predicted tick of soft clock (delayed ACK control data).
+ // Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: sending congestion window, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+ // Unimplemented, report as large threshold.
+ fmt.Fprintf(buf, "%d", -1)
+
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/net_test.go b/pkg/sentry/fsimpl/proc/net_test.go
new file mode 100644
index 000000000..20a77a8ca
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/net_test.go
@@ -0,0 +1,78 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+)
+
+func newIPv6TestStack() *inet.TestStack {
+ s := inet.NewTestStack()
+ s.SupportsIPv6Flag = true
+ return s
+}
+
+func TestIfinet6NoAddresses(t *testing.T) {
+ n := &ifinet6{s: newIPv6TestStack()}
+ var buf bytes.Buffer
+ n.Generate(contexttest.Context(t), &buf)
+ if buf.Len() > 0 {
+ t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{})
+ }
+}
+
+func TestIfinet6(t *testing.T) {
+ s := newIPv6TestStack()
+ s.InterfacesMap[1] = inet.Interface{Name: "eth0"}
+ s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{
+ {
+ Family: linux.AF_INET6,
+ PrefixLen: 128,
+ Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"),
+ },
+ }
+ s.InterfacesMap[2] = inet.Interface{Name: "eth1"}
+ s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{
+ {
+ Family: linux.AF_INET6,
+ PrefixLen: 128,
+ Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"),
+ },
+ }
+ want := map[string]struct{}{
+ "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {},
+ "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {},
+ }
+
+ n := &ifinet6{s: s}
+ contents := n.contents()
+ if len(contents) != len(want) {
+ t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want))
+ }
+ got := map[string]struct{}{}
+ for _, l := range contents {
+ got[l] = struct{}{}
+ }
+
+ if !reflect.DeepEqual(got, want) {
+ t.Errorf("Got n.contents() = %v, want = %v", got, want)
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/proc.go b/pkg/sentry/fsimpl/proc/proc.go
new file mode 100644
index 000000000..31dec36de
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/proc.go
@@ -0,0 +1,16 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package proc implements a partial in-memory file system for procfs.
+package proc
diff --git a/pkg/sentry/fsimpl/proc/stat.go b/pkg/sentry/fsimpl/proc/stat.go
new file mode 100644
index 000000000..720db3828
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/stat.go
@@ -0,0 +1,127 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// cpuStats contains the breakdown of CPU time for /proc/stat.
+type cpuStats struct {
+ // user is time spent in userspace tasks with non-positive niceness.
+ user uint64
+
+ // nice is time spent in userspace tasks with positive niceness.
+ nice uint64
+
+ // system is time spent in non-interrupt kernel context.
+ system uint64
+
+ // idle is time spent idle.
+ idle uint64
+
+ // ioWait is time spent waiting for IO.
+ ioWait uint64
+
+ // irq is time spent in interrupt context.
+ irq uint64
+
+ // softirq is time spent in software interrupt context.
+ softirq uint64
+
+ // steal is involuntary wait time.
+ steal uint64
+
+ // guest is time spent in guests with non-positive niceness.
+ guest uint64
+
+ // guestNice is time spent in guests with positive niceness.
+ guestNice uint64
+}
+
+// String implements fmt.Stringer.
+func (c cpuStats) String() string {
+ return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
+}
+
+// statData implements vfs.DynamicBytesSource for /proc/stat.
+//
+// +stateify savable
+type statData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*statData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/37226836): We currently export only zero CPU stats. We could
+ // at least provide some aggregate stats.
+ var cpu cpuStats
+ fmt.Fprintf(buf, "cpu %s\n", cpu)
+
+ for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ {
+ fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
+ }
+
+ // The total number of interrupts is dependent on the CPUs and PCI
+ // devices on the system. See arch_probe_nr_irqs.
+ //
+ // Since we don't report real interrupt stats, just choose an arbitrary
+ // value from a representative VM.
+ const numInterrupts = 256
+
+ // The Kernel doesn't handle real interrupts, so report all zeroes.
+ // TODO(b/37226836): We could count page faults as #PF.
+ fmt.Fprintf(buf, "intr 0") // total
+ for i := 0; i < numInterrupts; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+
+ // Total number of context switches.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "ctxt 0\n")
+
+ // CLOCK_REALTIME timestamp from boot, in seconds.
+ fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds())
+
+ // Total number of clones.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "processes 0\n")
+
+ // Number of runnable tasks.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_running 0\n")
+
+ // Number of tasks waiting on IO.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_blocked 0\n")
+
+ // Number of each softirq handled.
+ fmt.Fprintf(buf, "softirq 0") // total
+ for i := 0; i < linux.NumSoftIRQ; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/sys.go b/pkg/sentry/fsimpl/proc/sys.go
new file mode 100644
index 000000000..b88256e12
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/sys.go
@@ -0,0 +1,51 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// mmapMinAddrData implements vfs.DynamicBytesSource for
+// /proc/sys/vm/mmap_min_addr.
+//
+// +stateify savable
+type mmapMinAddrData struct {
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*mmapMinAddrData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
+ return nil
+}
+
+// +stateify savable
+type overcommitMemory struct{}
+
+var _ vfs.DynamicBytesSource = (*overcommitMemory)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *overcommitMemory) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "0\n")
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
new file mode 100644
index 000000000..c46e05c3a
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -0,0 +1,261 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// mapsCommon is embedded by mapsData and smapsData.
+type mapsCommon struct {
+ t *kernel.Task
+}
+
+// mm gets the kernel task's MemoryManager. No additional reference is taken on
+// mm here. This is safe because MemoryManager.destroy is required to leave the
+// MemoryManager in a state where it's still usable as a DynamicBytesSource.
+func (md *mapsCommon) mm() *mm.MemoryManager {
+ var tmm *mm.MemoryManager
+ md.t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ tmm = mm
+ }
+ })
+ return tmm
+}
+
+// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
+//
+// +stateify savable
+type mapsData struct {
+ mapsCommon
+}
+
+var _ vfs.DynamicBytesSource = (*mapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (md *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if mm := md.mm(); mm != nil {
+ mm.ReadMapsDataInto(ctx, buf)
+ }
+ return nil
+}
+
+// smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps.
+//
+// +stateify savable
+type smapsData struct {
+ mapsCommon
+}
+
+var _ vfs.DynamicBytesSource = (*smapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (sd *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if mm := sd.mm(); mm != nil {
+ mm.ReadSmapsDataInto(ctx, buf)
+ }
+ return nil
+}
+
+// +stateify savable
+type taskStatData struct {
+ t *kernel.Task
+
+ // If tgstats is true, accumulate fault stats (not implemented) and CPU
+ // time across all tasks in t's thread group.
+ tgstats bool
+
+ // pidns is the PID namespace associated with the proc filesystem that
+ // includes the file using this statData.
+ pidns *kernel.PIDNamespace
+}
+
+var _ vfs.DynamicBytesSource = (*taskStatData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.t))
+ fmt.Fprintf(buf, "(%s) ", s.t.Name())
+ fmt.Fprintf(buf, "%c ", s.t.StateStatus()[0])
+ ppid := kernel.ThreadID(0)
+ if parent := s.t.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(buf, "%d ", ppid)
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup()))
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session()))
+ fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
+ fmt.Fprintf(buf, "0 " /* flags */)
+ fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
+ var cputime usage.CPUStats
+ if s.tgstats {
+ cputime = s.t.ThreadGroup().CPUStats()
+ } else {
+ cputime = s.t.CPUStats()
+ }
+ fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ cputime = s.t.ThreadGroup().JoinedChildCPUStats()
+ fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ fmt.Fprintf(buf, "%d %d ", s.t.Priority(), s.t.Niceness())
+ fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Count())
+
+ // itrealvalue. Since kernel 2.6.17, this field is no longer
+ // maintained, and is hard coded as 0.
+ fmt.Fprintf(buf, "0 ")
+
+ // Start time is relative to boot time, expressed in clock ticks.
+ fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime())))
+
+ var vss, rss uint64
+ s.t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+ fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
+
+ // rsslim.
+ fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur)
+
+ fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
+ fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
+ fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
+ terminationSignal := linux.Signal(0)
+ if s.t == s.t.ThreadGroup().Leader() {
+ terminationSignal = s.t.ThreadGroup().TerminationSignal()
+ }
+ fmt.Fprintf(buf, "%d ", terminationSignal)
+ fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
+ fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
+ fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
+ fmt.Fprintf(buf, "0\n" /* exit_code */)
+
+ return nil
+}
+
+// statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm.
+//
+// +stateify savable
+type statmData struct {
+ t *kernel.Task
+}
+
+var _ vfs.DynamicBytesSource = (*statmData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var vss, rss uint64
+ s.t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+
+ fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
+ return nil
+}
+
+// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
+//
+// +stateify savable
+type statusData struct {
+ t *kernel.Task
+ pidns *kernel.PIDNamespace
+}
+
+var _ vfs.DynamicBytesSource = (*statusData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "Name:\t%s\n", s.t.Name())
+ fmt.Fprintf(buf, "State:\t%s\n", s.t.StateStatus())
+ fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup()))
+ fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t))
+ ppid := kernel.ThreadID(0)
+ if parent := s.t.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
+ tpid := kernel.ThreadID(0)
+ if tracer := s.t.Tracer(); tracer != nil {
+ tpid = s.pidns.IDOfTask(tracer)
+ }
+ fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
+ var fds int
+ var vss, rss, data uint64
+ s.t.WithMuLocked(func(t *kernel.Task) {
+ if fdTable := t.FDTable(); fdTable != nil {
+ fds = fdTable.Size()
+ }
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ data = mm.VirtualDataSize()
+ }
+ })
+ fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
+ fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
+ fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
+ fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
+ fmt.Fprintf(buf, "Threads:\t%d\n", s.t.ThreadGroup().Count())
+ creds := s.t.Credentials()
+ fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
+ fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
+ fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
+ fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
+ fmt.Fprintf(buf, "Seccomp:\t%d\n", s.t.SeccompMode())
+ return nil
+}
+
+// ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider.
+type ioUsage interface {
+ // IOUsage returns the io usage data.
+ IOUsage() *usage.IO
+}
+
+// +stateify savable
+type ioData struct {
+ ioUsage
+}
+
+var _ vfs.DynamicBytesSource = (*ioData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ io := usage.IO{}
+ io.Accumulate(i.IOUsage())
+
+ fmt.Fprintf(buf, "char: %d\n", io.CharsRead)
+ fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten)
+ fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls)
+ fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls)
+ fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead)
+ fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten)
+ fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/version.go b/pkg/sentry/fsimpl/proc/version.go
new file mode 100644
index 000000000..e1643d4e0
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/version.go
@@ -0,0 +1,68 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// versionData implements vfs.DynamicBytesSource for /proc/version.
+//
+// +stateify savable
+type versionData struct {
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*versionData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ init := v.k.GlobalInit()
+ if init == nil {
+ // Attempted to read before the init Task is created. This can
+ // only occur during startup, which should never need to read
+ // this file.
+ panic("Attempted to read version before initial Task is available")
+ }
+
+ // /proc/version takes the form:
+ //
+ // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
+ // (COMPILER_VERSION) VERSION"
+ //
+ // where:
+ // - SYSNAME, RELEASE, and VERSION are the same as returned by
+ // sys_utsname
+ // - COMPILE_USER is the user that build the kernel
+ // - COMPILE_HOST is the hostname of the machine on which the kernel
+ // was built
+ // - COMPILER_VERSION is the version reported by the building compiler
+ //
+ // Since we don't really want to expose build information to
+ // applications, those fields are omitted.
+ //
+ // FIXME(mpratt): Using Version from the init task SyscallTable
+ // disregards the different version a task may have (e.g., in a uts
+ // namespace).
+ ver := init.Leader().SyscallTable().Version
+ fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
+ return nil
+}
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 2a2e6f662..dd69939f9 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -15,6 +15,7 @@
package kernel
import (
+ "runtime"
"time"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -121,6 +122,17 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
// Deactive our address space, we don't need it.
interrupt := t.SleepStart()
+ // If the request is not completed, but the timer has already expired,
+ // then ensure that we run through a scheduler cycle. This is because
+ // we may see applications relying on timer slack to yield the thread.
+ // For example, they may attempt to sleep for some number of nanoseconds,
+ // and expect that this will actually yield the CPU and sleep for at
+ // least microseconds, e.g.:
+ // https://github.com/LMAX-Exchange/disruptor/commit/6ca210f2bcd23f703c479804d583718e16f43c07
+ if len(timerChan) > 0 {
+ runtime.Gosched()
+ }
+
select {
case <-C:
t.SleepFinish(true)
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index a8819aa84..8c2246bb4 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -58,6 +58,34 @@ func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
return true
}
+// ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var start usermem.Addr
+
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ mm.appendVMAMapsEntryLocked(ctx, vseg, buf)
+ }
+
+ // We always emulate vsyscall, so advertise it here. Everything about a
+ // vsyscall region is static, so just hard code the maps entry since we
+ // don't have a real vma backing it. The vsyscall region is at the end of
+ // the virtual address space so nothing should be mapped after it (if
+ // something is really mapped in the tiny ~10 MiB segment afterwards, we'll
+ // get the sorting on the maps file wrong at worst; but that's not possible
+ // on any current platform).
+ //
+ // Artifically adjust the seqfile handle so we only output vsyscall entry once.
+ if start != vsyscallEnd {
+ // FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+ buf.WriteString(vsyscallMapsEntry)
+ }
+}
+
// ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to
// implement /proc/[pid]/maps.
func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -151,6 +179,27 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI
b.WriteString("\n")
}
+// ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+ mm.mappingMu.RLock()
+ defer mm.mappingMu.RUnlock()
+ var start usermem.Addr
+
+ for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+ // FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+ // "panic: autosave error: type usermem.Addr is not registered".
+ mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf)
+ }
+
+ // We always emulate vsyscall, so advertise it here. See
+ // ReadMapsSeqFileData for additional commentary.
+ if start != vsyscallEnd {
+ // FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+ buf.WriteString(vsyscallSmapsEntry)
+ }
+}
+
// ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to
// implement /proc/[pid]/smaps.
func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -190,7 +239,12 @@ func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfil
// Preconditions: mm.mappingMu must be locked.
func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
var b bytes.Buffer
- mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+ mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b)
+ return b.Bytes()
+}
+
+func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
+ mm.appendVMAMapsEntryLocked(ctx, vseg, b)
vma := vseg.ValuePtr()
// We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
@@ -211,40 +265,40 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
}
mm.activeMu.RUnlock()
- fmt.Fprintf(&b, "Size: %8d kB\n", vseg.Range().Length()/1024)
- fmt.Fprintf(&b, "Rss: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Size: %8d kB\n", vseg.Range().Length()/1024)
+ fmt.Fprintf(b, "Rss: %8d kB\n", rss/1024)
// Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
// is only mapped by that pma. This avoids having to query memmap.Mappables
// for reference count information on each page. As a corollary, all pages
// are accounted as "private" whether or not the vma is private; compare
// Linux's fs/proc/task_mmu.c:smaps_account().
- fmt.Fprintf(&b, "Pss: %8d kB\n", rss/1024)
- fmt.Fprintf(&b, "Shared_Clean: %8d kB\n", 0)
- fmt.Fprintf(&b, "Shared_Dirty: %8d kB\n", 0)
+ fmt.Fprintf(b, "Pss: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Shared_Clean: %8d kB\n", 0)
+ fmt.Fprintf(b, "Shared_Dirty: %8d kB\n", 0)
// Pretend that all pages are dirty if the vma is writable, and clean otherwise.
clean := rss
if vma.effectivePerms.Write {
clean = 0
}
- fmt.Fprintf(&b, "Private_Clean: %8d kB\n", clean/1024)
- fmt.Fprintf(&b, "Private_Dirty: %8d kB\n", (rss-clean)/1024)
+ fmt.Fprintf(b, "Private_Clean: %8d kB\n", clean/1024)
+ fmt.Fprintf(b, "Private_Dirty: %8d kB\n", (rss-clean)/1024)
// Pretend that all pages are "referenced" (recently touched).
- fmt.Fprintf(&b, "Referenced: %8d kB\n", rss/1024)
- fmt.Fprintf(&b, "Anonymous: %8d kB\n", anon/1024)
+ fmt.Fprintf(b, "Referenced: %8d kB\n", rss/1024)
+ fmt.Fprintf(b, "Anonymous: %8d kB\n", anon/1024)
// Hugepages (hugetlb and THP) are not implemented.
- fmt.Fprintf(&b, "AnonHugePages: %8d kB\n", 0)
- fmt.Fprintf(&b, "Shared_Hugetlb: %8d kB\n", 0)
- fmt.Fprintf(&b, "Private_Hugetlb: %7d kB\n", 0)
+ fmt.Fprintf(b, "AnonHugePages: %8d kB\n", 0)
+ fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0)
+ fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0)
// Swap is not implemented.
- fmt.Fprintf(&b, "Swap: %8d kB\n", 0)
- fmt.Fprintf(&b, "SwapPss: %8d kB\n", 0)
- fmt.Fprintf(&b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
- fmt.Fprintf(&b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(b, "Swap: %8d kB\n", 0)
+ fmt.Fprintf(b, "SwapPss: %8d kB\n", 0)
+ fmt.Fprintf(b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
locked := rss
if vma.mlockMode == memmap.MLockNone {
locked = 0
}
- fmt.Fprintf(&b, "Locked: %8d kB\n", locked/1024)
+ fmt.Fprintf(b, "Locked: %8d kB\n", locked/1024)
b.WriteString("VmFlags: ")
if vma.realPerms.Read {
@@ -284,6 +338,4 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
b.WriteString("ac ")
}
b.WriteString("\n")
-
- return b.Bytes()
}
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 79501682d..6bf7cd097 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -354,6 +354,9 @@ func (t *thread) wait(outcome waitOutcome) syscall.Signal {
continue // Spurious stop.
}
if stopSig == syscall.SIGTRAP {
+ if status.TrapCause() == syscall.PTRACE_EVENT_EXIT {
+ t.dumpAndPanic("wait failed: the process exited")
+ }
// Re-encode the trap cause the way it's expected.
return stopSig | syscall.Signal(status.TrapCause()<<8)
}
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 8cb5c823f..635042263 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -291,18 +291,22 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
return tcpip.Address(addr)
}
-// GetAddress reads an sockaddr struct from the given address and converts it
-// to the FullAddress format. It supports AF_UNIX, AF_INET and AF_INET6
-// addresses.
-func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syserr.Error) {
+// AddressAndFamily reads an sockaddr struct from the given address and
+// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
+// AF_INET6 addresses.
+//
+// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
+//
+// AddressAndFamily returns an address, its family.
+func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
// Make sure we have at least 2 bytes for the address family.
if len(addr) < 2 {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
}
family := usermem.ByteOrder.Uint16(addr)
if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) {
- return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+ return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported
}
// Get the rest of the fields based on the address family.
@@ -310,7 +314,7 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
case linux.AF_UNIX:
path := addr[2:]
if len(path) > linux.UnixPathMax {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
// Drop the terminating NUL (if one exists) and everything after
// it for filesystem (non-abstract) addresses.
@@ -321,12 +325,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
}
return tcpip.FullAddress{
Addr: tcpip.Address(path),
- }, nil
+ }, family, nil
case linux.AF_INET:
var a linux.SockAddrInet
if len(addr) < sockAddrInetSize {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
@@ -334,12 +338,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
Addr: bytesToIPAddress(a.Addr[:]),
Port: ntohs(a.Port),
}
- return out, nil
+ return out, family, nil
case linux.AF_INET6:
var a linux.SockAddrInet6
if len(addr) < sockAddrInet6Size {
- return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+ return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
@@ -350,13 +354,13 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
if isLinkLocal(out.Addr) {
out.NIC = tcpip.NICID(a.Scope_id)
}
- return out, nil
+ return out, family, nil
case linux.AF_UNSPEC:
- return tcpip.FullAddress{}, nil
+ return tcpip.FullAddress{}, family, nil
default:
- return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+ return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
}
}
@@ -429,6 +433,11 @@ func (i *ioSequencePayload) Size() int {
return int(i.src.NumBytes())
}
+// DropFirst drops the first n bytes from underlying src.
+func (i *ioSequencePayload) DropFirst(n int) {
+ i.src = i.src.DropFirst(int(n))
+}
+
// Write implements fs.FileOperations.Write.
func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
f := &ioSequencePayload{ctx: ctx, src: src}
@@ -477,11 +486,18 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
// Connect implements the linux syscall connect(2) for sockets backed by
// tpcip.Endpoint.
func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr, false /* strict */)
+ addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */)
if err != nil {
return err
}
+ if family == linux.AF_UNSPEC {
+ err := s.Endpoint.Disconnect()
+ if err == tcpip.ErrNotSupported {
+ return syserr.ErrAddressFamilyNotSupported
+ }
+ return syserr.TranslateNetstackError(err)
+ }
// Always return right away in the non-blocking case.
if !blocking {
return syserr.TranslateNetstackError(s.Endpoint.Connect(addr))
@@ -510,7 +526,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
// Bind implements the linux syscall bind(2) for sockets backed by
// tcpip.Endpoint.
func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr, true /* strict */)
+ addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */)
if err != nil {
return err
}
@@ -2018,7 +2034,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
var addr *tcpip.FullAddress
if len(to) > 0 {
- addrBuf, err := GetAddress(s.family, to, true /* strict */)
+ addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */)
if err != nil {
return 0, err
}
@@ -2026,28 +2042,22 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
addr = &addrBuf
}
- v := buffer.NewView(int(src.NumBytes()))
-
- // Copy all the data into the buffer.
- if _, err := src.CopyIn(t, v); err != nil {
- return 0, syserr.FromError(err)
- }
-
opts := tcpip.WriteOptions{
To: addr,
More: flags&linux.MSG_MORE != 0,
EndOfRecord: flags&linux.MSG_EOR != 0,
}
- n, resCh, err := s.Endpoint.Write(tcpip.SlicePayload(v), opts)
+ v := &ioSequencePayload{t, src}
+ n, resCh, err := s.Endpoint.Write(v, opts)
if resCh != nil {
if err := t.Block(resCh); err != nil {
return 0, syserr.FromError(err)
}
- n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts)
+ n, _, err = s.Endpoint.Write(v, opts)
}
dontWait := flags&linux.MSG_DONTWAIT != 0
- if err == nil && (n >= uintptr(len(v)) || dontWait) {
+ if err == nil && (n >= int64(v.Size()) || dontWait) {
// Complete write.
return int(n), nil
}
@@ -2061,18 +2071,18 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
s.EventRegister(&e, waiter.EventOut)
defer s.EventUnregister(&e)
- v.TrimFront(int(n))
+ v.DropFirst(int(n))
total := n
for {
- n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts)
- v.TrimFront(int(n))
+ n, _, err = s.Endpoint.Write(v, opts)
+ v.DropFirst(int(n))
total += n
if err != nil && err != tcpip.ErrWouldBlock && total == 0 {
return 0, syserr.TranslateNetstackError(err)
}
- if err == nil && len(v) == 0 || err != nil && err != tcpip.ErrWouldBlock {
+ if err == nil && v.Size() == 0 || err != nil && err != tcpip.ErrWouldBlock {
return int(total), nil
}
diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/epsocket/stack.go
index 8f1572bf4..7cf7ff735 100644
--- a/pkg/sentry/socket/epsocket/stack.go
+++ b/pkg/sentry/socket/epsocket/stack.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/iptables"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -154,7 +153,7 @@ func (s *Stack) RouteTable() []inet.Route {
for _, rt := range s.Stack.GetRouteTable() {
var family uint8
- switch len(rt.Destination) {
+ switch len(rt.Destination.ID()) {
case header.IPv4AddressSize:
family = linux.AF_INET
case header.IPv6AddressSize:
@@ -164,14 +163,9 @@ func (s *Stack) RouteTable() []inet.Route {
continue
}
- dstSubnet, err := tcpip.NewSubnet(rt.Destination, rt.Mask)
- if err != nil {
- log.Warningf("Invalid destination & mask in route: %s(%s): %v", rt.Destination, rt.Mask, err)
- continue
- }
routeTable = append(routeTable, inet.Route{
Family: family,
- DstLen: uint8(dstSubnet.Prefix()), // The CIDR prefix for the destination.
+ DstLen: uint8(rt.Destination.Prefix()), // The CIDR prefix for the destination.
// Always return unspecified protocol since we have no notion of
// protocol for routes.
@@ -182,7 +176,7 @@ func (s *Stack) RouteTable() []inet.Route {
Scope: linux.RT_SCOPE_LINK,
Type: linux.RTN_UNICAST,
- DstAddr: []byte(rt.Destination),
+ DstAddr: []byte(rt.Destination.ID()),
OutputInterface: int32(rt.NIC),
GatewayAddr: []byte(rt.Gateway),
})
@@ -198,8 +192,8 @@ func (s *Stack) IPTables() (iptables.IPTables, error) {
// FillDefaultIPTables sets the stack's iptables to the default tables, which
// allow and do not modify all traffic.
-func (s *Stack) FillDefaultIPTables() error {
- return netfilter.FillDefaultIPTables(s.Stack)
+func (s *Stack) FillDefaultIPTables() {
+ netfilter.FillDefaultIPTables(s.Stack)
}
// Resume implements inet.Stack.Resume.
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 1902fe155..3a4fdec47 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -203,8 +203,14 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error)
inetRoute.DstAddr = attr.Value
case syscall.RTA_SRC:
inetRoute.SrcAddr = attr.Value
- case syscall.RTA_OIF:
+ case syscall.RTA_GATEWAY:
inetRoute.GatewayAddr = attr.Value
+ case syscall.RTA_OIF:
+ expected := int(binary.Size(inetRoute.OutputInterface))
+ if len(attr.Value) != expected {
+ return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected)
+ }
+ binary.Unmarshal(attr.Value, usermem.ByteOrder, &inetRoute.OutputInterface)
}
}
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 3021f83e7..354a0d6ee 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -13,6 +13,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/sentry/kernel",
"//pkg/sentry/usermem",
"//pkg/syserr",
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index efdb42903..9f87c32f1 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,7 +17,10 @@
package netfilter
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/syserr"
@@ -26,21 +29,258 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
+// errorTargetName is used to mark targets as error targets. Error targets
+// shouldn't be reached - an error has occurred if we fall through to one.
+const errorTargetName = "ERROR"
+
+// metadata is opaque to netstack. It holds data that we need to translate
+// between Linux's and netstack's iptables representations.
+type metadata struct {
+ HookEntry [linux.NF_INET_NUMHOOKS]uint32
+ Underflow [linux.NF_INET_NUMHOOKS]uint32
+ NumEntries uint32
+ Size uint32
+}
+
// GetInfo returns information about iptables.
func GetInfo(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
- // TODO(b/129292233): Implement.
- return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
+ // Read in the struct and table name.
+ var info linux.IPTGetinfo
+ if _, err := t.CopyIn(outPtr, &info); err != nil {
+ return linux.IPTGetinfo{}, syserr.FromError(err)
+ }
+
+ // Find the appropriate table.
+ table, err := findTable(ep, info.TableName())
+ if err != nil {
+ return linux.IPTGetinfo{}, err
+ }
+
+ // Get the hooks that apply to this table.
+ info.ValidHooks = table.ValidHooks()
+
+ // Grab the metadata struct, which is used to store information (e.g.
+ // the number of entries) that applies to the user's encoding of
+ // iptables, but not netstack's.
+ metadata := table.Metadata().(metadata)
+
+ // Set values from metadata.
+ info.HookEntry = metadata.HookEntry
+ info.Underflow = metadata.Underflow
+ info.NumEntries = metadata.NumEntries
+ info.Size = metadata.Size
+
+ return info, nil
}
// GetEntries returns netstack's iptables rules encoded for the iptables tool.
func GetEntries(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
- // TODO(b/129292233): Implement.
- return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+ // Read in the struct and table name.
+ var userEntries linux.IPTGetEntries
+ if _, err := t.CopyIn(outPtr, &userEntries); err != nil {
+ return linux.KernelIPTGetEntries{}, syserr.FromError(err)
+ }
+
+ // Find the appropriate table.
+ table, err := findTable(ep, userEntries.TableName())
+ if err != nil {
+ return linux.KernelIPTGetEntries{}, err
+ }
+
+ // Convert netstack's iptables rules to something that the iptables
+ // tool can understand.
+ entries, _, err := convertNetstackToBinary(userEntries.TableName(), table)
+ if err != nil {
+ return linux.KernelIPTGetEntries{}, err
+ }
+ if binary.Size(entries) > uintptr(outLen) {
+ return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+ }
+
+ return entries, nil
+}
+
+func findTable(ep tcpip.Endpoint, tableName string) (iptables.Table, *syserr.Error) {
+ ipt, err := ep.IPTables()
+ if err != nil {
+ return iptables.Table{}, syserr.FromError(err)
+ }
+ table, ok := ipt.Tables[tableName]
+ if !ok {
+ return iptables.Table{}, syserr.ErrInvalidArgument
+ }
+ return table, nil
}
// FillDefaultIPTables sets stack's IPTables to the default tables and
// populates them with metadata.
-func FillDefaultIPTables(stack *stack.Stack) error {
- stack.SetIPTables(iptables.DefaultTables())
- return nil
+func FillDefaultIPTables(stack *stack.Stack) {
+ ipt := iptables.DefaultTables()
+
+ // In order to fill in the metadata, we have to translate ipt from its
+ // netstack format to Linux's giant-binary-blob format.
+ for name, table := range ipt.Tables {
+ _, metadata, err := convertNetstackToBinary(name, table)
+ if err != nil {
+ panic(fmt.Errorf("Unable to set default IP tables: %v", err))
+ }
+ table.SetMetadata(metadata)
+ ipt.Tables[name] = table
+ }
+
+ stack.SetIPTables(ipt)
+}
+
+// convertNetstackToBinary converts the iptables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a bit, reading some offsets,
+// jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) {
+ // Return values.
+ var entries linux.KernelIPTGetEntries
+ var meta metadata
+
+ // The table name has to fit in the struct.
+ if linux.XT_TABLE_MAXNAMELEN < len(name) {
+ return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+ }
+ copy(entries.Name[:], name)
+
+ // Deal with the built in chains first (INPUT, OUTPUT, etc.). Each of
+ // these chains ends with an unconditional policy entry.
+ for hook := iptables.Prerouting; hook < iptables.NumHooks; hook++ {
+ chain, ok := table.BuiltinChains[hook]
+ if !ok {
+ // This table doesn't support this hook.
+ continue
+ }
+
+ // Sanity check.
+ if len(chain.Rules) < 1 {
+ return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+ }
+
+ for ruleIdx, rule := range chain.Rules {
+ // If this is the first rule of a builtin chain, set
+ // the metadata hook entry point.
+ if ruleIdx == 0 {
+ meta.HookEntry[hook] = entries.Size
+ }
+
+ // Each rule corresponds to an entry.
+ entry := linux.KernelIPTEntry{
+ IPTEntry: linux.IPTEntry{
+ NextOffset: linux.SizeOfIPTEntry,
+ TargetOffset: linux.SizeOfIPTEntry,
+ },
+ }
+
+ for _, matcher := range rule.Matchers {
+ // Serialize the matcher and add it to the
+ // entry.
+ serialized := marshalMatcher(matcher)
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.NextOffset += uint16(len(serialized))
+ entry.TargetOffset += uint16(len(serialized))
+ }
+
+ // Serialize and append the target.
+ serialized := marshalTarget(rule.Target)
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.NextOffset += uint16(len(serialized))
+
+ // The underflow rule is the last rule in the chain,
+ // and is an unconditional rule (i.e. it matches any
+ // packet). This is enforced when saving iptables.
+ if ruleIdx == len(chain.Rules)-1 {
+ meta.Underflow[hook] = entries.Size
+ }
+
+ entries.Size += uint32(entry.NextOffset)
+ entries.Entrytable = append(entries.Entrytable, entry)
+ meta.NumEntries++
+ }
+
+ }
+
+ // TODO(gvisor.dev/issue/170): Deal with the user chains here. Each of
+ // these starts with an error node holding the chain's name and ends
+ // with an unconditional return.
+
+ // Lastly, each table ends with an unconditional error target rule as
+ // its final entry.
+ errorEntry := linux.KernelIPTEntry{
+ IPTEntry: linux.IPTEntry{
+ NextOffset: linux.SizeOfIPTEntry,
+ TargetOffset: linux.SizeOfIPTEntry,
+ },
+ }
+ var errorTarget linux.XTErrorTarget
+ errorTarget.Target.TargetSize = linux.SizeOfXTErrorTarget
+ copy(errorTarget.ErrorName[:], errorTargetName)
+ copy(errorTarget.Target.Name[:], errorTargetName)
+
+ // Serialize and add it to the list of entries.
+ errorTargetBuf := make([]byte, 0, linux.SizeOfXTErrorTarget)
+ serializedErrorTarget := binary.Marshal(errorTargetBuf, usermem.ByteOrder, errorTarget)
+ errorEntry.Elems = append(errorEntry.Elems, serializedErrorTarget...)
+ errorEntry.NextOffset += uint16(len(serializedErrorTarget))
+
+ entries.Size += uint32(errorEntry.NextOffset)
+ entries.Entrytable = append(entries.Entrytable, errorEntry)
+ meta.NumEntries++
+ meta.Size = entries.Size
+
+ return entries, meta, nil
+}
+
+func marshalMatcher(matcher iptables.Matcher) []byte {
+ switch matcher.(type) {
+ default:
+ // TODO(gvisor.dev/issue/170): We don't support any matchers yet, so
+ // any call to marshalMatcher will panic.
+ panic(fmt.Errorf("unknown matcher of type %T", matcher))
+ }
+}
+
+func marshalTarget(target iptables.Target) []byte {
+ switch target.(type) {
+ case iptables.UnconditionalAcceptTarget:
+ return marshalUnconditionalAcceptTarget()
+ default:
+ panic(fmt.Errorf("unknown target of type %T", target))
+ }
+}
+
+func marshalUnconditionalAcceptTarget() []byte {
+ // The target's name will be the empty string.
+ target := linux.XTStandardTarget{
+ Target: linux.XTEntryTarget{
+ TargetSize: linux.SizeOfXTStandardTarget,
+ },
+ Verdict: translateStandardVerdict(iptables.Accept),
+ }
+
+ ret := make([]byte, 0, linux.SizeOfXTStandardTarget)
+ return binary.Marshal(ret, usermem.ByteOrder, target)
+}
+
+// translateStandardVerdict translates verdicts the same way as the iptables
+// tool.
+func translateStandardVerdict(verdict iptables.Verdict) int32 {
+ switch verdict {
+ case iptables.Accept:
+ return -linux.NF_ACCEPT - 1
+ case iptables.Drop:
+ return -linux.NF_DROP - 1
+ case iptables.Queue:
+ return -linux.NF_QUEUE - 1
+ case iptables.Return:
+ return linux.NF_RETURN
+ case iptables.Jump:
+ // TODO(gvisor.dev/issue/170): Support Jump.
+ panic("Jump isn't supported yet")
+ default:
+ panic(fmt.Sprintf("unknown standard verdict: %d", verdict))
+ }
}
diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go
index 760c7beab..2ec1a662d 100644
--- a/pkg/sentry/socket/unix/io.go
+++ b/pkg/sentry/socket/unix/io.go
@@ -62,7 +62,7 @@ type EndpointReader struct {
Creds bool
// NumRights is the number of SCM_RIGHTS FDs requested.
- NumRights uintptr
+ NumRights int
// Peek indicates that the data should not be consumed from the
// endpoint.
@@ -70,7 +70,7 @@ type EndpointReader struct {
// MsgSize is the size of the message that was read from. For stream
// sockets, it is the amount read.
- MsgSize uintptr
+ MsgSize int64
// From, if not nil, will be set with the address read from.
From *tcpip.FullAddress
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index 73d2df15d..4bd15808a 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -436,7 +436,7 @@ func (e *connectionedEndpoint) Bind(addr tcpip.FullAddress, commit func() *syser
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
-func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
// Stream sockets do not support specifying the endpoint. Seqpacket
// sockets ignore the passed endpoint.
if e.stype == linux.SOCK_STREAM && to != nil {
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index c7f7c5b16..0322dec0b 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -99,7 +99,7 @@ func (e *connectionlessEndpoint) UnidirectionalConnect(ctx context.Context) (Con
// SendMsg writes data and a control message to the specified endpoint.
// This method does not block if the data cannot be written.
-func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
if to == nil {
return e.baseEndpoint.SendMsg(ctx, data, c, nil)
}
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 7fb9cb1e0..2b0ad6395 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -121,13 +121,13 @@ type Endpoint interface {
// CMTruncated indicates that the numRights hint was used to receive fewer
// than the total available SCM_RIGHTS FDs. Additional truncation may be
// required by the caller.
- RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error)
+ RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, err *syserr.Error)
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
//
// SendMsg does not take ownership of any of its arguments on error.
- SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error)
+ SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (int64, *syserr.Error)
// Connect connects this endpoint directly to another.
//
@@ -291,7 +291,7 @@ type Receiver interface {
// See Endpoint.RecvMsg for documentation on shared arguments.
//
// notify indicates if RecvNotify should be called.
- Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error)
+ Recv(data [][]byte, creds bool, numRights int, peek bool) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error)
// RecvNotify notifies the Receiver of a successful Recv. This must not be
// called while holding any endpoint locks.
@@ -331,7 +331,7 @@ type queueReceiver struct {
}
// Recv implements Receiver.Recv.
-func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
var m *message
var notify bool
var err *syserr.Error
@@ -344,13 +344,13 @@ func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights uintptr, peek
return 0, 0, ControlMessages{}, false, tcpip.FullAddress{}, false, err
}
src := []byte(m.Data)
- var copied uintptr
+ var copied int64
for i := 0; i < len(data) && len(src) > 0; i++ {
n := copy(data[i], src)
- copied += uintptr(n)
+ copied += int64(n)
src = src[n:]
}
- return copied, uintptr(len(m.Data)), m.Control, false, m.Address, notify, nil
+ return copied, int64(len(m.Data)), m.Control, false, m.Address, notify, nil
}
// RecvNotify implements Receiver.RecvNotify.
@@ -401,11 +401,11 @@ type streamQueueReceiver struct {
addr tcpip.FullAddress
}
-func vecCopy(data [][]byte, buf []byte) (uintptr, [][]byte, []byte) {
- var copied uintptr
+func vecCopy(data [][]byte, buf []byte) (int64, [][]byte, []byte) {
+ var copied int64
for len(data) > 0 && len(buf) > 0 {
n := copy(data[0], buf)
- copied += uintptr(n)
+ copied += int64(n)
buf = buf[n:]
data[0] = data[0][n:]
if len(data[0]) == 0 {
@@ -443,7 +443,7 @@ func (q *streamQueueReceiver) RecvMaxQueueSize() int64 {
}
// Recv implements Receiver.Recv.
-func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uintptr, peek bool) (uintptr, uintptr, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
q.mu.Lock()
defer q.mu.Unlock()
@@ -464,7 +464,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uint
q.addr = m.Address
}
- var copied uintptr
+ var copied int64
if peek {
// Don't consume control message if we are peeking.
c := q.control.Clone()
@@ -531,7 +531,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uint
break
}
- var cpd uintptr
+ var cpd int64
cpd, data, q.buffer = vecCopy(data, q.buffer)
copied += cpd
@@ -569,7 +569,7 @@ type ConnectedEndpoint interface {
//
// syserr.ErrWouldBlock can be returned along with a partial write if
// the caller should block to send the rest of the data.
- Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (n uintptr, notify bool, err *syserr.Error)
+ Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (n int64, notify bool, err *syserr.Error)
// SendNotify notifies the ConnectedEndpoint of a successful Send. This
// must not be called while holding any endpoint locks.
@@ -637,7 +637,7 @@ func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
}
// Send implements ConnectedEndpoint.Send.
-func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) {
+func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
var l int64
for _, d := range data {
l += int64(len(d))
@@ -665,7 +665,7 @@ func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages,
}
l, notify, err := e.writeQueue.Enqueue(&message{Data: buffer.View(v), Control: controlMessages, Address: from}, truncate)
- return uintptr(l), notify, err
+ return int64(l), notify, err
}
// SendNotify implements ConnectedEndpoint.SendNotify.
@@ -781,7 +781,7 @@ func (e *baseEndpoint) Connected() bool {
}
// RecvMsg reads data and a control message from the endpoint.
-func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) {
+func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool, addr *tcpip.FullAddress) (int64, int64, ControlMessages, bool, *syserr.Error) {
e.Lock()
if e.receiver == nil {
@@ -807,7 +807,7 @@ func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, n
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
-func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
e.Lock()
if !e.Connected() {
e.Unlock()
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 9032b7580..0d0cb68df 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -116,7 +116,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint {
// extractPath extracts and validates the address.
func extractPath(sockaddr []byte) (string, *syserr.Error) {
- addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr, true /* strict */)
+ addr, _, err := epsocket.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */)
if err != nil {
return "", err
}
@@ -535,7 +535,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
Ctx: t,
Endpoint: s.ep,
Creds: wantCreds,
- NumRights: uintptr(numRights),
+ NumRights: numRights,
Peek: peek,
}
if senderRequested {
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index 386b40af7..f779186ad 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
switch family {
case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX:
- fa, err := epsocket.GetAddress(int(family), b, true /* strict */)
+ fa, _, err := epsocket.AddressAndFamily(int(family), b, true /* strict */)
if err != nil {
return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err)
}
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index b2474e60d..3ab54271c 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -191,7 +191,6 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
// Preadv2 implements linux syscall preadv2(2).
-// TODO(b/120162627): Implement RWF_HIPRI functionality.
func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
// While the syscall is
// preadv2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
@@ -228,6 +227,8 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
// Check flags field.
+ // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+ // accepted as a valid flag argument for preadv2.
if flags&^linux.RWF_VALID != 0 {
return 0, nil, syserror.EOPNOTSUPP
}
diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go
index 5278c96a6..27cd2c336 100644
--- a/pkg/sentry/syscalls/linux/sys_write.go
+++ b/pkg/sentry/syscalls/linux/sys_write.go
@@ -191,7 +191,6 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
// Pwritev2 implements linux syscall pwritev2(2).
-// TODO(b/120162627): Implement RWF_HIPRI functionality.
// TODO(b/120161091): Implement O_SYNC and D_SYNC functionality.
func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
// While the syscall is
@@ -227,6 +226,8 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, syserror.ESPIPE
}
+ // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+ // accepted as a valid flag argument for pwritev2.
if flags&^linux.RWF_VALID != 0 {
return uintptr(flags), nil, syserror.EOPNOTSUPP
}
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 4de6c41cf..0f247bf77 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -18,6 +18,7 @@ go_library(
"permissions.go",
"resolving_path.go",
"syscalls.go",
+ "testutil.go",
"vfs.go",
],
importpath = "gvisor.dev/gvisor/pkg/sentry/vfs",
@@ -40,7 +41,16 @@ go_test(
name = "vfs_test",
size = "small",
srcs = [
+ "file_description_impl_util_test.go",
"mount_test.go",
],
embed = [":vfs"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/sentry/context",
+ "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/usermem",
+ "//pkg/syserror",
+ ],
)
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 486893e70..ba230da72 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -15,6 +15,10 @@
package vfs
import (
+ "bytes"
+ "io"
+ "sync"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/context"
@@ -24,6 +28,16 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
+// The following design pattern is strongly recommended for filesystem
+// implementations to adapt:
+// - Have a local fileDescription struct (containing FileDescription) which
+// embeds FileDescriptionDefaultImpl and overrides the default methods
+// which are common to all fd implementations for that for that filesystem
+// like StatusFlags, SetStatusFlags, Stat, SetStat, StatFS, etc.
+// - This should be embedded in all file description implementations as the
+// first field by value.
+// - Directory FDs would also embed DirectoryFileDescriptionDefaultImpl.
+
// FileDescriptionDefaultImpl may be embedded by implementations of
// FileDescriptionImpl to obtain implementations of many FileDescriptionImpl
// methods with default behavior analogous to Linux's.
@@ -115,11 +129,8 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg
// DirectoryFileDescriptionDefaultImpl may be embedded by implementations of
// FileDescriptionImpl that always represent directories to obtain
-// implementations of non-directory I/O methods that return EISDIR, and
-// implementations of other methods consistent with FileDescriptionDefaultImpl.
-type DirectoryFileDescriptionDefaultImpl struct {
- FileDescriptionDefaultImpl
-}
+// implementations of non-directory I/O methods that return EISDIR.
+type DirectoryFileDescriptionDefaultImpl struct{}
// PRead implements FileDescriptionImpl.PRead.
func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
@@ -140,3 +151,104 @@ func (DirectoryFileDescriptionDefaultImpl) PWrite(ctx context.Context, src userm
func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
return 0, syserror.EISDIR
}
+
+// DynamicBytesFileDescriptionImpl may be embedded by implementations of
+// FileDescriptionImpl that represent read-only regular files whose contents
+// are backed by a bytes.Buffer that is regenerated when necessary, consistent
+// with Linux's fs/seq_file.c:single_open().
+//
+// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first
+// use.
+type DynamicBytesFileDescriptionImpl struct {
+ data DynamicBytesSource // immutable
+ mu sync.Mutex // protects the following fields
+ buf bytes.Buffer
+ off int64
+ lastRead int64 // offset at which the last Read, PRead, or Seek ended
+}
+
+// DynamicBytesSource represents a data source for a
+// DynamicBytesFileDescriptionImpl.
+type DynamicBytesSource interface {
+ // Generate writes the file's contents to buf.
+ Generate(ctx context.Context, buf *bytes.Buffer) error
+}
+
+// SetDataSource must be called exactly once on fd before first use.
+func (fd *DynamicBytesFileDescriptionImpl) SetDataSource(data DynamicBytesSource) {
+ fd.data = data
+}
+
+// Preconditions: fd.mu must be locked.
+func (fd *DynamicBytesFileDescriptionImpl) preadLocked(ctx context.Context, dst usermem.IOSequence, offset int64, opts *ReadOptions) (int64, error) {
+ // Regenerate the buffer if it's empty, or before pread() at a new offset.
+ // Compare fs/seq_file.c:seq_read() => traverse().
+ switch {
+ case offset != fd.lastRead:
+ fd.buf.Reset()
+ fallthrough
+ case fd.buf.Len() == 0:
+ if err := fd.data.Generate(ctx, &fd.buf); err != nil {
+ fd.buf.Reset()
+ // fd.off is not updated in this case.
+ fd.lastRead = 0
+ return 0, err
+ }
+ }
+ bs := fd.buf.Bytes()
+ if offset >= int64(len(bs)) {
+ return 0, io.EOF
+ }
+ n, err := dst.CopyOut(ctx, bs[offset:])
+ fd.lastRead = offset + int64(n)
+ return int64(n), err
+}
+
+// PRead implements FileDescriptionImpl.PRead.
+func (fd *DynamicBytesFileDescriptionImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.preadLocked(ctx, dst, offset, &opts)
+ fd.mu.Unlock()
+ return n, err
+}
+
+// Read implements FileDescriptionImpl.Read.
+func (fd *DynamicBytesFileDescriptionImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.preadLocked(ctx, dst, fd.off, &opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// Seek implements FileDescriptionImpl.Seek.
+func (fd *DynamicBytesFileDescriptionImpl) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ switch whence {
+ case linux.SEEK_SET:
+ // Use offset as given.
+ case linux.SEEK_CUR:
+ offset += fd.off
+ default:
+ // fs/seq_file:seq_lseek() rejects SEEK_END etc.
+ return 0, syserror.EINVAL
+ }
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if offset != fd.lastRead {
+ // Regenerate the file's contents immediately. Compare
+ // fs/seq_file.c:seq_lseek() => traverse().
+ fd.buf.Reset()
+ if err := fd.data.Generate(ctx, &fd.buf); err != nil {
+ fd.buf.Reset()
+ fd.off = 0
+ fd.lastRead = 0
+ return 0, err
+ }
+ fd.lastRead = offset
+ }
+ fd.off = offset
+ return offset, nil
+}
diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go
new file mode 100644
index 000000000..511b829fc
--- /dev/null
+++ b/pkg/sentry/vfs/file_description_impl_util_test.go
@@ -0,0 +1,141 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "sync/atomic"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// fileDescription is the common fd struct which a filesystem implementation
+// embeds in all of its file description implementations as required.
+type fileDescription struct {
+ vfsfd FileDescription
+ FileDescriptionDefaultImpl
+}
+
+// genCountFD is a read-only FileDescriptionImpl representing a regular file
+// that contains the number of times its DynamicBytesSource.Generate()
+// implementation has been called.
+type genCountFD struct {
+ fileDescription
+ DynamicBytesFileDescriptionImpl
+
+ count uint64 // accessed using atomic memory ops
+}
+
+func newGenCountFD(mnt *Mount, vfsd *Dentry) *FileDescription {
+ var fd genCountFD
+ fd.vfsfd.Init(&fd, mnt, vfsd)
+ fd.DynamicBytesFileDescriptionImpl.SetDataSource(&fd)
+ return &fd.vfsfd
+}
+
+// Release implements FileDescriptionImpl.Release.
+func (fd *genCountFD) Release() {
+}
+
+// StatusFlags implements FileDescriptionImpl.StatusFlags.
+func (fd *genCountFD) StatusFlags(ctx context.Context) (uint32, error) {
+ return 0, nil
+}
+
+// SetStatusFlags implements FileDescriptionImpl.SetStatusFlags.
+func (fd *genCountFD) SetStatusFlags(ctx context.Context, flags uint32) error {
+ return syserror.EPERM
+}
+
+// Stat implements FileDescriptionImpl.Stat.
+func (fd *genCountFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
+ // Note that Statx.Mask == 0 in the return value.
+ return linux.Statx{}, nil
+}
+
+// SetStat implements FileDescriptionImpl.SetStat.
+func (fd *genCountFD) SetStat(ctx context.Context, opts SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// Generate implements DynamicBytesSource.Generate.
+func (fd *genCountFD) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d", atomic.AddUint64(&fd.count, 1))
+ return nil
+}
+
+func TestGenCountFD(t *testing.T) {
+ ctx := contexttest.Context(t)
+ creds := auth.CredentialsFromContext(ctx)
+
+ vfsObj := New() // vfs.New()
+ vfsObj.MustRegisterFilesystemType("testfs", FDTestFilesystemType{})
+ mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "testfs", &NewFilesystemOptions{})
+ if err != nil {
+ t.Fatalf("failed to create testfs root mount: %v", err)
+ }
+ vd := mntns.Root()
+ defer vd.DecRef()
+
+ fd := newGenCountFD(vd.Mount(), vd.Dentry())
+ defer fd.DecRef()
+
+ // The first read causes Generate to be called to fill the FD's buffer.
+ buf := make([]byte, 2)
+ ioseq := usermem.BytesIOSequence(buf)
+ n, err := fd.Impl().Read(ctx, ioseq, ReadOptions{})
+ if n != 1 || (err != nil && err != io.EOF) {
+ t.Fatalf("first Read: got (%d, %v), wanted (1, nil or EOF)", n, err)
+ }
+ if want := byte('1'); buf[0] != want {
+ t.Errorf("first Read: got byte %c, wanted %c", buf[0], want)
+ }
+
+ // A second read without seeking is still at EOF.
+ n, err = fd.Impl().Read(ctx, ioseq, ReadOptions{})
+ if n != 0 || err != io.EOF {
+ t.Fatalf("second Read: got (%d, %v), wanted (0, EOF)", n, err)
+ }
+
+ // Seeking to the beginning of the file causes it to be regenerated.
+ n, err = fd.Impl().Seek(ctx, 0, linux.SEEK_SET)
+ if n != 0 || err != nil {
+ t.Fatalf("Seek: got (%d, %v), wanted (0, nil)", n, err)
+ }
+ n, err = fd.Impl().Read(ctx, ioseq, ReadOptions{})
+ if n != 1 || (err != nil && err != io.EOF) {
+ t.Fatalf("Read after Seek: got (%d, %v), wanted (1, nil or EOF)", n, err)
+ }
+ if want := byte('2'); buf[0] != want {
+ t.Errorf("Read after Seek: got byte %c, wanted %c", buf[0], want)
+ }
+
+ // PRead at the beginning of the file also causes it to be regenerated.
+ n, err = fd.Impl().PRead(ctx, ioseq, 0, ReadOptions{})
+ if n != 1 || (err != nil && err != io.EOF) {
+ t.Fatalf("PRead: got (%d, %v), wanted (1, nil or EOF)", n, err)
+ }
+ if want := byte('3'); buf[0] != want {
+ t.Errorf("PRead: got byte %c, wanted %c", buf[0], want)
+ }
+}
diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go
new file mode 100644
index 000000000..70b192ece
--- /dev/null
+++ b/pkg/sentry/vfs/testutil.go
@@ -0,0 +1,139 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// FDTestFilesystemType is a test-only FilesystemType that produces Filesystems
+// for which all FilesystemImpl methods taking a path return EPERM. It is used
+// to produce Mounts and Dentries for testing of FileDescriptionImpls that do
+// not depend on their originating Filesystem.
+type FDTestFilesystemType struct{}
+
+// FDTestFilesystem is a test-only FilesystemImpl produced by
+// FDTestFilesystemType.
+type FDTestFilesystem struct {
+ vfsfs Filesystem
+}
+
+// NewFilesystem implements FilesystemType.NewFilesystem.
+func (fstype FDTestFilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts NewFilesystemOptions) (*Filesystem, *Dentry, error) {
+ var fs FDTestFilesystem
+ fs.vfsfs.Init(&fs)
+ return &fs.vfsfs, fs.NewDentry(), nil
+}
+
+// Release implements FilesystemImpl.Release.
+func (fs *FDTestFilesystem) Release() {
+}
+
+// Sync implements FilesystemImpl.Sync.
+func (fs *FDTestFilesystem) Sync(ctx context.Context) error {
+ return nil
+}
+
+// GetDentryAt implements FilesystemImpl.GetDentryAt.
+func (fs *FDTestFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) {
+ return nil, syserror.EPERM
+}
+
+// LinkAt implements FilesystemImpl.LinkAt.
+func (fs *FDTestFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error {
+ return syserror.EPERM
+}
+
+// MkdirAt implements FilesystemImpl.MkdirAt.
+func (fs *FDTestFilesystem) MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error {
+ return syserror.EPERM
+}
+
+// MknodAt implements FilesystemImpl.MknodAt.
+func (fs *FDTestFilesystem) MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error {
+ return syserror.EPERM
+}
+
+// OpenAt implements FilesystemImpl.OpenAt.
+func (fs *FDTestFilesystem) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) {
+ return nil, syserror.EPERM
+}
+
+// ReadlinkAt implements FilesystemImpl.ReadlinkAt.
+func (fs *FDTestFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) {
+ return "", syserror.EPERM
+}
+
+// RenameAt implements FilesystemImpl.RenameAt.
+func (fs *FDTestFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry, opts RenameOptions) error {
+ return syserror.EPERM
+}
+
+// RmdirAt implements FilesystemImpl.RmdirAt.
+func (fs *FDTestFilesystem) RmdirAt(ctx context.Context, rp *ResolvingPath) error {
+ return syserror.EPERM
+}
+
+// SetStatAt implements FilesystemImpl.SetStatAt.
+func (fs *FDTestFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// StatAt implements FilesystemImpl.StatAt.
+func (fs *FDTestFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) {
+ return linux.Statx{}, syserror.EPERM
+}
+
+// StatFSAt implements FilesystemImpl.StatFSAt.
+func (fs *FDTestFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) {
+ return linux.Statfs{}, syserror.EPERM
+}
+
+// SymlinkAt implements FilesystemImpl.SymlinkAt.
+func (fs *FDTestFilesystem) SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error {
+ return syserror.EPERM
+}
+
+// UnlinkAt implements FilesystemImpl.UnlinkAt.
+func (fs *FDTestFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error {
+ return syserror.EPERM
+}
+
+type fdTestDentry struct {
+ vfsd Dentry
+}
+
+// NewDentry returns a new Dentry.
+func (fs *FDTestFilesystem) NewDentry() *Dentry {
+ var d fdTestDentry
+ d.vfsd.Init(&d)
+ return &d.vfsd
+}
+
+// IncRef implements DentryImpl.IncRef.
+func (d *fdTestDentry) IncRef(vfsfs *Filesystem) {
+}
+
+// TryIncRef implements DentryImpl.TryIncRef.
+func (d *fdTestDentry) TryIncRef(vfsfs *Filesystem) bool {
+ return true
+}
+
+// DecRef implements DentryImpl.DecRef.
+func (d *fdTestDentry) DecRef(vfsfs *Filesystem) {
+}