summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/proc/sys_net.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs/proc/sys_net.go')
-rw-r--r--pkg/sentry/fs/proc/sys_net.go355
1 files changed, 355 insertions, 0 deletions
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
new file mode 100644
index 000000000..dbf1a987c
--- /dev/null
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -0,0 +1,355 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "fmt"
+ "io"
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+type tcpMemDir int
+
+const (
+ tcpRMem tcpMemDir = iota
+ tcpWMem
+)
+
+// tcpMemInode is used to read/write the size of netstack tcp buffers.
+//
+// TODO(b/121381035): If we have multiple proc mounts, concurrent writes can
+// leave netstack and the proc files in an inconsistent state. Since we set the
+// buffer size from these proc files on restore, we may also race and end up in
+// an inconsistent state on restore.
+//
+// +stateify savable
+type tcpMemInode struct {
+ fsutil.SimpleFileInode
+ dir tcpMemDir
+ s inet.Stack `state:"wait"`
+
+ // size stores the tcp buffer size during save, and sets the buffer
+ // size in netstack in restore. We must save/restore this here, since
+ // netstack itself is stateless.
+ size inet.TCPBufferSize
+
+ // mu protects against concurrent reads/writes to files based on this
+ // inode.
+ mu sync.Mutex `state:"nosave"`
+}
+
+var _ fs.InodeOperations = (*tcpMemInode)(nil)
+
+func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode {
+ tm := &tcpMemInode{
+ SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
+ s: s,
+ dir: dir,
+ }
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(tm, msrc, sattr)
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (m *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ flags.Pread = true
+ return fs.NewFile(ctx, dirent, flags, &tcpMemFile{tcpMemInode: m}), nil
+}
+
+// +stateify savable
+type tcpMemFile struct {
+ fsutil.FileGenericSeek `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
+ fsutil.FileNoMMap `state:"nosave"`
+ fsutil.FileNoSplice `state:"nosave"`
+ fsutil.FileNoopRelease `state:"nosave"`
+ fsutil.FileNoopFlush `state:"nosave"`
+ fsutil.FileNoopFsync `state:"nosave"`
+ fsutil.FileNotDirReaddir `state:"nosave"`
+ fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
+
+ tcpMemInode *tcpMemInode
+}
+
+var _ fs.FileOperations = (*tcpMemFile)(nil)
+
+// Read implements fs.FileOperations.Read.
+func (f *tcpMemFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ return 0, io.EOF
+ }
+ f.tcpMemInode.mu.Lock()
+ defer f.tcpMemInode.mu.Unlock()
+
+ size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s)
+ if err != nil {
+ return 0, err
+ }
+ s := fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max)
+ n, err := dst.CopyOut(ctx, []byte(s))
+ return int64(n), err
+}
+
+// Write implements fs.FileOperations.Write.
+func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+ f.tcpMemInode.mu.Lock()
+ defer f.tcpMemInode.mu.Unlock()
+
+ src = src.TakeFirst(usermem.PageSize - 1)
+ size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s)
+ if err != nil {
+ return 0, err
+ }
+ buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)}
+ n, cperr := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts)
+ newSize := inet.TCPBufferSize{
+ Min: int(buf[0]),
+ Default: int(buf[1]),
+ Max: int(buf[2]),
+ }
+ if err := writeSize(f.tcpMemInode.dir, f.tcpMemInode.s, newSize); err != nil {
+ return n, err
+ }
+ return n, cperr
+}
+
+func readSize(dirType tcpMemDir, s inet.Stack) (inet.TCPBufferSize, error) {
+ switch dirType {
+ case tcpRMem:
+ return s.TCPReceiveBufferSize()
+ case tcpWMem:
+ return s.TCPSendBufferSize()
+ default:
+ panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType))
+ }
+}
+
+func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error {
+ switch dirType {
+ case tcpRMem:
+ return s.SetTCPReceiveBufferSize(size)
+ case tcpWMem:
+ return s.SetTCPSendBufferSize(size)
+ default:
+ panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType))
+ }
+}
+
+// +stateify savable
+type tcpSack struct {
+ stack inet.Stack `state:"wait"`
+ enabled *bool
+ fsutil.SimpleFileInode
+}
+
+func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ ts := &tcpSack{
+ SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
+ stack: s,
+ }
+ sattr := fs.StableAttr{
+ DeviceID: device.ProcDevice.DeviceID(),
+ InodeID: device.ProcDevice.NextIno(),
+ BlockSize: usermem.PageSize,
+ Type: fs.SpecialFile,
+ }
+ return fs.NewInode(ts, msrc, sattr)
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ flags.Pread = true
+ flags.Pwrite = true
+ return fs.NewFile(ctx, dirent, flags, &tcpSackFile{
+ tcpSack: s,
+ stack: s.stack,
+ }), nil
+}
+
+// +stateify savable
+type tcpSackFile struct {
+ fsutil.FileGenericSeek `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
+ fsutil.FileNoMMap `state:"nosave"`
+ fsutil.FileNoSplice `state:"nosave"`
+ fsutil.FileNoopRelease `state:"nosave"`
+ fsutil.FileNoopFlush `state:"nosave"`
+ fsutil.FileNoopFsync `state:"nosave"`
+ fsutil.FileNotDirReaddir `state:"nosave"`
+ fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
+
+ tcpSack *tcpSack
+
+ stack inet.Stack `state:"wait"`
+}
+
+// Read implements fs.FileOperations.Read.
+func (f *tcpSackFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ return 0, io.EOF
+ }
+
+ if f.tcpSack.enabled == nil {
+ sack, err := f.stack.TCPSACKEnabled()
+ if err != nil {
+ return 0, err
+ }
+ f.tcpSack.enabled = &sack
+ }
+
+ val := "0\n"
+ if *f.tcpSack.enabled {
+ // Technically, this is not quite compatible with Linux. Linux
+ // stores these as an integer, so if you write "2" into
+ // tcp_sack, you should get 2 back. Tough luck.
+ val = "1\n"
+ }
+ n, err := dst.CopyOut(ctx, []byte(val))
+ return int64(n), err
+}
+
+// Write implements fs.FileOperations.Write.
+func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return n, err
+ }
+ if f.tcpSack.enabled == nil {
+ f.tcpSack.enabled = new(bool)
+ }
+ *f.tcpSack.enabled = v != 0
+ return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled)
+}
+
+func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ // The following files are simple stubs until they are implemented in
+ // netstack, most of these files are configuration related. We use the
+ // value closest to the actual netstack behavior or any empty file,
+ // all of these files will have mode 0444 (read-only for all users).
+ contents := map[string]*fs.Inode{
+ "default_qdisc": newStaticProcInode(ctx, msrc, []byte("pfifo_fast")),
+ "message_burst": newStaticProcInode(ctx, msrc, []byte("10")),
+ "message_cost": newStaticProcInode(ctx, msrc, []byte("5")),
+ "optmem_max": newStaticProcInode(ctx, msrc, []byte("0")),
+ "rmem_default": newStaticProcInode(ctx, msrc, []byte("212992")),
+ "rmem_max": newStaticProcInode(ctx, msrc, []byte("212992")),
+ "somaxconn": newStaticProcInode(ctx, msrc, []byte("128")),
+ "wmem_default": newStaticProcInode(ctx, msrc, []byte("212992")),
+ "wmem_max": newStaticProcInode(ctx, msrc, []byte("212992")),
+ }
+
+ d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
+ return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+}
+
+func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+ contents := map[string]*fs.Inode{
+ // Add tcp_sack.
+ "tcp_sack": newTCPSackInode(ctx, msrc, s),
+
+ // The following files are simple stubs until they are
+ // implemented in netstack, most of these files are
+ // configuration related. We use the value closest to the
+ // actual netstack behavior or any empty file, all of these
+ // files will have mode 0444 (read-only for all users).
+ "ip_local_port_range": newStaticProcInode(ctx, msrc, []byte("16000 65535")),
+ "ip_local_reserved_ports": newStaticProcInode(ctx, msrc, []byte("")),
+ "ipfrag_time": newStaticProcInode(ctx, msrc, []byte("30")),
+ "ip_nonlocal_bind": newStaticProcInode(ctx, msrc, []byte("0")),
+ "ip_no_pmtu_disc": newStaticProcInode(ctx, msrc, []byte("1")),
+
+ // tcp_allowed_congestion_control tell the user what they are
+ // able to do as an unprivledged process so we leave it empty.
+ "tcp_allowed_congestion_control": newStaticProcInode(ctx, msrc, []byte("")),
+ "tcp_available_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")),
+ "tcp_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")),
+
+ // Many of the following stub files are features netstack
+ // doesn't support. The unsupported features return "0" to
+ // indicate they are disabled.
+ "tcp_base_mss": newStaticProcInode(ctx, msrc, []byte("1280")),
+ "tcp_dsack": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_early_retrans": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_fack": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_fastopen": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_fastopen_key": newStaticProcInode(ctx, msrc, []byte("")),
+ "tcp_invalid_ratelimit": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_keepalive_intvl": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_keepalive_probes": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_keepalive_time": newStaticProcInode(ctx, msrc, []byte("7200")),
+ "tcp_mtu_probing": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_no_metrics_save": newStaticProcInode(ctx, msrc, []byte("1")),
+ "tcp_probe_interval": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_probe_threshold": newStaticProcInode(ctx, msrc, []byte("0")),
+ "tcp_retries1": newStaticProcInode(ctx, msrc, []byte("3")),
+ "tcp_retries2": newStaticProcInode(ctx, msrc, []byte("15")),
+ "tcp_rfc1337": newStaticProcInode(ctx, msrc, []byte("1")),
+ "tcp_slow_start_after_idle": newStaticProcInode(ctx, msrc, []byte("1")),
+ "tcp_synack_retries": newStaticProcInode(ctx, msrc, []byte("5")),
+ "tcp_syn_retries": newStaticProcInode(ctx, msrc, []byte("3")),
+ "tcp_timestamps": newStaticProcInode(ctx, msrc, []byte("1")),
+ }
+
+ // Add tcp_rmem.
+ if _, err := s.TCPReceiveBufferSize(); err == nil {
+ contents["tcp_rmem"] = newTCPMemInode(ctx, msrc, s, tcpRMem)
+ }
+
+ // Add tcp_wmem.
+ if _, err := s.TCPSendBufferSize(); err == nil {
+ contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem)
+ }
+
+ d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
+ return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+}
+
+func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
+ var contents map[string]*fs.Inode
+ if s := p.k.NetworkStack(); s != nil {
+ contents = map[string]*fs.Inode{
+ "ipv4": p.newSysNetIPv4Dir(ctx, msrc, s),
+ "core": p.newSysNetCore(ctx, msrc, s),
+ }
+ }
+ d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
+ return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+}