diff options
Diffstat (limited to 'pkg/sentry/fs/proc/sys_net.go')
-rw-r--r-- | pkg/sentry/fs/proc/sys_net.go | 355 |
1 files changed, 355 insertions, 0 deletions
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go new file mode 100644 index 000000000..dbf1a987c --- /dev/null +++ b/pkg/sentry/fs/proc/sys_net.go @@ -0,0 +1,355 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "fmt" + "io" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +type tcpMemDir int + +const ( + tcpRMem tcpMemDir = iota + tcpWMem +) + +// tcpMemInode is used to read/write the size of netstack tcp buffers. +// +// TODO(b/121381035): If we have multiple proc mounts, concurrent writes can +// leave netstack and the proc files in an inconsistent state. Since we set the +// buffer size from these proc files on restore, we may also race and end up in +// an inconsistent state on restore. +// +// +stateify savable +type tcpMemInode struct { + fsutil.SimpleFileInode + dir tcpMemDir + s inet.Stack `state:"wait"` + + // size stores the tcp buffer size during save, and sets the buffer + // size in netstack in restore. We must save/restore this here, since + // netstack itself is stateless. + size inet.TCPBufferSize + + // mu protects against concurrent reads/writes to files based on this + // inode. + mu sync.Mutex `state:"nosave"` +} + +var _ fs.InodeOperations = (*tcpMemInode)(nil) + +func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode { + tm := &tcpMemInode{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + s: s, + dir: dir, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(tm, msrc, sattr) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (m *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + return fs.NewFile(ctx, dirent, flags, &tcpMemFile{tcpMemInode: m}), nil +} + +// +stateify savable +type tcpMemFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpMemInode *tcpMemInode +} + +var _ fs.FileOperations = (*tcpMemFile)(nil) + +// Read implements fs.FileOperations.Read. +func (f *tcpMemFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + f.tcpMemInode.mu.Lock() + defer f.tcpMemInode.mu.Unlock() + + size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s) + if err != nil { + return 0, err + } + s := fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max) + n, err := dst.CopyOut(ctx, []byte(s)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + f.tcpMemInode.mu.Lock() + defer f.tcpMemInode.mu.Unlock() + + src = src.TakeFirst(usermem.PageSize - 1) + size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s) + if err != nil { + return 0, err + } + buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)} + n, cperr := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts) + newSize := inet.TCPBufferSize{ + Min: int(buf[0]), + Default: int(buf[1]), + Max: int(buf[2]), + } + if err := writeSize(f.tcpMemInode.dir, f.tcpMemInode.s, newSize); err != nil { + return n, err + } + return n, cperr +} + +func readSize(dirType tcpMemDir, s inet.Stack) (inet.TCPBufferSize, error) { + switch dirType { + case tcpRMem: + return s.TCPReceiveBufferSize() + case tcpWMem: + return s.TCPSendBufferSize() + default: + panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType)) + } +} + +func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error { + switch dirType { + case tcpRMem: + return s.SetTCPReceiveBufferSize(size) + case tcpWMem: + return s.SetTCPSendBufferSize(size) + default: + panic(fmt.Sprintf("unknown tcpMemFile type: %v", dirType)) + } +} + +// +stateify savable +type tcpSack struct { + stack inet.Stack `state:"wait"` + enabled *bool + fsutil.SimpleFileInode +} + +func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ts := &tcpSack{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ts, msrc, sattr) +} + +// GetFile implements fs.InodeOperations.GetFile. +func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &tcpSackFile{ + tcpSack: s, + stack: s.stack, + }), nil +} + +// +stateify savable +type tcpSackFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpSack *tcpSack + + stack inet.Stack `state:"wait"` +} + +// Read implements fs.FileOperations.Read. +func (f *tcpSackFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + if f.tcpSack.enabled == nil { + sack, err := f.stack.TCPSACKEnabled() + if err != nil { + return 0, err + } + f.tcpSack.enabled = &sack + } + + val := "0\n" + if *f.tcpSack.enabled { + // Technically, this is not quite compatible with Linux. Linux + // stores these as an integer, so if you write "2" into + // tcp_sack, you should get 2 back. Tough luck. + val = "1\n" + } + n, err := dst.CopyOut(ctx, []byte(val)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return n, err + } + if f.tcpSack.enabled == nil { + f.tcpSack.enabled = new(bool) + } + *f.tcpSack.enabled = v != 0 + return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled) +} + +func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, + // all of these files will have mode 0444 (read-only for all users). + contents := map[string]*fs.Inode{ + "default_qdisc": newStaticProcInode(ctx, msrc, []byte("pfifo_fast")), + "message_burst": newStaticProcInode(ctx, msrc, []byte("10")), + "message_cost": newStaticProcInode(ctx, msrc, []byte("5")), + "optmem_max": newStaticProcInode(ctx, msrc, []byte("0")), + "rmem_default": newStaticProcInode(ctx, msrc, []byte("212992")), + "rmem_max": newStaticProcInode(ctx, msrc, []byte("212992")), + "somaxconn": newStaticProcInode(ctx, msrc, []byte("128")), + "wmem_default": newStaticProcInode(ctx, msrc, []byte("212992")), + "wmem_max": newStaticProcInode(ctx, msrc, []byte("212992")), + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + contents := map[string]*fs.Inode{ + // Add tcp_sack. + "tcp_sack": newTCPSackInode(ctx, msrc, s), + + // The following files are simple stubs until they are + // implemented in netstack, most of these files are + // configuration related. We use the value closest to the + // actual netstack behavior or any empty file, all of these + // files will have mode 0444 (read-only for all users). + "ip_local_port_range": newStaticProcInode(ctx, msrc, []byte("16000 65535")), + "ip_local_reserved_ports": newStaticProcInode(ctx, msrc, []byte("")), + "ipfrag_time": newStaticProcInode(ctx, msrc, []byte("30")), + "ip_nonlocal_bind": newStaticProcInode(ctx, msrc, []byte("0")), + "ip_no_pmtu_disc": newStaticProcInode(ctx, msrc, []byte("1")), + + // tcp_allowed_congestion_control tell the user what they are + // able to do as an unprivledged process so we leave it empty. + "tcp_allowed_congestion_control": newStaticProcInode(ctx, msrc, []byte("")), + "tcp_available_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")), + "tcp_congestion_control": newStaticProcInode(ctx, msrc, []byte("reno")), + + // Many of the following stub files are features netstack + // doesn't support. The unsupported features return "0" to + // indicate they are disabled. + "tcp_base_mss": newStaticProcInode(ctx, msrc, []byte("1280")), + "tcp_dsack": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_early_retrans": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fack": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fastopen": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_fastopen_key": newStaticProcInode(ctx, msrc, []byte("")), + "tcp_invalid_ratelimit": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_intvl": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_probes": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_keepalive_time": newStaticProcInode(ctx, msrc, []byte("7200")), + "tcp_mtu_probing": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_no_metrics_save": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_probe_interval": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_probe_threshold": newStaticProcInode(ctx, msrc, []byte("0")), + "tcp_retries1": newStaticProcInode(ctx, msrc, []byte("3")), + "tcp_retries2": newStaticProcInode(ctx, msrc, []byte("15")), + "tcp_rfc1337": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_slow_start_after_idle": newStaticProcInode(ctx, msrc, []byte("1")), + "tcp_synack_retries": newStaticProcInode(ctx, msrc, []byte("5")), + "tcp_syn_retries": newStaticProcInode(ctx, msrc, []byte("3")), + "tcp_timestamps": newStaticProcInode(ctx, msrc, []byte("1")), + } + + // Add tcp_rmem. + if _, err := s.TCPReceiveBufferSize(); err == nil { + contents["tcp_rmem"] = newTCPMemInode(ctx, msrc, s, tcpRMem) + } + + // Add tcp_wmem. + if _, err := s.TCPSendBufferSize(); err == nil { + contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem) + } + + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + var contents map[string]*fs.Inode + if s := p.k.NetworkStack(); s != nil { + contents = map[string]*fs.Inode{ + "ipv4": p.newSysNetIPv4Dir(ctx, msrc, s), + "core": p.newSysNetCore(ctx, msrc, s), + } + } + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(d, msrc, fs.SpecialDirectory, nil) +} |