diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/fs/proc/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/net.go | 17 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/proc.go | 45 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/rpcinet_proc.go | 193 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/sys.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/sys_net.go | 66 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/conn/conn.go | 20 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/stack.go | 90 |
8 files changed, 374 insertions, 69 deletions
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 18372cfbf..21b5fc0c3 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -44,6 +44,7 @@ go_library( "net.go", "proc.go", "proc_state.go", + "rpcinet_proc.go", "stat.go", "sys.go", "sys_net.go", @@ -70,6 +71,7 @@ go_library( "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/sentry/mm", + "//pkg/sentry/socket/rpcinet", "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 6e464857a..e6bd35f27 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -32,6 +32,23 @@ func (p *proc) newNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { if s := p.k.NetworkStack(); s != nil && s.SupportsIPv6() { d.AddChild(ctx, "dev", seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc)) d.AddChild(ctx, "if_inet6", seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc)) + + // The following files are simple stubs until they are implemented in + // netstack, if the file contains a header the stub is just the header + // otherwise it is an empty file. + d.AddChild(ctx, "arp", p.newStubProcFSFile(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device"))) + d.AddChild(ctx, "ipv6_route", p.newStubProcFSFile(ctx, msrc, []byte(""))) + d.AddChild(ctx, "netlink", p.newStubProcFSFile(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode"))) + d.AddChild(ctx, "netstat", p.newStubProcFSFile(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess"))) + d.AddChild(ctx, "packet", p.newStubProcFSFile(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode"))) + d.AddChild(ctx, "protocols", p.newStubProcFSFile(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em"))) + d.AddChild(ctx, "psched", p.newStubProcFSFile(ctx, msrc, []byte(""))) + d.AddChild(ctx, "ptype", p.newStubProcFSFile(ctx, msrc, []byte("Type Device Function"))) + d.AddChild(ctx, "route", p.newStubProcFSFile(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT"))) + d.AddChild(ctx, "tcp", p.newStubProcFSFile(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode"))) + d.AddChild(ctx, "tcp6", p.newStubProcFSFile(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode"))) + d.AddChild(ctx, "udp", p.newStubProcFSFile(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops"))) + d.AddChild(ctx, "udp6", p.newStubProcFSFile(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode"))) } return newFile(d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 459eb7e62..d727e1bc9 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -17,6 +17,7 @@ package proc import ( "fmt" + "io" "sort" "strconv" @@ -26,6 +27,9 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" ) // proc is a root proc node. @@ -40,6 +44,30 @@ type proc struct { pidns *kernel.PIDNamespace } +// stubProcFSFile is a file type that can be used to return file contents +// which are constant. This file is not writable and will always have mode +// 0444. +type stubProcFSFile struct { + ramfs.Entry + + // contents are the immutable file contents that will always be returned. + contents []byte +} + +// DeprecatedPreadv implements fs.InodeOperations.DeprecatedPreadv. +func (s *stubProcFSFile) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + + if offset >= int64(len(s.contents)) { + return 0, io.EOF + } + + n, err := dst.CopyOut(ctx, s.contents[offset:]) + return int64(n), err +} + // New returns the root node of a partial simple procfs. func New(ctx context.Context, msrc *fs.MountSource) (*fs.Inode, error) { k := kernel.KernelFromContext(ctx) @@ -83,6 +111,15 @@ func (p *proc) newSelf(ctx context.Context, msrc *fs.MountSource) *fs.Inode { return newFile(s, msrc, fs.Symlink, nil) } +// newStubProcFsFile returns a procfs file with constant contents. +func (p *proc) newStubProcFSFile(ctx context.Context, msrc *fs.MountSource, c []byte) *fs.Inode { + u := &stubProcFSFile{ + contents: c, + } + u.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(0444)) + return newFile(u, msrc, fs.SpecialFile, nil) +} + // Readlink implements fs.InodeOperations.Readlink. func (s *self) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { if t := kernel.TaskFromContext(ctx); t != nil { @@ -107,7 +144,13 @@ func (p *proc) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dire // Is it a dynamic element? nfs := map[string]func() *fs.Inode{ - "net": func() *fs.Inode { return p.newNetDir(ctx, dir.MountSource) }, + "net": func() *fs.Inode { + // If we're using rpcinet we will let it manage /proc/net. + if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { + return newRPCInetProcNet(ctx, dir.MountSource) + } + return p.newNetDir(ctx, dir.MountSource) + }, "self": func() *fs.Inode { return p.newSelf(ctx, dir.MountSource) }, "sys": func() *fs.Inode { return p.newSysDir(ctx, dir.MountSource) }, } diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go new file mode 100644 index 000000000..50d0271f9 --- /dev/null +++ b/pkg/sentry/fs/proc/rpcinet_proc.go @@ -0,0 +1,193 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + +// rpcinetFile implments fs.InodeOperations as RPCs. +type rpcinetFile struct { + ramfs.Entry + + // filepath is the full path of this rpcinetFile. + filepath string + + k *kernel.Kernel +} + +// DeprecatedPreadv implements fs.InodeOperations.DeprecatedPreadv. +// This method can panic if an rpcinetFile was created without an rpcinet +// stack. +func (r rpcinetFile) DeprecatedPreadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { + s, ok := r.k.NetworkStack().(*rpcinet.Stack) + if !ok { + panic("Network stack is not a rpcinet.") + } + + contents, se := s.RPCReadFile(r.filepath) + if se != nil || offset >= int64(len(contents)) { + return 0, io.EOF + } + + n, err := dst.CopyOut(ctx, contents[offset:]) + return int64(n), err +} + +// Truncate implements fs.InodeOperations.Truncate. +func (r rpcinetFile) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// DeprecatedPwritev implements fs.InodeOperations.DeprecatedPwritev. +// This method can panic if an rpcinetFile was created without an rpcinet +// stack. +func (r rpcinetFile) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + s, ok := r.k.NetworkStack().(*rpcinet.Stack) + if !ok { + panic("Network stack is not a rpcinet.") + } + + if src.NumBytes() == 0 { + return 0, nil + } + + b := make([]byte, src.NumBytes(), src.NumBytes()) + n, err := src.CopyIn(ctx, b) + if err != nil { + return int64(n), err + } + + written, se := s.RPCWriteFile(r.filepath, b) + return int64(written), se.ToError() +} + +func newRPCProcFSFile(ctx context.Context, msrc *fs.MountSource, filepath string, mode linux.FileMode) *fs.Inode { + f := &rpcinetFile{ + filepath: filepath, + k: kernel.KernelFromContext(ctx), + } + f.InitEntry(ctx, fs.RootOwner, fs.FilePermsFromMode(mode)) + + fi := newFile(f, msrc, fs.SpecialFile, nil) + return fi +} + +// newRPCInetProcNet will build an inode for /proc/net. +func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + d := &ramfs.Dir{} + d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) + + // Add all the files we want to forward for /proc/net. + d.AddChild(ctx, "arp", newRPCProcFSFile(ctx, msrc, "/proc/net/arp", 0444)) + d.AddChild(ctx, "dev", newRPCProcFSFile(ctx, msrc, "/proc/net/dev", 0444)) + d.AddChild(ctx, "if_inet6", newRPCProcFSFile(ctx, msrc, "/proc/net/if_inet6", 0444)) + d.AddChild(ctx, "ipv6_route", newRPCProcFSFile(ctx, msrc, "/proc/net/ipv6_route", 0444)) + d.AddChild(ctx, "netlink", newRPCProcFSFile(ctx, msrc, "/proc/net/netlink", 0444)) + d.AddChild(ctx, "netstat", newRPCProcFSFile(ctx, msrc, "/proc/net/netstat", 0444)) + d.AddChild(ctx, "packet", newRPCProcFSFile(ctx, msrc, "/proc/net/packet", 0444)) + d.AddChild(ctx, "protocols", newRPCProcFSFile(ctx, msrc, "/proc/net/protocols", 0444)) + d.AddChild(ctx, "psched", newRPCProcFSFile(ctx, msrc, "/proc/net/psched", 0444)) + d.AddChild(ctx, "ptype", newRPCProcFSFile(ctx, msrc, "/proc/net/ptype", 0444)) + d.AddChild(ctx, "route", newRPCProcFSFile(ctx, msrc, "/proc/net/route", 0444)) + d.AddChild(ctx, "tcp", newRPCProcFSFile(ctx, msrc, "/proc/net/tcp", 0444)) + d.AddChild(ctx, "tcp6", newRPCProcFSFile(ctx, msrc, "/proc/net/tcp6", 0444)) + d.AddChild(ctx, "udp", newRPCProcFSFile(ctx, msrc, "/proc/net/udp", 0444)) + d.AddChild(ctx, "udp6", newRPCProcFSFile(ctx, msrc, "/proc/net/udp6", 0444)) + + return newFile(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetProcSysNet will build an inode for /proc/sys/net. +func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + d := &ramfs.Dir{} + d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) + d.AddChild(ctx, "ipv4", newRPCInetSysNetIPv4Dir(ctx, msrc)) + d.AddChild(ctx, "core", newRPCInetSysNetCore(ctx, msrc)) + + return newFile(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetSysNetCore builds the /proc/sys/net/core directory. +func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + d := &ramfs.Dir{} + d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) + + // Add all the files we want to forward over RPC for /proc/sys/net/core + d.AddChild(ctx, "default_qdisc", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/default_qdisc", 0444)) + d.AddChild(ctx, "message_burst", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/message_burst", 0444)) + d.AddChild(ctx, "message_cost", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/message_cost", 0444)) + d.AddChild(ctx, "optmem_max", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/optmem_max", 0444)) + d.AddChild(ctx, "rmem_default", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/rmem_default", 0444)) + d.AddChild(ctx, "rmem_max", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/rmem_max", 0444)) + d.AddChild(ctx, "somaxconn", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/somaxconn", 0444)) + d.AddChild(ctx, "wmem_default", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/wmem_default", 0444)) + d.AddChild(ctx, "wmem_max", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/core/wmem_max", 0444)) + + return newFile(d, msrc, fs.SpecialDirectory, nil) +} + +// newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory. +func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { + d := &ramfs.Dir{} + d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) + + // Add all the files we want to forward over RPC for /proc/sys/net/ipv4. + d.AddChild(ctx, "ip_local_port_range", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/ip_local_port_range", 0444)) + d.AddChild(ctx, "ip_local_reserved_ports", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/ip_local_reserved_ports", 0444)) + d.AddChild(ctx, "ipfrag_time", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/ipfrag_time", 0444)) + d.AddChild(ctx, "ip_nonlocal_bind", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/ip_nonlocal_bind", 0444)) + d.AddChild(ctx, "ip_no_pmtu_disc", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/ip_no_pmtu_disc", 0444)) + + d.AddChild(ctx, "tcp_allowed_congestion_control", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_allowed_congestion_control", 0444)) + d.AddChild(ctx, "tcp_available_congestion_control", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_available_congestion_control", 0444)) + d.AddChild(ctx, "tcp_base_mss", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_base_mss", 0444)) + d.AddChild(ctx, "tcp_congestion_control", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_congestion_control", 0644)) + d.AddChild(ctx, "tcp_dsack", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_dsack", 0644)) + d.AddChild(ctx, "tcp_early_retrans", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_early_retrans", 0644)) + d.AddChild(ctx, "tcp_fack", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_fack", 0644)) + d.AddChild(ctx, "tcp_fastopen", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen", 0644)) + d.AddChild(ctx, "tcp_fastopen_key", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen_key", 0444)) + d.AddChild(ctx, "tcp_fin_timeout", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_fin_timeout", 0644)) + d.AddChild(ctx, "tcp_invalid_ratelimit", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_invalid_ratelimit", 0444)) + d.AddChild(ctx, "tcp_keepalive_intvl", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_intvl", 0644)) + d.AddChild(ctx, "tcp_keepalive_probes", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_probes", 0644)) + d.AddChild(ctx, "tcp_keepalive_time", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_time", 0644)) + d.AddChild(ctx, "tcp_mem", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_mem", 0444)) + d.AddChild(ctx, "tcp_mtu_probing", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_mtu_probing", 0644)) + d.AddChild(ctx, "tcp_no_metrics_save", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_no_metrics_save", 0444)) + d.AddChild(ctx, "tcp_probe_interval", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_interval", 0444)) + d.AddChild(ctx, "tcp_probe_threshold", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_threshold", 0444)) + d.AddChild(ctx, "tcp_retries1", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries1", 0644)) + d.AddChild(ctx, "tcp_retries2", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries2", 0644)) + d.AddChild(ctx, "tcp_rfc1337", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_rfc1337", 0444)) + d.AddChild(ctx, "tcp_rmem", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_rmem", 0444)) + d.AddChild(ctx, "tcp_sack", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_sack", 0644)) + d.AddChild(ctx, "tcp_slow_start_after_idle", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_slow_start_after_idle", 0644)) + d.AddChild(ctx, "tcp_synack_retries", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_synack_retries", 0644)) + d.AddChild(ctx, "tcp_syn_retries", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_syn_retries", 0644)) + d.AddChild(ctx, "tcp_timestamps", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_timestamps", 0644)) + d.AddChild(ctx, "tcp_wmem", newRPCProcFSFile(ctx, msrc, "/proc/sys/net/ipv4/tcp_wmem", 0444)) + + return newFile(d, msrc, fs.SpecialDirectory, nil) +} diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index 4323f3650..db9ec83b9 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -23,6 +23,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) @@ -112,6 +113,13 @@ func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) d.AddChild(ctx, "kernel", p.newKernelDir(ctx, msrc)) d.AddChild(ctx, "vm", p.newVMDir(ctx, msrc)) - d.AddChild(ctx, "net", p.newSysNetDir(ctx, msrc)) + + // If we're using rpcinet we will let it manage /proc/sys/net. + if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { + d.AddChild(ctx, "net", newRPCInetProcSysNet(ctx, msrc)) + } else { + d.AddChild(ctx, "net", p.newSysNetDir(ctx, msrc)) + } + return newFile(d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index db44c95cb..2a108708c 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -158,7 +158,28 @@ func (s *tcpSack) DeprecatedPwritev(ctx context.Context, src usermem.IOSequence, return n, s.s.SetTCPSACKEnabled(v != 0) } -func newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { +func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + d := &ramfs.Dir{} + d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) + + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, + // all of these files will have mode 0444 (read-only for all users). + d.AddChild(ctx, "default_qdisc", p.newStubProcFSFile(ctx, msrc, []byte("pfifo_fast"))) + d.AddChild(ctx, "message_burst", p.newStubProcFSFile(ctx, msrc, []byte("10"))) + d.AddChild(ctx, "message_cost", p.newStubProcFSFile(ctx, msrc, []byte("5"))) + d.AddChild(ctx, "optmem_max", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "rmem_default", p.newStubProcFSFile(ctx, msrc, []byte("212992"))) + d.AddChild(ctx, "rmem_max", p.newStubProcFSFile(ctx, msrc, []byte("212992"))) + d.AddChild(ctx, "somaxconn", p.newStubProcFSFile(ctx, msrc, []byte("128"))) + d.AddChild(ctx, "wmem_default", p.newStubProcFSFile(ctx, msrc, []byte("212992"))) + d.AddChild(ctx, "wmem_max", p.newStubProcFSFile(ctx, msrc, []byte("212992"))) + + return newFile(d, msrc, fs.SpecialDirectory, nil) +} + +func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { d := &ramfs.Dir{} d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) @@ -175,6 +196,46 @@ func newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) * // Add tcp_sack. d.AddChild(ctx, "tcp_sack", newTCPSackInode(ctx, msrc, s)) + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, + // all of these files will have mode 0444 (read-only for all users). + d.AddChild(ctx, "ip_local_port_range", p.newStubProcFSFile(ctx, msrc, []byte("16000 65535"))) + d.AddChild(ctx, "ip_local_reserved_ports", p.newStubProcFSFile(ctx, msrc, []byte(""))) + d.AddChild(ctx, "ipfrag_time", p.newStubProcFSFile(ctx, msrc, []byte("30"))) + d.AddChild(ctx, "ip_nonlocal_bind", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "ip_no_pmtu_disc", p.newStubProcFSFile(ctx, msrc, []byte("1"))) + + // tcp_allowed_congestion_control tell the user what they are able to do as an + // unprivledged process so we leave it empty. + d.AddChild(ctx, "tcp_allowed_congestion_control", p.newStubProcFSFile(ctx, msrc, []byte(""))) + d.AddChild(ctx, "tcp_available_congestion_control", p.newStubProcFSFile(ctx, msrc, []byte("reno"))) + d.AddChild(ctx, "tcp_congestion_control", p.newStubProcFSFile(ctx, msrc, []byte("reno"))) + + // Many of the following stub files are features netstack doesn't support + // and are therefore "0" for disabled. + d.AddChild(ctx, "tcp_base_mss", p.newStubProcFSFile(ctx, msrc, []byte("1280"))) + d.AddChild(ctx, "tcp_dsack", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_early_retrans", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_fack", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_fastopen", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_fastopen_key", p.newStubProcFSFile(ctx, msrc, []byte(""))) + d.AddChild(ctx, "tcp_invalid_ratelimit", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_keepalive_intvl", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_keepalive_probes", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_keepalive_time", p.newStubProcFSFile(ctx, msrc, []byte("7200"))) + d.AddChild(ctx, "tcp_mtu_probing", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_no_metrics_save", p.newStubProcFSFile(ctx, msrc, []byte("1"))) + d.AddChild(ctx, "tcp_probe_interval", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_probe_threshold", p.newStubProcFSFile(ctx, msrc, []byte("0"))) + d.AddChild(ctx, "tcp_retries1", p.newStubProcFSFile(ctx, msrc, []byte("3"))) + d.AddChild(ctx, "tcp_retries2", p.newStubProcFSFile(ctx, msrc, []byte("15"))) + d.AddChild(ctx, "tcp_rfc1337", p.newStubProcFSFile(ctx, msrc, []byte("1"))) + d.AddChild(ctx, "tcp_slow_start_after_idle", p.newStubProcFSFile(ctx, msrc, []byte("1"))) + d.AddChild(ctx, "tcp_synack_retries", p.newStubProcFSFile(ctx, msrc, []byte("5"))) + d.AddChild(ctx, "tcp_syn_retries", p.newStubProcFSFile(ctx, msrc, []byte("3"))) + d.AddChild(ctx, "tcp_timestamps", p.newStubProcFSFile(ctx, msrc, []byte("1"))) + return newFile(d, msrc, fs.SpecialDirectory, nil) } @@ -182,7 +243,8 @@ func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode d := &ramfs.Dir{} d.InitDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) if s := p.k.NetworkStack(); s != nil { - d.AddChild(ctx, "ipv4", newSysNetIPv4Dir(ctx, msrc, s)) + d.AddChild(ctx, "ipv4", p.newSysNetIPv4Dir(ctx, msrc, s)) + d.AddChild(ctx, "core", p.newSysNetCore(ctx, msrc, s)) } return newFile(d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go index ea6ec87ed..f4c8489b1 100644 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ b/pkg/sentry/socket/rpcinet/conn/conn.go @@ -147,6 +147,26 @@ func (c *RPCConnection) RPCReadFile(path string) ([]byte, *syserr.Error) { return res.(*pb.ReadFileResponse_Data).Data, nil } +// RPCWriteFile will execute the WriteFile helper RPC method which avoids the +// common pattern of open(2), write(2), write(2), close(2) by doing all +// operations as a single RPC. +func (c *RPCConnection) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { + req := &pb.SyscallRequest_WriteFile{&pb.WriteFileRequest{ + Path: path, + Content: data, + }} + + id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) + <-ch + + res := c.Request(id).Result.(*pb.SyscallResponse_WriteFile).WriteFile + if e := res.ErrorNumber; e != 0 { + return int64(res.Written), syserr.FromHost(syscall.Errno(e)) + } + + return int64(res.Written), nil +} + // Request retrieves the request corresponding to the given request ID. // // The channel returned by NewRequest must have been closed before Request can diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go index 503e0e932..bcb89fb34 100644 --- a/pkg/sentry/socket/rpcinet/stack.go +++ b/pkg/sentry/socket/rpcinet/stack.go @@ -16,50 +16,24 @@ package rpcinet import ( "fmt" - "strings" "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn" "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/syserr" "gvisor.googlesource.com/gvisor/pkg/unet" ) // Stack implements inet.Stack for RPC backed sockets. type Stack struct { - // We intentionally do not allow these values to be changed to remain - // consistent with the other networking stacks. interfaces map[int32]inet.Interface interfaceAddrs map[int32][]inet.InterfaceAddr - supportsIPv6 bool - tcpRecvBufSize inet.TCPBufferSize - tcpSendBufSize inet.TCPBufferSize - tcpSACKEnabled bool rpcConn *conn.RPCConnection notifier *notifier.Notifier } -func readTCPBufferSizeFile(conn *conn.RPCConnection, filename string) (inet.TCPBufferSize, error) { - contents, se := conn.RPCReadFile(filename) - if se != nil { - return inet.TCPBufferSize{}, fmt.Errorf("failed to read %s: %v", filename, se) - } - ioseq := usermem.BytesIOSequence(contents) - fields := make([]int32, 3) - if n, err := usermem.CopyInt32StringsInVec(context.Background(), ioseq.IO, ioseq.Addrs, fields, ioseq.Opts); n != ioseq.NumBytes() || err != nil { - return inet.TCPBufferSize{}, fmt.Errorf("failed to parse %s (%q): got %v after %d/%d bytes", filename, contents, err, n, ioseq.NumBytes()) - } - return inet.TCPBufferSize{ - Min: int(fields[0]), - Default: int(fields[1]), - Max: int(fields[2]), - }, nil -} - // NewStack returns a Stack containing the current state of the host network // stack. func NewStack(fd int32) (*Stack, error) { @@ -80,31 +54,6 @@ func NewStack(fd int32) (*Stack, error) { return nil, e } - // Load the configuration values from procfs. - tcpRMem, e := readTCPBufferSizeFile(stack.rpcConn, "/proc/sys/net/ipv4/tcp_rmem") - if e != nil { - return nil, e - } - stack.tcpRecvBufSize = tcpRMem - - tcpWMem, e := readTCPBufferSizeFile(stack.rpcConn, "/proc/sys/net/ipv4/tcp_wmem") - if e != nil { - return nil, e - } - stack.tcpSendBufSize = tcpWMem - - ipv6, se := stack.rpcConn.RPCReadFile("/proc/net/if_inet6") - if len(string(ipv6)) > 0 { - stack.supportsIPv6 = true - } - - sackFile := "/proc/sys/net/ipv4/tcp_sack" - sack, se := stack.rpcConn.RPCReadFile(sackFile) - if se != nil { - return nil, fmt.Errorf("failed to read %s: %v", sackFile, se) - } - stack.tcpSACKEnabled = strings.TrimSpace(string(sack)) != "0" - links, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETLINK) if err != nil { return nil, fmt.Errorf("RTM_GETLINK failed: %v", err) @@ -123,6 +72,21 @@ func NewStack(fd int32) (*Stack, error) { return stack, nil } +// RPCReadFile will execute the ReadFile helper RPC method which avoids the +// common pattern of open(2), read(2), close(2) by doing all three operations +// as a single RPC. It will read the entire file or return EFBIG if the file +// was too large. +func (s *Stack) RPCReadFile(path string) ([]byte, *syserr.Error) { + return s.rpcConn.RPCReadFile(path) +} + +// RPCWriteFile will execute the WriteFile helper RPC method which avoids the +// common pattern of open(2), write(2), write(2), close(2) by doing all +// operations as a single RPC. +func (s *Stack) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { + return s.rpcConn.RPCWriteFile(path, data) +} + // Interfaces implements inet.Stack.Interfaces. func (s *Stack) Interfaces() map[int32]inet.Interface { return s.interfaces @@ -135,41 +99,37 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { // SupportsIPv6 implements inet.Stack.SupportsIPv6. func (s *Stack) SupportsIPv6() bool { - return s.supportsIPv6 + panic("rpcinet handles procfs directly this method should not be called") } // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { - return s.tcpRecvBufSize, nil + panic("rpcinet handles procfs directly this method should not be called") } // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { - // To keep all the supported stacks consistent we don't allow changing this - // value even though it would be possible via an RPC. - return syserror.EACCES + panic("rpcinet handles procfs directly this method should not be called") + } // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { - return s.tcpSendBufSize, nil + panic("rpcinet handles procfs directly this method should not be called") + } // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { - // To keep all the supported stacks consistent we don't allow changing this - // value even though it would be possible via an RPC. - return syserror.EACCES + panic("rpcinet handles procfs directly this method should not be called") } // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. func (s *Stack) TCPSACKEnabled() (bool, error) { - return s.tcpSACKEnabled, nil + panic("rpcinet handles procfs directly this method should not be called") } // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. func (s *Stack) SetTCPSACKEnabled(enabled bool) error { - // To keep all the supported stacks consistent we don't allow changing this - // value even though it would be possible via an RPC. - return syserror.EACCES + panic("rpcinet handles procfs directly this method should not be called") } |