summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/socket/rpcinet
diff options
context:
space:
mode:
authorIan Lewis <ianmlewis@gmail.com>2020-08-17 21:44:31 -0400
committerIan Lewis <ianmlewis@gmail.com>2020-08-17 21:44:31 -0400
commitac324f646ee3cb7955b0b45a7453aeb9671cbdf1 (patch)
tree0cbc5018e8807421d701d190dc20525726c7ca76 /pkg/sentry/socket/rpcinet
parent352ae1022ce19de28fc72e034cc469872ad79d06 (diff)
parent6d0c5803d557d453f15ac6f683697eeb46dab680 (diff)
Merge branch 'master' into ip-forwarding
- Merges aleksej-paschenko's with HEAD - Adds vfs2 support for ip_forward
Diffstat (limited to 'pkg/sentry/socket/rpcinet')
-rw-r--r--pkg/sentry/socket/rpcinet/BUILD68
-rw-r--r--pkg/sentry/socket/rpcinet/conn/BUILD17
-rw-r--r--pkg/sentry/socket/rpcinet/conn/conn.go187
-rw-r--r--pkg/sentry/socket/rpcinet/device.go19
-rw-r--r--pkg/sentry/socket/rpcinet/notifier/BUILD16
-rw-r--r--pkg/sentry/socket/rpcinet/notifier/notifier.go231
-rw-r--r--pkg/sentry/socket/rpcinet/rpcinet.go16
-rw-r--r--pkg/sentry/socket/rpcinet/socket.go909
-rw-r--r--pkg/sentry/socket/rpcinet/stack.go178
-rw-r--r--pkg/sentry/socket/rpcinet/stack_unsafe.go193
-rw-r--r--pkg/sentry/socket/rpcinet/syscall_rpc.proto353
11 files changed, 0 insertions, 2187 deletions
diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD
deleted file mode 100644
index 3a6baa308..000000000
--- a/pkg/sentry/socket/rpcinet/BUILD
+++ /dev/null
@@ -1,68 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
-load("@rules_cc//cc:defs.bzl", "cc_proto_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "rpcinet",
- srcs = [
- "device.go",
- "rpcinet.go",
- "socket.go",
- "stack.go",
- "stack_unsafe.go",
- ],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- ":syscall_rpc_go_proto",
- "//pkg/abi/linux",
- "//pkg/binary",
- "//pkg/sentry/arch",
- "//pkg/sentry/context",
- "//pkg/sentry/device",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/fsutil",
- "//pkg/sentry/inet",
- "//pkg/sentry/kernel",
- "//pkg/sentry/kernel/time",
- "//pkg/sentry/socket",
- "//pkg/sentry/socket/hostinet",
- "//pkg/sentry/socket/rpcinet/conn",
- "//pkg/sentry/socket/rpcinet/notifier",
- "//pkg/sentry/unimpl",
- "//pkg/sentry/usermem",
- "//pkg/syserr",
- "//pkg/syserror",
- "//pkg/tcpip",
- "//pkg/tcpip/buffer",
- "//pkg/unet",
- "//pkg/waiter",
- ],
-)
-
-proto_library(
- name = "syscall_rpc_proto",
- srcs = ["syscall_rpc.proto"],
- visibility = [
- "//visibility:public",
- ],
-)
-
-cc_proto_library(
- name = "syscall_rpc_cc_proto",
- visibility = [
- "//visibility:public",
- ],
- deps = [":syscall_rpc_proto"],
-)
-
-go_proto_library(
- name = "syscall_rpc_go_proto",
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto",
- proto = ":syscall_rpc_proto",
- visibility = [
- "//visibility:public",
- ],
-)
diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD
deleted file mode 100644
index 23eadcb1b..000000000
--- a/pkg/sentry/socket/rpcinet/conn/BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "conn",
- srcs = ["conn.go"],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/binary",
- "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto",
- "//pkg/syserr",
- "//pkg/unet",
- "@com_github_golang_protobuf//proto:go_default_library",
- ],
-)
diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go
deleted file mode 100644
index 356adad99..000000000
--- a/pkg/sentry/socket/rpcinet/conn/conn.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package conn is an RPC connection to a syscall RPC server.
-package conn
-
-import (
- "fmt"
- "sync"
- "sync/atomic"
- "syscall"
-
- "github.com/golang/protobuf/proto"
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/unet"
-
- pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
-)
-
-type request struct {
- response []byte
- ready chan struct{}
- ignoreResult bool
-}
-
-// RPCConnection represents a single RPC connection to a syscall gofer.
-type RPCConnection struct {
- // reqID is the ID of the last request and must be accessed atomically.
- reqID uint64
-
- sendMu sync.Mutex
- socket *unet.Socket
-
- reqMu sync.Mutex
- requests map[uint64]request
-}
-
-// NewRPCConnection initializes a RPC connection to a socket gofer.
-func NewRPCConnection(s *unet.Socket) *RPCConnection {
- conn := &RPCConnection{socket: s, requests: map[uint64]request{}}
- go func() { // S/R-FIXME(b/77962828)
- var nums [16]byte
- for {
- for n := 0; n < len(nums); {
- nn, err := conn.socket.Read(nums[n:])
- if err != nil {
- panic(fmt.Sprint("error reading length from socket rpc gofer: ", err))
- }
- n += nn
- }
-
- b := make([]byte, binary.LittleEndian.Uint64(nums[:8]))
- id := binary.LittleEndian.Uint64(nums[8:])
-
- for n := 0; n < len(b); {
- nn, err := conn.socket.Read(b[n:])
- if err != nil {
- panic(fmt.Sprint("error reading request from socket rpc gofer: ", err))
- }
- n += nn
- }
-
- conn.reqMu.Lock()
- r := conn.requests[id]
- if r.ignoreResult {
- delete(conn.requests, id)
- } else {
- r.response = b
- conn.requests[id] = r
- }
- conn.reqMu.Unlock()
- close(r.ready)
- }
- }()
- return conn
-}
-
-// NewRequest makes a request to the RPC gofer and returns the request ID and a
-// channel which will be closed once the request completes.
-func (c *RPCConnection) NewRequest(req pb.SyscallRequest, ignoreResult bool) (uint64, chan struct{}) {
- b, err := proto.Marshal(&req)
- if err != nil {
- panic(fmt.Sprint("invalid proto: ", err))
- }
-
- id := atomic.AddUint64(&c.reqID, 1)
- ch := make(chan struct{})
-
- c.reqMu.Lock()
- c.requests[id] = request{ready: ch, ignoreResult: ignoreResult}
- c.reqMu.Unlock()
-
- c.sendMu.Lock()
- defer c.sendMu.Unlock()
-
- var nums [16]byte
- binary.LittleEndian.PutUint64(nums[:8], uint64(len(b)))
- binary.LittleEndian.PutUint64(nums[8:], id)
- for n := 0; n < len(nums); {
- nn, err := c.socket.Write(nums[n:])
- if err != nil {
- panic(fmt.Sprint("error writing length and ID to socket gofer: ", err))
- }
- n += nn
- }
-
- for n := 0; n < len(b); {
- nn, err := c.socket.Write(b[n:])
- if err != nil {
- panic(fmt.Sprint("error writing request to socket gofer: ", err))
- }
- n += nn
- }
-
- return id, ch
-}
-
-// RPCReadFile will execute the ReadFile helper RPC method which avoids the
-// common pattern of open(2), read(2), close(2) by doing all three operations
-// as a single RPC. It will read the entire file or return EFBIG if the file
-// was too large.
-func (c *RPCConnection) RPCReadFile(path string) ([]byte, *syserr.Error) {
- req := &pb.SyscallRequest_ReadFile{&pb.ReadFileRequest{
- Path: path,
- }}
-
- id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-ch
-
- res := c.Request(id).Result.(*pb.SyscallResponse_ReadFile).ReadFile.Result
- if e, ok := res.(*pb.ReadFileResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.ReadFileResponse_Data).Data, nil
-}
-
-// RPCWriteFile will execute the WriteFile helper RPC method which avoids the
-// common pattern of open(2), write(2), write(2), close(2) by doing all
-// operations as a single RPC.
-func (c *RPCConnection) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) {
- req := &pb.SyscallRequest_WriteFile{&pb.WriteFileRequest{
- Path: path,
- Content: data,
- }}
-
- id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-ch
-
- res := c.Request(id).Result.(*pb.SyscallResponse_WriteFile).WriteFile
- if e := res.ErrorNumber; e != 0 {
- return int64(res.Written), syserr.FromHost(syscall.Errno(e))
- }
-
- return int64(res.Written), nil
-}
-
-// Request retrieves the request corresponding to the given request ID.
-//
-// The channel returned by NewRequest must have been closed before Request can
-// be called. This will happen automatically, do not manually close the
-// channel.
-func (c *RPCConnection) Request(id uint64) pb.SyscallResponse {
- c.reqMu.Lock()
- r := c.requests[id]
- delete(c.requests, id)
- c.reqMu.Unlock()
-
- var resp pb.SyscallResponse
- if err := proto.Unmarshal(r.response, &resp); err != nil {
- panic(fmt.Sprint("invalid proto: ", err))
- }
-
- return resp
-}
diff --git a/pkg/sentry/socket/rpcinet/device.go b/pkg/sentry/socket/rpcinet/device.go
deleted file mode 100644
index 8cfd5f6e5..000000000
--- a/pkg/sentry/socket/rpcinet/device.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package rpcinet
-
-import "gvisor.dev/gvisor/pkg/sentry/device"
-
-var socketDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD
deleted file mode 100644
index a3585e10d..000000000
--- a/pkg/sentry/socket/rpcinet/notifier/BUILD
+++ /dev/null
@@ -1,16 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "notifier",
- srcs = ["notifier.go"],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier",
- visibility = ["//:sandbox"],
- deps = [
- "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto",
- "//pkg/sentry/socket/rpcinet/conn",
- "//pkg/waiter",
- "@org_golang_x_sys//unix:go_default_library",
- ],
-)
diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go
deleted file mode 100644
index 7efe4301f..000000000
--- a/pkg/sentry/socket/rpcinet/notifier/notifier.go
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package notifier implements an FD notifier implementation over RPC.
-package notifier
-
-import (
- "fmt"
- "sync"
- "syscall"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
- pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.dev/gvisor/pkg/waiter"
-)
-
-type fdInfo struct {
- queue *waiter.Queue
- waiting bool
-}
-
-// Notifier holds all the state necessary to issue notifications when IO events
-// occur in the observed FDs.
-type Notifier struct {
- // rpcConn is the connection that is used for sending RPCs.
- rpcConn *conn.RPCConnection
-
- // epFD is the epoll file descriptor used to register for io
- // notifications.
- epFD uint32
-
- // mu protects fdMap.
- mu sync.Mutex
-
- // fdMap maps file descriptors to their notification queues and waiting
- // status.
- fdMap map[uint32]*fdInfo
-}
-
-// NewRPCNotifier creates a new notifier object.
-func NewRPCNotifier(cn *conn.RPCConnection) (*Notifier, error) {
- id, c := cn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCreate1{&pb.EpollCreate1Request{}}}, false /* ignoreResult */)
- <-c
-
- res := cn.Request(id).Result.(*pb.SyscallResponse_EpollCreate1).EpollCreate1.Result
- if e, ok := res.(*pb.EpollCreate1Response_ErrorNumber); ok {
- return nil, syscall.Errno(e.ErrorNumber)
- }
-
- w := &Notifier{
- rpcConn: cn,
- epFD: res.(*pb.EpollCreate1Response_Fd).Fd,
- fdMap: make(map[uint32]*fdInfo),
- }
-
- go w.waitAndNotify() // S/R-FIXME(b/77962828)
-
- return w, nil
-}
-
-// waitFD waits on mask for fd. The fdMap mutex must be hold.
-func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error {
- if !fi.waiting && mask == 0 {
- return nil
- }
-
- e := pb.EpollEvent{
- Events: mask.ToLinux() | unix.EPOLLET,
- Fd: fd,
- }
-
- switch {
- case !fi.waiting && mask != 0:
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_ADD, Fd: fd, Event: &e}}}, false /* ignoreResult */)
- <-c
-
- e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber
- if e != 0 {
- return syscall.Errno(e)
- }
-
- fi.waiting = true
- case fi.waiting && mask == 0:
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_DEL, Fd: fd}}}, false /* ignoreResult */)
- <-c
- n.rpcConn.Request(id)
-
- fi.waiting = false
- case fi.waiting && mask != 0:
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_MOD, Fd: fd, Event: &e}}}, false /* ignoreResult */)
- <-c
-
- e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber
- if e != 0 {
- return syscall.Errno(e)
- }
- }
-
- return nil
-}
-
-// addFD adds an FD to the list of FDs observed by n.
-func (n *Notifier) addFD(fd uint32, queue *waiter.Queue) {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- // Panic if we're already notifying on this FD.
- if _, ok := n.fdMap[fd]; ok {
- panic(fmt.Sprintf("File descriptor %d added twice", fd))
- }
-
- // We have nothing to wait for at the moment. Just add it to the map.
- n.fdMap[fd] = &fdInfo{queue: queue}
-}
-
-// updateFD updates the set of events the FD needs to be notified on.
-func (n *Notifier) updateFD(fd uint32) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- if fi, ok := n.fdMap[fd]; ok {
- return n.waitFD(fd, fi, fi.queue.Events())
- }
-
- return nil
-}
-
-// RemoveFD removes an FD from the list of FDs observed by n.
-func (n *Notifier) removeFD(fd uint32) {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- // Remove from map, then from epoll object.
- n.waitFD(fd, n.fdMap[fd], 0)
- delete(n.fdMap, fd)
-}
-
-// hasFD returns true if the FD is in the list of observed FDs.
-func (n *Notifier) hasFD(fd uint32) bool {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- _, ok := n.fdMap[fd]
- return ok
-}
-
-// waitAndNotify loops waiting for io event notifications from the epoll
-// object. Once notifications arrive, they are dispatched to the
-// registered queue.
-func (n *Notifier) waitAndNotify() error {
- for {
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollWait{&pb.EpollWaitRequest{Fd: n.epFD, NumEvents: 100, Msec: -1}}}, false /* ignoreResult */)
- <-c
-
- res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollWait).EpollWait.Result
- if e, ok := res.(*pb.EpollWaitResponse_ErrorNumber); ok {
- err := syscall.Errno(e.ErrorNumber)
- // NOTE(magi): I don't think epoll_wait can return EAGAIN but I'm being
- // conseratively careful here since exiting the notification thread
- // would be really bad.
- if err == syscall.EINTR || err == syscall.EAGAIN {
- continue
- }
- return err
- }
-
- n.mu.Lock()
- for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events {
- if fi, ok := n.fdMap[e.Fd]; ok {
- fi.queue.Notify(waiter.EventMaskFromLinux(e.Events))
- }
- }
- n.mu.Unlock()
- }
-}
-
-// AddFD adds an FD to the list of observed FDs.
-func (n *Notifier) AddFD(fd uint32, queue *waiter.Queue) error {
- n.addFD(fd, queue)
- return nil
-}
-
-// UpdateFD updates the set of events the FD needs to be notified on.
-func (n *Notifier) UpdateFD(fd uint32) error {
- return n.updateFD(fd)
-}
-
-// RemoveFD removes an FD from the list of observed FDs.
-func (n *Notifier) RemoveFD(fd uint32) {
- n.removeFD(fd)
-}
-
-// HasFD returns true if the FD is in the list of observed FDs.
-//
-// This should only be used by tests to assert that FDs are correctly
-// registered.
-func (n *Notifier) HasFD(fd uint32) bool {
- return n.hasFD(fd)
-}
-
-// NonBlockingPoll polls the given fd in non-blocking fashion. It is used just
-// to query the FD's current state; this method will block on the RPC response
-// although the syscall is non-blocking.
-func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask {
- for {
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */)
- <-c
-
- res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result
- if e, ok := res.(*pb.PollResponse_ErrorNumber); ok {
- if syscall.Errno(e.ErrorNumber) == syscall.EINTR {
- continue
- }
- return mask
- }
-
- return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events)
- }
-}
diff --git a/pkg/sentry/socket/rpcinet/rpcinet.go b/pkg/sentry/socket/rpcinet/rpcinet.go
deleted file mode 100644
index 5d4fd4dac..000000000
--- a/pkg/sentry/socket/rpcinet/rpcinet.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package rpcinet implements sockets using an RPC for each syscall.
-package rpcinet
diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go
deleted file mode 100644
index ddb76d9d4..000000000
--- a/pkg/sentry/socket/rpcinet/socket.go
+++ /dev/null
@@ -1,909 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package rpcinet
-
-import (
- "sync/atomic"
- "syscall"
- "time"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
- "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier"
- pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.dev/gvisor/pkg/sentry/unimpl"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/tcpip"
- "gvisor.dev/gvisor/pkg/tcpip/buffer"
- "gvisor.dev/gvisor/pkg/waiter"
-)
-
-// socketOperations implements fs.FileOperations and socket.Socket for a socket
-// implemented using a host socket.
-type socketOperations struct {
- fsutil.FilePipeSeek `state:"nosave"`
- fsutil.FileNotDirReaddir `state:"nosave"`
- fsutil.FileNoFsync `state:"nosave"`
- fsutil.FileNoMMap `state:"nosave"`
- fsutil.FileNoSplice `state:"nosave"`
- fsutil.FileNoopFlush `state:"nosave"`
- fsutil.FileUseInodeUnstableAttr `state:"nosave"`
- socket.SendReceiveTimeout
-
- family int // Read-only.
- stype linux.SockType // Read-only.
- protocol int // Read-only.
-
- fd uint32 // must be O_NONBLOCK
- wq *waiter.Queue
- rpcConn *conn.RPCConnection
- notifier *notifier.Notifier
-
- // shState is the state of the connection with respect to shutdown. Because
- // we're mixing non-blocking semantics on the other side we have to adapt for
- // some strange differences between blocking and non-blocking sockets.
- shState int32
-}
-
-// Verify that we actually implement socket.Socket.
-var _ = socket.Socket(&socketOperations{})
-
-// New creates a new RPC socket.
-func newSocketFile(ctx context.Context, stack *Stack, family int, skType linux.SockType, protocol int) (*fs.File, *syserr.Error) {
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(family), Type: int64(skType | syscall.SOCK_NONBLOCK), Protocol: int64(protocol)}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result
- if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
- fd := res.(*pb.SocketResponse_Fd).Fd
-
- var wq waiter.Queue
- stack.notifier.AddFD(fd, &wq)
-
- dirent := socket.NewDirent(ctx, socketDevice)
- defer dirent.DecRef()
- return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &socketOperations{
- family: family,
- stype: skType,
- protocol: protocol,
- wq: &wq,
- fd: fd,
- rpcConn: stack.rpcConn,
- notifier: stack.notifier,
- }), nil
-}
-
-func isBlockingErrno(err error) bool {
- return err == syscall.EAGAIN || err == syscall.EWOULDBLOCK
-}
-
-func translateIOSyscallError(err error) error {
- if isBlockingErrno(err) {
- return syserror.ErrWouldBlock
- }
- return err
-}
-
-// setShutdownFlags will set the shutdown flag so we can handle blocking reads
-// after a read shutdown.
-func (s *socketOperations) setShutdownFlags(how int) {
- var f tcpip.ShutdownFlags
- switch how {
- case linux.SHUT_RD:
- f = tcpip.ShutdownRead
- case linux.SHUT_WR:
- f = tcpip.ShutdownWrite
- case linux.SHUT_RDWR:
- f = tcpip.ShutdownWrite | tcpip.ShutdownRead
- }
-
- // Atomically update the flags.
- for {
- old := atomic.LoadInt32(&s.shState)
- if atomic.CompareAndSwapInt32(&s.shState, old, old|int32(f)) {
- break
- }
- }
-}
-
-func (s *socketOperations) resetShutdownFlags() {
- atomic.StoreInt32(&s.shState, 0)
-}
-
-func (s *socketOperations) isShutRdSet() bool {
- return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownRead) != 0
-}
-
-func (s *socketOperations) isShutWrSet() bool {
- return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownWrite) != 0
-}
-
-// Release implements fs.FileOperations.Release.
-func (s *socketOperations) Release() {
- s.notifier.RemoveFD(s.fd)
-
- // We always need to close the FD.
- _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: s.fd}}}, true /* ignoreResult */)
-}
-
-// Readiness implements waiter.Waitable.Readiness.
-func (s *socketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
- return s.notifier.NonBlockingPoll(s.fd, mask)
-}
-
-// EventRegister implements waiter.Waitable.EventRegister.
-func (s *socketOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
- s.wq.EventRegister(e, mask)
- s.notifier.UpdateFD(s.fd)
-}
-
-// EventUnregister implements waiter.Waitable.EventUnregister.
-func (s *socketOperations) EventUnregister(e *waiter.Entry) {
- s.wq.EventUnregister(e)
- s.notifier.UpdateFD(s.fd)
-}
-
-func rpcRead(t *kernel.Task, req *pb.SyscallRequest_Read) (*pb.ReadResponse_Data, *syserr.Error) {
- s := t.NetworkContext().(*Stack)
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Read).Read.Result
- if e, ok := res.(*pb.ReadResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.ReadResponse_Data), nil
-}
-
-// Read implements fs.FileOperations.Read.
-func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
- req := &pb.SyscallRequest_Read{&pb.ReadRequest{
- Fd: s.fd,
- Length: uint32(dst.NumBytes()),
- }}
-
- res, se := rpcRead(ctx.(*kernel.Task), req)
- if se == nil {
- n, e := dst.CopyOut(ctx, res.Data)
- return int64(n), e
- }
-
- return 0, se.ToError()
-}
-
-func rpcWrite(t *kernel.Task, req *pb.SyscallRequest_Write) (uint32, *syserr.Error) {
- s := t.NetworkContext().(*Stack)
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Write).Write.Result
- if e, ok := res.(*pb.WriteResponse_ErrorNumber); ok {
- return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.WriteResponse_Length).Length, nil
-}
-
-// Write implements fs.FileOperations.Write.
-func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
- t := ctx.(*kernel.Task)
- v := buffer.NewView(int(src.NumBytes()))
-
- // Copy all the data into the buffer.
- if _, err := src.CopyIn(t, v); err != nil {
- return 0, err
- }
-
- n, err := rpcWrite(t, &pb.SyscallRequest_Write{&pb.WriteRequest{Fd: s.fd, Data: v}})
- if n > 0 && n < uint32(src.NumBytes()) {
- // The FileOperations.Write interface expects us to return ErrWouldBlock in
- // the event of a partial write.
- return int64(n), syserror.ErrWouldBlock
- }
- return int64(n), err.ToError()
-}
-
-func rpcConnect(t *kernel.Task, fd uint32, sockaddr []byte) *syserr.Error {
- s := t.NetworkContext().(*Stack)
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Connect{&pb.ConnectRequest{Fd: uint32(fd), Address: sockaddr}}}, false /* ignoreResult */)
- <-c
-
- if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Connect).Connect.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
- return nil
-}
-
-// Connect implements socket.Socket.Connect.
-func (s *socketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
- if !blocking {
- e := rpcConnect(t, s.fd, sockaddr)
- if e == nil {
- // Reset the shutdown state on new connects.
- s.resetShutdownFlags()
- }
- return e
- }
-
- // Register for notification when the endpoint becomes writable, then
- // initiate the connection.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventOut|waiter.EventIn|waiter.EventHUp)
- defer s.EventUnregister(&e)
- for {
- if err := rpcConnect(t, s.fd, sockaddr); err == nil || err != syserr.ErrInProgress && err != syserr.ErrAlreadyInProgress {
- if err == nil {
- // Reset the shutdown state on new connects.
- s.resetShutdownFlags()
- }
- return err
- }
-
- // It's pending, so we have to wait for a notification, and fetch the
- // result once the wait completes.
- if err := t.Block(ch); err != nil {
- return syserr.FromError(err)
- }
- }
-}
-
-func rpcAccept(t *kernel.Task, fd uint32, peer bool) (*pb.AcceptResponse_ResultPayload, *syserr.Error) {
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Accept{&pb.AcceptRequest{Fd: fd, Peer: peer, Flags: syscall.SOCK_NONBLOCK}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Accept).Accept.Result
- if e, ok := res.(*pb.AcceptResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
- return res.(*pb.AcceptResponse_Payload).Payload, nil
-}
-
-// Accept implements socket.Socket.Accept.
-func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
- payload, se := rpcAccept(t, s.fd, peerRequested)
-
- // Check if we need to block.
- if blocking && se == syserr.ErrTryAgain {
- // Register for notifications.
- e, ch := waiter.NewChannelEntry(nil)
- // FIXME(b/119878986): This waiter.EventHUp is a partial
- // measure, need to figure out how to translate linux events to
- // internal events.
- s.EventRegister(&e, waiter.EventIn|waiter.EventHUp)
- defer s.EventUnregister(&e)
-
- // Try to accept the connection again; if it fails, then wait until we
- // get a notification.
- for {
- if payload, se = rpcAccept(t, s.fd, peerRequested); se != syserr.ErrTryAgain {
- break
- }
-
- if err := t.Block(ch); err != nil {
- return 0, nil, 0, syserr.FromError(err)
- }
- }
- }
-
- // Handle any error from accept.
- if se != nil {
- return 0, nil, 0, se
- }
-
- var wq waiter.Queue
- s.notifier.AddFD(payload.Fd, &wq)
-
- dirent := socket.NewDirent(t, socketDevice)
- defer dirent.DecRef()
- fileFlags := fs.FileFlags{
- Read: true,
- Write: true,
- NonSeekable: true,
- NonBlocking: flags&linux.SOCK_NONBLOCK != 0,
- }
- file := fs.NewFile(t, dirent, fileFlags, &socketOperations{
- family: s.family,
- stype: s.stype,
- protocol: s.protocol,
- wq: &wq,
- fd: payload.Fd,
- rpcConn: s.rpcConn,
- notifier: s.notifier,
- })
- defer file.DecRef()
-
- fd, err := t.NewFDFrom(0, file, kernel.FDFlags{
- CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
- })
- if err != nil {
- return 0, nil, 0, syserr.FromError(err)
- }
- t.Kernel().RecordSocket(file)
-
- if peerRequested {
- return fd, socket.UnmarshalSockAddr(s.family, payload.Address.Address), payload.Address.Length, nil
- }
-
- return fd, nil, 0, nil
-}
-
-// Bind implements socket.Socket.Bind.
-func (s *socketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: s.fd, Address: sockaddr}}}, false /* ignoreResult */)
- <-c
-
- if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
- return nil
-}
-
-// Listen implements socket.Socket.Listen.
-func (s *socketOperations) Listen(t *kernel.Task, backlog int) *syserr.Error {
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Listen{&pb.ListenRequest{Fd: s.fd, Backlog: int64(backlog)}}}, false /* ignoreResult */)
- <-c
-
- if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Listen).Listen.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
- return nil
-}
-
-// Shutdown implements socket.Socket.Shutdown.
-func (s *socketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
- // We save the shutdown state because of strange differences on linux
- // related to recvs on blocking vs. non-blocking sockets after a SHUT_RD.
- // We need to emulate that behavior on the blocking side.
- // TODO(b/120096741): There is a possible race that can exist with loopback,
- // where data could possibly be lost.
- s.setShutdownFlags(how)
-
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Shutdown{&pb.ShutdownRequest{Fd: s.fd, How: int64(how)}}}, false /* ignoreResult */)
- <-c
-
- if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Shutdown).Shutdown.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
-
- return nil
-}
-
-// GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
- // SO_RCVTIMEO and SO_SNDTIMEO are special because blocking is performed
- // within the sentry.
- if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO {
- if outLen < linux.SizeOfTimeval {
- return nil, syserr.ErrInvalidArgument
- }
-
- return linux.NsecToTimeval(s.RecvTimeout()), nil
- }
- if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO {
- if outLen < linux.SizeOfTimeval {
- return nil, syserr.ErrInvalidArgument
- }
-
- return linux.NsecToTimeval(s.SendTimeout()), nil
- }
-
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockOpt{&pb.GetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Length: uint32(outLen)}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockOpt).GetSockOpt.Result
- if e, ok := res.(*pb.GetSockOptResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.GetSockOptResponse_Opt).Opt, nil
-}
-
-// SetSockOpt implements socket.Socket.SetSockOpt.
-func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error {
- // Because blocking actually happens within the sentry we need to inspect
- // this socket option to determine if it's a SO_RCVTIMEO or SO_SNDTIMEO,
- // and if so, we will save it and use it as the deadline for recv(2)
- // or send(2) related syscalls.
- if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO {
- if len(opt) < linux.SizeOfTimeval {
- return syserr.ErrInvalidArgument
- }
-
- var v linux.Timeval
- binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
- return syserr.ErrDomain
- }
- s.SetRecvTimeout(v.ToNsecCapped())
- return nil
- }
- if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO {
- if len(opt) < linux.SizeOfTimeval {
- return syserr.ErrInvalidArgument
- }
-
- var v linux.Timeval
- binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
- return syserr.ErrDomain
- }
- s.SetSendTimeout(v.ToNsecCapped())
- return nil
- }
-
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_SetSockOpt{&pb.SetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Opt: opt}}}, false /* ignoreResult */)
- <-c
-
- if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_SetSockOpt).SetSockOpt.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
- return nil
-}
-
-// GetPeerName implements socket.Socket.GetPeerName.
-func (s *socketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetPeerName{&pb.GetPeerNameRequest{Fd: s.fd}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetPeerName).GetPeerName.Result
- if e, ok := res.(*pb.GetPeerNameResponse_ErrorNumber); ok {
- return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- addr := res.(*pb.GetPeerNameResponse_Address).Address
- return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil
-}
-
-// GetSockName implements socket.Socket.GetSockName.
-func (s *socketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
- stack := t.NetworkContext().(*Stack)
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockName{&pb.GetSockNameRequest{Fd: s.fd}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockName).GetSockName.Result
- if e, ok := res.(*pb.GetSockNameResponse_ErrorNumber); ok {
- return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- addr := res.(*pb.GetSockNameResponse_Address).Address
- return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil
-}
-
-func rpcIoctl(t *kernel.Task, fd, cmd uint32, arg []byte) ([]byte, error) {
- stack := t.NetworkContext().(*Stack)
-
- id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Ioctl{&pb.IOCtlRequest{Fd: fd, Cmd: cmd, Arg: arg}}}, false /* ignoreResult */)
- <-c
-
- res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Ioctl).Ioctl.Result
- if e, ok := res.(*pb.IOCtlResponse_ErrorNumber); ok {
- return nil, syscall.Errno(e.ErrorNumber)
- }
-
- return res.(*pb.IOCtlResponse_Value).Value, nil
-}
-
-// ifconfIoctlFromStack populates a struct ifconf for the SIOCGIFCONF ioctl.
-func ifconfIoctlFromStack(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error {
- // If Ptr is NULL, return the necessary buffer size via Len.
- // Otherwise, write up to Len bytes starting at Ptr containing ifreq
- // structs.
- t := ctx.(*kernel.Task)
- s := t.NetworkContext().(*Stack)
- if s == nil {
- return syserr.ErrNoDevice.ToError()
- }
-
- if ifc.Ptr == 0 {
- ifc.Len = int32(len(s.Interfaces())) * int32(linux.SizeOfIFReq)
- return nil
- }
-
- max := ifc.Len
- ifc.Len = 0
- for key, ifaceAddrs := range s.InterfaceAddrs() {
- iface := s.Interfaces()[key]
- for _, ifaceAddr := range ifaceAddrs {
- // Don't write past the end of the buffer.
- if ifc.Len+int32(linux.SizeOfIFReq) > max {
- break
- }
- if ifaceAddr.Family != linux.AF_INET {
- continue
- }
-
- // Populate ifr.ifr_addr.
- ifr := linux.IFReq{}
- ifr.SetName(iface.Name)
- usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family))
- usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0)
- copy(ifr.Data[4:8], ifaceAddr.Addr[:4])
-
- // Copy the ifr to userspace.
- dst := uintptr(ifc.Ptr) + uintptr(ifc.Len)
- ifc.Len += int32(linux.SizeOfIFReq)
- if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// Ioctl implements fs.FileOperations.Ioctl.
-func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- t := ctx.(*kernel.Task)
-
- cmd := uint32(args[1].Int())
- arg := args[2].Pointer()
-
- var buf []byte
- switch cmd {
- // The following ioctls take 4 byte argument parameters.
- case syscall.TIOCINQ,
- syscall.TIOCOUTQ:
- buf = make([]byte, 4)
- // The following ioctls have args which are sizeof(struct ifreq).
- case syscall.SIOCGIFADDR,
- syscall.SIOCGIFBRDADDR,
- syscall.SIOCGIFDSTADDR,
- syscall.SIOCGIFFLAGS,
- syscall.SIOCGIFHWADDR,
- syscall.SIOCGIFINDEX,
- syscall.SIOCGIFMAP,
- syscall.SIOCGIFMETRIC,
- syscall.SIOCGIFMTU,
- syscall.SIOCGIFNAME,
- syscall.SIOCGIFNETMASK,
- syscall.SIOCGIFTXQLEN:
- buf = make([]byte, linux.SizeOfIFReq)
- case syscall.SIOCGIFCONF:
- // SIOCGIFCONF has slightly different behavior than the others, in that it
- // will need to populate the array of ifreqs.
- var ifc linux.IFConf
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
-
- if err := ifconfIoctlFromStack(ctx, io, &ifc); err != nil {
- return 0, err
- }
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{
- AddressSpaceActive: true,
- })
-
- return 0, err
-
- case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG:
- unimpl.EmitUnimplementedEvent(ctx)
-
- default:
- return 0, syserror.ENOTTY
- }
-
- _, err := io.CopyIn(ctx, arg, buf, usermem.IOOpts{
- AddressSpaceActive: true,
- })
-
- if err != nil {
- return 0, err
- }
-
- v, err := rpcIoctl(t, s.fd, cmd, buf)
- if err != nil {
- return 0, err
- }
-
- if len(v) != len(buf) {
- return 0, syserror.EINVAL
- }
-
- _, err = io.CopyOut(ctx, arg, v, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-}
-
-func rpcRecvMsg(t *kernel.Task, req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) {
- s := t.NetworkContext().(*Stack)
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result
- if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.RecvmsgResponse_Payload).Payload, nil
-}
-
-// Because we only support SO_TIMESTAMP we will search control messages for
-// that value and set it if so, all other control messages will be ignored.
-func (s *socketOperations) extractControlMessages(payload *pb.RecvmsgResponse_ResultPayload) socket.ControlMessages {
- c := socket.ControlMessages{}
- if len(payload.GetCmsgData()) > 0 {
- // Parse the control messages looking for SO_TIMESTAMP.
- msgs, e := syscall.ParseSocketControlMessage(payload.GetCmsgData())
- if e != nil {
- return socket.ControlMessages{}
- }
- for _, m := range msgs {
- if m.Header.Level != linux.SOL_SOCKET || m.Header.Type != linux.SO_TIMESTAMP {
- continue
- }
-
- // Let's parse the time stamp and set it.
- if len(m.Data) < linux.SizeOfTimeval {
- // Give up on locating the SO_TIMESTAMP option.
- return socket.ControlMessages{}
- }
-
- var v linux.Timeval
- binary.Unmarshal(m.Data[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- c.IP.HasTimestamp = true
- c.IP.Timestamp = v.ToNsecCapped()
- break
- }
- }
- return c
-}
-
-// RecvMsg implements socket.Socket.RecvMsg.
-func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
- req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{
- Fd: s.fd,
- Length: uint32(dst.NumBytes()),
- Sender: senderRequested,
- Trunc: flags&linux.MSG_TRUNC != 0,
- Peek: flags&linux.MSG_PEEK != 0,
- CmsgLength: uint32(controlDataLen),
- }}
-
- res, err := rpcRecvMsg(t, req)
- if err == nil {
- var e error
- var n int
- if len(res.Data) > 0 {
- n, e = dst.CopyOut(t, res.Data)
- if e == nil && n != len(res.Data) {
- panic("CopyOut failed to copy full buffer")
- }
- }
- c := s.extractControlMessages(res)
- return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e)
- }
- if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 {
- return 0, 0, nil, 0, socket.ControlMessages{}, err
- }
-
- // We'll have to block. Register for notifications and keep trying to
- // send all the data.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventIn)
- defer s.EventUnregister(&e)
-
- for {
- res, err := rpcRecvMsg(t, req)
- if err == nil {
- var e error
- var n int
- if len(res.Data) > 0 {
- n, e = dst.CopyOut(t, res.Data)
- if e == nil && n != len(res.Data) {
- panic("CopyOut failed to copy full buffer")
- }
- }
- c := s.extractControlMessages(res)
- return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e)
- }
- if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain {
- return 0, 0, nil, 0, socket.ControlMessages{}, err
- }
-
- if s.isShutRdSet() {
- // Blocking would have caused us to block indefinitely so we return 0,
- // this is the same behavior as Linux.
- return 0, 0, nil, 0, socket.ControlMessages{}, nil
- }
-
- if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if err == syserror.ETIMEDOUT {
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain
- }
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err)
- }
- }
-}
-
-func rpcSendMsg(t *kernel.Task, req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) {
- s := t.NetworkContext().(*Stack)
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result
- if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok {
- return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.SendmsgResponse_Length).Length, nil
-}
-
-// SendMsg implements socket.Socket.SendMsg.
-func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
- // Whitelist flags.
- if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 {
- return 0, syserr.ErrInvalidArgument
- }
-
- // Reject Unix control messages.
- if !controlMessages.Unix.Empty() {
- return 0, syserr.ErrInvalidArgument
- }
-
- v := buffer.NewView(int(src.NumBytes()))
-
- // Copy all the data into the buffer.
- if _, err := src.CopyIn(t, v); err != nil {
- return 0, syserr.FromError(err)
- }
-
- // TODO(bgeffon): this needs to change to map directly to a SendMsg syscall
- // in the RPC.
- totalWritten := 0
- n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{
- Fd: uint32(s.fd),
- Data: v,
- Address: to,
- More: flags&linux.MSG_MORE != 0,
- EndOfRecord: flags&linux.MSG_EOR != 0,
- }})
-
- if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 {
- return int(n), err
- }
-
- if n > 0 {
- totalWritten += int(n)
- v.TrimFront(int(n))
- }
-
- // We'll have to block. Register for notification and keep trying to
- // send all the data.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventOut)
- defer s.EventUnregister(&e)
-
- for {
- n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{
- Fd: uint32(s.fd),
- Data: v,
- Address: to,
- More: flags&linux.MSG_MORE != 0,
- EndOfRecord: flags&linux.MSG_EOR != 0,
- }})
-
- if n > 0 {
- totalWritten += int(n)
- v.TrimFront(int(n))
-
- if err == nil && totalWritten < int(src.NumBytes()) {
- continue
- }
- }
-
- if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain {
- // We eat the error in this situation.
- return int(totalWritten), nil
- }
-
- if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if err == syserror.ETIMEDOUT {
- return int(totalWritten), syserr.ErrTryAgain
- }
- return int(totalWritten), syserr.FromError(err)
- }
- }
-}
-
-// State implements socket.Socket.State.
-func (s *socketOperations) State() uint32 {
- // TODO(b/127845868): Define a new rpc to query the socket state.
- return 0
-}
-
-// Type implements socket.Socket.Type.
-func (s *socketOperations) Type() (family int, skType linux.SockType, protocol int) {
- return s.family, s.stype, s.protocol
-}
-
-type socketProvider struct {
- family int
-}
-
-// Socket implements socket.Provider.Socket.
-func (p *socketProvider) Socket(t *kernel.Task, stypeflags linux.SockType, protocol int) (*fs.File, *syserr.Error) {
- // Check that we are using the RPC network stack.
- stack := t.NetworkContext()
- if stack == nil {
- return nil, nil
- }
-
- s, ok := stack.(*Stack)
- if !ok {
- return nil, nil
- }
-
- // Only accept TCP and UDP.
- //
- // Try to restrict the flags we will accept to minimize backwards
- // incompatibility with netstack.
- stype := stypeflags & linux.SOCK_TYPE_MASK
- switch stype {
- case syscall.SOCK_STREAM:
- switch protocol {
- case 0, syscall.IPPROTO_TCP:
- // ok
- default:
- return nil, nil
- }
- case syscall.SOCK_DGRAM:
- switch protocol {
- case 0, syscall.IPPROTO_UDP:
- // ok
- default:
- return nil, nil
- }
- default:
- return nil, nil
- }
-
- return newSocketFile(t, s, p.family, stype, protocol)
-}
-
-// Pair implements socket.Provider.Pair.
-func (p *socketProvider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) {
- // Not supported by AF_INET/AF_INET6.
- return nil, nil, nil
-}
-
-func init() {
- for _, family := range []int{syscall.AF_INET, syscall.AF_INET6} {
- socket.RegisterProvider(family, &socketProvider{family})
- }
-}
diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go
deleted file mode 100644
index f5441b826..000000000
--- a/pkg/sentry/socket/rpcinet/stack.go
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package rpcinet
-
-import (
- "fmt"
- "syscall"
-
- "gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
- "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
- "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier"
- "gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/tcpip"
- "gvisor.dev/gvisor/pkg/unet"
-)
-
-// Stack implements inet.Stack for RPC backed sockets.
-type Stack struct {
- interfaces map[int32]inet.Interface
- interfaceAddrs map[int32][]inet.InterfaceAddr
- routes []inet.Route
- rpcConn *conn.RPCConnection
- notifier *notifier.Notifier
-}
-
-// NewStack returns a Stack containing the current state of the host network
-// stack.
-func NewStack(fd int32) (*Stack, error) {
- sock, err := unet.NewSocket(int(fd))
- if err != nil {
- return nil, err
- }
-
- stack := &Stack{
- interfaces: make(map[int32]inet.Interface),
- interfaceAddrs: make(map[int32][]inet.InterfaceAddr),
- rpcConn: conn.NewRPCConnection(sock),
- }
-
- var e error
- stack.notifier, e = notifier.NewRPCNotifier(stack.rpcConn)
- if e != nil {
- return nil, e
- }
-
- links, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETLINK)
- if err != nil {
- return nil, fmt.Errorf("RTM_GETLINK failed: %v", err)
- }
-
- addrs, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETADDR)
- if err != nil {
- return nil, fmt.Errorf("RTM_GETADDR failed: %v", err)
- }
-
- e = hostinet.ExtractHostInterfaces(links, addrs, stack.interfaces, stack.interfaceAddrs)
- if e != nil {
- return nil, e
- }
-
- routes, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETROUTE)
- if err != nil {
- return nil, fmt.Errorf("RTM_GETROUTE failed: %v", err)
- }
-
- stack.routes, e = hostinet.ExtractHostRoutes(routes)
- if e != nil {
- return nil, e
- }
-
- return stack, nil
-}
-
-// RPCReadFile will execute the ReadFile helper RPC method which avoids the
-// common pattern of open(2), read(2), close(2) by doing all three operations
-// as a single RPC. It will read the entire file or return EFBIG if the file
-// was too large.
-func (s *Stack) RPCReadFile(path string) ([]byte, *syserr.Error) {
- return s.rpcConn.RPCReadFile(path)
-}
-
-// RPCWriteFile will execute the WriteFile helper RPC method which avoids the
-// common pattern of open(2), write(2), write(2), close(2) by doing all
-// operations as a single RPC.
-func (s *Stack) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) {
- return s.rpcConn.RPCWriteFile(path, data)
-}
-
-// Interfaces implements inet.Stack.Interfaces.
-func (s *Stack) Interfaces() map[int32]inet.Interface {
- interfaces := make(map[int32]inet.Interface)
- for k, v := range s.interfaces {
- interfaces[k] = v
- }
- return interfaces
-}
-
-// InterfaceAddrs implements inet.Stack.InterfaceAddrs.
-func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr {
- addrs := make(map[int32][]inet.InterfaceAddr)
- for k, v := range s.interfaceAddrs {
- addrs[k] = append([]inet.InterfaceAddr(nil), v...)
- }
- return addrs
-}
-
-// SupportsIPv6 implements inet.Stack.SupportsIPv6.
-func (s *Stack) SupportsIPv6() bool {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
-func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
-func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
- panic("rpcinet handles procfs directly this method should not be called")
-
-}
-
-// TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
-func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
- panic("rpcinet handles procfs directly this method should not be called")
-
-}
-
-// SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
-func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
-func (s *Stack) TCPSACKEnabled() (bool, error) {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
-func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// Statistics implements inet.Stack.Statistics.
-func (s *Stack) Statistics(stat interface{}, arg string) error {
- return syserr.ErrEndpointOperation.ToError()
-}
-
-// RouteTable implements inet.Stack.RouteTable.
-func (s *Stack) RouteTable() []inet.Route {
- return append([]inet.Route(nil), s.routes...)
-}
-
-// Resume implements inet.Stack.Resume.
-func (s *Stack) Resume() {}
-
-// Forwarding implements inet.Stack.Forwarding.
-func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool {
- panic("rpcinet handles procfs directly this method should not be called")
-}
-
-// SetForwarding implements inet.Stack.SetForwarding.
-func (s *Stack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error {
- panic("rpcinet handles procfs directly this method should not be called")
-}
diff --git a/pkg/sentry/socket/rpcinet/stack_unsafe.go b/pkg/sentry/socket/rpcinet/stack_unsafe.go
deleted file mode 100644
index a94bdad83..000000000
--- a/pkg/sentry/socket/rpcinet/stack_unsafe.go
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package rpcinet
-
-import (
- "syscall"
- "unsafe"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
- pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/syserr"
-)
-
-// NewNetlinkRouteRequest builds a netlink message for getting the RIB,
-// the routing information base.
-func newNetlinkRouteRequest(proto, seq, family int) []byte {
- rr := &syscall.NetlinkRouteRequest{}
- rr.Header.Len = uint32(syscall.NLMSG_HDRLEN + syscall.SizeofRtGenmsg)
- rr.Header.Type = uint16(proto)
- rr.Header.Flags = syscall.NLM_F_DUMP | syscall.NLM_F_REQUEST
- rr.Header.Seq = uint32(seq)
- rr.Data.Family = uint8(family)
- return netlinkRRtoWireFormat(rr)
-}
-
-func netlinkRRtoWireFormat(rr *syscall.NetlinkRouteRequest) []byte {
- b := make([]byte, rr.Header.Len)
- *(*uint32)(unsafe.Pointer(&b[0:4][0])) = rr.Header.Len
- *(*uint16)(unsafe.Pointer(&b[4:6][0])) = rr.Header.Type
- *(*uint16)(unsafe.Pointer(&b[6:8][0])) = rr.Header.Flags
- *(*uint32)(unsafe.Pointer(&b[8:12][0])) = rr.Header.Seq
- *(*uint32)(unsafe.Pointer(&b[12:16][0])) = rr.Header.Pid
- b[16] = byte(rr.Data.Family)
- return b
-}
-
-func (s *Stack) getNetlinkFd() (uint32, *syserr.Error) {
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(syscall.AF_NETLINK), Type: int64(syscall.SOCK_RAW | syscall.SOCK_NONBLOCK), Protocol: int64(syscall.NETLINK_ROUTE)}}}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result
- if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok {
- return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
- return res.(*pb.SocketResponse_Fd).Fd, nil
-}
-
-func (s *Stack) bindNetlinkFd(fd uint32, sockaddr []byte) *syserr.Error {
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: fd, Address: sockaddr}}}, false /* ignoreResult */)
- <-c
-
- if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 {
- return syserr.FromHost(syscall.Errno(e))
- }
- return nil
-}
-
-func (s *Stack) closeNetlinkFd(fd uint32) {
- _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: fd}}}, true /* ignoreResult */)
-}
-
-func (s *Stack) rpcSendMsg(req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) {
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result
- if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok {
- return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.SendmsgResponse_Length).Length, nil
-}
-
-func (s *Stack) sendMsg(fd uint32, buf []byte, to []byte, flags int) (int, *syserr.Error) {
- // Whitelist flags.
- if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 {
- return 0, syserr.ErrInvalidArgument
- }
-
- req := &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{
- Fd: fd,
- Data: buf,
- Address: to,
- More: flags&linux.MSG_MORE != 0,
- EndOfRecord: flags&linux.MSG_EOR != 0,
- }}
-
- n, err := s.rpcSendMsg(req)
- return int(n), err
-}
-
-func (s *Stack) rpcRecvMsg(req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) {
- id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */)
- <-c
-
- res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result
- if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok {
- return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber))
- }
-
- return res.(*pb.RecvmsgResponse_Payload).Payload, nil
-}
-
-func (s *Stack) recvMsg(fd, l, flags uint32) ([]byte, *syserr.Error) {
- req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{
- Fd: fd,
- Length: l,
- Sender: false,
- Trunc: flags&linux.MSG_TRUNC != 0,
- Peek: flags&linux.MSG_PEEK != 0,
- }}
-
- res, err := s.rpcRecvMsg(req)
- if err != nil {
- return nil, err
- }
- return res.Data, nil
-}
-
-func (s *Stack) netlinkRequest(proto, family int) ([]byte, error) {
- fd, err := s.getNetlinkFd()
- if err != nil {
- return nil, err.ToError()
- }
- defer s.closeNetlinkFd(fd)
-
- lsa := syscall.SockaddrNetlink{Family: syscall.AF_NETLINK}
- b := binary.Marshal(nil, usermem.ByteOrder, &lsa)
- if err := s.bindNetlinkFd(fd, b); err != nil {
- return nil, err.ToError()
- }
-
- wb := newNetlinkRouteRequest(proto, 1, family)
- _, err = s.sendMsg(fd, wb, b, 0)
- if err != nil {
- return nil, err.ToError()
- }
-
- var tab []byte
-done:
- for {
- rb, err := s.recvMsg(fd, uint32(syscall.Getpagesize()), 0)
- nr := len(rb)
- if err != nil {
- return nil, err.ToError()
- }
-
- if nr < syscall.NLMSG_HDRLEN {
- return nil, syserr.ErrInvalidArgument.ToError()
- }
-
- tab = append(tab, rb...)
- msgs, e := syscall.ParseNetlinkMessage(rb)
- if e != nil {
- return nil, e
- }
-
- for _, m := range msgs {
- if m.Header.Type == syscall.NLMSG_DONE {
- break done
- }
- if m.Header.Type == syscall.NLMSG_ERROR {
- return nil, syserr.ErrInvalidArgument.ToError()
- }
- }
- }
-
- return tab, nil
-}
-
-// DoNetlinkRouteRequest returns routing information base, also known as RIB,
-// which consists of network facility information, states and parameters.
-func (s *Stack) DoNetlinkRouteRequest(req int) ([]syscall.NetlinkMessage, error) {
- data, err := s.netlinkRequest(req, syscall.AF_UNSPEC)
- if err != nil {
- return nil, err
- }
- return syscall.ParseNetlinkMessage(data)
-}
diff --git a/pkg/sentry/socket/rpcinet/syscall_rpc.proto b/pkg/sentry/socket/rpcinet/syscall_rpc.proto
deleted file mode 100644
index 9586f5923..000000000
--- a/pkg/sentry/socket/rpcinet/syscall_rpc.proto
+++ /dev/null
@@ -1,353 +0,0 @@
-syntax = "proto3";
-
-// package syscall_rpc is a set of networking related system calls that can be
-// forwarded to a socket gofer.
-//
-// TODO(b/77963526): Document individual RPCs.
-package syscall_rpc;
-
-message SendmsgRequest {
- uint32 fd = 1;
- bytes data = 2 [ctype = CORD];
- bytes address = 3;
- bool more = 4;
- bool end_of_record = 5;
-}
-
-message SendmsgResponse {
- oneof result {
- uint32 error_number = 1;
- uint32 length = 2;
- }
-}
-
-message IOCtlRequest {
- uint32 fd = 1;
- uint32 cmd = 2;
- bytes arg = 3;
-}
-
-message IOCtlResponse {
- oneof result {
- uint32 error_number = 1;
- bytes value = 2;
- }
-}
-
-message RecvmsgRequest {
- uint32 fd = 1;
- uint32 length = 2;
- bool sender = 3;
- bool peek = 4;
- bool trunc = 5;
- uint32 cmsg_length = 6;
-}
-
-message OpenRequest {
- bytes path = 1;
- uint32 flags = 2;
- uint32 mode = 3;
-}
-
-message OpenResponse {
- oneof result {
- uint32 error_number = 1;
- uint32 fd = 2;
- }
-}
-
-message ReadRequest {
- uint32 fd = 1;
- uint32 length = 2;
-}
-
-message ReadResponse {
- oneof result {
- uint32 error_number = 1;
- bytes data = 2 [ctype = CORD];
- }
-}
-
-message ReadFileRequest {
- string path = 1;
-}
-
-message ReadFileResponse {
- oneof result {
- uint32 error_number = 1;
- bytes data = 2 [ctype = CORD];
- }
-}
-
-message WriteRequest {
- uint32 fd = 1;
- bytes data = 2 [ctype = CORD];
-}
-
-message WriteResponse {
- oneof result {
- uint32 error_number = 1;
- uint32 length = 2;
- }
-}
-
-message WriteFileRequest {
- string path = 1;
- bytes content = 2;
-}
-
-message WriteFileResponse {
- uint32 error_number = 1;
- uint32 written = 2;
-}
-
-message AddressResponse {
- bytes address = 1;
- uint32 length = 2;
-}
-
-message RecvmsgResponse {
- message ResultPayload {
- bytes data = 1 [ctype = CORD];
- AddressResponse address = 2;
- uint32 length = 3;
- bytes cmsg_data = 4;
- }
- oneof result {
- uint32 error_number = 1;
- ResultPayload payload = 2;
- }
-}
-
-message BindRequest {
- uint32 fd = 1;
- bytes address = 2;
-}
-
-message BindResponse {
- uint32 error_number = 1;
-}
-
-message AcceptRequest {
- uint32 fd = 1;
- bool peer = 2;
- int64 flags = 3;
-}
-
-message AcceptResponse {
- message ResultPayload {
- uint32 fd = 1;
- AddressResponse address = 2;
- }
- oneof result {
- uint32 error_number = 1;
- ResultPayload payload = 2;
- }
-}
-
-message ConnectRequest {
- uint32 fd = 1;
- bytes address = 2;
-}
-
-message ConnectResponse {
- uint32 error_number = 1;
-}
-
-message ListenRequest {
- uint32 fd = 1;
- int64 backlog = 2;
-}
-
-message ListenResponse {
- uint32 error_number = 1;
-}
-
-message ShutdownRequest {
- uint32 fd = 1;
- int64 how = 2;
-}
-
-message ShutdownResponse {
- uint32 error_number = 1;
-}
-
-message CloseRequest {
- uint32 fd = 1;
-}
-
-message CloseResponse {
- uint32 error_number = 1;
-}
-
-message GetSockOptRequest {
- uint32 fd = 1;
- int64 level = 2;
- int64 name = 3;
- uint32 length = 4;
-}
-
-message GetSockOptResponse {
- oneof result {
- uint32 error_number = 1;
- bytes opt = 2;
- }
-}
-
-message SetSockOptRequest {
- uint32 fd = 1;
- int64 level = 2;
- int64 name = 3;
- bytes opt = 4;
-}
-
-message SetSockOptResponse {
- uint32 error_number = 1;
-}
-
-message GetSockNameRequest {
- uint32 fd = 1;
-}
-
-message GetSockNameResponse {
- oneof result {
- uint32 error_number = 1;
- AddressResponse address = 2;
- }
-}
-
-message GetPeerNameRequest {
- uint32 fd = 1;
-}
-
-message GetPeerNameResponse {
- oneof result {
- uint32 error_number = 1;
- AddressResponse address = 2;
- }
-}
-
-message SocketRequest {
- int64 family = 1;
- int64 type = 2;
- int64 protocol = 3;
-}
-
-message SocketResponse {
- oneof result {
- uint32 error_number = 1;
- uint32 fd = 2;
- }
-}
-
-message EpollWaitRequest {
- uint32 fd = 1;
- uint32 num_events = 2;
- sint64 msec = 3;
-}
-
-message EpollEvent {
- uint32 fd = 1;
- uint32 events = 2;
-}
-
-message EpollEvents {
- repeated EpollEvent events = 1;
-}
-
-message EpollWaitResponse {
- oneof result {
- uint32 error_number = 1;
- EpollEvents events = 2;
- }
-}
-
-message EpollCtlRequest {
- uint32 epfd = 1;
- int64 op = 2;
- uint32 fd = 3;
- EpollEvent event = 4;
-}
-
-message EpollCtlResponse {
- uint32 error_number = 1;
-}
-
-message EpollCreate1Request {
- int64 flag = 1;
-}
-
-message EpollCreate1Response {
- oneof result {
- uint32 error_number = 1;
- uint32 fd = 2;
- }
-}
-
-message PollRequest {
- uint32 fd = 1;
- uint32 events = 2;
-}
-
-message PollResponse {
- oneof result {
- uint32 error_number = 1;
- uint32 events = 2;
- }
-}
-
-message SyscallRequest {
- oneof args {
- SocketRequest socket = 1;
- SendmsgRequest sendmsg = 2;
- RecvmsgRequest recvmsg = 3;
- BindRequest bind = 4;
- AcceptRequest accept = 5;
- ConnectRequest connect = 6;
- ListenRequest listen = 7;
- ShutdownRequest shutdown = 8;
- CloseRequest close = 9;
- GetSockOptRequest get_sock_opt = 10;
- SetSockOptRequest set_sock_opt = 11;
- GetSockNameRequest get_sock_name = 12;
- GetPeerNameRequest get_peer_name = 13;
- EpollWaitRequest epoll_wait = 14;
- EpollCtlRequest epoll_ctl = 15;
- EpollCreate1Request epoll_create1 = 16;
- PollRequest poll = 17;
- ReadRequest read = 18;
- WriteRequest write = 19;
- OpenRequest open = 20;
- IOCtlRequest ioctl = 21;
- WriteFileRequest write_file = 22;
- ReadFileRequest read_file = 23;
- }
-}
-
-message SyscallResponse {
- oneof result {
- SocketResponse socket = 1;
- SendmsgResponse sendmsg = 2;
- RecvmsgResponse recvmsg = 3;
- BindResponse bind = 4;
- AcceptResponse accept = 5;
- ConnectResponse connect = 6;
- ListenResponse listen = 7;
- ShutdownResponse shutdown = 8;
- CloseResponse close = 9;
- GetSockOptResponse get_sock_opt = 10;
- SetSockOptResponse set_sock_opt = 11;
- GetSockNameResponse get_sock_name = 12;
- GetPeerNameResponse get_peer_name = 13;
- EpollWaitResponse epoll_wait = 14;
- EpollCtlResponse epoll_ctl = 15;
- EpollCreate1Response epoll_create1 = 16;
- PollResponse poll = 17;
- ReadResponse read = 18;
- WriteResponse write = 19;
- OpenResponse open = 20;
- IOCtlResponse ioctl = 21;
- WriteFileResponse write_file = 22;
- ReadFileResponse read_file = 23;
- }
-}