summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/host/socket.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs/host/socket.go')
-rw-r--r--pkg/sentry/fs/host/socket.go145
1 files changed, 107 insertions, 38 deletions
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index e11772946..68ebf6402 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -19,6 +19,7 @@ import (
"syscall"
"gvisor.googlesource.com/gvisor/pkg/fd"
+ "gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/refs"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
@@ -33,6 +34,11 @@ import (
"gvisor.googlesource.com/gvisor/pkg/waiter/fdnotifier"
)
+// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
+//
+// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
+const maxSendBufferSize = 8 << 20
+
// endpoint encapsulates the state needed to represent a host Unix socket.
//
// TODO: Remove/merge with ConnectedEndpoint.
@@ -41,15 +47,17 @@ import (
type endpoint struct {
queue waiter.Queue `state:"zerovalue"`
- // stype is the type of Unix socket. (Ex: unix.SockStream,
- // unix.SockSeqpacket, unix.SockDgram)
- stype unix.SockType `state:"nosave"`
-
// fd is the host fd backing this file.
fd int `state:"nosave"`
// If srfd >= 0, it is the host fd that fd was imported from.
srfd int `state:"wait"`
+
+ // stype is the type of Unix socket.
+ stype unix.SockType `state:"nosave"`
+
+ // sndbuf is the size of the send buffer.
+ sndbuf int `state:"nosave"`
}
func (e *endpoint) init() error {
@@ -67,12 +75,21 @@ func (e *endpoint) init() error {
if err != nil {
return err
}
+ e.stype = unix.SockType(stype)
+
+ e.sndbuf, err = syscall.GetsockoptInt(e.fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF)
+ if err != nil {
+ return err
+ }
+ if e.sndbuf > maxSendBufferSize {
+ log.Warningf("Socket send buffer too large: %d", e.sndbuf)
+ return syserror.EINVAL
+ }
if err := syscall.SetNonblock(e.fd, true); err != nil {
return err
}
- e.stype = unix.SockType(stype)
return fdnotifier.AddFD(int32(e.fd), &e.queue)
}
@@ -189,13 +206,13 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
*o = 0
return nil
case *tcpip.SendBufferSizeOption:
- v, err := syscall.GetsockoptInt(e.fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF)
- *o = tcpip.SendBufferSizeOption(v)
- return translateError(err)
+ *o = tcpip.SendBufferSizeOption(e.sndbuf)
+ return nil
case *tcpip.ReceiveBufferSizeOption:
- v, err := syscall.GetsockoptInt(e.fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF)
- *o = tcpip.ReceiveBufferSizeOption(v)
- return translateError(err)
+ // N.B. Unix sockets don't use the receive buffer. We'll claim it is
+ // the same size as the send buffer.
+ *o = tcpip.ReceiveBufferSizeOption(e.sndbuf)
+ return nil
case *tcpip.ReuseAddressOption:
v, err := syscall.GetsockoptInt(e.fd, syscall.SOL_SOCKET, syscall.SO_REUSEADDR)
*o = tcpip.ReuseAddressOption(v)
@@ -240,33 +257,47 @@ func (e *endpoint) SendMsg(data [][]byte, controlMessages unix.ControlMessages,
if to != nil {
return 0, tcpip.ErrInvalidEndpointState
}
- return sendMsg(e.fd, data, controlMessages)
+
+ // Since stream sockets don't preserve message boundaries, we can write
+ // only as much of the message as fits in the send buffer.
+ truncate := e.stype == unix.SockStream
+
+ return sendMsg(e.fd, data, controlMessages, e.sndbuf, truncate)
}
-func sendMsg(fd int, data [][]byte, controlMessages unix.ControlMessages) (uintptr, *tcpip.Error) {
+func sendMsg(fd int, data [][]byte, controlMessages unix.ControlMessages, maxlen int, truncate bool) (uintptr, *tcpip.Error) {
if !controlMessages.Empty() {
return 0, tcpip.ErrInvalidEndpointState
}
- n, err := fdWriteVec(fd, data)
+ n, totalLen, err := fdWriteVec(fd, data, maxlen, truncate)
+ if n < totalLen && err == nil {
+ // The host only returns a short write if it would otherwise
+ // block (and only for stream sockets).
+ err = syserror.EAGAIN
+ }
return n, translateError(err)
}
// RecvMsg implements unix.Endpoint.RecvMsg.
func (e *endpoint) RecvMsg(data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, unix.ControlMessages, *tcpip.Error) {
- return recvMsg(e.fd, data, numRights, peek, addr)
+ // N.B. Unix sockets don't have a receive buffer, the send buffer
+ // serves both purposes.
+ rl, ml, cm, err := recvMsg(e.fd, data, numRights, peek, addr, e.sndbuf)
+ if rl > 0 && err == tcpip.ErrWouldBlock {
+ // Message did not fill buffer; that's fine, no need to block.
+ err = nil
+ }
+ return rl, ml, cm, err
}
-func recvMsg(fd int, data [][]byte, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, unix.ControlMessages, *tcpip.Error) {
+func recvMsg(fd int, data [][]byte, numRights uintptr, peek bool, addr *tcpip.FullAddress, maxlen int) (uintptr, uintptr, unix.ControlMessages, *tcpip.Error) {
var cm unet.ControlMessage
if numRights > 0 {
cm.EnableFDs(int(numRights))
}
- rl, ml, cl, err := fdReadVec(fd, data, []byte(cm), peek)
- if err == syscall.EAGAIN {
- return 0, 0, unix.ControlMessages{}, tcpip.ErrWouldBlock
- }
- if err != nil {
- return 0, 0, unix.ControlMessages{}, translateError(err)
+ rl, ml, cl, rerr := fdReadVec(fd, data, []byte(cm), peek, maxlen)
+ if rl == 0 && rerr != nil {
+ return 0, 0, unix.ControlMessages{}, translateError(rerr)
}
// Trim the control data if we received less than the full amount.
@@ -276,7 +307,7 @@ func recvMsg(fd int, data [][]byte, numRights uintptr, peek bool, addr *tcpip.Fu
// Avoid extra allocations in the case where there isn't any control data.
if len(cm) == 0 {
- return rl, ml, unix.ControlMessages{}, nil
+ return rl, ml, unix.ControlMessages{}, translateError(rerr)
}
fds, err := cm.ExtractFDs()
@@ -285,9 +316,9 @@ func recvMsg(fd int, data [][]byte, numRights uintptr, peek bool, addr *tcpip.Fu
}
if len(fds) == 0 {
- return rl, ml, unix.ControlMessages{}, nil
+ return rl, ml, unix.ControlMessages{}, translateError(rerr)
}
- return rl, ml, control.New(nil, nil, newSCMRights(fds)), nil
+ return rl, ml, control.New(nil, nil, newSCMRights(fds)), translateError(rerr)
}
// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host FD
@@ -307,7 +338,27 @@ func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*Conne
return nil, tcpip.ErrInvalidEndpointState
}
- e := &ConnectedEndpoint{path: path, queue: queue, file: file}
+ stype, err := syscall.GetsockoptInt(file.FD(), syscall.SOL_SOCKET, syscall.SO_TYPE)
+ if err != nil {
+ return nil, translateError(err)
+ }
+
+ sndbuf, err := syscall.GetsockoptInt(file.FD(), syscall.SOL_SOCKET, syscall.SO_SNDBUF)
+ if err != nil {
+ return nil, translateError(err)
+ }
+ if sndbuf > maxSendBufferSize {
+ log.Warningf("Socket send buffer too large: %d", sndbuf)
+ return nil, tcpip.ErrInvalidEndpointState
+ }
+
+ e := &ConnectedEndpoint{
+ path: path,
+ queue: queue,
+ file: file,
+ stype: unix.SockType(stype),
+ sndbuf: sndbuf,
+ }
// AtomicRefCounters start off with a single reference. We need two.
e.ref.IncRef()
@@ -346,6 +397,17 @@ type ConnectedEndpoint struct {
// writeClosed is true if the FD has write shutdown or if it has been
// closed.
writeClosed bool
+
+ // stype is the type of Unix socket.
+ stype unix.SockType
+
+ // sndbuf is the size of the send buffer.
+ //
+ // N.B. When this is smaller than the host size, we present it via
+ // GetSockOpt and message splitting/rejection in SendMsg, but do not
+ // prevent lots of small messages from filling the real send buffer
+ // size on the host.
+ sndbuf int
}
// Send implements unix.ConnectedEndpoint.Send.
@@ -355,7 +417,12 @@ func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages unix.ControlMess
if c.writeClosed {
return 0, false, tcpip.ErrClosedForSend
}
- n, err := sendMsg(c.file.FD(), data, controlMessages)
+
+ // Since stream sockets don't preserve message boundaries, we can write
+ // only as much of the message as fits in the send buffer.
+ truncate := c.stype == unix.SockStream
+
+ n, err := sendMsg(c.file.FD(), data, controlMessages, c.sndbuf, truncate)
// There is no need for the callee to call SendNotify because sendMsg uses
// the host's sendmsg(2) and the host kernel's queue.
return n, false, err
@@ -411,7 +478,15 @@ func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, p
if c.readClosed {
return 0, 0, unix.ControlMessages{}, tcpip.FullAddress{}, false, tcpip.ErrClosedForReceive
}
- rl, ml, cm, err := recvMsg(c.file.FD(), data, numRights, peek, nil)
+
+ // N.B. Unix sockets don't have a receive buffer, the send buffer
+ // serves both purposes.
+ rl, ml, cm, err := recvMsg(c.file.FD(), data, numRights, peek, nil, c.sndbuf)
+ if rl > 0 && err == tcpip.ErrWouldBlock {
+ // Message did not fill buffer; that's fine, no need to block.
+ err = nil
+ }
+
// There is no need for the callee to call RecvNotify because recvMsg uses
// the host's recvmsg(2) and the host kernel's queue.
return rl, ml, cm, tcpip.FullAddress{Addr: tcpip.Address(c.path)}, false, err
@@ -460,20 +535,14 @@ func (c *ConnectedEndpoint) RecvQueuedSize() int64 {
// SendMaxQueueSize implements unix.Receiver.SendMaxQueueSize.
func (c *ConnectedEndpoint) SendMaxQueueSize() int64 {
- v, err := syscall.GetsockoptInt(c.file.FD(), syscall.SOL_SOCKET, syscall.SO_SNDBUF)
- if err != nil {
- return -1
- }
- return int64(v)
+ return int64(c.sndbuf)
}
// RecvMaxQueueSize implements unix.Receiver.RecvMaxQueueSize.
func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
- v, err := syscall.GetsockoptInt(c.file.FD(), syscall.SOL_SOCKET, syscall.SO_RCVBUF)
- if err != nil {
- return -1
- }
- return int64(v)
+ // N.B. Unix sockets don't use the receive buffer. We'll claim it is
+ // the same size as the send buffer.
+ return int64(c.sndbuf)
}
// Release implements unix.ConnectedEndpoint.Release and unix.Receiver.Release.