summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rwxr-xr-xpkg/sentry/platform/ring0/defs_impl.go2
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go20
-rw-r--r--pkg/tcpip/header/ipv4.go4
-rw-r--r--pkg/tcpip/header/tcp.go17
-rw-r--r--pkg/tcpip/tcpip.go4
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go26
-rwxr-xr-xpkg/tcpip/transport/tcp/tcp_state_autogen.go2
-rw-r--r--runsc/boot/fs.go3
8 files changed, 74 insertions, 4 deletions
diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go
index d4bfc5a4a..ea3f514cd 100755
--- a/pkg/sentry/platform/ring0/defs_impl.go
+++ b/pkg/sentry/platform/ring0/defs_impl.go
@@ -1,10 +1,10 @@
package ring0
import (
+ "gvisor.dev/gvisor/pkg/cpuid"
"syscall"
"fmt"
- "gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"io"
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 9d1bcfd41..69eff7373 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -867,6 +867,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(v), nil
+ case linux.TCP_MAXSEG:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.MaxSegOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
+
case linux.TCP_KEEPIDLE:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -1219,6 +1231,14 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
v := usermem.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.QuickAckOption(v)))
+ case linux.TCP_MAXSEG:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MaxSegOption(v)))
+
case linux.TCP_KEEPIDLE:
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index 7da4c4845..7b8034de4 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -85,6 +85,10 @@ const (
// units, the header cannot exceed 15*4 = 60 bytes.
IPv4MaximumHeaderSize = 60
+ // MinIPFragmentPayloadSize is the minimum number of payload bytes that
+ // the first fragment must carry when an IPv4 packet is fragmented.
+ MinIPFragmentPayloadSize = 8
+
// IPv4AddressSize is the size, in bytes, of an IPv4 address.
IPv4AddressSize = 4
diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go
index 1141443bb..82cfe785c 100644
--- a/pkg/tcpip/header/tcp.go
+++ b/pkg/tcpip/header/tcp.go
@@ -176,6 +176,21 @@ const (
// TCPProtocolNumber is TCP's transport protocol number.
TCPProtocolNumber tcpip.TransportProtocolNumber = 6
+
+ // TCPMinimumMSS is the minimum acceptable value for MSS. This is the
+ // same as the value TCP_MIN_MSS defined net/tcp.h.
+ TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize
+
+ // TCPMaximumMSS is the maximum acceptable value for MSS.
+ TCPMaximumMSS = 0xffff
+
+ // TCPDefaultMSS is the MSS value that should be used if an MSS option
+ // is not received from the peer. It's also the value returned by
+ // TCP_MAXSEG option for a socket in an unconnected state.
+ //
+ // Per RFC 1122, page 85: "If an MSS option is not received at
+ // connection setup, TCP MUST assume a default send MSS of 536."
+ TCPDefaultMSS = 536
)
// SourcePort returns the "source port" field of the tcp header.
@@ -306,7 +321,7 @@ func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions {
synOpts := TCPSynOptions{
// Per RFC 1122, page 85: "If an MSS option is not received at
// connection setup, TCP MUST assume a default send MSS of 536."
- MSS: 536,
+ MSS: TCPDefaultMSS,
// If no window scale option is specified, WS in options is
// returned as -1; this is because the absence of the option
// indicates that the we cannot use window scaling on the
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index c61f96fb0..c4076666a 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -496,6 +496,10 @@ type AvailableCongestionControlOption string
// buffer moderation.
type ModerateReceiveBufferOption bool
+// MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current
+// Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option.
+type MaxSegOption int
+
// MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default
// TTL value for multicast messages. The default is 1.
type MulticastTTLOption uint8
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index cb40fea94..beb90afb5 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -117,6 +117,7 @@ const (
notifyDrain
notifyReset
notifyKeepaliveChanged
+ notifyMSSChanged
)
// SACKInfo holds TCP SACK related information for a given endpoint.
@@ -218,8 +219,6 @@ type endpoint struct {
mu sync.RWMutex `state:"nosave"`
id stack.TransportEndpointID
- // state endpointState `state:".(endpointState)"`
- // pState ProtocolState
state EndpointState `state:".(EndpointState)"`
isPortReserved bool `state:"manual"`
@@ -313,6 +312,10 @@ type endpoint struct {
// in SYN-RCVD state.
synRcvdCount int
+ // userMSS if non-zero is the MSS value explicitly set by the user
+ // for this endpoint using the TCP_MAXSEG setsockopt.
+ userMSS int
+
// The following fields are used to manage the send buffer. When
// segments are ready to be sent, they are added to sndQueue and the
// protocol goroutine is signaled via sndWaker.
@@ -917,6 +920,17 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
}
return nil
+ case tcpip.MaxSegOption:
+ userMSS := v
+ if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS {
+ return tcpip.ErrInvalidOptionValue
+ }
+ e.mu.Lock()
+ e.userMSS = int(userMSS)
+ e.mu.Unlock()
+ e.notifyProtocolGoroutine(notifyMSSChanged)
+ return nil
+
case tcpip.ReceiveBufferSizeOption:
// Make sure the receive buffer size is within the min and max
// allowed.
@@ -1096,6 +1110,14 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
e.lastErrorMu.Unlock()
return err
+ case *tcpip.MaxSegOption:
+ // This is just stubbed out. Linux never returns the user_mss
+ // value as it either returns the defaultMSS or returns the
+ // actual current MSS. Netstack just returns the defaultMSS
+ // always for now.
+ *o = header.TCPDefaultMSS
+ return nil
+
case *tcpip.SendBufferSizeOption:
e.sndBufMu.Lock()
*o = tcpip.SendBufferSizeOption(e.sndBufSize)
diff --git a/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
index 5b679f586..159444cc2 100755
--- a/pkg/tcpip/transport/tcp/tcp_state_autogen.go
+++ b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
@@ -114,6 +114,7 @@ func (x *endpoint) save(m state.Map) {
m.Save("slowAck", &x.slowAck)
m.Save("segmentQueue", &x.segmentQueue)
m.Save("synRcvdCount", &x.synRcvdCount)
+ m.Save("userMSS", &x.userMSS)
m.Save("sndBufSize", &x.sndBufSize)
m.Save("sndBufUsed", &x.sndBufUsed)
m.Save("sndClosed", &x.sndClosed)
@@ -161,6 +162,7 @@ func (x *endpoint) load(m state.Map) {
m.Load("slowAck", &x.slowAck)
m.LoadWait("segmentQueue", &x.segmentQueue)
m.Load("synRcvdCount", &x.synRcvdCount)
+ m.Load("userMSS", &x.userMSS)
m.Load("sndBufSize", &x.sndBufSize)
m.Load("sndBufUsed", &x.sndBufUsed)
m.Load("sndClosed", &x.sndClosed)
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index f9a6f2d3c..d3e3196fd 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -906,7 +906,10 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
}
defer target.DecRef()
+ // Take a ref on the inode that is about to be (re)-mounted.
+ source.root.IncRef()
if err := mns.Mount(ctx, target, source.root); err != nil {
+ source.root.DecRef()
return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
}