diff options
-rw-r--r-- | pkg/abi/linux/linux_abi_autogen_unsafe.go | 2 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 19 | ||||
-rw-r--r-- | pkg/sentry/socket/socket.go | 1 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 100 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint.go | 24 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/udp_state_autogen.go | 47 |
6 files changed, 121 insertions, 72 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index 868b27001..ac656f14c 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -138,7 +138,7 @@ func (s *Statx) MarshalUnsafe(dst []byte) { // UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. func (s *Statx) UnmarshalUnsafe(src []byte) { - if s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() { + if s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() { safecopy.CopyOut(unsafe.Pointer(s), src) } else { s.UnmarshalBytes(src) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index c0b63a803..e7d2c83d7 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1169,6 +1169,17 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam return int32(v), nil + case linux.SO_NO_CHECK: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.NoChecksumOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + return boolToInt32(v), nil + default: socket.GetSockOptEmitUnimplementedEvent(t, name) } @@ -1720,6 +1731,14 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.OutOfBandInlineOption(v))) + case linux.SO_NO_CHECK: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.NoChecksumOption, v != 0)) + case linux.SO_LINGER: if len(optVal) < linux.SizeOfLinger { return syserr.ErrInvalidArgument diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 6580bd6e9..fcd7f9d7f 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -407,7 +407,6 @@ func emitUnimplementedEvent(t *kernel.Task, name int) { linux.SO_MARK, linux.SO_MAX_PACING_RATE, linux.SO_NOFCS, - linux.SO_NO_CHECK, linux.SO_OOBINLINE, linux.SO_PASSCRED, linux.SO_PASSSEC, diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 4d45dcc42..2be1c107a 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -585,59 +585,68 @@ type WriteOptions struct { type SockOptBool int const ( - // BroadcastOption is used by SetSockOpt/GetSockOpt to specify whether - // datagram sockets are allowed to send packets to a broadcast address. + // BroadcastOption is used by SetSockOptBool/GetSockOptBool to specify + // whether datagram sockets are allowed to send packets to a broadcast + // address. BroadcastOption SockOptBool = iota - // CorkOption is used by SetSockOpt/GetSockOpt to specify if data should be - // held until segments are full by the TCP transport protocol. + // CorkOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be held until segments are full by the TCP transport + // protocol. CorkOption - // DelayOption is used by SetSockOpt/GetSockOpt to specify if data - // should be sent out immediately by the transport protocol. For TCP, - // it determines if the Nagle algorithm is on or off. + // DelayOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be sent out immediately by the transport protocol. For + // TCP, it determines if the Nagle algorithm is on or off. DelayOption - // KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether - // TCP keepalive is enabled for this socket. + // KeepaliveEnabledOption is used by SetSockOptBool/GetSockOptBool to + // specify whether TCP keepalive is enabled for this socket. KeepaliveEnabledOption - // MulticastLoopOption is used by SetSockOpt/GetSockOpt to specify whether - // multicast packets sent over a non-loopback interface will be looped back. + // MulticastLoopOption is used by SetSockOptBool/GetSockOptBool to + // specify whether multicast packets sent over a non-loopback interface + // will be looped back. MulticastLoopOption - // PasscredOption is used by SetSockOpt/GetSockOpt to specify whether - // SCM_CREDENTIALS socket control messages are enabled. + // NoChecksumOption is used by SetSockOptBool/GetSockOptBool to specify + // whether UDP checksum is disabled for this socket. + NoChecksumOption + + // PasscredOption is used by SetSockOptBool/GetSockOptBool to specify + // whether SCM_CREDENTIALS socket control messages are enabled. // // Only supported on Unix sockets. PasscredOption - // QuickAckOption is stubbed out in SetSockOpt/GetSockOpt. + // QuickAckOption is stubbed out in SetSockOptBool/GetSockOptBool. QuickAckOption - // ReceiveTClassOption is used by SetSockOpt/GetSockOpt to specify if the - // IPV6_TCLASS ancillary message is passed with incoming packets. + // ReceiveTClassOption is used by SetSockOptBool/GetSockOptBool to + // specify if the IPV6_TCLASS ancillary message is passed with incoming + // packets. ReceiveTClassOption - // ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS - // ancillary message is passed with incoming packets. + // ReceiveTOSOption is used by SetSockOptBool/GetSockOptBool to specify + // if the TOS ancillary message is passed with incoming packets. ReceiveTOSOption - // ReceiveIPPacketInfoOption is used by {G,S}etSockOptBool to specify - // if more inforamtion is provided with incoming packets such - // as interface index and address. + // ReceiveIPPacketInfoOption is used by SetSockOptBool/GetSockOptBool to + // specify if more inforamtion is provided with incoming packets such as + // interface index and address. ReceiveIPPacketInfoOption - // ReuseAddressOption is used by SetSockOpt/GetSockOpt to specify whether Bind() - // should allow reuse of local address. + // ReuseAddressOption is used by SetSockOptBool/GetSockOptBool to + // specify whether Bind() should allow reuse of local address. ReuseAddressOption - // ReusePortOption is used by SetSockOpt/GetSockOpt to permit multiple sockets - // to be bound to an identical socket address. + // ReusePortOption is used by SetSockOptBool/GetSockOptBool to permit + // multiple sockets to be bound to an identical socket address. ReusePortOption - // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 - // socket is to be restricted to sending and receiving IPv6 packets only. + // V6OnlyOption is used by SetSockOptBool/GetSockOptBool to specify + // whether an IPv6 socket is to be restricted to sending and receiving + // IPv6 packets only. V6OnlyOption ) @@ -645,25 +654,27 @@ const ( type SockOptInt int const ( - // KeepaliveCountOption is used by SetSockOpt/GetSockOpt to specify the number - // of un-ACKed TCP keepalives that will be sent before the connection is - // closed. + // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to + // specify the number of un-ACKed TCP keepalives that will be sent + // before the connection is closed. KeepaliveCountOption SockOptInt = iota - // IPv4TOSOption is used by SetSockOpt/GetSockOpt to specify TOS + // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS // for all subsequent outgoing IPv4 packets from the endpoint. IPv4TOSOption - // IPv6TrafficClassOption is used by SetSockOpt/GetSockOpt to specify TOS - // for all subsequent outgoing IPv6 packets from the endpoint. + // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to + // specify TOS for all subsequent outgoing IPv6 packets from the + // endpoint. IPv6TrafficClassOption - // MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current - // Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. + // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the + // current Maximum Segment Size(MSS) value as specified using the + // TCP_MAXSEG option. MaxSegOption - // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default - // TTL value for multicast messages. The default is 1. + // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control + // the default TTL value for multicast messages. The default is 1. MulticastTTLOption // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the @@ -682,21 +693,22 @@ const ( // number of unread bytes in the output buffer should be returned. SendQueueSizeOption - // TTLOption is used by SetSockOpt/GetSockOpt to control the default TTL/hop - // limit value for unicast messages. The default is protocol specific. + // TTLOption is used by SetSockOptInt/GetSockOptInt to control the + // default TTL/hop limit value for unicast messages. The default is + // protocol specific. // // A zero value indicates the default. TTLOption - // TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of - // SYN retransmits that TCP should send before aborting the attempt to - // connect. It cannot exceed 255. + // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify + // the number of SYN retransmits that TCP should send before aborting + // the attempt to connect. It cannot exceed 255. // // NOTE: This option is currently only stubbed out and is no-op. TCPSynCountOption - // TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size - // of the advertised window to this value. + // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound + // the size of the advertised window to this value. // // NOTE: This option is currently only stubed out and is a no-op TCPWindowClampOption diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 8bdc1ee1f..cae29fbff 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -109,6 +109,7 @@ type endpoint struct { portFlags ports.Flags bindToDevice tcpip.NICID broadcast bool + noChecksum bool lastErrorMu sync.Mutex `state:"nosave"` lastError *tcpip.Error `state:".(string)"` @@ -529,7 +530,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c useDefaultTTL = false } - if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner); err != nil { + if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner, e.noChecksum); err != nil { return 0, nil, err } return int64(len(v)), nil, nil @@ -553,6 +554,11 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { e.multicastLoop = v e.mu.Unlock() + case tcpip.NoChecksumOption: + e.mu.Lock() + e.noChecksum = v + e.mu.Unlock() + case tcpip.ReceiveTOSOption: e.mu.Lock() e.receiveTOS = v @@ -825,6 +831,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { e.mu.RUnlock() return v, nil + case tcpip.NoChecksumOption: + e.mu.RLock() + v := e.noChecksum + e.mu.RUnlock() + return v, nil + case tcpip.ReceiveTOSOption: e.mu.RLock() v := e.receiveTOS @@ -959,7 +971,7 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { // sendUDP sends a UDP segment via the provided network endpoint and under the // provided identity. -func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner) *tcpip.Error { +func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) *tcpip.Error { // Allocate a buffer for the UDP header. hdr := buffer.NewPrependable(header.UDPMinimumSize + int(r.MaxHeaderLength())) @@ -973,8 +985,12 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u Length: length, }) - // Only calculate the checksum if offloading isn't supported. - if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 { + // Set the checksum field unless TX checksum offload is enabled. + // On IPv4, UDP checksum is optional, and a zero value indicates the + // transmitter skipped the checksum generation (RFC768). + // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1). + if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 && + (!noChecksum || r.NetProto == header.IPv6ProtocolNumber) { xsum := r.PseudoHeaderChecksum(ProtocolNumber, length) for _, v := range data.Views() { xsum = header.Checksum(v, xsum) diff --git a/pkg/tcpip/transport/udp/udp_state_autogen.go b/pkg/tcpip/transport/udp/udp_state_autogen.go index 943022c38..50fd0f186 100644 --- a/pkg/tcpip/transport/udp/udp_state_autogen.go +++ b/pkg/tcpip/transport/udp/udp_state_autogen.go @@ -73,6 +73,7 @@ func (x *endpoint) StateFields() []string { "portFlags", "bindToDevice", "broadcast", + "noChecksum", "lastError", "boundBindToDevice", "boundPortFlags", @@ -92,7 +93,7 @@ func (x *endpoint) StateSave(m state.Sink) { var rcvBufSizeMax int = x.saveRcvBufSizeMax() m.SaveValue(5, rcvBufSizeMax) var lastError string = x.saveLastError() - m.SaveValue(21, lastError) + m.SaveValue(22, lastError) m.Save(0, &x.TransportEndpointInfo) m.Save(1, &x.waiterQueue) m.Save(2, &x.uniqueID) @@ -113,16 +114,17 @@ func (x *endpoint) StateSave(m state.Sink) { m.Save(18, &x.portFlags) m.Save(19, &x.bindToDevice) m.Save(20, &x.broadcast) - m.Save(22, &x.boundBindToDevice) - m.Save(23, &x.boundPortFlags) - m.Save(24, &x.sendTOS) - m.Save(25, &x.receiveTOS) - m.Save(26, &x.receiveTClass) - m.Save(27, &x.receiveIPPacketInfo) - m.Save(28, &x.shutdownFlags) - m.Save(29, &x.multicastMemberships) - m.Save(30, &x.effectiveNetProtos) - m.Save(31, &x.owner) + m.Save(21, &x.noChecksum) + m.Save(23, &x.boundBindToDevice) + m.Save(24, &x.boundPortFlags) + m.Save(25, &x.sendTOS) + m.Save(26, &x.receiveTOS) + m.Save(27, &x.receiveTClass) + m.Save(28, &x.receiveIPPacketInfo) + m.Save(29, &x.shutdownFlags) + m.Save(30, &x.multicastMemberships) + m.Save(31, &x.effectiveNetProtos) + m.Save(32, &x.owner) } func (x *endpoint) StateLoad(m state.Source) { @@ -146,18 +148,19 @@ func (x *endpoint) StateLoad(m state.Source) { m.Load(18, &x.portFlags) m.Load(19, &x.bindToDevice) m.Load(20, &x.broadcast) - m.Load(22, &x.boundBindToDevice) - m.Load(23, &x.boundPortFlags) - m.Load(24, &x.sendTOS) - m.Load(25, &x.receiveTOS) - m.Load(26, &x.receiveTClass) - m.Load(27, &x.receiveIPPacketInfo) - m.Load(28, &x.shutdownFlags) - m.Load(29, &x.multicastMemberships) - m.Load(30, &x.effectiveNetProtos) - m.Load(31, &x.owner) + m.Load(21, &x.noChecksum) + m.Load(23, &x.boundBindToDevice) + m.Load(24, &x.boundPortFlags) + m.Load(25, &x.sendTOS) + m.Load(26, &x.receiveTOS) + m.Load(27, &x.receiveTClass) + m.Load(28, &x.receiveIPPacketInfo) + m.Load(29, &x.shutdownFlags) + m.Load(30, &x.multicastMemberships) + m.Load(31, &x.effectiveNetProtos) + m.Load(32, &x.owner) m.LoadValue(5, new(int), func(y interface{}) { x.loadRcvBufSizeMax(y.(int)) }) - m.LoadValue(21, new(string), func(y interface{}) { x.loadLastError(y.(string)) }) + m.LoadValue(22, new(string), func(y interface{}) { x.loadLastError(y.(string)) }) m.AfterLoad(x.afterLoad) } |