summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/BUILD1
-rw-r--r--pkg/abi/linux/ip.go107
-rw-r--r--pkg/abi/linux/netlink.go14
-rw-r--r--pkg/abi/linux/socket.go87
-rw-r--r--pkg/abi/linux/tcp.go54
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go675
-rw-r--r--pkg/sentry/socket/netlink/socket.go33
-rw-r--r--pkg/sentry/socket/socket.go91
-rw-r--r--runsc/boot/compat.go10
-rw-r--r--runsc/boot/compat_amd64.go55
-rw-r--r--runsc/boot/compat_test.go39
11 files changed, 902 insertions, 264 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index f8f82c0da..1f6e43605 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -43,6 +43,7 @@ go_library(
"shm.go",
"signal.go",
"socket.go",
+ "tcp.go",
"time.go",
"timer.go",
"tty.go",
diff --git a/pkg/abi/linux/ip.go b/pkg/abi/linux/ip.go
index fcec16965..77ac1062c 100644
--- a/pkg/abi/linux/ip.go
+++ b/pkg/abi/linux/ip.go
@@ -42,3 +42,110 @@ const (
IPPROTO_MPLS = 137
IPPROTO_RAW = 255
)
+
+// Socket options from uapi/linux/in.h
+const (
+ IP_TOS = 1
+ IP_TTL = 2
+ IP_HDRINCL = 3
+ IP_OPTIONS = 4
+ IP_ROUTER_ALERT = 5
+ IP_RECVOPTS = 6
+ IP_RETOPTS = 7
+ IP_PKTINFO = 8
+ IP_PKTOPTIONS = 9
+ IP_MTU_DISCOVER = 10
+ IP_RECVERR = 11
+ IP_RECVTTL = 12
+ IP_RECVTOS = 13
+ IP_MTU = 14
+ IP_FREEBIND = 15
+ IP_IPSEC_POLICY = 16
+ IP_XFRM_POLICY = 17
+ IP_PASSSEC = 18
+ IP_TRANSPARENT = 19
+ IP_ORIGDSTADDR = 20
+ IP_RECVORIGDSTADDR = IP_ORIGDSTADDR
+ IP_MINTTL = 21
+ IP_NODEFRAG = 22
+ IP_CHECKSUM = 23
+ IP_BIND_ADDRESS_NO_PORT = 24
+ IP_RECVFRAGSIZE = 25
+ IP_MULTICAST_IF = 32
+ IP_MULTICAST_TTL = 33
+ IP_MULTICAST_LOOP = 34
+ IP_ADD_MEMBERSHIP = 35
+ IP_DROP_MEMBERSHIP = 36
+ IP_UNBLOCK_SOURCE = 37
+ IP_BLOCK_SOURCE = 38
+ IP_ADD_SOURCE_MEMBERSHIP = 39
+ IP_DROP_SOURCE_MEMBERSHIP = 40
+ IP_MSFILTER = 41
+ MCAST_JOIN_GROUP = 42
+ MCAST_BLOCK_SOURCE = 43
+ MCAST_UNBLOCK_SOURCE = 44
+ MCAST_LEAVE_GROUP = 45
+ MCAST_JOIN_SOURCE_GROUP = 46
+ MCAST_LEAVE_SOURCE_GROUP = 47
+ MCAST_MSFILTER = 48
+ IP_MULTICAST_ALL = 49
+ IP_UNICAST_IF = 50
+)
+
+// Socket options from uapi/linux/in6.h
+const (
+ IPV6_ADDRFORM = 1
+ IPV6_2292PKTINFO = 2
+ IPV6_2292HOPOPTS = 3
+ IPV6_2292DSTOPTS = 4
+ IPV6_2292RTHDR = 5
+ IPV6_2292PKTOPTIONS = 6
+ IPV6_CHECKSUM = 7
+ IPV6_2292HOPLIMIT = 8
+ IPV6_NEXTHOP = 9
+ IPV6_FLOWINFO = 11
+ IPV6_UNICAST_HOPS = 16
+ IPV6_MULTICAST_IF = 17
+ IPV6_MULTICAST_HOPS = 18
+ IPV6_MULTICAST_LOOP = 19
+ IPV6_ADD_MEMBERSHIP = 20
+ IPV6_DROP_MEMBERSHIP = 21
+ IPV6_ROUTER_ALERT = 22
+ IPV6_MTU_DISCOVER = 23
+ IPV6_MTU = 24
+ IPV6_RECVERR = 25
+ IPV6_V6ONLY = 26
+ IPV6_JOIN_ANYCAST = 27
+ IPV6_LEAVE_ANYCAST = 28
+ IPV6_MULTICAST_ALL = 29
+ IPV6_FLOWLABEL_MGR = 32
+ IPV6_FLOWINFO_SEND = 33
+ IPV6_IPSEC_POLICY = 34
+ IPV6_XFRM_POLICY = 35
+ IPV6_HDRINCL = 36
+ IPV6_RECVPKTINFO = 49
+ IPV6_PKTINFO = 50
+ IPV6_RECVHOPLIMIT = 51
+ IPV6_HOPLIMIT = 52
+ IPV6_RECVHOPOPTS = 53
+ IPV6_HOPOPTS = 54
+ IPV6_RTHDRDSTOPTS = 55
+ IPV6_RECVRTHDR = 56
+ IPV6_RTHDR = 57
+ IPV6_RECVDSTOPTS = 58
+ IPV6_DSTOPTS = 59
+ IPV6_RECVPATHMTU = 60
+ IPV6_PATHMTU = 61
+ IPV6_DONTFRAG = 62
+ IPV6_RECVTCLASS = 66
+ IPV6_TCLASS = 67
+ IPV6_AUTOFLOWLABEL = 70
+ IPV6_ADDR_PREFERENCES = 72
+ IPV6_MINHOPCOUNT = 73
+ IPV6_ORIGDSTADDR = 74
+ IPV6_RECVORIGDSTADDR = IPV6_ORIGDSTADDR
+ IPV6_TRANSPARENT = 75
+ IPV6_UNICAST_IF = 76
+ IPV6_RECVFRAGSIZE = 77
+ IPV6_FREEBIND = 78
+)
diff --git a/pkg/abi/linux/netlink.go b/pkg/abi/linux/netlink.go
index 10ceb5bf2..25c5e17fd 100644
--- a/pkg/abi/linux/netlink.go
+++ b/pkg/abi/linux/netlink.go
@@ -108,3 +108,17 @@ const NetlinkAttrHeaderSize = 4
// NLA_ALIGNTO is the alignment of netlink attributes, from
// uapi/linux/netlink.h.
const NLA_ALIGNTO = 4
+
+// Socket options, from uapi/linux/netlink.h.
+const (
+ NETLINK_ADD_MEMBERSHIP = 1
+ NETLINK_DROP_MEMBERSHIP = 2
+ NETLINK_PKTINFO = 3
+ NETLINK_BROADCAST_ERROR = 4
+ NETLINK_NO_ENOBUFS = 5
+ NETLINK_LISTEN_ALL_NSID = 8
+ NETLINK_LIST_MEMBERSHIPS = 9
+ NETLINK_CAP_ACK = 10
+ NETLINK_EXT_ACK = 11
+ NETLINK_DUMP_STRICT_CHK = 12
+)
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index af0761a3b..929814752 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -89,8 +89,18 @@ const (
MSG_CMSG_CLOEXEC = 0x40000000
)
-// SOL_SOCKET is from socket.h
-const SOL_SOCKET = 1
+// Set/get socket option levels, from socket.h.
+const (
+ SOL_IP = 0
+ SOL_SOCKET = 1
+ SOL_TCP = 6
+ SOL_UDP = 17
+ SOL_IPV6 = 41
+ SOL_ICMPV6 = 58
+ SOL_RAW = 255
+ SOL_PACKET = 263
+ SOL_NETLINK = 270
+)
// Socket types, from linux/net.h.
const (
@@ -122,22 +132,63 @@ const (
// Socket options from socket.h.
const (
- SO_ERROR = 4
- SO_KEEPALIVE = 9
- SO_LINGER = 13
- SO_MARK = 36
- SO_PASSCRED = 16
- SO_PEERCRED = 17
- SO_PEERNAME = 28
- SO_PROTOCOL = 38
- SO_RCVBUF = 8
- SO_RCVTIMEO = 20
- SO_REUSEADDR = 2
- SO_SNDBUF = 7
- SO_SNDTIMEO = 21
- SO_TIMESTAMP = 29
- SO_TIMESTAMPNS = 35
- SO_TYPE = 3
+ SO_DEBUG = 1
+ SO_REUSEADDR = 2
+ SO_TYPE = 3
+ SO_ERROR = 4
+ SO_DONTROUTE = 5
+ SO_BROADCAST = 6
+ SO_SNDBUF = 7
+ SO_RCVBUF = 8
+ SO_KEEPALIVE = 9
+ SO_OOBINLINE = 10
+ SO_NO_CHECK = 11
+ SO_PRIORITY = 12
+ SO_LINGER = 13
+ SO_BSDCOMPAT = 14
+ SO_REUSEPORT = 15
+ SO_PASSCRED = 16
+ SO_PEERCRED = 17
+ SO_RCVLOWAT = 18
+ SO_SNDLOWAT = 19
+ SO_RCVTIMEO = 20
+ SO_SNDTIMEO = 21
+ SO_BINDTODEVICE = 25
+ SO_ATTACH_FILTER = 26
+ SO_DETACH_FILTER = 27
+ SO_GET_FILTER = SO_ATTACH_FILTER
+ SO_PEERNAME = 28
+ SO_TIMESTAMP = 29
+ SO_ACCEPTCONN = 30
+ SO_PEERSEC = 31
+ SO_SNDBUFFORCE = 32
+ SO_RCVBUFFORCE = 33
+ SO_PASSSEC = 34
+ SO_TIMESTAMPNS = 35
+ SO_MARK = 36
+ SO_TIMESTAMPING = 37
+ SO_PROTOCOL = 38
+ SO_DOMAIN = 39
+ SO_RXQ_OVFL = 40
+ SO_WIFI_STATUS = 41
+ SO_PEEK_OFF = 42
+ SO_NOFCS = 43
+ SO_LOCK_FILTER = 44
+ SO_SELECT_ERR_QUEUE = 45
+ SO_BUSY_POLL = 46
+ SO_MAX_PACING_RATE = 47
+ SO_BPF_EXTENSIONS = 48
+ SO_INCOMING_CPU = 49
+ SO_ATTACH_BPF = 50
+ SO_ATTACH_REUSEPORT_CBPF = 51
+ SO_ATTACH_REUSEPORT_EBPF = 52
+ SO_CNX_ADVICE = 53
+ SO_MEMINFO = 55
+ SO_INCOMING_NAPI_ID = 56
+ SO_COOKIE = 57
+ SO_PEERGROUPS = 59
+ SO_ZEROCOPY = 60
+ SO_TXTIME = 61
)
// SockAddrMax is the maximum size of a struct sockaddr, from
diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go
new file mode 100644
index 000000000..7586ada42
--- /dev/null
+++ b/pkg/abi/linux/tcp.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Socket options from uapi/linux/tcp.h.
+const (
+ TCP_NODELAY = 1
+ TCP_MAXSEG = 2
+ TCP_CORK = 3
+ TCP_KEEPIDLE = 4
+ TCP_KEEPINTVL = 5
+ TCP_KEEPCNT = 6
+ TCP_SYNCNT = 7
+ TCP_LINGER2 = 8
+ TCP_DEFER_ACCEPT = 9
+ TCP_WINDOW_CLAMP = 10
+ TCP_INFO = 11
+ TCP_QUICKACK = 12
+ TCP_CONGESTION = 13
+ TCP_MD5SIG = 14
+ TCP_THIN_LINEAR_TIMEOUTS = 16
+ TCP_THIN_DUPACK = 17
+ TCP_USER_TIMEOUT = 18
+ TCP_REPAIR = 19
+ TCP_REPAIR_QUEUE = 20
+ TCP_QUEUE_SEQ = 21
+ TCP_REPAIR_OPTIONS = 22
+ TCP_FASTOPEN = 23
+ TCP_TIMESTAMP = 24
+ TCP_NOTSENT_LOWAT = 25
+ TCP_CC_INFO = 26
+ TCP_SAVE_SYN = 27
+ TCP_SAVED_SYN = 28
+ TCP_REPAIR_WINDOW = 29
+ TCP_FASTOPEN_CONNECT = 30
+ TCP_ULP = 31
+ TCP_MD5SIG_EXT = 32
+ TCP_FASTOPEN_KEY = 33
+ TCP_FASTOPEN_NO_COOKIE = 34
+ TCP_ZEROCOPY_RECEIVE = 35
+ TCP_INQ = 36
+)
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index d14bbad01..c5ce289b5 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -515,189 +515,233 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name, outLen int) (
func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType transport.SockType, level, name, outLen int) (interface{}, *syserr.Error) {
switch level {
case linux.SOL_SOCKET:
- switch name {
- case linux.SO_TYPE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
- return int32(skType), nil
+ return getSockOptSocket(t, s, ep, family, skType, name, outLen)
- case linux.SO_ERROR:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ case linux.SOL_TCP:
+ return getSockOptTCP(t, ep, name, outLen)
- // Get the last error and convert it.
- err := ep.GetSockOpt(tcpip.ErrorOption{})
- if err == nil {
- return int32(0), nil
- }
- return int32(syserr.TranslateNetstackError(err).ToLinux().Number()), nil
+ case linux.SOL_IPV6:
+ return getSockOptIPv6(t, ep, name, outLen)
- case linux.SO_PEERCRED:
- if family != linux.AF_UNIX || outLen < syscall.SizeofUcred {
- return nil, syserr.ErrInvalidArgument
- }
+ case linux.SOL_IP,
+ linux.SOL_UDP,
+ linux.SOL_ICMPV6,
+ linux.SOL_RAW,
+ linux.SOL_PACKET:
- tcred := t.Credentials()
- return syscall.Ucred{
- Pid: int32(t.ThreadGroup().ID()),
- Uid: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()),
- Gid: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()),
- }, nil
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
- case linux.SO_PASSCRED:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ return nil, syserr.ErrProtocolNotAvailable
+}
- var v tcpip.PasscredOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+// getSockOptSocket implements GetSockOpt when level is SOL_SOCKET.
+func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType transport.SockType, name, outLen int) (interface{}, *syserr.Error) {
+ switch name {
+ case linux.SO_TYPE:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+ return int32(skType), nil
- return int32(v), nil
+ case linux.SO_ERROR:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- case linux.SO_SNDBUF:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ // Get the last error and convert it.
+ err := ep.GetSockOpt(tcpip.ErrorOption{})
+ if err == nil {
+ return int32(0), nil
+ }
+ return int32(syserr.TranslateNetstackError(err).ToLinux().Number()), nil
- var size tcpip.SendBufferSizeOption
- if err := ep.GetSockOpt(&size); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ case linux.SO_PEERCRED:
+ if family != linux.AF_UNIX || outLen < syscall.SizeofUcred {
+ return nil, syserr.ErrInvalidArgument
+ }
- if size > math.MaxInt32 {
- size = math.MaxInt32
- }
+ tcred := t.Credentials()
+ return syscall.Ucred{
+ Pid: int32(t.ThreadGroup().ID()),
+ Uid: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()),
+ Gid: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()),
+ }, nil
- return int32(size), nil
+ case linux.SO_PASSCRED:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- case linux.SO_RCVBUF:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ var v tcpip.PasscredOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- var size tcpip.ReceiveBufferSizeOption
- if err := ep.GetSockOpt(&size); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ return int32(v), nil
- if size > math.MaxInt32 {
- size = math.MaxInt32
- }
+ case linux.SO_SNDBUF:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- return int32(size), nil
+ var size tcpip.SendBufferSizeOption
+ if err := ep.GetSockOpt(&size); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- case linux.SO_REUSEADDR:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ if size > math.MaxInt32 {
+ size = math.MaxInt32
+ }
- var v tcpip.ReuseAddressOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ return int32(size), nil
- return int32(v), nil
+ case linux.SO_RCVBUF:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- case linux.SO_KEEPALIVE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
- return int32(0), nil
+ var size tcpip.ReceiveBufferSizeOption
+ if err := ep.GetSockOpt(&size); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- case linux.SO_LINGER:
- if outLen < syscall.SizeofLinger {
- return nil, syserr.ErrInvalidArgument
- }
- return syscall.Linger{}, nil
+ if size > math.MaxInt32 {
+ size = math.MaxInt32
+ }
- case linux.SO_RCVTIMEO:
- if outLen < linux.SizeOfTimeval {
- return nil, syserr.ErrInvalidArgument
- }
+ return int32(size), nil
- return linux.NsecToTimeval(s.RecvTimeout()), nil
+ case linux.SO_REUSEADDR:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- case linux.SO_TIMESTAMP:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ var v tcpip.ReuseAddressOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- var v tcpip.TimestampOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ return int32(v), nil
- return int32(v), nil
+ case linux.SO_KEEPALIVE:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
}
+ return int32(0), nil
- case syscall.SOL_TCP:
- switch name {
- case syscall.TCP_NODELAY:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ case linux.SO_LINGER:
+ if outLen < syscall.SizeofLinger {
+ return nil, syserr.ErrInvalidArgument
+ }
+ return syscall.Linger{}, nil
- var v tcpip.DelayOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ case linux.SO_RCVTIMEO:
+ if outLen < linux.SizeOfTimeval {
+ return nil, syserr.ErrInvalidArgument
+ }
- if v == 0 {
- return int32(1), nil
- }
- return int32(0), nil
+ return linux.NsecToTimeval(s.RecvTimeout()), nil
- case syscall.TCP_CORK:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ case linux.SO_TIMESTAMP:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- var v tcpip.CorkOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ var v tcpip.TimestampOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- return int32(v), nil
+ return int32(v), nil
- case syscall.TCP_INFO:
- var v tcpip.TCPInfoOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ default:
+ socket.GetSockOptEmitUnimplementedEvent(t, name)
+ }
+ return nil, syserr.ErrProtocolNotAvailable
+}
+
+// getSockOptTCP implements GetSockOpt when level is SOL_TCP.
+func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
+ switch name {
+ case linux.TCP_NODELAY:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
- // TODO: Translate fields once they are added to
- // tcpip.TCPInfoOption.
- info := linux.TCPInfo{}
+ var v tcpip.DelayOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- // Linux truncates the output binary to outLen.
- ib := binary.Marshal(nil, usermem.ByteOrder, &info)
- if len(ib) > outLen {
- ib = ib[:outLen]
- }
+ if v == 0 {
+ return int32(1), nil
+ }
+ return int32(0), nil
- return ib, nil
+ case linux.TCP_CORK:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
}
- case syscall.SOL_IPV6:
- switch name {
- case syscall.IPV6_V6ONLY:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
+ var v tcpip.CorkOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
- var v tcpip.V6OnlyOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
+ return int32(v), nil
- return int32(v), nil
+ case linux.TCP_INFO:
+ var v tcpip.TCPInfoOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ // TODO: Translate fields once they are added to
+ // tcpip.TCPInfoOption.
+ info := linux.TCPInfo{}
+
+ // Linux truncates the output binary to outLen.
+ ib := binary.Marshal(nil, usermem.ByteOrder, &info)
+ if len(ib) > outLen {
+ ib = ib[:outLen]
}
+
+ return ib, nil
+
+ case linux.TCP_CC_INFO,
+ linux.TCP_NOTSENT_LOWAT,
+ linux.TCP_ZEROCOPY_RECEIVE:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUmplementedEventTCP(t, name)
}
+ return nil, syserr.ErrProtocolNotAvailable
+}
+
+// getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
+func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
+ switch name {
+ case linux.IPV6_V6ONLY:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+ var v tcpip.V6OnlyOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
+
+ case linux.IPV6_PATHMTU:
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUmplementedEventIPv6(t, name)
+ }
return nil, syserr.ErrProtocolNotAvailable
}
@@ -712,109 +756,304 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error {
switch level {
case linux.SOL_SOCKET:
- switch name {
- case linux.SO_SNDBUF:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ return setSockOptSocket(t, s, ep, name, optVal)
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.SendBufferSizeOption(v)))
+ case linux.SOL_TCP:
+ return setSockOptTCP(t, ep, name, optVal)
- case linux.SO_RCVBUF:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ case linux.SOL_IPV6:
+ return setSockOptIPv6(t, ep, name, optVal)
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(v)))
+ case linux.SOL_IP:
+ return setSockOptIP(t, ep, name, optVal)
- case linux.SO_REUSEADDR:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ case linux.SOL_UDP,
+ linux.SOL_ICMPV6,
+ linux.SOL_RAW,
+ linux.SOL_PACKET:
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReuseAddressOption(v)))
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
- case linux.SO_PASSCRED:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ // Default to the old behavior; hand off to network stack.
+ return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+}
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.PasscredOption(v)))
+// setSockOptSocket implements SetSockOpt when level is SOL_SOCKET.
+func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+ switch name {
+ case linux.SO_SNDBUF:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
- case linux.SO_RCVTIMEO:
- if len(optVal) < linux.SizeOfTimeval {
- return syserr.ErrInvalidArgument
- }
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.SendBufferSizeOption(v)))
- var v linux.Timeval
- binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- s.SetRecvTimeout(v.ToNsecCapped())
- return nil
+ case linux.SO_RCVBUF:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
- case linux.SO_TIMESTAMP:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(v)))
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v)))
+ case linux.SO_REUSEADDR:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
}
- case syscall.SOL_TCP:
- switch name {
- case syscall.TCP_NODELAY:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReuseAddressOption(v)))
- v := usermem.ByteOrder.Uint32(optVal)
- var o tcpip.DelayOption
- if v == 0 {
- o = 1
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(o))
- case syscall.TCP_CORK:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
+ case linux.SO_PASSCRED:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.PasscredOption(v)))
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.CorkOption(v)))
+ case linux.SO_RCVTIMEO:
+ if len(optVal) < linux.SizeOfTimeval {
+ return syserr.ErrInvalidArgument
}
- case syscall.SOL_IPV6:
- switch name {
- case syscall.IPV6_V6ONLY:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.V6OnlyOption(v)))
- }
- case syscall.SOL_IP:
- const (
- _IP_MULTICAST_IF = 32
- _IP_ADD_MEMBERSHIP = 35
- _MCAST_JOIN_GROUP = 42
- )
- switch name {
- case _IP_ADD_MEMBERSHIP, _MCAST_JOIN_GROUP, _IP_MULTICAST_IF:
- // FIXME: Disallow IP-level multicast group options by
- // default. These will need to be supported by appropriately plumbing
- // the level through to the network stack (if at all). However, we
- // still allow setting TTL, and multicast-enable/disable type options.
+ var v linux.Timeval
+ binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
+ s.SetRecvTimeout(v.ToNsecCapped())
+ return nil
+
+ case linux.SO_TIMESTAMP:
+ if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TimestampOption(v)))
+
+ default:
+ socket.SetSockOptEmitUnimplementedEvent(t, name)
}
// Default to the old behavior; hand off to network stack.
return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
}
+// setSockOptTCP implements SetSockOpt when level is SOL_TCP.
+func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+ switch name {
+ case linux.TCP_NODELAY:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ var o tcpip.DelayOption
+ if v == 0 {
+ o = 1
+ }
+ return syserr.TranslateNetstackError(ep.SetSockOpt(o))
+
+ case linux.TCP_CORK:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.CorkOption(v)))
+
+ case linux.TCP_REPAIR_OPTIONS:
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUmplementedEventTCP(t, name)
+ }
+
+ // Default to the old behavior; hand off to network stack.
+ return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+}
+
+// setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6.
+func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+ switch name {
+ case linux.IPV6_V6ONLY:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.V6OnlyOption(v)))
+
+ case linux.IPV6_ADD_MEMBERSHIP,
+ linux.IPV6_DROP_MEMBERSHIP,
+ linux.IPV6_IPSEC_POLICY,
+ linux.IPV6_JOIN_ANYCAST,
+ linux.IPV6_LEAVE_ANYCAST,
+ linux.IPV6_PKTINFO,
+ linux.IPV6_ROUTER_ALERT,
+ linux.IPV6_XFRM_POLICY,
+ linux.MCAST_BLOCK_SOURCE,
+ linux.MCAST_JOIN_GROUP,
+ linux.MCAST_JOIN_SOURCE_GROUP,
+ linux.MCAST_LEAVE_GROUP,
+ linux.MCAST_LEAVE_SOURCE_GROUP,
+ linux.MCAST_UNBLOCK_SOURCE:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUmplementedEventIPv6(t, name)
+ }
+
+ // Default to the old behavior; hand off to network stack.
+ return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+}
+
+// setSockOptIP implements SetSockOpt when level is SOL_IP.
+func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+ switch name {
+ case linux.IP_ADD_MEMBERSHIP, linux.MCAST_JOIN_GROUP, linux.IP_MULTICAST_IF:
+ // FIXME: Disallow IP-level multicast group options by
+ // default. These will need to be supported by appropriately plumbing
+ // the level through to the network stack (if at all). However, we
+ // still allow setting TTL, and multicast-enable/disable type options.
+ t.Kernel().EmitUnimplementedEvent(t)
+ return syserr.ErrInvalidArgument
+
+ case linux.IP_ADD_SOURCE_MEMBERSHIP,
+ linux.IP_BIND_ADDRESS_NO_PORT,
+ linux.IP_BLOCK_SOURCE,
+ linux.IP_CHECKSUM,
+ linux.IP_DROP_MEMBERSHIP,
+ linux.IP_DROP_SOURCE_MEMBERSHIP,
+ linux.IP_FREEBIND,
+ linux.IP_HDRINCL,
+ linux.IP_IPSEC_POLICY,
+ linux.IP_MINTTL,
+ linux.IP_MSFILTER,
+ linux.IP_MTU_DISCOVER,
+ linux.IP_MULTICAST_ALL,
+ linux.IP_MULTICAST_LOOP,
+ linux.IP_MULTICAST_TTL,
+ linux.IP_NODEFRAG,
+ linux.IP_OPTIONS,
+ linux.IP_PASSSEC,
+ linux.IP_PKTINFO,
+ linux.IP_RECVERR,
+ linux.IP_RECVFRAGSIZE,
+ linux.IP_RECVOPTS,
+ linux.IP_RECVORIGDSTADDR,
+ linux.IP_RECVTOS,
+ linux.IP_RECVTTL,
+ linux.IP_RETOPTS,
+ linux.IP_TOS,
+ linux.IP_TRANSPARENT,
+ linux.IP_TTL,
+ linux.IP_UNBLOCK_SOURCE,
+ linux.IP_UNICAST_IF,
+ linux.IP_XFRM_POLICY,
+ linux.MCAST_BLOCK_SOURCE,
+ linux.MCAST_JOIN_SOURCE_GROUP,
+ linux.MCAST_LEAVE_GROUP,
+ linux.MCAST_LEAVE_SOURCE_GROUP,
+ linux.MCAST_MSFILTER,
+ linux.MCAST_UNBLOCK_SOURCE:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
+
+ // Default to the old behavior; hand off to network stack.
+ return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
+}
+
+// emitUmplementedEventTCP emits unimplemented event if name is valid. This
+// function contains names that are common between Get and SetSockOpt when
+// level is SOL_TCP.
+func emitUmplementedEventTCP(t *kernel.Task, name int) {
+ switch name {
+ case linux.TCP_CONGESTION,
+ linux.TCP_CORK,
+ linux.TCP_DEFER_ACCEPT,
+ linux.TCP_FASTOPEN,
+ linux.TCP_FASTOPEN_CONNECT,
+ linux.TCP_FASTOPEN_KEY,
+ linux.TCP_FASTOPEN_NO_COOKIE,
+ linux.TCP_INQ,
+ linux.TCP_KEEPCNT,
+ linux.TCP_KEEPIDLE,
+ linux.TCP_KEEPINTVL,
+ linux.TCP_LINGER2,
+ linux.TCP_MAXSEG,
+ linux.TCP_QUEUE_SEQ,
+ linux.TCP_QUICKACK,
+ linux.TCP_REPAIR,
+ linux.TCP_REPAIR_QUEUE,
+ linux.TCP_REPAIR_WINDOW,
+ linux.TCP_SAVED_SYN,
+ linux.TCP_SAVE_SYN,
+ linux.TCP_SYNCNT,
+ linux.TCP_THIN_DUPACK,
+ linux.TCP_THIN_LINEAR_TIMEOUTS,
+ linux.TCP_TIMESTAMP,
+ linux.TCP_ULP,
+ linux.TCP_USER_TIMEOUT,
+ linux.TCP_WINDOW_CLAMP:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
+}
+
+// emitUmplementedEventIPv6 emits unimplemented event if name is valid. It
+// contains names that are common between Get and SetSockOpt when level is
+// SOL_IPV6.
+func emitUmplementedEventIPv6(t *kernel.Task, name int) {
+ switch name {
+ case linux.IPV6_2292DSTOPTS,
+ linux.IPV6_2292HOPLIMIT,
+ linux.IPV6_2292HOPOPTS,
+ linux.IPV6_2292PKTINFO,
+ linux.IPV6_2292PKTOPTIONS,
+ linux.IPV6_2292RTHDR,
+ linux.IPV6_ADDR_PREFERENCES,
+ linux.IPV6_AUTOFLOWLABEL,
+ linux.IPV6_DONTFRAG,
+ linux.IPV6_DSTOPTS,
+ linux.IPV6_FLOWINFO,
+ linux.IPV6_FLOWINFO_SEND,
+ linux.IPV6_FLOWLABEL_MGR,
+ linux.IPV6_FREEBIND,
+ linux.IPV6_HOPOPTS,
+ linux.IPV6_MINHOPCOUNT,
+ linux.IPV6_MTU,
+ linux.IPV6_MTU_DISCOVER,
+ linux.IPV6_MULTICAST_ALL,
+ linux.IPV6_MULTICAST_HOPS,
+ linux.IPV6_MULTICAST_IF,
+ linux.IPV6_MULTICAST_LOOP,
+ linux.IPV6_RECVDSTOPTS,
+ linux.IPV6_RECVERR,
+ linux.IPV6_RECVFRAGSIZE,
+ linux.IPV6_RECVHOPLIMIT,
+ linux.IPV6_RECVHOPOPTS,
+ linux.IPV6_RECVORIGDSTADDR,
+ linux.IPV6_RECVPATHMTU,
+ linux.IPV6_RECVPKTINFO,
+ linux.IPV6_RECVRTHDR,
+ linux.IPV6_RECVTCLASS,
+ linux.IPV6_RTHDR,
+ linux.IPV6_RTHDRDSTOPTS,
+ linux.IPV6_TCLASS,
+ linux.IPV6_TRANSPARENT,
+ linux.IPV6_UNICAST_HOPS,
+ linux.IPV6_UNICAST_IF,
+ linux.MCAST_MSFILTER,
+ linux.IPV6_ADDRFORM:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
+}
+
// isLinkLocal determines if the given IPv6 address is link-local. This is the
// case when it has the fe80::/10 prefix. This check is used to determine when
// the NICID is relevant for a given IPv6 address.
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index f901cfa0b..b1f6620de 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -299,6 +299,21 @@ func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outLen int) (in
}
// We don't have limit on receiving size.
return math.MaxInt32, nil
+
+ default:
+ socket.GetSockOptEmitUnimplementedEvent(t, name)
+ }
+ case linux.SOL_NETLINK:
+ switch name {
+ case linux.NETLINK_BROADCAST_ERROR,
+ linux.NETLINK_CAP_ACK,
+ linux.NETLINK_DUMP_STRICT_CHK,
+ linux.NETLINK_EXT_ACK,
+ linux.NETLINK_LIST_MEMBERSHIPS,
+ linux.NETLINK_NO_ENOBUFS,
+ linux.NETLINK_PKTINFO:
+
+ t.Kernel().EmitUnimplementedEvent(t)
}
}
// TODO: other sockopts are not supported.
@@ -329,7 +344,25 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy
// We don't have limit on receiving size. So just accept anything as
// valid for compatibility.
return nil
+ default:
+ socket.SetSockOptEmitUnimplementedEvent(t, name)
}
+
+ case linux.SOL_NETLINK:
+ switch name {
+ case linux.NETLINK_ADD_MEMBERSHIP,
+ linux.NETLINK_BROADCAST_ERROR,
+ linux.NETLINK_CAP_ACK,
+ linux.NETLINK_DROP_MEMBERSHIP,
+ linux.NETLINK_DUMP_STRICT_CHK,
+ linux.NETLINK_EXT_ACK,
+ linux.NETLINK_LISTEN_ALL_NSID,
+ linux.NETLINK_NO_ENOBUFS,
+ linux.NETLINK_PKTINFO:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
+
}
// TODO: other sockopts are not supported.
return syserr.ErrProtocolNotAvailable
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index a235c5249..b1dcbf7b0 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -213,3 +213,94 @@ func (rt *ReceiveTimeout) SetRecvTimeout(nanoseconds int64) {
func (rt *ReceiveTimeout) RecvTimeout() int64 {
return atomic.LoadInt64(&rt.ns)
}
+
+// GetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
+// It contains names that are valid for GetSockOpt when level is SOL_SOCKET.
+func GetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
+ switch name {
+ case linux.SO_ACCEPTCONN,
+ linux.SO_BPF_EXTENSIONS,
+ linux.SO_COOKIE,
+ linux.SO_DOMAIN,
+ linux.SO_ERROR,
+ linux.SO_GET_FILTER,
+ linux.SO_INCOMING_NAPI_ID,
+ linux.SO_MEMINFO,
+ linux.SO_PEERCRED,
+ linux.SO_PEERGROUPS,
+ linux.SO_PEERNAME,
+ linux.SO_PEERSEC,
+ linux.SO_PROTOCOL,
+ linux.SO_SNDLOWAT,
+ linux.SO_TYPE:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUnimplementedEvent(t, name)
+ }
+}
+
+// SetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
+// It contains names that are valid for SetSockOpt when level is SOL_SOCKET.
+func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
+ switch name {
+ case linux.SO_ATTACH_BPF,
+ linux.SO_ATTACH_FILTER,
+ linux.SO_ATTACH_REUSEPORT_CBPF,
+ linux.SO_ATTACH_REUSEPORT_EBPF,
+ linux.SO_CNX_ADVICE,
+ linux.SO_DETACH_FILTER,
+ linux.SO_RCVBUFFORCE,
+ linux.SO_SNDBUFFORCE:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+
+ default:
+ emitUnimplementedEvent(t, name)
+ }
+}
+
+// emitUnimplementedEvent emits unimplemented event if name is valid. It
+// contains names that are common between Get and SetSocketOpt when level is
+// SOL_SOCKET.
+func emitUnimplementedEvent(t *kernel.Task, name int) {
+ switch name {
+ case linux.SO_BINDTODEVICE,
+ linux.SO_BROADCAST,
+ linux.SO_BSDCOMPAT,
+ linux.SO_BUSY_POLL,
+ linux.SO_DEBUG,
+ linux.SO_DONTROUTE,
+ linux.SO_INCOMING_CPU,
+ linux.SO_KEEPALIVE,
+ linux.SO_LINGER,
+ linux.SO_LOCK_FILTER,
+ linux.SO_MARK,
+ linux.SO_MAX_PACING_RATE,
+ linux.SO_NOFCS,
+ linux.SO_NO_CHECK,
+ linux.SO_OOBINLINE,
+ linux.SO_PASSCRED,
+ linux.SO_PASSSEC,
+ linux.SO_PEEK_OFF,
+ linux.SO_PRIORITY,
+ linux.SO_RCVBUF,
+ linux.SO_RCVLOWAT,
+ linux.SO_RCVTIMEO,
+ linux.SO_REUSEADDR,
+ linux.SO_REUSEPORT,
+ linux.SO_RXQ_OVFL,
+ linux.SO_SELECT_ERR_QUEUE,
+ linux.SO_SNDBUF,
+ linux.SO_SNDTIMEO,
+ linux.SO_TIMESTAMP,
+ linux.SO_TIMESTAMPING,
+ linux.SO_TIMESTAMPNS,
+ linux.SO_TXTIME,
+ linux.SO_WIFI_STATUS,
+ linux.SO_ZEROCOPY:
+
+ t.Kernel().EmitUnimplementedEvent(t)
+ }
+}
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 4c49e90e3..c2a77ebf5 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -89,10 +89,16 @@ func (c *compatEmitter) Emit(msg proto.Message) (hangup bool, err error) {
if tr == nil {
switch sysnr {
case syscall.SYS_PRCTL, syscall.SYS_ARCH_PRCTL:
- tr = newCmdTracker(0)
+ // args: cmd, ...
+ tr = newArgsTracker(0)
case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL:
- tr = newCmdTracker(1)
+ // args: fd, cmd, ...
+ tr = newArgsTracker(1)
+
+ case syscall.SYS_GETSOCKOPT, syscall.SYS_SETSOCKOPT:
+ // args: fd, level, name, ...
+ tr = newArgsTracker(1, 2)
default:
tr = &onceTracker{}
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 2bb769a49..0c9472f18 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -20,35 +20,58 @@ import (
rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
)
-// cmdTracker reports only a single time for each different command argument in
-// the syscall. It's used for generic syscalls like ioctl to report once per
-// 'cmd'
-type cmdTracker struct {
- // argIdx is the syscall argument index where the command is located.
- argIdx int
- cmds map[uint32]struct{}
+// reportLimit is the max number of events that should be reported per tracker.
+const reportLimit = 100
+
+// argsTracker reports only once for each different combination of arguments.
+// It's used for generic syscalls like ioctl to report once per 'cmd'.
+type argsTracker struct {
+ // argsIdx is the syscall arguments to use as unique ID.
+ argsIdx []int
+ reported map[string]struct{}
+ count int
}
-func newCmdTracker(argIdx int) *cmdTracker {
- return &cmdTracker{argIdx: argIdx, cmds: make(map[uint32]struct{})}
+func newArgsTracker(argIdx ...int) *argsTracker {
+ return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})}
}
// cmd returns the command based on the syscall argument index.
-func (c *cmdTracker) cmd(regs *rpb.AMD64Registers) uint32 {
- switch c.argIdx {
+func (a *argsTracker) key(regs *rpb.AMD64Registers) string {
+ var rv string
+ for _, idx := range a.argsIdx {
+ rv += fmt.Sprintf("%d|", argVal(idx, regs))
+ }
+ return rv
+}
+
+func argVal(argIdx int, regs *rpb.AMD64Registers) uint32 {
+ switch argIdx {
case 0:
return uint32(regs.Rdi)
case 1:
return uint32(regs.Rsi)
+ case 2:
+ return uint32(regs.Rdx)
+ case 3:
+ return uint32(regs.R10)
+ case 4:
+ return uint32(regs.R8)
+ case 5:
+ return uint32(regs.R9)
}
- panic(fmt.Sprintf("unsupported syscall argument index %d", c.argIdx))
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
-func (c *cmdTracker) shouldReport(regs *rpb.AMD64Registers) bool {
- _, ok := c.cmds[c.cmd(regs)]
+func (a *argsTracker) shouldReport(regs *rpb.AMD64Registers) bool {
+ if a.count >= reportLimit {
+ return false
+ }
+ _, ok := a.reported[a.key(regs)]
return !ok
}
-func (c *cmdTracker) onReported(regs *rpb.AMD64Registers) {
- c.cmds[c.cmd(regs)] = struct{}{}
+func (a *argsTracker) onReported(regs *rpb.AMD64Registers) {
+ a.count++
+ a.reported[a.key(regs)] = struct{}{}
}
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index 30b94798a..f1940dd72 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -33,34 +33,53 @@ func TestOnceTracker(t *testing.T) {
}
}
-func TestCmdTracker(t *testing.T) {
+func TestArgsTracker(t *testing.T) {
for _, tc := range []struct {
name string
- idx int
+ idx []int
rdi1 uint64
rdi2 uint64
rsi1 uint64
rsi2 uint64
want bool
}{
- {name: "same rdi", idx: 0, rdi1: 123, rdi2: 123, want: false},
- {name: "same rsi", idx: 1, rsi1: 123, rsi2: 123, want: false},
- {name: "diff rdi", idx: 0, rdi1: 123, rdi2: 321, want: true},
- {name: "diff rsi", idx: 1, rsi1: 123, rsi2: 321, want: true},
- {name: "cmd is uint32", idx: 0, rsi1: 0xdead00000123, rsi2: 0xbeef00000123, want: false},
+ {name: "same rdi", idx: []int{0}, rdi1: 123, rdi2: 123, want: false},
+ {name: "same rsi", idx: []int{1}, rsi1: 123, rsi2: 123, want: false},
+ {name: "diff rdi", idx: []int{0}, rdi1: 123, rdi2: 321, want: true},
+ {name: "diff rsi", idx: []int{1}, rsi1: 123, rsi2: 321, want: true},
+ {name: "cmd is uint32", idx: []int{0}, rsi1: 0xdead00000123, rsi2: 0xbeef00000123, want: false},
+ {name: "same 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 123, rdi2: 321, want: false},
+ {name: "diff 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 789, rdi2: 987, want: true},
} {
t.Run(tc.name, func(t *testing.T) {
- c := newCmdTracker(tc.idx)
+ c := newArgsTracker(tc.idx...)
regs := &rpb.AMD64Registers{Rdi: tc.rdi1, Rsi: tc.rsi1}
if !c.shouldReport(regs) {
- t.Error("first call to checkAndMark, got: false, want: true")
+ t.Error("first call to shouldReport, got: false, want: true")
}
c.onReported(regs)
regs.Rdi, regs.Rsi = tc.rdi2, tc.rsi2
if got := c.shouldReport(regs); tc.want != got {
- t.Errorf("after first call to checkAndMark, got: %t, want: %t", got, tc.want)
+ t.Errorf("second call to shouldReport, got: %t, want: %t", got, tc.want)
}
})
}
}
+
+func TestArgsTrackerLimit(t *testing.T) {
+ c := newArgsTracker(0, 1)
+ for i := 0; i < reportLimit; i++ {
+ regs := &rpb.AMD64Registers{Rdi: 123, Rsi: uint64(i)}
+ if !c.shouldReport(regs) {
+ t.Error("shouldReport before limit was reached, got: false, want: true")
+ }
+ c.onReported(regs)
+ }
+
+ // Should hit the count limit now.
+ regs := &rpb.AMD64Registers{Rdi: 123, Rsi: 123456}
+ if c.shouldReport(regs) {
+ t.Error("shouldReport after limit was reached, got: true, want: false")
+ }
+}