summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/socket/netlink
diff options
context:
space:
mode:
authorIan Lewis <ianmlewis@gmail.com>2020-08-17 21:44:31 -0400
committerIan Lewis <ianmlewis@gmail.com>2020-08-17 21:44:31 -0400
commitac324f646ee3cb7955b0b45a7453aeb9671cbdf1 (patch)
tree0cbc5018e8807421d701d190dc20525726c7ca76 /pkg/sentry/socket/netlink
parent352ae1022ce19de28fc72e034cc469872ad79d06 (diff)
parent6d0c5803d557d453f15ac6f683697eeb46dab680 (diff)
Merge branch 'master' into ip-forwarding
- Merges aleksej-paschenko's with HEAD - Adds vfs2 support for ip_forward
Diffstat (limited to 'pkg/sentry/socket/netlink')
-rw-r--r--pkg/sentry/socket/netlink/BUILD29
-rw-r--r--pkg/sentry/socket/netlink/message.go136
-rw-r--r--pkg/sentry/socket/netlink/message_test.go312
-rw-r--r--pkg/sentry/socket/netlink/port/BUILD7
-rw-r--r--pkg/sentry/socket/netlink/port/port.go3
-rw-r--r--pkg/sentry/socket/netlink/provider.go18
-rw-r--r--pkg/sentry/socket/netlink/provider_vfs2.go69
-rw-r--r--pkg/sentry/socket/netlink/route/BUILD9
-rw-r--r--pkg/sentry/socket/netlink/route/protocol.go339
-rw-r--r--pkg/sentry/socket/netlink/socket.go291
-rw-r--r--pkg/sentry/socket/netlink/socket_vfs2.go152
-rw-r--r--pkg/sentry/socket/netlink/uevent/BUILD16
-rw-r--r--pkg/sentry/socket/netlink/uevent/protocol.go60
13 files changed, 1279 insertions, 162 deletions
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index f95803f91..0546801bf 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -1,4 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -7,29 +7,48 @@ go_library(
srcs = [
"message.go",
"provider.go",
+ "provider_vfs2.go",
"socket.go",
+ "socket_vfs2.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/context",
"//pkg/sentry/arch",
- "//pkg/sentry/context",
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
+ "//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/time",
- "//pkg/sentry/safemem",
"//pkg/sentry/socket",
"//pkg/sentry/socket/netlink/port",
"//pkg/sentry/socket/unix",
"//pkg/sentry/socket/unix/transport",
- "//pkg/sentry/usermem",
+ "//pkg/sentry/vfs",
+ "//pkg/sync",
"//pkg/syserr",
"//pkg/syserror",
"//pkg/tcpip",
+ "//pkg/usermem",
"//pkg/waiter",
+ "//tools/go_marshal/marshal",
+ "//tools/go_marshal/primitive",
+ ],
+)
+
+go_test(
+ name = "netlink_test",
+ size = "small",
+ srcs = [
+ "message_test.go",
+ ],
+ deps = [
+ ":netlink",
+ "//pkg/abi/linux",
],
)
diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go
index ce0a1afd0..0899c61d1 100644
--- a/pkg/sentry/socket/netlink/message.go
+++ b/pkg/sentry/socket/netlink/message.go
@@ -20,18 +20,19 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
-// alignUp rounds a length up to an alignment.
+// alignPad returns the length of padding required for alignment.
//
// Preconditions: align is a power of two.
-func alignUp(length int, align uint) int {
- return (length + int(align) - 1) &^ (int(align) - 1)
+func alignPad(length int, align uint) int {
+ return binary.AlignUp(length, align) - length
}
// Message contains a complete serialized netlink message.
type Message struct {
+ hdr linux.NetlinkMessageHeader
buf []byte
}
@@ -40,10 +41,86 @@ type Message struct {
// The header length will be updated by Finalize.
func NewMessage(hdr linux.NetlinkMessageHeader) *Message {
return &Message{
+ hdr: hdr,
buf: binary.Marshal(nil, usermem.ByteOrder, hdr),
}
}
+// ParseMessage parses the first message seen at buf, returning the rest of the
+// buffer. If message is malformed, ok of false is returned. For last message,
+// padding check is loose, if there isn't enought padding, whole buf is consumed
+// and ok is set to true.
+func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) {
+ b := BytesView(buf)
+
+ hdrBytes, ok := b.Extract(linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return
+ }
+ var hdr linux.NetlinkMessageHeader
+ binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+
+ // Msg portion.
+ totalMsgLen := int(hdr.Length)
+ _, ok = b.Extract(totalMsgLen - linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return
+ }
+
+ // Padding.
+ numPad := alignPad(totalMsgLen, linux.NLMSG_ALIGNTO)
+ // Linux permits the last message not being aligned, just consume all of it.
+ // Ref: net/netlink/af_netlink.c:netlink_rcv_skb
+ if numPad > len(b) {
+ numPad = len(b)
+ }
+ _, ok = b.Extract(numPad)
+ if !ok {
+ return
+ }
+
+ return &Message{
+ hdr: hdr,
+ buf: buf[:totalMsgLen],
+ }, []byte(b), true
+}
+
+// Header returns the header of this message.
+func (m *Message) Header() linux.NetlinkMessageHeader {
+ return m.hdr
+}
+
+// GetData unmarshals the payload message header from this netlink message, and
+// returns the attributes portion.
+func (m *Message) GetData(msg interface{}) (AttrsView, bool) {
+ b := BytesView(m.buf)
+
+ _, ok := b.Extract(linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return nil, false
+ }
+
+ size := int(binary.Size(msg))
+ msgBytes, ok := b.Extract(size)
+ if !ok {
+ return nil, false
+ }
+ binary.Unmarshal(msgBytes, usermem.ByteOrder, msg)
+
+ numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO)
+ // Linux permits the last message not being aligned, just consume all of it.
+ // Ref: net/netlink/af_netlink.c:netlink_rcv_skb
+ if numPad > len(b) {
+ numPad = len(b)
+ }
+ _, ok = b.Extract(numPad)
+ if !ok {
+ return nil, false
+ }
+
+ return AttrsView(b), true
+}
+
// Finalize returns the []byte containing the entire message, with the total
// length set in the message header. The Message must not be modified after
// calling Finalize.
@@ -54,7 +131,7 @@ func (m *Message) Finalize() []byte {
// Align the message. Note that the message length in the header (set
// above) is the useful length of the message, not the total aligned
// length. See net/netlink/af_netlink.c:__nlmsg_put.
- aligned := alignUp(len(m.buf), linux.NLMSG_ALIGNTO)
+ aligned := binary.AlignUp(len(m.buf), linux.NLMSG_ALIGNTO)
m.putZeros(aligned - len(m.buf))
return m.buf
}
@@ -89,7 +166,7 @@ func (m *Message) PutAttr(atype uint16, v interface{}) {
m.Put(v)
// Align the attribute.
- aligned := alignUp(l, linux.NLA_ALIGNTO)
+ aligned := binary.AlignUp(l, linux.NLA_ALIGNTO)
m.putZeros(aligned - l)
}
@@ -106,7 +183,7 @@ func (m *Message) PutAttrString(atype uint16, s string) {
m.putZeros(1)
// Align the attribute.
- aligned := alignUp(l, linux.NLA_ALIGNTO)
+ aligned := binary.AlignUp(l, linux.NLA_ALIGNTO)
m.putZeros(aligned - l)
}
@@ -157,3 +234,48 @@ func (ms *MessageSet) AddMessage(hdr linux.NetlinkMessageHeader) *Message {
ms.Messages = append(ms.Messages, m)
return m
}
+
+// AttrsView is a view into the attributes portion of a netlink message.
+type AttrsView []byte
+
+// Empty returns whether there is no attribute left in v.
+func (v AttrsView) Empty() bool {
+ return len(v) == 0
+}
+
+// ParseFirst parses first netlink attribute at the beginning of v.
+func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest AttrsView, ok bool) {
+ b := BytesView(v)
+
+ hdrBytes, ok := b.Extract(linux.NetlinkAttrHeaderSize)
+ if !ok {
+ return
+ }
+ binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+
+ value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize)
+ if !ok {
+ return
+ }
+
+ _, ok = b.Extract(alignPad(int(hdr.Length), linux.NLA_ALIGNTO))
+ if !ok {
+ return
+ }
+
+ return hdr, value, AttrsView(b), ok
+}
+
+// BytesView supports extracting data from a byte slice with bounds checking.
+type BytesView []byte
+
+// Extract removes the first n bytes from v and returns it. If n is out of
+// bounds, it returns false.
+func (v *BytesView) Extract(n int) ([]byte, bool) {
+ if n < 0 || n > len(*v) {
+ return nil, false
+ }
+ extracted := (*v)[:n]
+ *v = (*v)[n:]
+ return extracted, true
+}
diff --git a/pkg/sentry/socket/netlink/message_test.go b/pkg/sentry/socket/netlink/message_test.go
new file mode 100644
index 000000000..ef13d9386
--- /dev/null
+++ b/pkg/sentry/socket/netlink/message_test.go
@@ -0,0 +1,312 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package message_test
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+)
+
+type dummyNetlinkMsg struct {
+ Foo uint16
+}
+
+func TestParseMessage(t *testing.T) {
+ tests := []struct {
+ desc string
+ input []byte
+
+ header linux.NetlinkMessageHeader
+ dataMsg *dummyNetlinkMsg
+ restLen int
+ ok bool
+ }{
+ {
+ desc: "valid",
+ input: []byte{
+ 0x14, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 20,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "valid with next message",
+ input: []byte{
+ 0x14, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ 0xFF, // Next message (rest)
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 20,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 1,
+ ok: true,
+ },
+ {
+ desc: "valid for last message without padding",
+ input: []byte{
+ 0x12, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, // Data message
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 18,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "valid for last message not to be aligned",
+ input: []byte{
+ 0x13, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, // Data message
+ 0x00, // Excessive 1 byte permitted at end
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 19,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "header.Length too short",
+ input: []byte{
+ 0x04, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ ok: false,
+ },
+ {
+ desc: "header.Length too long",
+ input: []byte{
+ 0xFF, 0xFF, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ ok: false,
+ },
+ {
+ desc: "header incomplete",
+ input: []byte{
+ 0x04, 0x00, 0x00, 0x00, // Length
+ },
+ ok: false,
+ },
+ {
+ desc: "empty message",
+ input: []byte{},
+ ok: false,
+ },
+ }
+ for _, test := range tests {
+ msg, rest, ok := netlink.ParseMessage(test.input)
+ if ok != test.ok {
+ t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok)
+ continue
+ }
+ if !test.ok {
+ continue
+ }
+ if !reflect.DeepEqual(msg.Header(), test.header) {
+ t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, msg.Header(), test.header)
+ }
+
+ dataMsg := &dummyNetlinkMsg{}
+ _, dataOk := msg.GetData(dataMsg)
+ if !dataOk {
+ t.Errorf("%v: GetData.ok = %v, want = true", test.desc, dataOk)
+ } else if !reflect.DeepEqual(dataMsg, test.dataMsg) {
+ t.Errorf("%v: GetData.msg = %+v, want = %+v", test.desc, dataMsg, test.dataMsg)
+ }
+
+ if got, want := rest, test.input[len(test.input)-test.restLen:]; !bytes.Equal(got, want) {
+ t.Errorf("%v: got rest = %v, want = %v", test.desc, got, want)
+ }
+ }
+}
+
+func TestAttrView(t *testing.T) {
+ tests := []struct {
+ desc string
+ input []byte
+
+ // Outputs for ParseFirst.
+ hdr linux.NetlinkAttrHeader
+ value []byte
+ restLen int
+ ok bool
+
+ // Outputs for Empty.
+ isEmpty bool
+ }{
+ {
+ desc: "valid",
+ input: []byte{
+ 0x06, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x00, 0x00, // Data with 2 bytes padding
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 6,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31},
+ restLen: 0,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "at alignment",
+ input: []byte{
+ 0x08, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 8,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31, 0x32, 0x33},
+ restLen: 0,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "at alignment with rest data",
+ input: []byte{
+ 0x08, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ 0xFF, 0xFE, // Rest data
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 8,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31, 0x32, 0x33},
+ restLen: 2,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "hdr.Length too long",
+ input: []byte{
+ 0xFF, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ ok: false,
+ isEmpty: false,
+ },
+ {
+ desc: "hdr.Length too short",
+ input: []byte{
+ 0x01, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ ok: false,
+ isEmpty: false,
+ },
+ {
+ desc: "empty",
+ input: []byte{},
+ ok: false,
+ isEmpty: true,
+ },
+ }
+ for _, test := range tests {
+ attrs := netlink.AttrsView(test.input)
+
+ // Test ParseFirst().
+ hdr, value, rest, ok := attrs.ParseFirst()
+ if ok != test.ok {
+ t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok)
+ } else if test.ok {
+ if !reflect.DeepEqual(hdr, test.hdr) {
+ t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, hdr, test.hdr)
+ }
+ if !bytes.Equal(value, test.value) {
+ t.Errorf("%v: got value = %v, want = %v", test.desc, value, test.value)
+ }
+ if wantRest := test.input[len(test.input)-test.restLen:]; !bytes.Equal(rest, wantRest) {
+ t.Errorf("%v: got rest = %v, want = %v", test.desc, rest, wantRest)
+ }
+ }
+
+ // Test Empty().
+ if got, want := attrs.Empty(), test.isEmpty; got != want {
+ t.Errorf("%v: got empty = %v, want = %v", test.desc, got, want)
+ }
+ }
+}
diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD
index 463544c1a..3a22923d8 100644
--- a/pkg/sentry/socket/netlink/port/BUILD
+++ b/pkg/sentry/socket/netlink/port/BUILD
@@ -1,17 +1,16 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
go_library(
name = "port",
srcs = ["port.go"],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port",
visibility = ["//pkg/sentry:internal"],
+ deps = ["//pkg/sync"],
)
go_test(
name = "port_test",
srcs = ["port_test.go"],
- embed = [":port"],
+ library = ":port",
)
diff --git a/pkg/sentry/socket/netlink/port/port.go b/pkg/sentry/socket/netlink/port/port.go
index e9d3275b1..2cd3afc22 100644
--- a/pkg/sentry/socket/netlink/port/port.go
+++ b/pkg/sentry/socket/netlink/port/port.go
@@ -24,7 +24,8 @@ import (
"fmt"
"math"
"math/rand"
- "sync"
+
+ "gvisor.dev/gvisor/pkg/sync"
)
// maxPorts is a sanity limit on the maximum number of ports to allocate per
diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go
index 689cad997..31e374833 100644
--- a/pkg/sentry/socket/netlink/provider.go
+++ b/pkg/sentry/socket/netlink/provider.go
@@ -18,7 +18,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -30,12 +30,19 @@ type Protocol interface {
// Protocol returns the Linux netlink protocol value.
Protocol() int
+ // CanSend returns true if this protocol may ever send messages.
+ //
+ // TODO(gvisor.dev/issue/1119): This is a workaround to allow
+ // advertising support for otherwise unimplemented features on sockets
+ // that will never send messages, thus making those features no-ops.
+ CanSend() bool
+
// ProcessMessage processes a single message from userspace.
//
// If err == nil, any messages added to ms will be sent back to the
// other end of the socket. Setting ms.Multi will cause an NLMSG_DONE
// message to be sent even if ms contains no messages.
- ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *MessageSet) *syserr.Error
+ ProcessMessage(ctx context.Context, msg *Message, ms *MessageSet) *syserr.Error
}
// Provider is a function that creates a new Protocol for a specific netlink
@@ -60,6 +67,8 @@ func RegisterProvider(protocol int, provider Provider) {
protocols[protocol] = provider
}
+// LINT.IfChange
+
// socketProvider implements socket.Provider.
type socketProvider struct {
}
@@ -88,7 +97,7 @@ func (*socketProvider) Socket(t *kernel.Task, stype linux.SockType, protocol int
}
d := socket.NewDirent(t, netlinkSocketDevice)
- defer d.DecRef()
+ defer d.DecRef(t)
return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, s), nil
}
@@ -98,7 +107,10 @@ func (*socketProvider) Pair(*kernel.Task, linux.SockType, int) (*fs.File, *fs.Fi
return nil, nil, syserr.ErrNotSupported
}
+// LINT.ThenChange(./provider_vfs2.go)
+
// init registers the socket provider.
func init() {
socket.RegisterProvider(linux.AF_NETLINK, &socketProvider{})
+ socket.RegisterProviderVFS2(linux.AF_NETLINK, &socketProviderVFS2{})
}
diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go
new file mode 100644
index 000000000..bb205be0d
--- /dev/null
+++ b/pkg/sentry/socket/netlink/provider_vfs2.go
@@ -0,0 +1,69 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netlink
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserr"
+)
+
+// socketProviderVFS2 implements socket.Provider.
+type socketProviderVFS2 struct {
+}
+
+// Socket implements socket.Provider.Socket.
+func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
+ // Netlink sockets must be specified as datagram or raw, but they
+ // behave the same regardless of type.
+ if stype != linux.SOCK_DGRAM && stype != linux.SOCK_RAW {
+ return nil, syserr.ErrSocketNotSupported
+ }
+
+ provider, ok := protocols[protocol]
+ if !ok {
+ return nil, syserr.ErrProtocolNotSupported
+ }
+
+ p, err := provider(t)
+ if err != nil {
+ return nil, err
+ }
+
+ s, err := NewVFS2(t, stype, p)
+ if err != nil {
+ return nil, err
+ }
+
+ vfsfd := &s.vfsfd
+ mnt := t.Kernel().SocketMount()
+ d := sockfs.NewDentry(t.Credentials(), mnt)
+ if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{
+ DenyPRead: true,
+ DenyPWrite: true,
+ UseDentryMetadata: true,
+ }); err != nil {
+ return nil, syserr.FromError(err)
+ }
+ return vfsfd, nil
+}
+
+// Pair implements socket.Provider.Pair by returning an error.
+func (*socketProviderVFS2) Pair(*kernel.Task, linux.SockType, int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
+ // Netlink sockets never supports creating socket pairs.
+ return nil, nil, syserr.ErrNotSupported
+}
diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD
index 1d4912753..93127398d 100644
--- a/pkg/sentry/socket/netlink/route/BUILD
+++ b/pkg/sentry/socket/netlink/route/BUILD
@@ -1,15 +1,16 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
go_library(
name = "route",
- srcs = ["protocol.go"],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route",
+ srcs = [
+ "protocol.go",
+ ],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
- "//pkg/sentry/context",
+ "//pkg/context",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go
index cc70ac237..c84d8bd7c 100644
--- a/pkg/sentry/socket/netlink/route/protocol.go
+++ b/pkg/sentry/socket/netlink/route/protocol.go
@@ -17,9 +17,10 @@ package route
import (
"bytes"
+ "syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -61,8 +62,13 @@ func (p *Protocol) Protocol() int {
return linux.NETLINK_ROUTE
}
-// dumpLinks handles RTM_GETLINK + NLM_F_DUMP requests.
-func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+// CanSend implements netlink.Protocol.CanSend.
+func (p *Protocol) CanSend() bool {
+ return true
+}
+
+// dumpLinks handles RTM_GETLINK dump requests.
+func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// NLM_F_DUMP + RTM_GETLINK messages are supposed to include an
// ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some
// userspace applications (including glibc) still include rtgenmsg.
@@ -86,38 +92,105 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader
return nil
}
- for id, i := range stack.Interfaces() {
- m := ms.AddMessage(linux.NetlinkMessageHeader{
- Type: linux.RTM_NEWLINK,
- })
+ for idx, i := range stack.Interfaces() {
+ addNewLinkMessage(ms, idx, i)
+ }
- m.Put(linux.InterfaceInfoMessage{
- Family: linux.AF_UNSPEC,
- Type: i.DeviceType,
- Index: id,
- Flags: i.Flags,
- })
+ return nil
+}
- m.PutAttrString(linux.IFLA_IFNAME, i.Name)
- m.PutAttr(linux.IFLA_MTU, i.MTU)
+// getLinks handles RTM_GETLINK requests.
+func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ stack := inet.StackFromContext(ctx)
+ if stack == nil {
+ // No network devices.
+ return nil
+ }
- mac := make([]byte, 6)
- brd := mac
- if len(i.Addr) > 0 {
- mac = i.Addr
- brd = bytes.Repeat([]byte{0xff}, len(i.Addr))
+ // Parse message.
+ var ifi linux.InterfaceInfoMessage
+ attrs, ok := msg.GetData(&ifi)
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+
+ // Parse attributes.
+ var byName []byte
+ for !attrs.Empty() {
+ ahdr, value, rest, ok := attrs.ParseFirst()
+ if !ok {
+ return syserr.ErrInvalidArgument
}
- m.PutAttr(linux.IFLA_ADDRESS, mac)
- m.PutAttr(linux.IFLA_BROADCAST, brd)
+ attrs = rest
- // TODO(gvisor.dev/issue/578): There are many more attributes.
+ switch ahdr.Type {
+ case linux.IFLA_IFNAME:
+ if len(value) < 1 {
+ return syserr.ErrInvalidArgument
+ }
+ byName = value[:len(value)-1]
+
+ // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK.
+ }
}
+ found := false
+ for idx, i := range stack.Interfaces() {
+ switch {
+ case ifi.Index > 0:
+ if idx != ifi.Index {
+ continue
+ }
+ case byName != nil:
+ if string(byName) != i.Name {
+ continue
+ }
+ default:
+ // Criteria not specified.
+ return syserr.ErrInvalidArgument
+ }
+
+ addNewLinkMessage(ms, idx, i)
+ found = true
+ break
+ }
+ if !found {
+ return syserr.ErrNoDevice
+ }
return nil
}
-// dumpAddrs handles RTM_GETADDR + NLM_F_DUMP requests.
-func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+// addNewLinkMessage appends RTM_NEWLINK message for the given interface into
+// the message set.
+func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) {
+ m := ms.AddMessage(linux.NetlinkMessageHeader{
+ Type: linux.RTM_NEWLINK,
+ })
+
+ m.Put(linux.InterfaceInfoMessage{
+ Family: linux.AF_UNSPEC,
+ Type: i.DeviceType,
+ Index: idx,
+ Flags: i.Flags,
+ })
+
+ m.PutAttrString(linux.IFLA_IFNAME, i.Name)
+ m.PutAttr(linux.IFLA_MTU, i.MTU)
+
+ mac := make([]byte, 6)
+ brd := mac
+ if len(i.Addr) > 0 {
+ mac = i.Addr
+ brd = bytes.Repeat([]byte{0xff}, len(i.Addr))
+ }
+ m.PutAttr(linux.IFLA_ADDRESS, mac)
+ m.PutAttr(linux.IFLA_BROADCAST, brd)
+
+ // TODO(gvisor.dev/issue/578): There are many more attributes.
+}
+
+// dumpAddrs handles RTM_GETADDR dump requests.
+func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// RTM_GETADDR dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
@@ -149,6 +222,7 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader
Index: uint32(id),
})
+ m.PutAttr(linux.IFA_LOCAL, []byte(a.Addr))
m.PutAttr(linux.IFA_ADDRESS, []byte(a.Addr))
// TODO(gvisor.dev/issue/578): There are many more attributes.
@@ -158,22 +232,136 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader
return nil
}
-// dumpRoutes handles RTM_GETROUTE + NLM_F_DUMP requests.
-func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+// commonPrefixLen reports the length of the longest IP address prefix.
+// This is a simplied version from Golang's src/net/addrselect.go.
+func commonPrefixLen(a, b []byte) (cpl int) {
+ for len(a) > 0 {
+ if a[0] == b[0] {
+ cpl += 8
+ a = a[1:]
+ b = b[1:]
+ continue
+ }
+ bits := 8
+ ab, bb := a[0], b[0]
+ for {
+ ab >>= 1
+ bb >>= 1
+ bits--
+ if ab == bb {
+ cpl += bits
+ return
+ }
+ }
+ }
+ return
+}
+
+// fillRoute returns the Route using LPM algorithm. Refer to Linux's
+// net/ipv4/route.c:rt_fill_info().
+func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) {
+ family := uint8(linux.AF_INET)
+ if len(addr) != 4 {
+ family = linux.AF_INET6
+ }
+
+ idx := -1 // Index of the Route rule to be returned.
+ idxDef := -1 // Index of the default route rule.
+ prefix := 0 // Current longest prefix.
+ for i, route := range routes {
+ if route.Family != family {
+ continue
+ }
+
+ if len(route.GatewayAddr) > 0 && route.DstLen == 0 {
+ idxDef = i
+ continue
+ }
+
+ cpl := commonPrefixLen(addr, route.DstAddr)
+ if cpl < int(route.DstLen) {
+ continue
+ }
+ cpl = int(route.DstLen)
+ if cpl > prefix {
+ idx = i
+ prefix = cpl
+ }
+ }
+ if idx == -1 {
+ idx = idxDef
+ }
+ if idx == -1 {
+ return inet.Route{}, syserr.ErrNoRoute
+ }
+
+ route := routes[idx]
+ if family == linux.AF_INET {
+ route.DstLen = 32
+ } else {
+ route.DstLen = 128
+ }
+ route.DstAddr = addr
+ route.Flags |= linux.RTM_F_CLONED // This route is cloned.
+ return route, nil
+}
+
+// parseForDestination parses a message as format of RouteMessage-RtAttr-dst.
+func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) {
+ var rtMsg linux.RouteMessage
+ attrs, ok := msg.GetData(&rtMsg)
+ if !ok {
+ return nil, syserr.ErrInvalidArgument
+ }
+ // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See
+ // commit bc234301af12. Note we don't check this flag for backward
+ // compatibility.
+ if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE {
+ return nil, syserr.ErrNotSupported
+ }
+
+ // Expect first attribute is RTA_DST.
+ if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST {
+ return value, nil
+ }
+ return nil, syserr.ErrInvalidArgument
+}
+
+// dumpRoutes handles RTM_GETROUTE requests.
+func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// RTM_GETROUTE dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
- // We always send back an NLMSG_DONE.
- ms.Multi = true
-
stack := inet.StackFromContext(ctx)
if stack == nil {
// No network routes.
return nil
}
- for _, rt := range stack.RouteTable() {
+ hdr := msg.Header()
+ routeTables := stack.RouteTable()
+
+ if hdr.Flags == linux.NLM_F_REQUEST {
+ dst, err := parseForDestination(msg)
+ if err != nil {
+ return err
+ }
+ route, err := fillRoute(routeTables, dst)
+ if err != nil {
+ // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH.
+ return syserr.ErrNotSupported
+ }
+ routeTables = append([]inet.Route{}, route)
+ } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
+ // We always send back an NLMSG_DONE.
+ ms.Multi = true
+ } else {
+ // TODO(b/68878065): Only above cases are supported.
+ return syserr.ErrNotSupported
+ }
+
+ for _, rt := range routeTables {
m := ms.AddMessage(linux.NetlinkMessageHeader{
Type: linux.RTM_NEWROUTE,
})
@@ -214,10 +402,55 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade
return nil
}
+// newAddr handles RTM_NEWADDR requests.
+func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ stack := inet.StackFromContext(ctx)
+ if stack == nil {
+ // No network stack.
+ return syserr.ErrProtocolNotSupported
+ }
+
+ var ifa linux.InterfaceAddrMessage
+ attrs, ok := msg.GetData(&ifa)
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+
+ for !attrs.Empty() {
+ ahdr, value, rest, ok := attrs.ParseFirst()
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+ attrs = rest
+
+ switch ahdr.Type {
+ case linux.IFA_LOCAL:
+ err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{
+ Family: ifa.Family,
+ PrefixLen: ifa.PrefixLen,
+ Flags: ifa.Flags,
+ Addr: value,
+ })
+ if err == syscall.EEXIST {
+ flags := msg.Header().Flags
+ if flags&linux.NLM_F_EXCL != 0 {
+ return syserr.ErrExists
+ }
+ } else if err != nil {
+ return syserr.ErrInvalidArgument
+ }
+ }
+ }
+ return nil
+}
+
// ProcessMessage implements netlink.Protocol.ProcessMessage.
-func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ hdr := msg.Header()
+
// All messages start with a 1 byte protocol family.
- if len(data) < 1 {
+ var family uint8
+ if _, ok := msg.GetData(&family); !ok {
// Linux ignores messages missing the protocol family. See
// net/core/rtnetlink.c:rtnetlink_rcv_msg.
return nil
@@ -231,22 +464,32 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH
}
}
- // TODO(b/68878065): Only the dump variant of the types below are
- // supported.
- if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
- return syserr.ErrNotSupported
- }
-
- switch hdr.Type {
- case linux.RTM_GETLINK:
- return p.dumpLinks(ctx, hdr, data, ms)
- case linux.RTM_GETADDR:
- return p.dumpAddrs(ctx, hdr, data, ms)
- case linux.RTM_GETROUTE:
- return p.dumpRoutes(ctx, hdr, data, ms)
- default:
- return syserr.ErrNotSupported
+ if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
+ // TODO(b/68878065): Only the dump variant of the types below are
+ // supported.
+ switch hdr.Type {
+ case linux.RTM_GETLINK:
+ return p.dumpLinks(ctx, msg, ms)
+ case linux.RTM_GETADDR:
+ return p.dumpAddrs(ctx, msg, ms)
+ case linux.RTM_GETROUTE:
+ return p.dumpRoutes(ctx, msg, ms)
+ default:
+ return syserr.ErrNotSupported
+ }
+ } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST {
+ switch hdr.Type {
+ case linux.RTM_GETLINK:
+ return p.getLink(ctx, msg, ms)
+ case linux.RTM_GETROUTE:
+ return p.dumpRoutes(ctx, msg, ms)
+ case linux.RTM_NEWADDR:
+ return p.newAddr(ctx, msg, ms)
+ default:
+ return syserr.ErrNotSupported
+ }
}
+ return syserr.ErrNotSupported
}
// init registers the NETLINK_ROUTE provider.
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index b2732ca29..68a9b9a96 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -17,27 +17,29 @@ package netlink
import (
"math"
- "sync"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/safemem"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/tools/go_marshal/marshal"
+ "gvisor.dev/gvisor/tools/go_marshal/primitive"
)
const sizeOfInt32 int = 4
@@ -53,15 +55,19 @@ const (
maxSendBufferSize = 4 << 20 // 4MB
)
+var errNoFilter = syserr.New("no filter attached", linux.ENOENT)
+
// netlinkSocketDevice is the netlink socket virtual device.
var netlinkSocketDevice = device.NewAnonDevice()
+// LINT.IfChange
+
// Socket is the base socket type for netlink sockets.
//
// This implementation only supports userspace sending and receiving messages
// to/from the kernel.
//
-// Socket implements socket.Socket.
+// Socket implements socket.Socket and transport.Credentialer.
//
// +stateify savable
type Socket struct {
@@ -72,6 +78,14 @@ type Socket struct {
fsutil.FileNoSplice `state:"nosave"`
fsutil.FileNoopFlush `state:"nosave"`
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+
+ socketOpsCommon
+}
+
+// socketOpsCommon contains the socket operations common to VFS1 and VFS2.
+//
+// +stateify savable
+type socketOpsCommon struct {
socket.SendReceiveTimeout
// ports provides netlink port allocation.
@@ -104,9 +118,19 @@ type Socket struct {
// sendBufferSize is the send buffer "size". We don't actually have a
// fixed buffer but only consume this many bytes.
sendBufferSize uint32
+
+ // passcred indicates if this socket wants SCM credentials.
+ passcred bool
+
+ // filter indicates that this socket has a BPF filter "installed".
+ //
+ // TODO(gvisor.dev/issue/1119): We don't actually support filtering,
+ // this is just bookkeeping for tracking add/remove.
+ filter bool
}
var _ socket.Socket = (*Socket)(nil)
+var _ transport.Credentialer = (*Socket)(nil)
// NewSocket creates a new Socket.
func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socket, *syserr.Error) {
@@ -116,31 +140,33 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke
// Bind the endpoint for good measure so we can connect to it. The
// bound address will never be exposed.
if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil {
- ep.Close()
+ ep.Close(t)
return nil, err
}
// Create a connection from which the kernel can write messages.
connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t)
if err != nil {
- ep.Close()
+ ep.Close(t)
return nil, err
}
return &Socket{
- ports: t.Kernel().NetlinkPorts(),
- protocol: protocol,
- skType: skType,
- ep: ep,
- connection: connection,
- sendBufferSize: defaultSendBufferSize,
+ socketOpsCommon: socketOpsCommon{
+ ports: t.Kernel().NetlinkPorts(),
+ protocol: protocol,
+ skType: skType,
+ ep: ep,
+ connection: connection,
+ sendBufferSize: defaultSendBufferSize,
+ },
}, nil
}
// Release implements fs.FileOperations.Release.
-func (s *Socket) Release() {
- s.connection.Release()
- s.ep.Close()
+func (s *socketOpsCommon) Release(ctx context.Context) {
+ s.connection.Release(ctx)
+ s.ep.Close(ctx)
if s.bound {
s.ports.Release(s.protocol.Protocol(), s.portID)
@@ -148,7 +174,7 @@ func (s *Socket) Release() {
}
// Readiness implements waiter.Waitable.Readiness.
-func (s *Socket) Readiness(mask waiter.EventMask) waiter.EventMask {
+func (s *socketOpsCommon) Readiness(mask waiter.EventMask) waiter.EventMask {
// ep holds messages to be read and thus handles EventIn readiness.
ready := s.ep.Readiness(mask)
@@ -162,16 +188,32 @@ func (s *Socket) Readiness(mask waiter.EventMask) waiter.EventMask {
}
// EventRegister implements waiter.Waitable.EventRegister.
-func (s *Socket) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+func (s *socketOpsCommon) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
s.ep.EventRegister(e, mask)
// Writable readiness never changes, so no registration is needed.
}
// EventUnregister implements waiter.Waitable.EventUnregister.
-func (s *Socket) EventUnregister(e *waiter.Entry) {
+func (s *socketOpsCommon) EventUnregister(e *waiter.Entry) {
s.ep.EventUnregister(e)
}
+// Passcred implements transport.Credentialer.Passcred.
+func (s *socketOpsCommon) Passcred() bool {
+ s.mu.Lock()
+ passcred := s.passcred
+ s.mu.Unlock()
+ return passcred
+}
+
+// ConnectedPasscred implements transport.Credentialer.ConnectedPasscred.
+func (s *socketOpsCommon) ConnectedPasscred() bool {
+ // This socket is connected to the kernel, which doesn't need creds.
+ //
+ // This is arbitrary, as ConnectedPasscred on this type has no callers.
+ return false
+}
+
// Ioctl implements fs.FileOperations.Ioctl.
func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) {
// TODO(b/68878065): no ioctls supported.
@@ -199,7 +241,7 @@ func ExtractSockAddr(b []byte) (*linux.SockAddrNetlink, *syserr.Error) {
// port of 0 defaults to the ThreadGroup ID.
//
// Preconditions: mu is held.
-func (s *Socket) bindPort(t *kernel.Task, port int32) *syserr.Error {
+func (s *socketOpsCommon) bindPort(t *kernel.Task, port int32) *syserr.Error {
if s.bound {
// Re-binding is only allowed if the port doesn't change.
if port != s.portID {
@@ -223,7 +265,7 @@ func (s *Socket) bindPort(t *kernel.Task, port int32) *syserr.Error {
}
// Bind implements socket.Socket.Bind.
-func (s *Socket) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
+func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
a, err := ExtractSockAddr(sockaddr)
if err != nil {
return err
@@ -241,7 +283,7 @@ func (s *Socket) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
}
// Connect implements socket.Socket.Connect.
-func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
+func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
a, err := ExtractSockAddr(sockaddr)
if err != nil {
return err
@@ -272,25 +314,25 @@ func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr
}
// Accept implements socket.Socket.Accept.
-func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
+func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
// Netlink sockets never support accept.
return 0, nil, 0, syserr.ErrNotSupported
}
// Listen implements socket.Socket.Listen.
-func (s *Socket) Listen(t *kernel.Task, backlog int) *syserr.Error {
+func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
// Netlink sockets never support listen.
return syserr.ErrNotSupported
}
// Shutdown implements socket.Socket.Shutdown.
-func (s *Socket) Shutdown(t *kernel.Task, how int) *syserr.Error {
+func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
// Netlink sockets never support shutdown.
return syserr.ErrNotSupported
}
// GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
switch level {
case linux.SOL_SOCKET:
switch name {
@@ -300,18 +342,31 @@ func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.
}
s.mu.Lock()
defer s.mu.Unlock()
- return int32(s.sendBufferSize), nil
+ sendBufferSizeP := primitive.Int32(s.sendBufferSize)
+ return &sendBufferSizeP, nil
case linux.SO_RCVBUF:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
}
// We don't have limit on receiving size.
- return int32(math.MaxInt32), nil
+ recvBufferSizeP := primitive.Int32(math.MaxInt32)
+ return &recvBufferSizeP, nil
+
+ case linux.SO_PASSCRED:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+ var passcred primitive.Int32
+ if s.Passcred() {
+ passcred = 1
+ }
+ return &passcred, nil
default:
socket.GetSockOptEmitUnimplementedEvent(t, name)
}
+
case linux.SOL_NETLINK:
switch name {
case linux.NETLINK_BROADCAST_ERROR,
@@ -330,7 +385,7 @@ func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.
}
// SetSockOpt implements socket.Socket.SetSockOpt.
-func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error {
+func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error {
switch level {
case linux.SOL_SOCKET:
switch name {
@@ -348,6 +403,7 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy
s.sendBufferSize = size
s.mu.Unlock()
return nil
+
case linux.SO_RCVBUF:
if len(opt) < sizeOfInt32 {
return syserr.ErrInvalidArgument
@@ -355,6 +411,52 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy
// We don't have limit on receiving size. So just accept anything as
// valid for compatibility.
return nil
+
+ case linux.SO_PASSCRED:
+ if len(opt) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ passcred := usermem.ByteOrder.Uint32(opt)
+
+ s.mu.Lock()
+ s.passcred = passcred != 0
+ s.mu.Unlock()
+ return nil
+
+ case linux.SO_ATTACH_FILTER:
+ // TODO(gvisor.dev/issue/1119): We don't actually
+ // support filtering. If this socket can't ever send
+ // messages, then there is nothing to filter and we can
+ // advertise support. Otherwise, be conservative and
+ // return an error.
+ if s.protocol.CanSend() {
+ socket.SetSockOptEmitUnimplementedEvent(t, name)
+ return syserr.ErrProtocolNotAvailable
+ }
+
+ s.mu.Lock()
+ s.filter = true
+ s.mu.Unlock()
+ return nil
+
+ case linux.SO_DETACH_FILTER:
+ // TODO(gvisor.dev/issue/1119): See above.
+ if s.protocol.CanSend() {
+ socket.SetSockOptEmitUnimplementedEvent(t, name)
+ return syserr.ErrProtocolNotAvailable
+ }
+
+ s.mu.Lock()
+ filter := s.filter
+ s.filter = false
+ s.mu.Unlock()
+
+ if !filter {
+ return errNoFilter
+ }
+
+ return nil
+
default:
socket.SetSockOptEmitUnimplementedEvent(t, name)
}
@@ -380,7 +482,7 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy
}
// GetSockName implements socket.Socket.GetSockName.
-func (s *Socket) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
+func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
s.mu.Lock()
defer s.mu.Unlock()
@@ -392,7 +494,7 @@ func (s *Socket) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Er
}
// GetPeerName implements socket.Socket.GetPeerName.
-func (s *Socket) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
+func (s *socketOpsCommon) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
sa := &linux.SockAddrNetlink{
Family: linux.AF_NETLINK,
// TODO(b/68878065): Support non-kernel peers. For now the peer
@@ -403,7 +505,7 @@ func (s *Socket) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Er
}
// RecvMsg implements socket.Socket.RecvMsg.
-func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
+func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
from := &linux.SockAddrNetlink{
Family: linux.AF_NETLINK,
PortID: 0,
@@ -413,29 +515,29 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have
trunc := flags&linux.MSG_TRUNC != 0
r := unix.EndpointReader{
+ Ctx: t,
Endpoint: s.ep,
Peek: flags&linux.MSG_PEEK != 0,
}
+ doRead := func() (int64, error) {
+ return dst.CopyOutFrom(t, &r)
+ }
+
// If MSG_TRUNC is set with a zero byte destination then we still need
// to read the message and discard it, or in the case where MSG_PEEK is
// set, leave it be. In both cases the full message length must be
- // returned. However, the memory manager for the destination will not read
- // the endpoint if the destination is zero length.
- //
- // In order for the endpoint to be read when the destination size is zero,
- // we must cause a read of the endpoint by using a separate fake zero
- // length block sequence and calling the EndpointReader directly.
+ // returned.
if trunc && dst.Addrs.NumBytes() == 0 {
- // Perform a read to a zero byte block sequence. We can ignore the
- // original destination since it was zero bytes. The length returned by
- // ReadToBlocks is ignored and we return the full message length to comply
- // with MSG_TRUNC.
- _, err := r.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(make([]byte, 0))))
- return int(r.MsgSize), linux.MSG_TRUNC, from, fromLen, socket.ControlMessages{}, syserr.FromError(err)
+ doRead = func() (int64, error) {
+ err := r.Truncate()
+ // Always return zero for bytes read since the destination size is
+ // zero.
+ return 0, err
+ }
}
- if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
+ if n, err := doRead(); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
var mflags int
if n < int64(r.MsgSize) {
mflags |= linux.MSG_TRUNC
@@ -453,7 +555,7 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have
defer s.EventUnregister(&e)
for {
- if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock {
+ if n, err := doRead(); err != syserror.ErrWouldBlock {
var mflags int
if n < int64(r.MsgSize) {
mflags |= linux.MSG_TRUNC
@@ -483,18 +585,43 @@ func (s *Socket) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _
})
}
+// kernelSCM implements control.SCMCredentials with credentials that represent
+// the kernel itself rather than a Task.
+//
+// +stateify savable
+type kernelSCM struct{}
+
+// Equals implements transport.CredentialsControlMessage.Equals.
+func (kernelSCM) Equals(oc transport.CredentialsControlMessage) bool {
+ _, ok := oc.(kernelSCM)
+ return ok
+}
+
+// Credentials implements control.SCMCredentials.Credentials.
+func (kernelSCM) Credentials(*kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) {
+ return 0, auth.RootUID, auth.RootGID
+}
+
+// kernelCreds is the concrete version of kernelSCM used in all creds.
+var kernelCreds = &kernelSCM{}
+
// sendResponse sends the response messages in ms back to userspace.
-func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error {
+func (s *socketOpsCommon) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error {
// Linux combines multiple netlink messages into a single datagram.
bufs := make([][]byte, 0, len(ms.Messages))
for _, m := range ms.Messages {
bufs = append(bufs, m.Finalize())
}
+ // All messages are from the kernel.
+ cms := transport.ControlMessages{
+ Credentials: kernelCreds,
+ }
+
if len(bufs) > 0 {
// RecvMsg never receives the address, so we don't need to send
// one.
- _, notify, err := s.connection.Send(bufs, transport.ControlMessages{}, tcpip.FullAddress{})
+ _, notify, err := s.connection.Send(ctx, bufs, cms, tcpip.FullAddress{})
// If the buffer is full, we simply drop messages, just like
// Linux.
if err != nil && err != syserr.ErrWouldBlock {
@@ -521,7 +648,7 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error
// Add the dump_done_errno payload.
m.Put(int64(0))
- _, notify, err := s.connection.Send([][]byte{m.Finalize()}, transport.ControlMessages{}, tcpip.FullAddress{})
+ _, notify, err := s.connection.Send(ctx, [][]byte{m.Finalize()}, cms, tcpip.FullAddress{})
if err != nil && err != syserr.ErrWouldBlock {
return err
}
@@ -533,47 +660,38 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error
return nil
}
-func (s *Socket) dumpErrorMesage(ctx context.Context, hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) *syserr.Error {
+func dumpErrorMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) {
m := ms.AddMessage(linux.NetlinkMessageHeader{
Type: linux.NLMSG_ERROR,
})
-
m.Put(linux.NetlinkErrorMessage{
Error: int32(-err.ToLinux().Number()),
Header: hdr,
})
- return nil
+}
+func dumpAckMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet) {
+ m := ms.AddMessage(linux.NetlinkMessageHeader{
+ Type: linux.NLMSG_ERROR,
+ })
+ m.Put(linux.NetlinkErrorMessage{
+ Error: 0,
+ Header: hdr,
+ })
}
// processMessages handles each message in buf, passing it to the protocol
// handler for final handling.
-func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error {
+func (s *socketOpsCommon) processMessages(ctx context.Context, buf []byte) *syserr.Error {
for len(buf) > 0 {
- if len(buf) < linux.NetlinkMessageHeaderSize {
+ msg, rest, ok := ParseMessage(buf)
+ if !ok {
// Linux ignores messages that are too short. See
// net/netlink/af_netlink.c:netlink_rcv_skb.
break
}
-
- var hdr linux.NetlinkMessageHeader
- binary.Unmarshal(buf[:linux.NetlinkMessageHeaderSize], usermem.ByteOrder, &hdr)
-
- if hdr.Length < linux.NetlinkMessageHeaderSize || uint64(hdr.Length) > uint64(len(buf)) {
- // Linux ignores malformed messages. See
- // net/netlink/af_netlink.c:netlink_rcv_skb.
- break
- }
-
- // Data from this message.
- data := buf[linux.NetlinkMessageHeaderSize:hdr.Length]
-
- // Advance to the next message.
- next := alignUp(int(hdr.Length), linux.NLMSG_ALIGNTO)
- if next >= len(buf)-1 {
- next = len(buf) - 1
- }
- buf = buf[next:]
+ buf = rest
+ hdr := msg.Header()
// Ignore control messages.
if hdr.Type < linux.NLMSG_MIN_TYPE {
@@ -581,19 +699,10 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error
}
ms := NewMessageSet(s.portID, hdr.Seq)
- var err *syserr.Error
- // TODO(b/68877377): ACKs not supported yet.
- if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK {
- err = syserr.ErrNotSupported
- } else {
-
- err = s.protocol.ProcessMessage(ctx, hdr, data, ms)
- }
- if err != nil {
- ms = NewMessageSet(s.portID, hdr.Seq)
- if err := s.dumpErrorMesage(ctx, hdr, ms, err); err != nil {
- return err
- }
+ if err := s.protocol.ProcessMessage(ctx, msg, ms); err != nil {
+ dumpErrorMesage(hdr, ms, err)
+ } else if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK {
+ dumpAckMesage(hdr, ms)
}
if err := s.sendResponse(ctx, ms); err != nil {
@@ -605,7 +714,7 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error
}
// sendMsg is the core of message send, used for SendMsg and Write.
-func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) {
+func (s *socketOpsCommon) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) {
dstPort := int32(0)
if len(to) != 0 {
@@ -652,7 +761,7 @@ func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte,
}
// SendMsg implements socket.Socket.SendMsg.
-func (s *Socket) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
+func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
return s.sendMsg(t, src, to, flags, controlMessages)
}
@@ -663,11 +772,13 @@ func (s *Socket) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence,
}
// State implements socket.Socket.State.
-func (s *Socket) State() uint32 {
+func (s *socketOpsCommon) State() uint32 {
return s.ep.State()
}
// Type implements socket.Socket.Type.
-func (s *Socket) Type() (family int, skType linux.SockType, protocol int) {
+func (s *socketOpsCommon) Type() (family int, skType linux.SockType, protocol int) {
return linux.AF_NETLINK, s.skType, s.protocol.Protocol()
}
+
+// LINT.ThenChange(./socket_vfs2.go)
diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go
new file mode 100644
index 000000000..a38d25da9
--- /dev/null
+++ b/pkg/sentry/socket/netlink/socket_vfs2.go
@@ -0,0 +1,152 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netlink
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+// SocketVFS2 is the base VFS2 socket type for netlink sockets.
+//
+// This implementation only supports userspace sending and receiving messages
+// to/from the kernel.
+//
+// SocketVFS2 implements socket.SocketVFS2 and transport.Credentialer.
+type SocketVFS2 struct {
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+ vfs.DentryMetadataFileDescriptionImpl
+ vfs.LockFD
+
+ socketOpsCommon
+}
+
+var _ socket.SocketVFS2 = (*SocketVFS2)(nil)
+var _ transport.Credentialer = (*SocketVFS2)(nil)
+
+// NewVFS2 creates a new SocketVFS2.
+func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketVFS2, *syserr.Error) {
+ // Datagram endpoint used to buffer kernel -> user messages.
+ ep := transport.NewConnectionless(t)
+
+ // Bind the endpoint for good measure so we can connect to it. The
+ // bound address will never be exposed.
+ if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil {
+ ep.Close(t)
+ return nil, err
+ }
+
+ // Create a connection from which the kernel can write messages.
+ connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t)
+ if err != nil {
+ ep.Close(t)
+ return nil, err
+ }
+
+ fd := &SocketVFS2{
+ socketOpsCommon: socketOpsCommon{
+ ports: t.Kernel().NetlinkPorts(),
+ protocol: protocol,
+ skType: skType,
+ ep: ep,
+ connection: connection,
+ sendBufferSize: defaultSendBufferSize,
+ },
+ }
+ fd.LockFD.Init(&vfs.FileLocks{})
+ return fd, nil
+}
+
+// Readiness implements waiter.Waitable.Readiness.
+func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask {
+ return s.socketOpsCommon.Readiness(mask)
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ s.socketOpsCommon.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
+ s.socketOpsCommon.EventUnregister(e)
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.
+func (*SocketVFS2) Ioctl(context.Context, usermem.IO, arch.SyscallArguments) (uintptr, error) {
+ // TODO(b/68878065): no ioctls supported.
+ return 0, syserror.ENOTTY
+}
+
+// PRead implements vfs.FileDescriptionImpl.
+func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ return 0, syserror.ESPIPE
+}
+
+// Read implements vfs.FileDescriptionImpl.
+func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ // All flags other than RWF_NOWAIT should be ignored.
+ // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT.
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ if dst.NumBytes() == 0 {
+ return 0, nil
+ }
+ return dst.CopyOutFrom(ctx, &unix.EndpointReader{
+ Endpoint: s.ep,
+ })
+}
+
+// PWrite implements vfs.FileDescriptionImpl.
+func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ return 0, syserror.ESPIPE
+}
+
+// Write implements vfs.FileDescriptionImpl.
+func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ // All flags other than RWF_NOWAIT should be ignored.
+ // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT.
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{})
+ return int64(n), err.ToError()
+}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
+}
diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD
new file mode 100644
index 000000000..b6434923c
--- /dev/null
+++ b/pkg/sentry/socket/netlink/uevent/BUILD
@@ -0,0 +1,16 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "uevent",
+ srcs = ["protocol.go"],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/socket/netlink",
+ "//pkg/syserr",
+ ],
+)
diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go
new file mode 100644
index 000000000..029ba21b5
--- /dev/null
+++ b/pkg/sentry/socket/netlink/uevent/protocol.go
@@ -0,0 +1,60 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package uevent provides a NETLINK_KOBJECT_UEVENT socket protocol.
+//
+// NETLINK_KOBJECT_UEVENT sockets send udev-style device events. gVisor does
+// not support any device events, so these sockets never send any messages.
+package uevent
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ "gvisor.dev/gvisor/pkg/syserr"
+)
+
+// Protocol implements netlink.Protocol.
+//
+// +stateify savable
+type Protocol struct{}
+
+var _ netlink.Protocol = (*Protocol)(nil)
+
+// NewProtocol creates a NETLINK_KOBJECT_UEVENT netlink.Protocol.
+func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) {
+ return &Protocol{}, nil
+}
+
+// Protocol implements netlink.Protocol.Protocol.
+func (p *Protocol) Protocol() int {
+ return linux.NETLINK_KOBJECT_UEVENT
+}
+
+// CanSend implements netlink.Protocol.CanSend.
+func (p *Protocol) CanSend() bool {
+ return false
+}
+
+// ProcessMessage implements netlink.Protocol.ProcessMessage.
+func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ // Silently ignore all messages.
+ return nil
+}
+
+// init registers the NETLINK_KOBJECT_UEVENT provider.
+func init() {
+ netlink.RegisterProvider(linux.NETLINK_KOBJECT_UEVENT, NewProtocol)
+}