summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--images/benchmarks/ab/Dockerfile7
-rw-r--r--images/benchmarks/httpd/Dockerfile17
-rw-r--r--images/benchmarks/httpd/apache2-tmpdir.conf5
-rw-r--r--pkg/abi/linux/ioctl.go27
-rw-r--r--pkg/abi/linux/netlink_route.go2
-rw-r--r--pkg/iovec/BUILD18
-rw-r--r--pkg/iovec/iovec.go75
-rw-r--r--pkg/iovec/iovec_test.go121
-rw-r--r--pkg/sentry/fs/host/BUILD1
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go7
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD1
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go5
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go6
-rw-r--r--pkg/sentry/fsimpl/host/BUILD1
-rw-r--r--pkg/sentry/fsimpl/host/socket_iovec.go7
-rw-r--r--pkg/sentry/kernel/kernel.go4
-rw-r--r--pkg/sentry/kernel/syslog.go9
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go1
-rw-r--r--pkg/sentry/socket/netstack/netstack.go74
-rw-r--r--pkg/sentry/socket/netstack/stack.go22
-rw-r--r--pkg/syserr/netstack.go2
-rw-r--r--pkg/tcpip/header/arp.go77
-rw-r--r--pkg/tcpip/link/channel/BUILD1
-rw-r--r--pkg/tcpip/link/channel/channel.go6
-rw-r--r--pkg/tcpip/link/fdbased/BUILD1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go77
-rw-r--r--pkg/tcpip/link/loopback/loopback.go5
-rw-r--r--pkg/tcpip/link/muxed/BUILD1
-rw-r--r--pkg/tcpip/link/muxed/injectable.go6
-rw-r--r--pkg/tcpip/link/nested/BUILD1
-rw-r--r--pkg/tcpip/link/nested/nested.go6
-rw-r--r--pkg/tcpip/link/qdisc/fifo/BUILD1
-rw-r--r--pkg/tcpip/link/qdisc/fifo/endpoint.go6
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go32
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go5
-rw-r--r--pkg/tcpip/link/tun/device.go10
-rw-r--r--pkg/tcpip/link/waitable/BUILD2
-rw-r--r--pkg/tcpip/link/waitable/waitable.go6
-rw-r--r--pkg/tcpip/link/waitable/waitable_test.go6
-rw-r--r--pkg/tcpip/network/ip_test.go9
-rw-r--r--pkg/tcpip/stack/forwarder_test.go6
-rw-r--r--pkg/tcpip/stack/iptables.go30
-rw-r--r--pkg/tcpip/stack/nic_test.go5
-rw-r--r--pkg/tcpip/stack/registration.go7
-rw-r--r--pkg/tcpip/stack/stack.go6
-rw-r--r--pkg/tcpip/transport/packet/endpoint.go25
-rw-r--r--pkg/tcpip/transport/packet/endpoint_state.go19
-rw-r--r--runsc/boot/config.go7
-rw-r--r--runsc/boot/filter/config.go11
-rw-r--r--runsc/boot/loader.go4
-rw-r--r--runsc/boot/vfs.go14
-rw-r--r--runsc/container/container.go14
-rw-r--r--runsc/main.go2
-rw-r--r--test/benchmarks/fs/bazel_test.go1
-rw-r--r--test/benchmarks/harness/machine.go7
-rw-r--r--test/benchmarks/network/BUILD5
-rw-r--r--test/benchmarks/network/httpd_test.go276
-rw-r--r--test/benchmarks/network/iperf_test.go4
-rw-r--r--test/runner/defs.bzl20
-rw-r--r--test/runner/runner.go10
-rw-r--r--test/syscalls/BUILD1
-rw-r--r--test/syscalls/linux/dev.cc2
-rw-r--r--test/syscalls/linux/packet_socket.cc13
-rw-r--r--test/syscalls/linux/packet_socket_raw.cc58
-rw-r--r--test/syscalls/linux/socket_netdevice.cc23
-rw-r--r--test/util/test_util.cc6
-rw-r--r--test/util/test_util.h1
-rw-r--r--tools/vm/README.md6
-rw-r--r--tools/vm/defs.bzl11
69 files changed, 1054 insertions, 210 deletions
diff --git a/images/benchmarks/ab/Dockerfile b/images/benchmarks/ab/Dockerfile
new file mode 100644
index 000000000..10544639b
--- /dev/null
+++ b/images/benchmarks/ab/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:18.04
+
+RUN set -x \
+ && apt-get update \
+ && apt-get install -y \
+ apache2-utils \
+ && rm -rf /var/lib/apt/lists/*
diff --git a/images/benchmarks/httpd/Dockerfile b/images/benchmarks/httpd/Dockerfile
new file mode 100644
index 000000000..b72406012
--- /dev/null
+++ b/images/benchmarks/httpd/Dockerfile
@@ -0,0 +1,17 @@
+FROM ubuntu:18.04
+
+RUN set -x \
+ && apt-get update \
+ && apt-get install -y \
+ apache2 \
+ && rm -rf /var/lib/apt/lists/*
+
+# Generate a bunch of relevant files.
+RUN mkdir -p /local && \
+ for size in 1 10 100 1000 1024 10240; do \
+ dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \
+ done
+
+# Rewrite DocumentRoot to point to /tmp/html instead of the default path.
+RUN sed -i 's/DocumentRoot.*\/var\/www\/html$/DocumentRoot \/tmp\/html/' /etc/apache2/sites-enabled/000-default.conf
+COPY ./apache2-tmpdir.conf /etc/apache2/sites-enabled/apache2-tmpdir.conf
diff --git a/images/benchmarks/httpd/apache2-tmpdir.conf b/images/benchmarks/httpd/apache2-tmpdir.conf
new file mode 100644
index 000000000..e33f8d9bb
--- /dev/null
+++ b/images/benchmarks/httpd/apache2-tmpdir.conf
@@ -0,0 +1,5 @@
+<Directory /tmp/html/>
+ Options Indexes FollowSymLinks
+ AllowOverride None
+ Require all granted
+</Directory> \ No newline at end of file
diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index 2062e6a4b..2c5e56ae5 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -67,10 +67,29 @@ const (
// ioctl(2) requests provided by uapi/linux/sockios.h
const (
- SIOCGIFMEM = 0x891f
- SIOCGIFPFLAGS = 0x8935
- SIOCGMIIPHY = 0x8947
- SIOCGMIIREG = 0x8948
+ SIOCGIFNAME = 0x8910
+ SIOCGIFCONF = 0x8912
+ SIOCGIFFLAGS = 0x8913
+ SIOCGIFADDR = 0x8915
+ SIOCGIFDSTADDR = 0x8917
+ SIOCGIFBRDADDR = 0x8919
+ SIOCGIFNETMASK = 0x891b
+ SIOCGIFMETRIC = 0x891d
+ SIOCGIFMTU = 0x8921
+ SIOCGIFMEM = 0x891f
+ SIOCGIFHWADDR = 0x8927
+ SIOCGIFINDEX = 0x8933
+ SIOCGIFPFLAGS = 0x8935
+ SIOCGIFTXQLEN = 0x8942
+ SIOCETHTOOL = 0x8946
+ SIOCGMIIPHY = 0x8947
+ SIOCGMIIREG = 0x8948
+ SIOCGIFMAP = 0x8970
+)
+
+// ioctl(2) requests provided by uapi/asm-generic/sockios.h
+const (
+ SIOCGSTAMP = 0x8906
)
// ioctl(2) directions. Used to calculate requests number.
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
index 40bec566c..ceda0a8d3 100644
--- a/pkg/abi/linux/netlink_route.go
+++ b/pkg/abi/linux/netlink_route.go
@@ -187,6 +187,8 @@ const (
// Device types, from uapi/linux/if_arp.h.
const (
+ ARPHRD_NONE = 65534
+ ARPHRD_ETHER = 1
ARPHRD_LOOPBACK = 772
)
diff --git a/pkg/iovec/BUILD b/pkg/iovec/BUILD
new file mode 100644
index 000000000..eda82cfc1
--- /dev/null
+++ b/pkg/iovec/BUILD
@@ -0,0 +1,18 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "iovec",
+ srcs = ["iovec.go"],
+ visibility = ["//:sandbox"],
+ deps = ["//pkg/abi/linux"],
+)
+
+go_test(
+ name = "iovec_test",
+ size = "small",
+ srcs = ["iovec_test.go"],
+ library = ":iovec",
+ deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
diff --git a/pkg/iovec/iovec.go b/pkg/iovec/iovec.go
new file mode 100644
index 000000000..dd70fe80f
--- /dev/null
+++ b/pkg/iovec/iovec.go
@@ -0,0 +1,75 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+// Package iovec provides helpers to interact with vectorized I/O on host
+// system.
+package iovec
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+)
+
+// MaxIovs is the maximum number of iovecs host platform can accept.
+var MaxIovs = linux.UIO_MAXIOV
+
+// Builder is a builder for slice of syscall.Iovec.
+type Builder struct {
+ iovec []syscall.Iovec
+ storage [8]syscall.Iovec
+
+ // overflow tracks the last buffer when iovec length is at MaxIovs.
+ overflow []byte
+}
+
+// Add adds buf to b preparing to be written. Zero-length buf won't be added.
+func (b *Builder) Add(buf []byte) {
+ if len(buf) == 0 {
+ return
+ }
+ if b.iovec == nil {
+ b.iovec = b.storage[:0]
+ }
+ if len(b.iovec) >= MaxIovs {
+ b.addByAppend(buf)
+ return
+ }
+ b.iovec = append(b.iovec, syscall.Iovec{
+ Base: &buf[0],
+ Len: uint64(len(buf)),
+ })
+ // Keep the last buf if iovec is at max capacity. We will need to append to it
+ // for later bufs.
+ if len(b.iovec) == MaxIovs {
+ n := len(buf)
+ b.overflow = buf[:n:n]
+ }
+}
+
+func (b *Builder) addByAppend(buf []byte) {
+ b.overflow = append(b.overflow, buf...)
+ b.iovec[len(b.iovec)-1] = syscall.Iovec{
+ Base: &b.overflow[0],
+ Len: uint64(len(b.overflow)),
+ }
+}
+
+// Build returns the final Iovec slice. The length of returned iovec will not
+// excceed MaxIovs.
+func (b *Builder) Build() []syscall.Iovec {
+ return b.iovec
+}
diff --git a/pkg/iovec/iovec_test.go b/pkg/iovec/iovec_test.go
new file mode 100644
index 000000000..a3900c299
--- /dev/null
+++ b/pkg/iovec/iovec_test.go
@@ -0,0 +1,121 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package iovec
+
+import (
+ "bytes"
+ "fmt"
+ "syscall"
+ "testing"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+func TestBuilderEmpty(t *testing.T) {
+ var builder Builder
+ iovecs := builder.Build()
+ if got, want := len(iovecs), 0; got != want {
+ t.Errorf("len(iovecs) = %d, want %d", got, want)
+ }
+}
+
+func TestBuilderBuild(t *testing.T) {
+ a := []byte{1, 2}
+ b := []byte{3, 4, 5}
+
+ var builder Builder
+ builder.Add(a)
+ builder.Add(b)
+ builder.Add(nil) // Nil slice won't be added.
+ builder.Add([]byte{}) // Empty slice won't be added.
+ iovecs := builder.Build()
+
+ if got, want := len(iovecs), 2; got != want {
+ t.Fatalf("len(iovecs) = %d, want %d", got, want)
+ }
+ for i, data := range [][]byte{a, b} {
+ if got, want := *iovecs[i].Base, data[0]; got != want {
+ t.Fatalf("*iovecs[%d].Base = %d, want %d", i, got, want)
+ }
+ if got, want := iovecs[i].Len, uint64(len(data)); got != want {
+ t.Fatalf("iovecs[%d].Len = %d, want %d", i, got, want)
+ }
+ }
+}
+
+func TestBuilderBuildMaxIov(t *testing.T) {
+ for _, test := range []struct {
+ numIov int
+ }{
+ {
+ numIov: MaxIovs - 1,
+ },
+ {
+ numIov: MaxIovs,
+ },
+ {
+ numIov: MaxIovs + 1,
+ },
+ {
+ numIov: MaxIovs + 10,
+ },
+ } {
+ name := fmt.Sprintf("numIov=%v", test.numIov)
+ t.Run(name, func(t *testing.T) {
+ var data []byte
+ var builder Builder
+ for i := 0; i < test.numIov; i++ {
+ buf := []byte{byte(i)}
+ builder.Add(buf)
+ data = append(data, buf...)
+ }
+ iovec := builder.Build()
+
+ // Check the expected length of iovec.
+ wantNum := test.numIov
+ if wantNum > MaxIovs {
+ wantNum = MaxIovs
+ }
+ if got, want := len(iovec), wantNum; got != want {
+ t.Errorf("len(iovec) = %d, want %d", got, want)
+ }
+
+ // Test a real read-write.
+ var fds [2]int
+ if err := unix.Pipe(fds[:]); err != nil {
+ t.Fatalf("Pipe: %v", err)
+ }
+ defer syscall.Close(fds[0])
+ defer syscall.Close(fds[1])
+
+ wrote, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fds[1]), uintptr(unsafe.Pointer(&iovec[0])), uintptr(len(iovec)))
+ if int(wrote) != len(data) || e != 0 {
+ t.Fatalf("writev: %v, %v; want %v, 0", wrote, e, len(data))
+ }
+
+ got := make([]byte, len(data))
+ if n, err := syscall.Read(fds[0], got); n != len(got) || err != nil {
+ t.Fatalf("read: %v, %v; want %v, nil", n, err, len(got))
+ }
+
+ if !bytes.Equal(got, data) {
+ t.Errorf("read: got data %v, want %v", got, data)
+ }
+ })
+ }
+}
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index aabce6cc9..d41d23a43 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -30,6 +30,7 @@ go_library(
"//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
+ "//pkg/iovec",
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 5c18dbd5e..905afb50d 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -17,15 +17,12 @@ package host
import (
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/iovec"
"gvisor.dev/gvisor/pkg/syserror"
)
// LINT.IfChange
-// maxIovs is the maximum number of iovecs to pass to the host.
-var maxIovs = linux.UIO_MAXIOV
-
// copyToMulti copies as many bytes from src to dst as possible.
func copyToMulti(dst [][]byte, src []byte) {
for _, d := range dst {
@@ -76,7 +73,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
}
}
- if iovsRequired > maxIovs {
+ if iovsRequired > iovec.MaxIovs {
// The kernel will reject our call if we pass this many iovs.
// Use a single intermediate buffer instead.
b := make([]byte, stopLen)
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 41567967d..3e00c2abb 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -12,6 +12,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/sentry/fsimpl/devtmpfs",
+ "//pkg/sentry/kernel",
"//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index f6a67d005..dc33268af 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -30,6 +31,10 @@ type fuseDevice struct{}
// Open implements vfs.Device.Open.
func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ if !kernel.FUSEEnabled {
+ return nil, syserror.ENOENT
+ }
+
var fd DeviceFD
if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{
UseDentryMetadata: true,
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index c1e6b13e5..a7b53b2d2 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -28,9 +28,9 @@ import (
)
// specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
-// special files, and (when filesystemOptions.specialRegularFiles is in effect)
-// regular files. specialFileFD differs from regularFileFD by using per-FD
-// handles instead of shared per-dentry handles, and never buffering I/O.
+// special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
+// in effect) regular files. specialFileFD differs from regularFileFD by using
+// per-FD handles instead of shared per-dentry handles, and never buffering I/O.
type specialFileFD struct {
fileDescription
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 44a09d87a..e86fbe2d5 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -22,6 +22,7 @@ go_library(
"//pkg/context",
"//pkg/fdnotifier",
"//pkg/fspath",
+ "//pkg/iovec",
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go
index 584c247d2..fc0d5fd38 100644
--- a/pkg/sentry/fsimpl/host/socket_iovec.go
+++ b/pkg/sentry/fsimpl/host/socket_iovec.go
@@ -17,13 +17,10 @@ package host
import (
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/iovec"
"gvisor.dev/gvisor/pkg/syserror"
)
-// maxIovs is the maximum number of iovecs to pass to the host.
-var maxIovs = linux.UIO_MAXIOV
-
// copyToMulti copies as many bytes from src to dst as possible.
func copyToMulti(dst [][]byte, src []byte) {
for _, d := range dst {
@@ -74,7 +71,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
}
}
- if iovsRequired > maxIovs {
+ if iovsRequired > iovec.MaxIovs {
// The kernel will reject our call if we pass this many iovs.
// Use a single intermediate buffer instead.
b := make([]byte, stopLen)
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 2177b785a..240cd6fe0 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -81,6 +81,10 @@ import (
// easy access everywhere. To be removed once VFS2 becomes the default.
var VFS2Enabled = false
+// FUSEEnabled is set to true when FUSE is enabled. Added as a global for allow
+// easy access everywhere. To be removed once FUSE is completed.
+var FUSEEnabled = false
+
// Kernel represents an emulated Linux kernel. It must be initialized by calling
// Init() or LoadFrom().
//
diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go
index 4607cde2f..a83ce219c 100644
--- a/pkg/sentry/kernel/syslog.go
+++ b/pkg/sentry/kernel/syslog.go
@@ -98,6 +98,15 @@ func (s *syslog) Log() []byte {
s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, selectMessage()))...)
}
+ if VFS2Enabled {
+ time += rand.Float64() / 2
+ s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Setting up VFS2..."))...)
+ if FUSEEnabled {
+ time += rand.Float64() / 2
+ s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Setting up FUSE..."))...)
+ }
+ }
+
time += rand.Float64() / 2
s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Ready!"))...)
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index 8f192c62f..8a1d52ebf 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -71,6 +71,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in
DenyPWrite: true,
UseDentryMetadata: true,
}); err != nil {
+ fdnotifier.RemoveFD(int32(s.fd))
return nil, syserr.FromError(err)
}
return vfsfd, nil
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 78a842973..0b1be1bd2 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -2747,7 +2747,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
// sockets.
// TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP.
switch args[1].Int() {
- case syscall.SIOCGSTAMP:
+ case linux.SIOCGSTAMP:
s.readMu.Lock()
defer s.readMu.Unlock()
if !s.timestampValid {
@@ -2788,18 +2788,19 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
// Ioctl performs a socket ioctl.
func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch arg := int(args[1].Int()); arg {
- case syscall.SIOCGIFFLAGS,
- syscall.SIOCGIFADDR,
- syscall.SIOCGIFBRDADDR,
- syscall.SIOCGIFDSTADDR,
- syscall.SIOCGIFHWADDR,
- syscall.SIOCGIFINDEX,
- syscall.SIOCGIFMAP,
- syscall.SIOCGIFMETRIC,
- syscall.SIOCGIFMTU,
- syscall.SIOCGIFNAME,
- syscall.SIOCGIFNETMASK,
- syscall.SIOCGIFTXQLEN:
+ case linux.SIOCGIFFLAGS,
+ linux.SIOCGIFADDR,
+ linux.SIOCGIFBRDADDR,
+ linux.SIOCGIFDSTADDR,
+ linux.SIOCGIFHWADDR,
+ linux.SIOCGIFINDEX,
+ linux.SIOCGIFMAP,
+ linux.SIOCGIFMETRIC,
+ linux.SIOCGIFMTU,
+ linux.SIOCGIFNAME,
+ linux.SIOCGIFNETMASK,
+ linux.SIOCGIFTXQLEN,
+ linux.SIOCETHTOOL:
var ifr linux.IFReq
if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{
@@ -2815,7 +2816,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc
})
return 0, err
- case syscall.SIOCGIFCONF:
+ case linux.SIOCGIFCONF:
// Return a list of interface addresses or the buffer size
// necessary to hold the list.
var ifc linux.IFConf
@@ -2889,7 +2890,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
// SIOCGIFNAME uses ifr.ifr_ifindex rather than ifr.ifr_name to
// identify a device.
- if arg == syscall.SIOCGIFNAME {
+ if arg == linux.SIOCGIFNAME {
// Gets the name of the interface given the interface index
// stored in ifr_ifindex.
index = int32(usermem.ByteOrder.Uint32(ifr.Data[:4]))
@@ -2912,21 +2913,28 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
}
switch arg {
- case syscall.SIOCGIFINDEX:
+ case linux.SIOCGIFINDEX:
// Copy out the index to the data.
usermem.ByteOrder.PutUint32(ifr.Data[:], uint32(index))
- case syscall.SIOCGIFHWADDR:
+ case linux.SIOCGIFHWADDR:
// Copy the hardware address out.
- ifr.Data[0] = 6 // IEEE802.2 arp type.
- ifr.Data[1] = 0
+ //
+ // Refer: https://linux.die.net/man/7/netdevice
+ // SIOCGIFHWADDR, SIOCSIFHWADDR
+ //
+ // Get or set the hardware address of a device using
+ // ifr_hwaddr. The hardware address is specified in a struct
+ // sockaddr. sa_family contains the ARPHRD_* device type,
+ // sa_data the L2 hardware address starting from byte 0. Setting
+ // the hardware address is a privileged operation.
+ usermem.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType)
n := copy(ifr.Data[2:], iface.Addr)
for i := 2 + n; i < len(ifr.Data); i++ {
ifr.Data[i] = 0 // Clear padding.
}
- usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(n))
- case syscall.SIOCGIFFLAGS:
+ case linux.SIOCGIFFLAGS:
f, err := interfaceStatusFlags(stack, iface.Name)
if err != nil {
return err
@@ -2935,7 +2943,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
// matches Linux behavior.
usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(f))
- case syscall.SIOCGIFADDR:
+ case linux.SIOCGIFADDR:
// Copy the IPv4 address out.
for _, addr := range stack.InterfaceAddrs()[index] {
// This ioctl is only compatible with AF_INET addresses.
@@ -2946,32 +2954,32 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
break
}
- case syscall.SIOCGIFMETRIC:
+ case linux.SIOCGIFMETRIC:
// Gets the metric of the device. As per netdevice(7), this
// always just sets ifr_metric to 0.
usermem.ByteOrder.PutUint32(ifr.Data[:4], 0)
- case syscall.SIOCGIFMTU:
+ case linux.SIOCGIFMTU:
// Gets the MTU of the device.
usermem.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU)
- case syscall.SIOCGIFMAP:
+ case linux.SIOCGIFMAP:
// Gets the hardware parameters of the device.
// TODO(gvisor.dev/issue/505): Implement.
- case syscall.SIOCGIFTXQLEN:
+ case linux.SIOCGIFTXQLEN:
// Gets the transmit queue length of the device.
// TODO(gvisor.dev/issue/505): Implement.
- case syscall.SIOCGIFDSTADDR:
+ case linux.SIOCGIFDSTADDR:
// Gets the destination address of a point-to-point device.
// TODO(gvisor.dev/issue/505): Implement.
- case syscall.SIOCGIFBRDADDR:
+ case linux.SIOCGIFBRDADDR:
// Gets the broadcast address of a device.
// TODO(gvisor.dev/issue/505): Implement.
- case syscall.SIOCGIFNETMASK:
+ case linux.SIOCGIFNETMASK:
// Gets the network mask of a device.
for _, addr := range stack.InterfaceAddrs()[index] {
// This ioctl is only compatible with AF_INET addresses.
@@ -2988,6 +2996,14 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
break
}
+ case linux.SIOCETHTOOL:
+ // Stubbed out for now, Ideally we should implement the required
+ // sub-commands for ETHTOOL
+ //
+ // See:
+ // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/net/core/dev_ioctl.c
+ return syserr.ErrEndpointOperation
+
default:
// Not a valid call.
return syserr.ErrInvalidArgument
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 548442b96..67737ae87 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -15,6 +15,8 @@
package netstack
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -40,19 +42,29 @@ func (s *Stack) SupportsIPv6() bool {
return s.Stack.CheckNetworkProtocol(ipv6.ProtocolNumber)
}
+// Converts Netstack's ARPHardwareType to equivalent linux constants.
+func toLinuxARPHardwareType(t header.ARPHardwareType) uint16 {
+ switch t {
+ case header.ARPHardwareNone:
+ return linux.ARPHRD_NONE
+ case header.ARPHardwareLoopback:
+ return linux.ARPHRD_LOOPBACK
+ case header.ARPHardwareEther:
+ return linux.ARPHRD_ETHER
+ default:
+ panic(fmt.Sprintf("unknown ARPHRD type: %d", t))
+ }
+}
+
// Interfaces implements inet.Stack.Interfaces.
func (s *Stack) Interfaces() map[int32]inet.Interface {
is := make(map[int32]inet.Interface)
for id, ni := range s.Stack.NICInfo() {
- var devType uint16
- if ni.Flags.Loopback {
- devType = linux.ARPHRD_LOOPBACK
- }
is[int32(id)] = inet.Interface{
Name: ni.Name,
Addr: []byte(ni.LinkAddress),
Flags: uint32(nicStateFlagsToLinux(ni.Flags)),
- DeviceType: devType,
+ DeviceType: toLinuxARPHardwareType(ni.ARPHardwareType),
MTU: ni.MTU,
}
}
diff --git a/pkg/syserr/netstack.go b/pkg/syserr/netstack.go
index 8ff922c69..5ae10939d 100644
--- a/pkg/syserr/netstack.go
+++ b/pkg/syserr/netstack.go
@@ -22,7 +22,7 @@ import (
// Mapping for tcpip.Error types.
var (
ErrUnknownProtocol = New(tcpip.ErrUnknownProtocol.String(), linux.EINVAL)
- ErrUnknownNICID = New(tcpip.ErrUnknownNICID.String(), linux.EINVAL)
+ ErrUnknownNICID = New(tcpip.ErrUnknownNICID.String(), linux.ENODEV)
ErrUnknownDevice = New(tcpip.ErrUnknownDevice.String(), linux.ENODEV)
ErrUnknownProtocolOption = New(tcpip.ErrUnknownProtocolOption.String(), linux.ENOPROTOOPT)
ErrDuplicateNICID = New(tcpip.ErrDuplicateNICID.String(), linux.EEXIST)
diff --git a/pkg/tcpip/header/arp.go b/pkg/tcpip/header/arp.go
index 718a4720a..83189676e 100644
--- a/pkg/tcpip/header/arp.go
+++ b/pkg/tcpip/header/arp.go
@@ -14,14 +14,33 @@
package header
-import "gvisor.dev/gvisor/pkg/tcpip"
+import (
+ "encoding/binary"
+
+ "gvisor.dev/gvisor/pkg/tcpip"
+)
const (
// ARPProtocolNumber is the ARP network protocol number.
ARPProtocolNumber tcpip.NetworkProtocolNumber = 0x0806
// ARPSize is the size of an IPv4-over-Ethernet ARP packet.
- ARPSize = 2 + 2 + 1 + 1 + 2 + 2*6 + 2*4
+ ARPSize = 28
+)
+
+// ARPHardwareType is the hardware type for LinkEndpoint in an ARP header.
+type ARPHardwareType uint16
+
+// Typical ARP HardwareType values. Some of the constants have to be specific
+// values as they are egressed on the wire in the HTYPE field of an ARP header.
+const (
+ ARPHardwareNone ARPHardwareType = 0
+ // ARPHardwareEther specifically is the HTYPE for Ethernet as specified
+ // in the IANA list here:
+ //
+ // https://www.iana.org/assignments/arp-parameters/arp-parameters.xhtml#arp-parameters-2
+ ARPHardwareEther ARPHardwareType = 1
+ ARPHardwareLoopback ARPHardwareType = 2
)
// ARPOp is an ARP opcode.
@@ -36,54 +55,64 @@ const (
// ARP is an ARP packet stored in a byte array as described in RFC 826.
type ARP []byte
-func (a ARP) hardwareAddressSpace() uint16 { return uint16(a[0])<<8 | uint16(a[1]) }
-func (a ARP) protocolAddressSpace() uint16 { return uint16(a[2])<<8 | uint16(a[3]) }
-func (a ARP) hardwareAddressSize() int { return int(a[4]) }
-func (a ARP) protocolAddressSize() int { return int(a[5]) }
+const (
+ hTypeOffset = 0
+ protocolOffset = 2
+ haAddressSizeOffset = 4
+ protoAddressSizeOffset = 5
+ opCodeOffset = 6
+ senderHAAddressOffset = 8
+ senderProtocolAddressOffset = senderHAAddressOffset + EthernetAddressSize
+ targetHAAddressOffset = senderProtocolAddressOffset + IPv4AddressSize
+ targetProtocolAddressOffset = targetHAAddressOffset + EthernetAddressSize
+)
+
+func (a ARP) hardwareAddressType() ARPHardwareType {
+ return ARPHardwareType(binary.BigEndian.Uint16(a[hTypeOffset:]))
+}
+
+func (a ARP) protocolAddressSpace() uint16 { return binary.BigEndian.Uint16(a[protocolOffset:]) }
+func (a ARP) hardwareAddressSize() int { return int(a[haAddressSizeOffset]) }
+func (a ARP) protocolAddressSize() int { return int(a[protoAddressSizeOffset]) }
// Op is the ARP opcode.
-func (a ARP) Op() ARPOp { return ARPOp(a[6])<<8 | ARPOp(a[7]) }
+func (a ARP) Op() ARPOp { return ARPOp(binary.BigEndian.Uint16(a[opCodeOffset:])) }
// SetOp sets the ARP opcode.
func (a ARP) SetOp(op ARPOp) {
- a[6] = uint8(op >> 8)
- a[7] = uint8(op)
+ binary.BigEndian.PutUint16(a[opCodeOffset:], uint16(op))
}
// SetIPv4OverEthernet configures the ARP packet for IPv4-over-Ethernet.
func (a ARP) SetIPv4OverEthernet() {
- a[0], a[1] = 0, 1 // htypeEthernet
- a[2], a[3] = 0x08, 0x00 // IPv4ProtocolNumber
- a[4] = 6 // macSize
- a[5] = uint8(IPv4AddressSize)
+ binary.BigEndian.PutUint16(a[hTypeOffset:], uint16(ARPHardwareEther))
+ binary.BigEndian.PutUint16(a[protocolOffset:], uint16(IPv4ProtocolNumber))
+ a[haAddressSizeOffset] = EthernetAddressSize
+ a[protoAddressSizeOffset] = uint8(IPv4AddressSize)
}
// HardwareAddressSender is the link address of the sender.
// It is a view on to the ARP packet so it can be used to set the value.
func (a ARP) HardwareAddressSender() []byte {
- const s = 8
- return a[s : s+6]
+ return a[senderHAAddressOffset : senderHAAddressOffset+EthernetAddressSize]
}
// ProtocolAddressSender is the protocol address of the sender.
// It is a view on to the ARP packet so it can be used to set the value.
func (a ARP) ProtocolAddressSender() []byte {
- const s = 8 + 6
- return a[s : s+4]
+ return a[senderProtocolAddressOffset : senderProtocolAddressOffset+IPv4AddressSize]
}
// HardwareAddressTarget is the link address of the target.
// It is a view on to the ARP packet so it can be used to set the value.
func (a ARP) HardwareAddressTarget() []byte {
- const s = 8 + 6 + 4
- return a[s : s+6]
+ return a[targetHAAddressOffset : targetHAAddressOffset+EthernetAddressSize]
}
// ProtocolAddressTarget is the protocol address of the target.
// It is a view on to the ARP packet so it can be used to set the value.
func (a ARP) ProtocolAddressTarget() []byte {
- const s = 8 + 6 + 4 + 6
- return a[s : s+4]
+ return a[targetProtocolAddressOffset : targetProtocolAddressOffset+IPv4AddressSize]
}
// IsValid reports whether this is an ARP packet for IPv4 over Ethernet.
@@ -91,10 +120,8 @@ func (a ARP) IsValid() bool {
if len(a) < ARPSize {
return false
}
- const htypeEthernet = 1
- const macSize = 6
- return a.hardwareAddressSpace() == htypeEthernet &&
+ return a.hardwareAddressType() == ARPHardwareEther &&
a.protocolAddressSpace() == uint16(IPv4ProtocolNumber) &&
- a.hardwareAddressSize() == macSize &&
+ a.hardwareAddressSize() == EthernetAddressSize &&
a.protocolAddressSize() == IPv4AddressSize
}
diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD
index b8b93e78e..39ca774ef 100644
--- a/pkg/tcpip/link/channel/BUILD
+++ b/pkg/tcpip/link/channel/BUILD
@@ -10,6 +10,7 @@ go_library(
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index 20b183da0..a2bb773d4 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -296,3 +297,8 @@ func (e *Endpoint) AddNotify(notify Notification) *NotificationHandle {
func (e *Endpoint) RemoveNotify(handle *NotificationHandle) {
e.q.RemoveNotify(handle)
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*Endpoint) ARPHardwareType() header.ARPHardwareType {
+ return header.ARPHardwareNone
+}
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index aa6db9aea..507b44abc 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -15,6 +15,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/binary",
+ "//pkg/iovec",
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index f34082e1a..6aa1badc7 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -45,6 +45,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/iovec"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -406,6 +407,8 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
eth.Encode(ethHdr)
}
+ var builder iovec.Builder
+
fd := e.fds[pkt.Hash%uint32(len(e.fds))]
if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
vnetHdr := virtioNetHdr{}
@@ -430,29 +433,25 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
}
vnetHdrBuf := binary.Marshal(make([]byte, 0, virtioNetHdrSize), binary.LittleEndian, vnetHdr)
- return rawfile.NonBlockingWrite3(fd, vnetHdrBuf, pkt.Header.View(), pkt.Data.ToView())
+ builder.Add(vnetHdrBuf)
}
- if pkt.Data.Size() == 0 {
- return rawfile.NonBlockingWrite(fd, pkt.Header.View())
- }
- if pkt.Header.UsedLength() == 0 {
- return rawfile.NonBlockingWrite(fd, pkt.Data.ToView())
+ builder.Add(pkt.Header.View())
+ for _, v := range pkt.Data.Views() {
+ builder.Add(v)
}
- return rawfile.NonBlockingWrite3(fd, pkt.Header.View(), pkt.Data.ToView(), nil)
+ return rawfile.NonBlockingWriteIovec(fd, builder.Build())
}
func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tcpip.Error) {
// Send a batch of packets through batchFD.
mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch))
for _, pkt := range batch {
- var ethHdrBuf []byte
- iovLen := 0
+ var eth header.Ethernet
if e.hdrSize > 0 {
// Add ethernet header if needed.
- ethHdrBuf = make([]byte, header.EthernetMinimumSize)
- eth := header.Ethernet(ethHdrBuf)
+ eth = make(header.Ethernet, header.EthernetMinimumSize)
ethHdr := &header.EthernetFields{
DstAddr: pkt.EgressRoute.RemoteLinkAddress,
Type: pkt.NetworkProtocolNumber,
@@ -465,12 +464,11 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
ethHdr.SrcAddr = e.addr
}
eth.Encode(ethHdr)
- iovLen++
}
- vnetHdr := virtioNetHdr{}
var vnetHdrBuf []byte
if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
+ vnetHdr := virtioNetHdr{}
if pkt.GSOOptions != nil {
vnetHdr.hdrLen = uint16(pkt.Header.UsedLength())
if pkt.GSOOptions.NeedsCsum {
@@ -491,45 +489,20 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
}
}
vnetHdrBuf = binary.Marshal(make([]byte, 0, virtioNetHdrSize), binary.LittleEndian, vnetHdr)
- iovLen++
}
- iovecs := make([]syscall.Iovec, iovLen+1+len(pkt.Data.Views()))
+ var builder iovec.Builder
+ builder.Add(vnetHdrBuf)
+ builder.Add(eth)
+ builder.Add(pkt.Header.View())
+ for _, v := range pkt.Data.Views() {
+ builder.Add(v)
+ }
+ iovecs := builder.Build()
+
var mmsgHdr rawfile.MMsgHdr
mmsgHdr.Msg.Iov = &iovecs[0]
- iovecIdx := 0
- if vnetHdrBuf != nil {
- v := &iovecs[iovecIdx]
- v.Base = &vnetHdrBuf[0]
- v.Len = uint64(len(vnetHdrBuf))
- iovecIdx++
- }
- if ethHdrBuf != nil {
- v := &iovecs[iovecIdx]
- v.Base = &ethHdrBuf[0]
- v.Len = uint64(len(ethHdrBuf))
- iovecIdx++
- }
- pktSize := uint64(0)
- // Encode L3 Header
- v := &iovecs[iovecIdx]
- hdr := &pkt.Header
- hdrView := hdr.View()
- v.Base = &hdrView[0]
- v.Len = uint64(len(hdrView))
- pktSize += v.Len
- iovecIdx++
-
- // Now encode the Transport Payload.
- pktViews := pkt.Data.Views()
- for i := range pktViews {
- vec := &iovecs[iovecIdx]
- iovecIdx++
- vec.Base = &pktViews[i][0]
- vec.Len = uint64(len(pktViews[i]))
- pktSize += vec.Len
- }
- mmsgHdr.Msg.Iovlen = uint64(iovecIdx)
+ mmsgHdr.Msg.Iovlen = uint64(len(iovecs))
mmsgHdrs = append(mmsgHdrs, mmsgHdr)
}
@@ -626,6 +599,14 @@ func (e *endpoint) GSOMaxSize() uint32 {
return e.gsoMaxSize
}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (e *endpoint) ARPHardwareType() header.ARPHardwareType {
+ if e.hdrSize > 0 {
+ return header.ARPHardwareEther
+ }
+ return header.ARPHardwareNone
+}
+
// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes
// to the FD, but does not read from it. All reads come from injected packets.
type InjectableEndpoint struct {
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 568c6874f..3b17d8c28 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -113,3 +113,8 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
return nil
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*endpoint) ARPHardwareType() header.ARPHardwareType {
+ return header.ARPHardwareLoopback
+}
diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD
index 82b441b79..e7493e5c5 100644
--- a/pkg/tcpip/link/muxed/BUILD
+++ b/pkg/tcpip/link/muxed/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index c69d6b7e9..c305d9e86 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -18,6 +18,7 @@ package muxed
import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -129,6 +130,11 @@ func (m *InjectableEndpoint) Wait() {
}
}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*InjectableEndpoint) ARPHardwareType() header.ARPHardwareType {
+ panic("unsupported operation")
+}
+
// NewInjectableEndpoint creates a new multi-endpoint injectable endpoint.
func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint {
return &InjectableEndpoint{
diff --git a/pkg/tcpip/link/nested/BUILD b/pkg/tcpip/link/nested/BUILD
index bdd5276ad..2cdb23475 100644
--- a/pkg/tcpip/link/nested/BUILD
+++ b/pkg/tcpip/link/nested/BUILD
@@ -12,6 +12,7 @@ go_library(
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go
index 2998f9c4f..328bd048e 100644
--- a/pkg/tcpip/link/nested/nested.go
+++ b/pkg/tcpip/link/nested/nested.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -129,3 +130,8 @@ func (e *Endpoint) GSOMaxSize() uint32 {
}
return 0
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType
+func (e *Endpoint) ARPHardwareType() header.ARPHardwareType {
+ return e.child.ARPHardwareType()
+}
diff --git a/pkg/tcpip/link/qdisc/fifo/BUILD b/pkg/tcpip/link/qdisc/fifo/BUILD
index 054c213bc..1d0079bd6 100644
--- a/pkg/tcpip/link/qdisc/fifo/BUILD
+++ b/pkg/tcpip/link/qdisc/fifo/BUILD
@@ -14,6 +14,7 @@ go_library(
"//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go
index b5dfb7850..c84fe1bb9 100644
--- a/pkg/tcpip/link/qdisc/fifo/endpoint.go
+++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -207,3 +208,8 @@ func (e *endpoint) Wait() {
e.wg.Wait()
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType
+func (e *endpoint) ARPHardwareType() header.ARPHardwareType {
+ return e.lower.ARPHardwareType()
+}
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 69de6eb3e..f4c32c2da 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -66,38 +66,14 @@ func NonBlockingWrite(fd int, buf []byte) *tcpip.Error {
return nil
}
-// NonBlockingWrite3 writes up to three byte slices to a file descriptor in a
-// single syscall. It fails if partial data is written.
-func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error {
- // If there is no second and third buffer, issue a regular write.
- if len(b2) == 0 && len(b3) == 0 {
- return NonBlockingWrite(fd, b1)
- }
-
- // Build the iovec that represents them and issue a writev syscall.
- iovec := [3]syscall.Iovec{
- {
- Base: &b1[0],
- Len: uint64(len(b1)),
- },
- {
- Base: &b2[0],
- Len: uint64(len(b2)),
- },
- }
- iovecLen := uintptr(2)
-
- if len(b3) > 0 {
- iovecLen++
- iovec[2].Base = &b3[0]
- iovec[2].Len = uint64(len(b3))
- }
-
+// NonBlockingWriteIovec writes iovec to a file descriptor in a single syscall.
+// It fails if partial data is written.
+func NonBlockingWriteIovec(fd int, iovec []syscall.Iovec) *tcpip.Error {
+ iovecLen := uintptr(len(iovec))
_, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen)
if e != 0 {
return TranslateErrno(e)
}
-
return nil
}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 0374a2441..a36862c67 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -287,3 +287,8 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
e.completed.Done()
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType
+func (*endpoint) ARPHardwareType() header.ARPHardwareType {
+ return header.ARPHardwareEther
+}
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index 6bc9033d0..47446efec 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -139,6 +139,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
stack: s,
nicID: id,
name: name,
+ isTap: prefix == "tap",
}
endpoint.Endpoint.LinkEPCapabilities = linkCaps
if endpoint.name == "" {
@@ -348,6 +349,7 @@ type tunEndpoint struct {
stack *stack.Stack
nicID tcpip.NICID
name string
+ isTap bool
}
// DecRef decrements refcount of e, removes NIC if refcount goes to 0.
@@ -356,3 +358,11 @@ func (e *tunEndpoint) DecRef() {
e.stack.RemoveNIC(e.nicID)
})
}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (e *tunEndpoint) ARPHardwareType() header.ARPHardwareType {
+ if e.isTap {
+ return header.ARPHardwareEther
+ }
+ return header.ARPHardwareNone
+}
diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD
index 0956d2c65..ee84c3d96 100644
--- a/pkg/tcpip/link/waitable/BUILD
+++ b/pkg/tcpip/link/waitable/BUILD
@@ -12,6 +12,7 @@ go_library(
"//pkg/gate",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
@@ -25,6 +26,7 @@ go_test(
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
"//pkg/tcpip/stack",
],
)
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 949b3f2b2..24a8dc2eb 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/gate"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -147,3 +148,8 @@ func (e *Endpoint) WaitDispatch() {
// Wait implements stack.LinkEndpoint.Wait.
func (e *Endpoint) Wait() {}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (e *Endpoint) ARPHardwareType() header.ARPHardwareType {
+ return e.lower.ARPHardwareType()
+}
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 63bf40562..ffb2354be 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -81,6 +82,11 @@ func (e *countedEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
return nil
}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*countedEndpoint) ARPHardwareType() header.ARPHardwareType {
+ panic("unimplemented")
+}
+
// Wait implements stack.LinkEndpoint.Wait.
func (*countedEndpoint) Wait() {}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 7c8fb3e0a..a5b780ca2 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -172,14 +172,19 @@ func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.Ne
}
// WritePackets implements stack.LinkEndpoint.WritePackets.
-func (t *testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (*testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
panic("not implemented")
}
-func (t *testObject) WriteRawPacket(_ buffer.VectorisedView) *tcpip.Error {
+func (*testObject) WriteRawPacket(_ buffer.VectorisedView) *tcpip.Error {
return tcpip.ErrNotSupported
}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*testObject) ARPHardwareType() header.ARPHardwareType {
+ panic("not implemented")
+}
+
func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index a6546cef0..eefb4b07f 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
const (
@@ -301,6 +302,11 @@ func (e *fwdTestLinkEndpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Er
// Wait implements stack.LinkEndpoint.Wait.
func (*fwdTestLinkEndpoint) Wait() {}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*fwdTestLinkEndpoint) ARPHardwareType() header.ARPHardwareType {
+ panic("not implemented")
+}
+
func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) {
// Create a stack with the network protocol and two NICs.
s := New(Options{
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index f846ea2e5..bbf3b60e8 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -292,10 +292,9 @@ func (it *IPTables) startReaper(interval time.Duration) {
// CheckPackets runs pkts through the rules for hook and returns a map of packets that
// should not go forward.
//
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-//
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
+// Preconditions:
+// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// - pkt.NetworkHeader is not nil.
//
// NOTE: unlike the Check API the returned map contains packets that should be
// dropped.
@@ -319,9 +318,9 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *
return drop, natPkts
}
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a
-// precondition.
+// Preconditions:
+// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// - pkt.NetworkHeader is not nil.
func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) chainVerdict {
// Start from ruleIdx and walk the list of rules until a rule gives us
// a verdict.
@@ -366,23 +365,12 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId
return chainDrop
}
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a
-// precondition.
+// Preconditions:
+// - pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
+// - pkt.NetworkHeader is not nil.
func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) (RuleVerdict, int) {
rule := table.Rules[ruleIdx]
- // If pkt.NetworkHeader hasn't been set yet, it will be contained in
- // pkt.Data.
- if pkt.NetworkHeader == nil {
- var ok bool
- pkt.NetworkHeader, ok = pkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- // Precondition has been violated.
- panic(fmt.Sprintf("iptables checks require IPv4 headers of at least %d bytes", header.IPv4MinimumSize))
- }
- }
-
// Check whether the packet matches the IP header filter.
if !rule.Filter.match(header.IPv4(pkt.NetworkHeader), hook, nicName) {
// Continue on to the next rule.
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index 31f865260..3bc9fd831 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -84,6 +84,11 @@ func (e *testLinkEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
return tcpip.ErrNotSupported
}
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
+func (*testLinkEndpoint) ARPHardwareType() header.ARPHardwareType {
+ panic("not implemented")
+}
+
var _ NetworkEndpoint = (*testIPv6Endpoint)(nil)
// An IPv6 NetworkEndpoint that throws away outgoing packets.
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 5cbc946b6..f260eeb7f 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -436,6 +437,12 @@ type LinkEndpoint interface {
// Wait will not block if the endpoint hasn't started any goroutines
// yet, even if it might later.
Wait()
+
+ // ARPHardwareType returns the ARPHRD_TYPE of the link endpoint.
+ //
+ // See:
+ // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30
+ ARPHardwareType() header.ARPHardwareType
}
// InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 0aa815447..2b7ece851 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1095,6 +1095,11 @@ type NICInfo struct {
// Context is user-supplied data optionally supplied in CreateNICWithOptions.
// See type NICOptions for more details.
Context NICContext
+
+ // ARPHardwareType holds the ARP Hardware type of the NIC. This is the
+ // value sent in haType field of an ARP Request sent by this NIC and the
+ // value expected in the haType field of an ARP response.
+ ARPHardwareType header.ARPHardwareType
}
// HasNIC returns true if the NICID is defined in the stack.
@@ -1126,6 +1131,7 @@ func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
MTU: nic.linkEP.MTU(),
Stats: nic.stats,
Context: nic.context,
+ ARPHardwareType: nic.linkEP.ARPHardwareType(),
}
}
return nics
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 57b7f5c19..92b487381 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -79,6 +79,11 @@ type endpoint struct {
closed bool
stats tcpip.TransportEndpointStats `state:"nosave"`
bound bool
+ boundNIC tcpip.NICID
+
+ // lastErrorMu protects lastError.
+ lastErrorMu sync.Mutex `state:"nosave"`
+ lastError *tcpip.Error `state:".(string)"`
}
// NewEndpoint returns a new packet endpoint.
@@ -229,12 +234,14 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
ep.mu.Lock()
defer ep.mu.Unlock()
- if ep.bound {
- return tcpip.ErrAlreadyBound
+ if ep.bound && ep.boundNIC == addr.NIC {
+ // If the NIC being bound is the same then just return success.
+ return nil
}
// Unregister endpoint with all the nics.
ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
+ ep.bound = false
// Bind endpoint to receive packets from specific interface.
if err := ep.stack.RegisterPacketEndpoint(addr.NIC, ep.netProto, ep); err != nil {
@@ -242,6 +249,7 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
}
ep.bound = true
+ ep.boundNIC = addr.NIC
return nil
}
@@ -336,8 +344,21 @@ func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
}
}
+func (ep *endpoint) takeLastError() *tcpip.Error {
+ ep.lastErrorMu.Lock()
+ defer ep.lastErrorMu.Unlock()
+
+ err := ep.lastError
+ ep.lastError = nil
+ return err
+}
+
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
+ switch opt.(type) {
+ case tcpip.ErrorOption:
+ return ep.takeLastError()
+ }
return tcpip.ErrNotSupported
}
diff --git a/pkg/tcpip/transport/packet/endpoint_state.go b/pkg/tcpip/transport/packet/endpoint_state.go
index 9b88f17e4..e2fa96d17 100644
--- a/pkg/tcpip/transport/packet/endpoint_state.go
+++ b/pkg/tcpip/transport/packet/endpoint_state.go
@@ -15,6 +15,7 @@
package packet
import (
+ "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -70,3 +71,21 @@ func (ep *endpoint) afterLoad() {
panic(*err)
}
}
+
+// saveLastError is invoked by stateify.
+func (ep *endpoint) saveLastError() string {
+ if ep.lastError == nil {
+ return ""
+ }
+
+ return ep.lastError.String()
+}
+
+// loadLastError is invoked by stateify.
+func (ep *endpoint) loadLastError(s string) {
+ if s == "" {
+ return
+ }
+
+ ep.lastError = tcpip.StringToError(s)
+}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index bb01b8fb5..80da8b3e6 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -274,6 +274,9 @@ type Config struct {
// Enables VFS2 (not plumbled through yet).
VFS2 bool
+
+ // Enables FUSE usage (not plumbled through yet).
+ FUSE bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -325,5 +328,9 @@ func (c *Config) ToFlags() []string {
f = append(f, "--vfs2=true")
}
+ if c.FUSE {
+ f = append(f, "--fuse=true")
+ }
+
return f
}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 60e33425f..149eb0b1b 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -310,19 +310,12 @@ var allowedSyscalls = seccomp.SyscallRules{
},
},
syscall.SYS_WRITE: {},
- // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with
- // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR
- // option is enabled for a packet socket.
+ // For rawfile.NonBlockingWriteIovec.
syscall.SYS_WRITEV: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
- seccomp.AllowValue(2),
- },
- {
- seccomp.AllowAny{},
- seccomp.AllowAny{},
- seccomp.AllowValue(3),
+ seccomp.GreaterThan(0),
},
},
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a48547ea5..9cd9c5909 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -205,6 +205,10 @@ func New(args Args) (*Loader, error) {
// Is this a VFSv2 kernel?
if args.Conf.VFS2 {
kernel.VFS2Enabled = true
+ if args.Conf.FUSE {
+ kernel.FUSEEnabled = true
+ }
+
vfs2.Override()
}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 6ee6fae04..56f4ba15d 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -86,9 +86,12 @@ func registerFilesystems(k *kernel.Kernel) error {
return fmt.Errorf("registering ttydev: %w", err)
}
- if err := fuse.Register(vfsObj); err != nil {
- return fmt.Errorf("registering fusedev: %w", err)
+ if kernel.FUSEEnabled {
+ if err := fuse.Register(vfsObj); err != nil {
+ return fmt.Errorf("registering fusedev: %w", err)
+ }
}
+
if err := tundev.Register(vfsObj); err != nil {
return fmt.Errorf("registering tundev: %v", err)
}
@@ -110,8 +113,11 @@ func registerFilesystems(k *kernel.Kernel) error {
if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil {
return fmt.Errorf("creating tundev devtmpfs files: %v", err)
}
- if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil {
- return fmt.Errorf("creating fusedev devtmpfs files: %w", err)
+
+ if kernel.FUSEEnabled {
+ if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil {
+ return fmt.Errorf("creating fusedev devtmpfs files: %w", err)
+ }
}
return nil
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 6d297d0df..7ad09bf23 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -324,7 +324,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
}
}
if err := runInCgroup(cg, func() error {
- ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir)
+ ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir, args.Attached)
if err != nil {
return err
}
@@ -427,7 +427,7 @@ func (c *Container) Start(conf *boot.Config) error {
// the start (and all their children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
- ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
+ ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir, false)
if err != nil {
return err
}
@@ -861,7 +861,7 @@ func (c *Container) waitForStopped() error {
return backoff.Retry(op, b)
}
-func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, *os.File, error) {
+func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string, attached bool) ([]*os.File, *os.File, error) {
// Start with the general config flags.
args := conf.ToFlags()
@@ -955,6 +955,14 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.ExtraFiles = goferEnds
cmd.Args[0] = "runsc-gofer"
+ if attached {
+ // The gofer is attached to the lifetime of this process, so it
+ // should synchronously die when this process dies.
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Pdeathsig: syscall.SIGKILL,
+ }
+ }
+
// Enter new namespaces to isolate from the rest of the system. Don't unshare
// cgroup because gofer is added to a cgroup in the caller's namespace.
nss := []specs.LinuxNamespace{
diff --git a/runsc/main.go b/runsc/main.go
index c9f47c579..69cb505fa 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -88,6 +88,7 @@ var (
referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
+ fuseEnabled = flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
// Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -242,6 +243,7 @@ func main() {
OverlayfsStaleRead: *overlayfsStaleRead,
CPUNumFromQuota: *cpuNumFromQuota,
VFS2: *vfs2Enabled,
+ FUSE: *fuseEnabled,
QDisc: queueingDiscipline,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
TestOnlyTestNameEnv: *testOnlyTestNameEnv,
diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go
index aabcdbd87..b7915e19d 100644
--- a/test/benchmarks/fs/bazel_test.go
+++ b/test/benchmarks/fs/bazel_test.go
@@ -32,6 +32,7 @@ func BenchmarkABSL(b *testing.B) {
if err != nil {
b.Fatalf("failed to get machine: %v", err)
}
+ defer machine.CleanUp()
// Dimensions here are clean/dirty cache (do or don't drop caches)
// and if the mount on which we are compiling is a tmpfs/bind mount.
diff --git a/test/benchmarks/harness/machine.go b/test/benchmarks/harness/machine.go
index 032b387fc..93c0db9ce 100644
--- a/test/benchmarks/harness/machine.go
+++ b/test/benchmarks/harness/machine.go
@@ -33,6 +33,9 @@ type Machine interface {
// Returns IP Address for the machine.
IPAddress() (net.IP, error)
+
+ // CleanUp cleans up this machine.
+ CleanUp()
}
// localMachine describes this machine.
@@ -62,3 +65,7 @@ func (l *localMachine) IPAddress() (net.IP, error) {
addr := conn.LocalAddr().(*net.UDPAddr)
return addr.IP, nil
}
+
+// CleanUp implements Machine.CleanUp and does nothing for localMachine.
+func (*localMachine) CleanUp() {
+}
diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD
index 57328456d..ea78416cf 100644
--- a/test/benchmarks/network/BUILD
+++ b/test/benchmarks/network/BUILD
@@ -11,7 +11,10 @@ go_library(
go_test(
name = "network_test",
size = "large",
- srcs = ["iperf_test.go"],
+ srcs = [
+ "httpd_test.go",
+ "iperf_test.go",
+ ],
library = ":network",
tags = [
# Requires docker and runsc to be configured before test runs.
diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go
new file mode 100644
index 000000000..f9afdf15f
--- /dev/null
+++ b/test/benchmarks/network/httpd_test.go
@@ -0,0 +1,276 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package network
+
+import (
+ "context"
+ "fmt"
+ "regexp"
+ "strconv"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/test/dockerutil"
+ "gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+// see Dockerfile '//images/benchmarks/httpd'.
+var docs = map[string]string{
+ "notfound": "notfound",
+ "1Kb": "latin1k.txt",
+ "10Kb": "latin10k.txt",
+ "100Kb": "latin100k.txt",
+ "1000Kb": "latin1000k.txt",
+ "1Mb": "latin1024k.txt",
+ "10Mb": "latin10240k.txt",
+}
+
+// BenchmarkHttpdConcurrency iterates the concurrency argument and tests
+// how well the runtime under test handles requests in parallel.
+func BenchmarkHttpdConcurrency(b *testing.B) {
+ // Grab a machine for the client and server.
+ clientMachine, err := h.GetMachine()
+ if err != nil {
+ b.Fatalf("failed to get client: %v", err)
+ }
+ defer clientMachine.CleanUp()
+
+ serverMachine, err := h.GetMachine()
+ if err != nil {
+ b.Fatalf("failed to get server: %v", err)
+ }
+ defer serverMachine.CleanUp()
+
+ // The test iterates over client concurrency, so set other parameters.
+ requests := 1000
+ concurrency := []int{1, 5, 10, 25}
+ doc := docs["10Kb"]
+
+ for _, c := range concurrency {
+ b.Run(fmt.Sprintf("%dConcurrency", c), func(b *testing.B) {
+ runHttpd(b, clientMachine, serverMachine, doc, requests, c)
+ })
+ }
+}
+
+// BenchmarkHttpdDocSize iterates over different sized payloads, testing how
+// well the runtime handles different payload sizes.
+func BenchmarkHttpdDocSize(b *testing.B) {
+ clientMachine, err := h.GetMachine()
+ if err != nil {
+ b.Fatalf("failed to get machine: %v", err)
+ }
+ defer clientMachine.CleanUp()
+
+ serverMachine, err := h.GetMachine()
+ if err != nil {
+ b.Fatalf("failed to get machine: %v", err)
+ }
+ defer serverMachine.CleanUp()
+
+ requests := 1000
+ concurrency := 1
+
+ for name, filename := range docs {
+ b.Run(name, func(b *testing.B) {
+ runHttpd(b, clientMachine, serverMachine, filename, requests, concurrency)
+ })
+ }
+}
+
+// runHttpd runs a single test run.
+func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc string, requests, concurrency int) {
+ b.Helper()
+
+ // Grab a container from the server.
+ ctx := context.Background()
+ server := serverMachine.GetContainer(ctx, b)
+ defer server.CleanUp(ctx)
+
+ // Copy the docs to /tmp and serve from there.
+ cmd := "mkdir -p /tmp/html; cp -r /local /tmp/html/.; apache2 -X"
+ port := 80
+
+ // Start the server.
+ server.Spawn(ctx, dockerutil.RunOpts{
+ Image: "benchmarks/httpd",
+ Ports: []int{port},
+ Env: []string{
+ // Standard environmental variables for httpd.
+ "APACHE_RUN_DIR=/tmp",
+ "APACHE_RUN_USER=nobody",
+ "APACHE_RUN_GROUP=nogroup",
+ "APACHE_LOG_DIR=/tmp",
+ "APACHE_PID_FILE=/tmp/apache.pid",
+ },
+ }, "sh", "-c", cmd)
+
+ ip, err := serverMachine.IPAddress()
+ if err != nil {
+ b.Fatalf("failed to find server ip: %v", err)
+ }
+
+ servingPort, err := server.FindPort(ctx, port)
+ if err != nil {
+ b.Fatalf("failed to find server port %d: %v", port, err)
+ }
+
+ // Check the server is serving.
+ harness.WaitUntilServing(ctx, clientMachine, ip, servingPort)
+
+ // Grab a client.
+ client := clientMachine.GetContainer(ctx, b)
+ defer client.CleanUp(ctx)
+
+ path := fmt.Sprintf("http://%s:%d/%s", ip, servingPort, doc)
+ // See apachebench (ab) for flags.
+ cmd = fmt.Sprintf("ab -n %d -c %d %s", requests, concurrency, path)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ out, err := client.Run(ctx, dockerutil.RunOpts{
+ Image: "benchmarks/ab",
+ }, "sh", "-c", cmd)
+ if err != nil {
+ b.Fatalf("run failed with: %v", err)
+ }
+
+ b.StopTimer()
+
+ // Parse and report custom metrics.
+ transferRate, err := parseTransferRate(out)
+ if err != nil {
+ b.Logf("failed to parse transferrate: %v", err)
+ }
+ b.ReportMetric(transferRate*1024, "transfer_rate") // Convert from Kb/s to b/s.
+
+ latency, err := parseLatency(out)
+ if err != nil {
+ b.Logf("failed to parse latency: %v", err)
+ }
+ b.ReportMetric(latency/1000, "mean_latency") // Convert from ms to s.
+
+ reqPerSecond, err := parseRequestsPerSecond(out)
+ if err != nil {
+ b.Logf("failed to parse requests per second: %v", err)
+ }
+ b.ReportMetric(reqPerSecond, "requests_per_second")
+
+ b.StartTimer()
+ }
+}
+
+var transferRateRE = regexp.MustCompile(`Transfer rate:\s+(\d+\.?\d+?)\s+\[Kbytes/sec\]\s+received`)
+
+// parseTransferRate parses transfer rate from apachebench output.
+func parseTransferRate(data string) (float64, error) {
+ match := transferRateRE.FindStringSubmatch(data)
+ if len(match) < 2 {
+ return 0, fmt.Errorf("failed get bandwidth: %s", data)
+ }
+ return strconv.ParseFloat(match[1], 64)
+}
+
+var latencyRE = regexp.MustCompile(`Total:\s+\d+\s+(\d+)\s+(\d+\.?\d+?)\s+\d+\s+\d+\s`)
+
+// parseLatency parses latency from apachebench output.
+func parseLatency(data string) (float64, error) {
+ match := latencyRE.FindStringSubmatch(data)
+ if len(match) < 2 {
+ return 0, fmt.Errorf("failed get bandwidth: %s", data)
+ }
+ return strconv.ParseFloat(match[1], 64)
+}
+
+var requestsPerSecondRE = regexp.MustCompile(`Requests per second:\s+(\d+\.?\d+?)\s+`)
+
+// parseRequestsPerSecond parses requests per second from apachebench output.
+func parseRequestsPerSecond(data string) (float64, error) {
+ match := requestsPerSecondRE.FindStringSubmatch(data)
+ if len(match) < 2 {
+ return 0, fmt.Errorf("failed get bandwidth: %s", data)
+ }
+ return strconv.ParseFloat(match[1], 64)
+}
+
+// Sample output from apachebench.
+const sampleData = `This is ApacheBench, Version 2.3 <$Revision: 1826891 $>
+Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
+Licensed to The Apache Software Foundation, http://www.apache.org/
+
+Benchmarking 10.10.10.10 (be patient).....done
+
+
+Server Software: Apache/2.4.38
+Server Hostname: 10.10.10.10
+Server Port: 80
+
+Document Path: /latin10k.txt
+Document Length: 210 bytes
+
+Concurrency Level: 1
+Time taken for tests: 0.180 seconds
+Complete requests: 100
+Failed requests: 0
+Non-2xx responses: 100
+Total transferred: 38800 bytes
+HTML transferred: 21000 bytes
+Requests per second: 556.44 [#/sec] (mean)
+Time per request: 1.797 [ms] (mean)
+Time per request: 1.797 [ms] (mean, across all concurrent requests)
+Transfer rate: 210.84 [Kbytes/sec] received
+
+Connection Times (ms)
+ min mean[+/-sd] median max
+Connect: 0 0 0.2 0 2
+Processing: 1 2 1.0 1 8
+Waiting: 1 1 1.0 1 7
+Total: 1 2 1.2 1 10
+
+Percentage of the requests served within a certain time (ms)
+ 50% 1
+ 66% 2
+ 75% 2
+ 80% 2
+ 90% 2
+ 95% 3
+ 98% 7
+ 99% 10
+ 100% 10 (longest request)`
+
+// TestParsers checks the parsers work.
+func TestParsers(t *testing.T) {
+ want := 210.84
+ got, err := parseTransferRate(sampleData)
+ if err != nil {
+ t.Fatalf("failed to parse transfer rate with error: %v", err)
+ } else if got != want {
+ t.Fatalf("parseTransferRate got: %f, want: %f", got, want)
+ }
+
+ want = 2.0
+ got, err = parseLatency(sampleData)
+ if err != nil {
+ t.Fatalf("failed to parse transfer rate with error: %v", err)
+ } else if got != want {
+ t.Fatalf("parseLatency got: %f, want: %f", got, want)
+ }
+
+ want = 556.44
+ got, err = parseRequestsPerSecond(sampleData)
+ if err != nil {
+ t.Fatalf("failed to parse transfer rate with error: %v", err)
+ } else if got != want {
+ t.Fatalf("parseRequestsPerSecond got: %f, want: %f", got, want)
+ }
+}
diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go
index 72e9c99a8..48cc9dd8f 100644
--- a/test/benchmarks/network/iperf_test.go
+++ b/test/benchmarks/network/iperf_test.go
@@ -35,11 +35,13 @@ func BenchmarkIperf(b *testing.B) {
if err != nil {
b.Fatalf("failed to get machine: %v", err)
}
+ defer clientMachine.CleanUp()
serverMachine, err := h.GetMachine()
if err != nil {
b.Fatalf("failed to get machine: %v", err)
}
+ defer serverMachine.CleanUp()
for _, bm := range []struct {
name string
@@ -111,7 +113,7 @@ func BenchmarkIperf(b *testing.B) {
if err != nil {
b.Fatalf("failed to parse bandwitdth from %s: %v", out, err)
}
- b.ReportMetric(bW, "KBytes/sec")
+ b.ReportMetric(bW*1024, "bandwidth") // Convert from Kb/s to b/s.
b.StartTimer()
}
})
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 921e499be..600cb5192 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -61,7 +61,8 @@ def _syscall_test(
file_access = "exclusive",
overlay = False,
add_uds_tree = False,
- vfs2 = False):
+ vfs2 = False,
+ fuse = False):
# Prepend "runsc" to non-native platform names.
full_platform = platform if platform == "native" else "runsc_" + platform
@@ -73,6 +74,8 @@ def _syscall_test(
name += "_overlay"
if vfs2:
name += "_vfs2"
+ if fuse:
+ name += "_fuse"
if network != "none":
name += "_" + network + "net"
@@ -107,6 +110,7 @@ def _syscall_test(
"--overlay=" + str(overlay),
"--add-uds-tree=" + str(add_uds_tree),
"--vfs2=" + str(vfs2),
+ "--fuse=" + str(fuse),
]
# Call the rule above.
@@ -129,6 +133,7 @@ def syscall_test(
add_uds_tree = False,
add_hostinet = False,
vfs2 = False,
+ fuse = False,
tags = None):
"""syscall_test is a macro that will create targets for all platforms.
@@ -188,6 +193,19 @@ def syscall_test(
vfs2 = True,
)
+ if vfs2 and fuse:
+ _syscall_test(
+ test = test,
+ shard_count = shard_count,
+ size = size,
+ platform = default_platform,
+ use_tmpfs = use_tmpfs,
+ add_uds_tree = add_uds_tree,
+ tags = platforms[default_platform] + vfs2_tags,
+ vfs2 = True,
+ fuse = True,
+ )
+
# TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests.
if add_overlay:
_syscall_test(
diff --git a/test/runner/runner.go b/test/runner/runner.go
index 5456e46a6..2296f3a46 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -47,6 +47,7 @@ var (
fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay")
vfs2 = flag.Bool("vfs2", false, "enable VFS2")
+ fuse = flag.Bool("fuse", false, "enable FUSE")
parallel = flag.Bool("parallel", false, "run tests in parallel")
runscPath = flag.String("runsc", "", "path to runsc binary")
@@ -149,6 +150,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error {
}
if *vfs2 {
args = append(args, "-vfs2")
+ if *fuse {
+ args = append(args, "-fuse")
+ }
}
if *debug {
args = append(args, "-debug", "-log-packets=true")
@@ -358,6 +362,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
vfsVar := "GVISOR_VFS"
if *vfs2 {
env = append(env, vfsVar+"=VFS2")
+ fuseVar := "FUSE_ENABLED"
+ if *fuse {
+ env = append(env, fuseVar+"=TRUE")
+ } else {
+ env = append(env, fuseVar+"=FALSE")
+ }
} else {
env = append(env, vfsVar+"=VFS1")
}
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 28ef55945..c06a75ada 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -146,6 +146,7 @@ syscall_test(
)
syscall_test(
+ fuse = "True",
test = "//test/syscalls/linux:dev_test",
vfs2 = "True",
)
diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc
index 3c88c4cbd..6fa16208e 100644
--- a/test/syscalls/linux/dev.cc
+++ b/test/syscalls/linux/dev.cc
@@ -156,7 +156,7 @@ TEST(DevTest, TTYExists) {
TEST(DevTest, OpenDevFuse) {
// Note(gvisor.dev/issue/3076) This won't work in the sentry until the new
// device registration is complete.
- SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor());
+ SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor() || !IsFUSEEnabled());
ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY));
}
diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc
index 5ac68feb4..ce63adb23 100644
--- a/test/syscalls/linux/packet_socket.cc
+++ b/test/syscalls/linux/packet_socket.cc
@@ -343,7 +343,7 @@ TEST_P(CookedPacketTest, BindReceive) {
}
// Double Bind socket.
-TEST_P(CookedPacketTest, DoubleBind) {
+TEST_P(CookedPacketTest, DoubleBindSucceeds) {
struct sockaddr_ll bind_addr = {};
bind_addr.sll_family = AF_PACKET;
bind_addr.sll_protocol = htons(GetParam());
@@ -354,12 +354,11 @@ TEST_P(CookedPacketTest, DoubleBind) {
SyscallSucceeds());
// Binding socket again should fail.
- ASSERT_THAT(
- bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
- sizeof(bind_addr)),
- // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched
- // to EADDRINUSE.
- AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL)));
+ ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr)),
+ // Linux 4.09 returns EINVAL here, but some time before 4.19 it
+ // switched to EADDRINUSE.
+ SyscallSucceeds());
}
// Bind and verify we do not receive data on interface which is not bound
diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc
index 6a963b12c..e44062475 100644
--- a/test/syscalls/linux/packet_socket_raw.cc
+++ b/test/syscalls/linux/packet_socket_raw.cc
@@ -559,6 +559,64 @@ TEST_P(RawPacketTest, SetSocketSendBuf) {
ASSERT_EQ(quarter_sz, val);
}
+TEST_P(RawPacketTest, GetSocketError) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(val, 0);
+}
+
+TEST_P(RawPacketTest, GetSocketErrorBind) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ {
+ // Bind to the loopback device.
+ struct sockaddr_ll bind_addr = {};
+ bind_addr.sll_family = AF_PACKET;
+ bind_addr.sll_protocol = htons(GetParam());
+ bind_addr.sll_ifindex = GetLoopbackIndex();
+
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr)),
+ SyscallSucceeds());
+
+ // SO_ERROR should return no errors.
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(val, 0);
+ }
+
+ {
+ // Now try binding to an invalid interface.
+ struct sockaddr_ll bind_addr = {};
+ bind_addr.sll_family = AF_PACKET;
+ bind_addr.sll_protocol = htons(GetParam());
+ bind_addr.sll_ifindex = 0xffff; // Just pick a really large number.
+
+ // Binding should fail with EINVAL
+ ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr)),
+ SyscallFailsWithErrno(ENODEV));
+
+ // SO_ERROR does not return error when the device is invalid.
+ // On Linux there is just one odd ball condition where this can return
+ // an error where the device was valid and then removed or disabled
+ // between the first check for index and the actual registration of
+ // the packet endpoint. On Netstack this is not possible as the stack
+ // global mutex is held during registration and check.
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(val, 0);
+ }
+}
+
#ifndef __fuchsia__
TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) {
diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc
index 15d4b85a7..5f8d7f981 100644
--- a/test/syscalls/linux/socket_netdevice.cc
+++ b/test/syscalls/linux/socket_netdevice.cc
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <linux/ethtool.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
@@ -49,6 +50,7 @@ TEST(NetdeviceTest, Loopback) {
// Check that the loopback is zero hardware address.
ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds());
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_family, ARPHRD_LOOPBACK);
EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0);
EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0);
EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0);
@@ -178,6 +180,27 @@ TEST(NetdeviceTest, InterfaceMTU) {
EXPECT_GT(ifr.ifr_mtu, 0);
}
+TEST(NetdeviceTest, EthtoolGetTSInfo) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+ struct ethtool_ts_info tsi = {};
+ tsi.cmd = ETHTOOL_GET_TS_INFO; // Get NIC's Timestamping capabilities.
+
+ // Prepare the request.
+ struct ifreq ifr = {};
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+ ifr.ifr_data = (void*)&tsi;
+
+ // Check that SIOCGIFMTU returns a nonzero MTU.
+ if (IsRunningOnGvisor()) {
+ ASSERT_THAT(ioctl(sock.get(), SIOCETHTOOL, &ifr),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+ return;
+ }
+ ASSERT_THAT(ioctl(sock.get(), SIOCETHTOOL, &ifr), SyscallSucceeds());
+}
+
} // namespace
} // namespace testing
diff --git a/test/util/test_util.cc b/test/util/test_util.cc
index 8a037f45f..d0c1d6426 100644
--- a/test/util/test_util.cc
+++ b/test/util/test_util.cc
@@ -42,6 +42,7 @@ namespace testing {
constexpr char kGvisorNetwork[] = "GVISOR_NETWORK";
constexpr char kGvisorVfs[] = "GVISOR_VFS";
+constexpr char kFuseEnabled[] = "FUSE_ENABLED";
bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; }
@@ -68,6 +69,11 @@ bool IsRunningWithVFS1() {
return strcmp(env, "VFS1") == 0;
}
+bool IsFUSEEnabled() {
+ const char* env = getenv(kFuseEnabled);
+ return env && strcmp(env, "TRUE") == 0;
+}
+
// Inline cpuid instruction. Preserve %ebx/%rbx register. In PIC compilations
// %ebx contains the address of the global offset table. %rbx is occasionally
// used to address stack variables in presence of dynamic allocas.
diff --git a/test/util/test_util.h b/test/util/test_util.h
index 0f9781038..373c54f32 100644
--- a/test/util/test_util.h
+++ b/test/util/test_util.h
@@ -225,6 +225,7 @@ const std::string GvisorPlatform();
bool IsRunningWithHostinet();
// TODO(gvisor.dev/issue/1624): Delete once VFS1 is gone.
bool IsRunningWithVFS1();
+bool IsFUSEEnabled();
#ifdef __linux__
void SetupGvisorDeathTest();
diff --git a/tools/vm/README.md b/tools/vm/README.md
index 898c95fca..1e9859e66 100644
--- a/tools/vm/README.md
+++ b/tools/vm/README.md
@@ -25,6 +25,12 @@ vm_image(
These images can be built manually by executing the target. The output on
`stdout` will be the image id (in the current project).
+For example:
+
+```
+$ bazel build :ubuntu
+```
+
Images are always named per the hash of all the hermetic input scripts. This
allows images to be memoized quickly and easily.
diff --git a/tools/vm/defs.bzl b/tools/vm/defs.bzl
index 0f67cfa92..9af5ad3b4 100644
--- a/tools/vm/defs.bzl
+++ b/tools/vm/defs.bzl
@@ -60,11 +60,12 @@ def _vm_image_impl(ctx):
# Run the builder to generate our output.
echo = ctx.actions.declare_file(ctx.label.name)
resolved_inputs, argv, runfiles_manifests = ctx.resolve_command(
- command = "echo -ne \"#!/bin/bash\\nset -e\\nimage=$(%s)\\necho ${image}\\n\" > %s && chmod 0755 %s" % (
- ctx.files.builder[0].path,
- echo.path,
- echo.path,
- ),
+ command = "\n".join([
+ "set -e",
+ "image=$(%s)" % ctx.files.builder[0].path,
+ "echo -ne \"#!/bin/bash\\necho ${image}\\n\" > %s" % echo.path,
+ "chmod 0755 %s" % echo.path,
+ ]),
tools = [ctx.attr.builder],
)
ctx.actions.run_shell(