diff options
author | Andrei Vagin <avagin@google.com> | 2019-03-29 16:26:36 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-03-29 16:27:38 -0700 |
commit | a046054ba35e8d8c4882f9311dc964eaa1497d58 (patch) | |
tree | 7cdb13fc8f5f57716b143f94188f0f01169b1c6c /runsc | |
parent | 26e8d9981fcf6d08199a9fd9c609d9715c3cf37e (diff) |
gvisor/runsc: enable generic segmentation offload (GSO)
The linux packet socket can handle GSO packets, so we can segment packets to
64K instead of the MTU which is usually 1500.
Here are numbers for the nginx-1m test:
runsc: 579330.01 [Kbytes/sec] received
runsc-gso: 1794121.66 [Kbytes/sec] received
runc: 2122139.06 [Kbytes/sec] received
and for tcp_benchmark:
$ tcp_benchmark --duration 15 --ideal
[ 4] 0.0-15.0 sec 86647 MBytes 48456 Mbits/sec
$ tcp_benchmark --client --duration 15 --ideal
[ 4] 0.0-15.0 sec 2173 MBytes 1214 Mbits/sec
$ tcp_benchmark --client --duration 15 --ideal --gso 65536
[ 4] 0.0-15.0 sec 19357 MBytes 10825 Mbits/sec
PiperOrigin-RevId: 241072403
Change-Id: I20b03063a1a6649362b43609cbbc9b59be06e6d5
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/config.go | 3 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 8 | ||||
-rw-r--r-- | runsc/boot/network.go | 10 | ||||
-rw-r--r-- | runsc/main.go | 2 | ||||
-rw-r--r-- | runsc/sandbox/BUILD | 2 | ||||
-rw-r--r-- | runsc/sandbox/network.go | 18 | ||||
-rw-r--r-- | runsc/sandbox/network_unsafe.go | 56 |
7 files changed, 93 insertions, 6 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 626fcabdd..2523077fd 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -175,6 +175,9 @@ type Config struct { // Network indicates what type of network to use. Network NetworkType + // GSO indicates that generic segmentation offload is enabled. + GSO bool + // LogPackets indicates that all network packets should be logged. LogPackets bool diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 1ba5b7257..9c72e3b1a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -256,12 +256,20 @@ var allowedSyscalls = seccomp.SyscallRules{ }, }, syscall.SYS_WRITE: {}, + // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with + // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR + // option is enabled for a packet socket. syscall.SYS_WRITEV: []seccomp.Rule{ { seccomp.AllowAny{}, seccomp.AllowAny{}, seccomp.AllowValue(2), }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(3), + }, }, } diff --git a/runsc/boot/network.go b/runsc/boot/network.go index f025a42f1..77291415b 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -52,10 +52,11 @@ type DefaultRoute struct { // FDBasedLink configures an fd-based link. type FDBasedLink struct { - Name string - MTU int - Addresses []net.IP - Routes []Route + Name string + MTU int + Addresses []net.IP + Routes []Route + GSOMaxSize uint32 } // LoopbackLink configures a loopback li nk. @@ -140,6 +141,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct EthernetHeader: true, Address: mac, PacketDispatchMode: fdbased.PacketMMap, + GSOMaxSize: link.GSOMaxSize, }) log.Infof("Enabling interface %q with id %d on addresses %+v (%v)", link.Name, nicID, link.Addresses, mac) diff --git a/runsc/main.go b/runsc/main.go index 82c37ec11..4b3f55ad1 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -59,6 +59,7 @@ var ( // Flags that control sandbox runtime behavior. platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") + gso = flag.Bool("gso", true, "enable generic segmenation offload") fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") @@ -141,6 +142,7 @@ func main() { FileAccess: fsAccess, Overlay: *overlay, Network: netType, + GSO: *gso, LogPackets: *logPackets, Platform: platformType, Strace: *strace, diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index 2ed793333..c0de9a28f 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -6,6 +6,7 @@ go_library( name = "sandbox", srcs = [ "network.go", + "network_unsafe.go", "sandbox.go", ], importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox", @@ -27,5 +28,6 @@ go_library( "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", "@com_github_vishvananda_netlink//:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index ec0a252d1..be924ae25 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -26,6 +26,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/urpc" "gvisor.googlesource.com/gvisor/runsc/boot" @@ -67,7 +68,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi // Build the path to the net namespace of the sandbox process. // This is what we will copy. nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net") - if err := createInterfacesAndRoutesFromNS(conn, nsPath); err != nil { + if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO); err != nil { return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err) } case boot.NetworkHost: @@ -137,7 +138,7 @@ func isRootNS() (bool, error) { // createInterfacesAndRoutesFromNS scrapes the interface and routes from the // net namespace with the given path, creates them in the sandbox, and removes // them from the host. -func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string) error { +func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool) error { // Join the network namespace that we will be copying. restore, err := joinNetNS(nsPath) if err != nil { @@ -246,6 +247,19 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string) error { return fmt.Errorf("getting link for interface %q: %v", iface.Name, err) } + if enableGSO { + gso, err := isGSOEnabled(fd, iface.Name) + if err != nil { + return fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err) + } + if gso { + if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { + return fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) + } + link.GSOMaxSize = ifaceLink.Attrs().GSOMaxSize + } + } + // Collect the addresses for the interface, enable forwarding, // and remove them from the host. for _, addr := range ip4addrs { diff --git a/runsc/sandbox/network_unsafe.go b/runsc/sandbox/network_unsafe.go new file mode 100644 index 000000000..f7447f002 --- /dev/null +++ b/runsc/sandbox/network_unsafe.go @@ -0,0 +1,56 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sandbox + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +type ethtoolValue struct { + cmd uint32 + val uint32 +} + +type ifreq struct { + ifrName [unix.IFNAMSIZ]byte + ifrData *ethtoolValue +} + +const ( + _ETHTOOL_GGSO = 0x00000023 +) + +func isGSOEnabled(fd int, intf string) (bool, error) { + val := ethtoolValue{ + cmd: _ETHTOOL_GGSO, + } + + var name [unix.IFNAMSIZ]byte + copy(name[:], []byte(intf)) + + ifr := ifreq{ + ifrName: name, + ifrData: &val, + } + + if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 { + return false, err + } + + return val.val != 0, nil +} |