summaryrefslogtreecommitdiffhomepage
path: root/runsc/sandbox
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/sandbox')
-rw-r--r--runsc/sandbox/BUILD2
-rw-r--r--runsc/sandbox/network.go129
-rw-r--r--runsc/sandbox/sandbox.go112
3 files changed, 145 insertions, 98 deletions
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c0de9a28f..f32da45c1 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -9,7 +9,7 @@ go_library(
"network_unsafe.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+ importpath = "gvisor.dev/gvisor/runsc/sandbox",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 0460d5f1a..a965a9dcb 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -27,10 +27,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -68,7 +68,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO, conf.NumNetworkChannels); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -138,7 +138,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool, numNetworkChannels int) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -202,25 +202,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
continue
}
- // Create the socket.
- const protocol = 0x0300 // htons(ETH_P_ALL)
- fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
- if err != nil {
- return fmt.Errorf("unable to create raw socket: %v", err)
- }
- deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
-
- // Bind to the appropriate device.
- ll := syscall.SockaddrLinklayer{
- Protocol: protocol,
- Ifindex: iface.Index,
- Hatype: 0, // No ARP type.
- Pkttype: syscall.PACKET_OTHERHOST,
- }
- if err := syscall.Bind(fd, &ll); err != nil {
- return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
- }
-
// Scrape the routes before removing the address, since that
// will remove the routes as well.
routes, def, err := routesForIface(iface)
@@ -236,9 +217,10 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
link := boot.FDBasedLink{
- Name: iface.Name,
- MTU: iface.MTU,
- Routes: routes,
+ Name: iface.Name,
+ MTU: iface.MTU,
+ Routes: routes,
+ NumChannels: numNetworkChannels,
}
// Get the link for the interface.
@@ -246,32 +228,25 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
if err != nil {
return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
}
- link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
+ link.LinkAddress = ifaceLink.Attrs().HardwareAddr
- if enableGSO {
- gso, err := isGSOEnabled(fd, iface.Name)
+ log.Debugf("Setting up network channels")
+ // Create the socket for the device.
+ for i := 0; i < link.NumChannels; i++ {
+ log.Debugf("Creating Channel %d", i)
+ socketEntry, err := createSocket(iface, ifaceLink, enableGSO)
if err != nil {
- return fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ return fmt.Errorf("failed to createSocket for %s : %v", iface.Name, err)
}
- if gso {
- if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
- return fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
- }
- link.GSOMaxSize = ifaceLink.Attrs().GSOMaxSize
+ if i == 0 {
+ link.GSOMaxSize = socketEntry.gsoMaxSize
} else {
- log.Infof("GSO not available in host.")
+ if link.GSOMaxSize != socketEntry.gsoMaxSize {
+ return fmt.Errorf("inconsistent gsoMaxSize %d and %d when creating multiple channels for same interface: %s",
+ link.GSOMaxSize, socketEntry.gsoMaxSize, iface.Name)
+ }
}
- }
-
- // Use SO_RCVBUFFORCE because on linux the receive buffer for an
- // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
- // defaults to a unusually low value of 208KB. This is too low
- // for gVisor to be able to receive packets at high throughputs
- // without incurring packet drops.
- const rcvBufSize = 4 << 20 // 4MB.
-
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
- return fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile)
}
// Collect the addresses for the interface, enable forwarding,
@@ -285,7 +260,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
}
- args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
args.FDBasedLinks = append(args.FDBasedLinks, link)
}
@@ -296,6 +270,61 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
return nil
}
+type socketEntry struct {
+ deviceFile *os.File
+ gsoMaxSize uint32
+}
+
+// createSocket creates an underlying AF_PACKET socket and configures it for use by
+// the sentry and returns an *os.File that wraps the underlying socket fd.
+func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (*socketEntry, error) {
+ // Create the socket.
+ const protocol = 0x0300 // htons(ETH_P_ALL)
+ fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+ if err != nil {
+ return nil, fmt.Errorf("unable to create raw socket: %v", err)
+ }
+ deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+ // Bind to the appropriate device.
+ ll := syscall.SockaddrLinklayer{
+ Protocol: protocol,
+ Ifindex: iface.Index,
+ Hatype: 0, // No ARP type.
+ Pkttype: syscall.PACKET_OTHERHOST,
+ }
+ if err := syscall.Bind(fd, &ll); err != nil {
+ return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+ }
+
+ gsoMaxSize := uint32(0)
+ if enableGSO {
+ gso, err := isGSOEnabled(fd, iface.Name)
+ if err != nil {
+ return nil, fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ }
+ if gso {
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
+ return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
+ }
+ gsoMaxSize = ifaceLink.Attrs().GSOMaxSize
+ } else {
+ log.Infof("GSO not available in host.")
+ }
+ }
+
+ // Use SO_RCVBUFFORCE because on linux the receive buffer for an
+ // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
+ // defaults to a unusually low value of 208KB. This is too low
+ // for gVisor to be able to receive packets at high throughputs
+ // without incurring packet drops.
+ const rcvBufSize = 4 << 20 // 4MB.
+
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
+ return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ }
+ return &socketEntry{deviceFile, gsoMaxSize}, nil
+}
+
// loopbackLinks collects the links for a loopback interface.
func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
var links []boot.LoopbackLink
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 032190636..a19b1d124 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -28,16 +28,16 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/control/client"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/control/client"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
@@ -515,46 +515,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ cmd.Args = append(cmd.Args, "--setup-root")
- // Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(0),
- HostID: int(nobody - 1),
- Size: int(1),
- },
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
+ if conf.Rootless {
+ log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getuid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getgid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
+
+ } else {
+ // Map nobody in the new namespace to nobody in the parent namespace.
+ //
+ // A sandbox process will construct an empty
+ // root for itself, so it has to have the CAP_SYS_ADMIN
+ // capability.
+ //
+ // FIXME(b/122554829): The current implementations of
+ // os/exec doesn't allow to set ambient capabilities if
+ // a process is started in a new user namespace. As a
+ // workaround, we start the sandbox process with the 0
+ // UID and then it constructs a chroot and sets UID to
+ // nobody. https://github.com/golang/go/issues/2315
+ const nobody = 65534
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: nobody - 1,
+ Size: 1,
+ },
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{
- Uid: 0,
- Gid: nobody,
- }
- cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}