summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/config.go14
-rw-r--r--runsc/boot/controller.go4
-rw-r--r--runsc/boot/filter/config.go10
-rw-r--r--runsc/boot/fs.go16
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/boot/network.go14
-rw-r--r--runsc/cmd/gofer.go29
-rw-r--r--runsc/container/container.go6
-rw-r--r--runsc/container/container_test.go37
-rw-r--r--runsc/fsgofer/filter/config.go12
-rw-r--r--runsc/fsgofer/fsgofer.go105
-rw-r--r--runsc/main.go32
-rw-r--r--runsc/sandbox/BUILD1
-rw-r--r--runsc/sandbox/network.go12
-rw-r--r--runsc/specutils/BUILD1
-rw-r--r--runsc/specutils/cri.go110
-rw-r--r--runsc/specutils/specutils.go58
-rw-r--r--runsc/testutil/BUILD1
-rw-r--r--runsc/testutil/testutil.go4
19 files changed, 335 insertions, 133 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 38278d0a2..72a33534f 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -178,8 +178,11 @@ type Config struct {
// capabilities.
EnableRaw bool
- // GSO indicates that generic segmentation offload is enabled.
- GSO bool
+ // HardwareGSO indicates that hardware segmentation offload is enabled.
+ HardwareGSO bool
+
+ // SoftwareGSO indicates that software segmentation offload is enabled.
+ SoftwareGSO bool
// LogPackets indicates that all network packets should be logged.
LogPackets bool
@@ -231,6 +234,10 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeakMode refs.LeakMode
+ // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
+ // write to workaround overlayfs limitation on kernels before 4.19.
+ OverlayfsStaleRead bool
+
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
// tests. It allows runsc to start the sandbox process as the current
// user, and without chrooting the sandbox process. This can be
@@ -271,6 +278,9 @@ func (c *Config) ToFlags() []string {
"--rootless=" + strconv.FormatBool(c.Rootless),
"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
"--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
+ "--gso=" + strconv.FormatBool(c.HardwareGSO),
+ "--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
+ "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
}
// Only include these if set since it is never to be used by users.
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index a73c593ea..5f644b57e 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -32,6 +32,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -237,6 +238,9 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
return fmt.Errorf("start arguments must contain stdin, stderr, and stdout followed by at least one file for the container root gofer")
}
+ // All validation passed, logs the spec for debugging.
+ specutils.LogSpec(args.Spec)
+
err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files)
if err != nil {
log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err)
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index a2ecc6bcb..5ad108261 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -44,6 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{
},
syscall.SYS_CLOSE: {},
syscall.SYS_DUP: {},
+ syscall.SYS_DUP2: {},
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
@@ -242,6 +243,15 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowValue(0),
},
},
+ unix.SYS_SENDMMSG: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MSG_DONTWAIT),
+ seccomp.AllowValue(0),
+ },
+ },
syscall.SYS_RESTART_SYSCALL: {},
syscall.SYS_RT_SIGACTION: {},
syscall.SYS_RT_SIGPROCMASK: {},
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 393c2a88b..76036c147 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -703,6 +703,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
opts := p9MountOptions(fd, conf.FileAccess)
+
+ if conf.OverlayfsStaleRead {
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ opts = append(opts, "overlayfs_stale_read")
+ }
+
rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating root mount point: %v", err)
@@ -737,7 +745,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
fsName string
opts []string
useOverlay bool
- err error
)
switch m.Type {
@@ -747,7 +754,12 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
fsName = sysfs
case tmpfs:
fsName = m.Type
+
+ var err error
opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ if err != nil {
+ return "", nil, false, err
+ }
case bind:
fd := c.fds.remove()
@@ -763,7 +775,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
// for now.
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
- return fsName, opts, useOverlay, err
+ return fsName, opts, useOverlay, nil
}
// mountSubmount mounts volumes inside the container's root. Because mounts may
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index c8e5e86ee..0c0eba99e 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -922,7 +922,7 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
HandleLocal: true,
// Enable raw sockets for users with sufficient
// privileges.
- UnassociatedFactory: raw.EndpointFactory{},
+ RawFactory: raw.EndpointFactory{},
})}
// Enable SACK Recovery.
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 32cba5ac1..f98c5fd36 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -50,12 +50,13 @@ type DefaultRoute struct {
// FDBasedLink configures an fd-based link.
type FDBasedLink struct {
- Name string
- MTU int
- Addresses []net.IP
- Routes []Route
- GSOMaxSize uint32
- LinkAddress net.HardwareAddr
+ Name string
+ MTU int
+ Addresses []net.IP
+ Routes []Route
+ GSOMaxSize uint32
+ SoftwareGSOEnabled bool
+ LinkAddress net.HardwareAddr
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
@@ -163,6 +164,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
Address: mac,
PacketDispatchMode: fdbased.RecvMMsg,
GSOMaxSize: link.GSOMaxSize,
+ SoftwareGSOEnabled: link.SoftwareGSOEnabled,
RXChecksumOffload: true,
})
if err != nil {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 4c2fb80bf..4831210c0 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -27,6 +27,7 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/unet"
@@ -135,7 +136,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
//
// Note that all mount points have been mounted in the proper location in
// setupRootFS().
- cleanMounts, err := resolveMounts(spec.Mounts, root)
+ cleanMounts, err := resolveMounts(conf, spec.Mounts, root)
if err != nil {
Fatalf("Failure to resolve mounts: %v", err)
}
@@ -380,7 +381,7 @@ func setupMounts(mounts []specs.Mount, root string) error {
// Otherwise, it may follow symlinks to locations that would be overwritten
// with another mount point and return the wrong location. In short, make sure
// setupMounts() has been called before.
-func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
+func resolveMounts(conf *boot.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
cleanMounts := make([]specs.Mount, 0, len(mounts))
for _, m := range mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
@@ -395,8 +396,15 @@ func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
if err != nil {
panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
}
+
+ opts, err := adjustMountOptions(conf, filepath.Join(root, relDst), m.Options)
+ if err != nil {
+ return nil, err
+ }
+
cpy := m
cpy.Destination = filepath.Join("/", relDst)
+ cpy.Options = opts
cleanMounts = append(cleanMounts, cpy)
}
return cleanMounts, nil
@@ -453,3 +461,20 @@ func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, erro
}
return base, nil
}
+
+// adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs.
+func adjustMountOptions(conf *boot.Config, path string, opts []string) ([]string, error) {
+ rv := make([]string, len(opts))
+ copy(rv, opts)
+
+ if conf.OverlayfsStaleRead {
+ statfs := syscall.Statfs_t{}
+ if err := syscall.Statfs(path, &statfs); err != nil {
+ return nil, err
+ }
+ if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
+ rv = append(rv, "overlayfs_stale_read")
+ }
+ }
+ return rv, nil
+}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index a721c1c31..32510d427 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -1149,7 +1149,7 @@ func maybeLockRootContainer(spec *specs.Spec, rootDir string) (func() error, err
}
func isRoot(spec *specs.Spec) bool {
- return specutils.ShouldCreateSandbox(spec)
+ return specutils.SpecContainerType(spec) != specutils.ContainerTypeContainer
}
// runInCgroup executes fn inside the specified cgroup. If cg is nil, execute
@@ -1198,7 +1198,7 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
// Get the lowest score for all containers.
var lowScore int
scoreFound := false
- if len(containers) == 1 && len(containers[0].Spec.Annotations[specutils.ContainerdContainerTypeAnnotation]) == 0 {
+ if len(containers) == 1 && specutils.SpecContainerType(containers[0].Spec) == specutils.ContainerTypeUnspecified {
// This is a single-container sandbox. Set the oom_score_adj to
// the value specified in the OCI bundle.
if containers[0].Spec.Process.OOMScoreAdj != nil {
@@ -1214,7 +1214,7 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
//
// We will use OOMScoreAdj in the single-container case where the
// containerd container-type annotation is not present.
- if container.Spec.Annotations[specutils.ContainerdContainerTypeAnnotation] == specutils.ContainerdContainerTypeSandbox {
+ if specutils.SpecContainerType(container.Spec) == specutils.ContainerTypeSandbox {
continue
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 519f5ed9b..c4c56b2e0 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -2074,6 +2074,43 @@ func TestNetRaw(t *testing.T) {
}
}
+// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works.
+func TestOverlayfsStaleRead(t *testing.T) {
+ conf := testutil.TestConfig()
+ conf.OverlayfsStaleRead = true
+
+ in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in")
+ if err != nil {
+ t.Fatalf("ioutil.TempFile() failed: %v", err)
+ }
+ defer in.Close()
+ if _, err := in.WriteString("stale data"); err != nil {
+ t.Fatalf("in.Write() failed: %v", err)
+ }
+
+ out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out")
+ if err != nil {
+ t.Fatalf("ioutil.TempFile() failed: %v", err)
+ }
+ defer out.Close()
+
+ const want = "foobar"
+ cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
+ spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("Error running container: %v", err)
+ }
+
+ gotBytes, err := ioutil.ReadAll(out)
+ if err != nil {
+ t.Fatalf("out.Read() failed: %v", err)
+ }
+ got := strings.TrimSpace(string(gotBytes))
+ if want != got {
+ t.Errorf("Wrong content in out file, got: %q. want: %q", got, want)
+ }
+}
+
// executeSync synchronously executes a new process.
func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
pid, err := cont.Execute(args)
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 0bf7507b7..2ea95f8fb 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -220,6 +220,18 @@ var udsSyscalls = seccomp.SyscallRules{
syscall.SYS_SOCKET: []seccomp.Rule{
{
seccomp.AllowValue(syscall.AF_UNIX),
+ seccomp.AllowValue(syscall.SOCK_STREAM),
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowValue(syscall.AF_UNIX),
+ seccomp.AllowValue(syscall.SOCK_DGRAM),
+ seccomp.AllowValue(0),
+ },
+ {
+ seccomp.AllowValue(syscall.AF_UNIX),
+ seccomp.AllowValue(syscall.SOCK_SEQPACKET),
+ seccomp.AllowValue(0),
},
},
syscall.SYS_CONNECT: []seccomp.Rule{
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 29a82138e..3fceecb3d 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -21,7 +21,6 @@
package fsgofer
import (
- "errors"
"fmt"
"io"
"math"
@@ -126,13 +125,6 @@ func NewAttachPoint(prefix string, c Config) (p9.Attacher, error) {
// Attach implements p9.Attacher.
func (a *attachPoint) Attach() (p9.File, error) {
- // dirFD (1st argument) is ignored because 'prefix' is always absolute.
- stat, err := statAt(-1, a.prefix)
- if err != nil {
- return nil, fmt.Errorf("stat file %q, err: %v", a.prefix, err)
- }
-
- // Acquire the attach point lock.
a.attachedMu.Lock()
defer a.attachedMu.Unlock()
@@ -140,47 +132,24 @@ func (a *attachPoint) Attach() (p9.File, error) {
return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix)
}
- // Hold the file descriptor we are converting into a p9.File.
- var f *fd.FD
-
- // Apply the S_IFMT bitmask so we can detect file type appropriately.
- switch fmtStat := stat.Mode & syscall.S_IFMT; fmtStat {
- case syscall.S_IFSOCK:
- // Check to see if the CLI option has been set to allow the UDS mount.
- if !a.conf.HostUDS {
- return nil, errors.New("host UDS support is disabled")
- }
-
- // Attempt to open a connection. Bubble up the failures.
- f, err = fd.DialUnix(a.prefix)
- if err != nil {
- return nil, err
- }
-
- default:
- // Default to Read/Write permissions.
- mode := syscall.O_RDWR
-
- // If the configuration is Read Only or the mount point is a directory,
- // set the mode to Read Only.
- if a.conf.ROMount || fmtStat == syscall.S_IFDIR {
- mode = syscall.O_RDONLY
- }
+ f, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) {
+ return fd.Open(a.prefix, openFlags|mode, 0)
+ })
+ if err != nil {
+ return nil, fmt.Errorf("unable to open %q: %v", a.prefix, err)
+ }
- // Open the mount point & capture the FD.
- f, err = fd.Open(a.prefix, openFlags|mode, 0)
- if err != nil {
- return nil, fmt.Errorf("unable to open file %q, err: %v", a.prefix, err)
- }
+ stat, err := stat(f.FD())
+ if err != nil {
+ return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err)
}
- // Return a localFile object to the caller with the UDS FD included.
- rv, err := newLocalFile(a, f, a.prefix, stat)
+ lf, err := newLocalFile(a, f, a.prefix, stat)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("unable to create localFile %q: %v", a.prefix, err)
}
a.attached = true
- return rv, nil
+ return lf, nil
}
// makeQID returns a unique QID for the given stat buffer.
@@ -296,10 +265,10 @@ func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, erro
// actual file open and is customizable by the caller.
func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, error) {
// Attempt to open file in the following mode in order:
- // 1. RDONLY | NONBLOCK: for all files, works for directories and ro mounts too.
- // Use non-blocking to prevent getting stuck inside open(2) for FIFOs. This option
- // has no effect on regular files.
- // 2. PATH: for symlinks
+ // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs.
+ // Use non-blocking to prevent getting stuck inside open(2) for
+ // FIFOs. This option has no effect on regular files.
+ // 2. PATH: for symlinks, sockets.
modes := []int{syscall.O_RDONLY | syscall.O_NONBLOCK, unix.O_PATH}
var err error
@@ -1063,12 +1032,48 @@ func (l *localFile) Flush() error {
}
// Connect implements p9.File.
-func (l *localFile) Connect(p9.ConnectFlags) (*fd.FD, error) {
- // Check to see if the CLI option has been set to allow the UDS mount.
+func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) {
if !l.attachPoint.conf.HostUDS {
return nil, syscall.ECONNREFUSED
}
- return fd.DialUnix(l.hostPath)
+
+ // TODO(gvisor.dev/issue/1003): Due to different app vs replacement
+ // mappings, the app path may have fit in the sockaddr, but we can't
+ // fit f.path in our sockaddr. We'd need to redirect through a shorter
+ // path in order to actually connect to this socket.
+ if len(l.hostPath) > linux.UnixPathMax {
+ return nil, syscall.ECONNREFUSED
+ }
+
+ var stype int
+ switch flags {
+ case p9.StreamSocket:
+ stype = syscall.SOCK_STREAM
+ case p9.DgramSocket:
+ stype = syscall.SOCK_DGRAM
+ case p9.SeqpacketSocket:
+ stype = syscall.SOCK_SEQPACKET
+ default:
+ return nil, syscall.ENXIO
+ }
+
+ f, err := syscall.Socket(syscall.AF_UNIX, stype, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := syscall.SetNonblock(f, true); err != nil {
+ syscall.Close(f)
+ return nil, err
+ }
+
+ sa := syscall.SockaddrUnix{Name: l.hostPath}
+ if err := syscall.Connect(f, &sa); err != nil {
+ syscall.Close(f)
+ return nil, err
+ }
+
+ return fd.New(f), nil
}
// Close implements p9.File.
diff --git a/runsc/main.go b/runsc/main.go
index 7dce9dc00..ae906c661 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -41,35 +41,37 @@ import (
var (
// Although these flags are not part of the OCI spec, they are used by
// Docker, and thus should not be changed.
- rootDir = flag.String("root", "", "root directory for storage of container state")
- logFilename = flag.String("log", "", "file path where internal debug information is written, default is stdout")
- logFormat = flag.String("log-format", "text", "log format: text (default), json, or json-k8s")
- debug = flag.Bool("debug", false, "enable debug logging")
- showVersion = flag.Bool("version", false, "show version and exit")
+ rootDir = flag.String("root", "", "root directory for storage of container state.")
+ logFilename = flag.String("log", "", "file path where internal debug information is written, default is stdout.")
+ logFormat = flag.String("log-format", "text", "log format: text (default), json, or json-k8s.")
+ debug = flag.Bool("debug", false, "enable debug logging.")
+ showVersion = flag.Bool("version", false, "show version and exit.")
// These flags are unique to runsc, and are used to configure parts of the
// system that are not covered by the runtime spec.
// Debugging flags.
debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
- logPackets = flag.Bool("log-packets", false, "enable network packet logging")
+ logPackets = flag.Bool("log-packets", false, "enable network packet logging.")
logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
- debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s")
- alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr")
+ debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
+ alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.")
// Debugging flags: strace related
- strace = flag.Bool("strace", false, "enable strace")
+ strace = flag.Bool("strace", false, "enable strace.")
straceSyscalls = flag.String("strace-syscalls", "", "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced.")
- straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
+ straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs.")
// Flags that control sandbox runtime behavior.
- platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+ platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm.")
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
- gso = flag.Bool("gso", true, "enable generic segmenation offload")
+ hardwareGSO = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
+ softwareGSO = flag.Bool("software-gso", true, "enable software segmentation offload when hardware ofload can't be enabled.")
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
- fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "Allow the gofer to mount Unix Domain Sockets.")
+ fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.")
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
@@ -199,7 +201,8 @@ func main() {
FSGoferHostUDS: *fsGoferHostUDS,
Overlay: *overlay,
Network: netType,
- GSO: *gso,
+ HardwareGSO: *hardwareGSO,
+ SoftwareGSO: *softwareGSO,
LogPackets: *logPackets,
Platform: platformType,
Strace: *strace,
@@ -212,6 +215,7 @@ func main() {
Rootless: *rootless,
AlsoLogToStderr: *alsoLogToStderr,
ReferenceLeakMode: refsLeakMode,
+ OverlayfsStaleRead: *overlayfsStaleRead,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
TestOnlyTestNameEnv: *testOnlyTestNameEnv,
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index 7fdceaab6..27459e6d1 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -19,6 +19,7 @@ go_library(
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/platform",
+ "//pkg/tcpip/stack",
"//pkg/urpc",
"//runsc/boot",
"//runsc/boot/platforms",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 5634f0707..d42de0176 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -28,6 +28,7 @@ import (
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/specutils"
@@ -61,7 +62,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO, conf.NumNetworkChannels); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -136,7 +137,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool, numNetworkChannels int) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -232,7 +233,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
// Create the socket for the device.
for i := 0; i < link.NumChannels; i++ {
log.Debugf("Creating Channel %d", i)
- socketEntry, err := createSocket(iface, ifaceLink, enableGSO)
+ socketEntry, err := createSocket(iface, ifaceLink, hardwareGSO)
if err != nil {
return fmt.Errorf("failed to createSocket for %s : %v", iface.Name, err)
}
@@ -246,6 +247,11 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile)
}
+ if link.GSOMaxSize == 0 && softwareGSO {
+ // Hardware GSO is disabled. Let's enable software GSO.
+ link.GSOMaxSize = stack.SoftwareGSOMaxSize
+ link.SoftwareGSOEnabled = true
+ }
// Collect the addresses for the interface, enable forwarding,
// and remove them from the host.
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index fa58313a0..205638803 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "specutils",
srcs = [
+ "cri.go",
"fs.go",
"namespace.go",
"specutils.go",
diff --git a/runsc/specutils/cri.go b/runsc/specutils/cri.go
new file mode 100644
index 000000000..9c5877cd5
--- /dev/null
+++ b/runsc/specutils/cri.go
@@ -0,0 +1,110 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package specutils
+
+import (
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+const (
+ // ContainerdContainerTypeAnnotation is the OCI annotation set by
+ // containerd to indicate whether the container to create should have
+ // its own sandbox or a container within an existing sandbox.
+ ContainerdContainerTypeAnnotation = "io.kubernetes.cri.container-type"
+ // ContainerdContainerTypeContainer is the container type value
+ // indicating the container should be created in an existing sandbox.
+ ContainerdContainerTypeContainer = "container"
+ // ContainerdContainerTypeSandbox is the container type value
+ // indicating the container should be created in a new sandbox.
+ ContainerdContainerTypeSandbox = "sandbox"
+
+ // ContainerdSandboxIDAnnotation is the OCI annotation set to indicate
+ // which sandbox the container should be created in when the container
+ // is not the first container in the sandbox.
+ ContainerdSandboxIDAnnotation = "io.kubernetes.cri.sandbox-id"
+
+ // CRIOContainerTypeAnnotation is the OCI annotation set by
+ // CRI-O to indicate whether the container to create should have
+ // its own sandbox or a container within an existing sandbox.
+ CRIOContainerTypeAnnotation = "io.kubernetes.cri-o.ContainerType"
+
+ // CRIOContainerTypeContainer is the container type value
+ // indicating the container should be created in an existing sandbox.
+ CRIOContainerTypeContainer = "container"
+ // CRIOContainerTypeSandbox is the container type value
+ // indicating the container should be created in a new sandbox.
+ CRIOContainerTypeSandbox = "sandbox"
+
+ // CRIOSandboxIDAnnotation is the OCI annotation set to indicate
+ // which sandbox the container should be created in when the container
+ // is not the first container in the sandbox.
+ CRIOSandboxIDAnnotation = "io.kubernetes.cri-o.SandboxID"
+)
+
+// ContainerType represents the type of container requested by the calling container manager.
+type ContainerType int
+
+const (
+ // ContainerTypeUnspecified indicates that no known container type
+ // annotation was found in the spec.
+ ContainerTypeUnspecified ContainerType = iota
+ // ContainerTypeUnknown indicates that a container type was specified
+ // but is unknown to us.
+ ContainerTypeUnknown
+ // ContainerTypeSandbox indicates that the container should be run in a
+ // new sandbox.
+ ContainerTypeSandbox
+ // ContainerTypeContainer indicates that the container should be run in
+ // an existing sandbox.
+ ContainerTypeContainer
+)
+
+// SpecContainerType tries to determine the type of container specified by the
+// container manager using well-known container annotations.
+func SpecContainerType(spec *specs.Spec) ContainerType {
+ if t, ok := spec.Annotations[ContainerdContainerTypeAnnotation]; ok {
+ switch t {
+ case ContainerdContainerTypeSandbox:
+ return ContainerTypeSandbox
+ case ContainerdContainerTypeContainer:
+ return ContainerTypeContainer
+ default:
+ return ContainerTypeUnknown
+ }
+ }
+ if t, ok := spec.Annotations[CRIOContainerTypeAnnotation]; ok {
+ switch t {
+ case CRIOContainerTypeSandbox:
+ return ContainerTypeSandbox
+ case CRIOContainerTypeContainer:
+ return ContainerTypeContainer
+ default:
+ return ContainerTypeUnknown
+ }
+ }
+ return ContainerTypeUnspecified
+}
+
+// SandboxID returns the ID of the sandbox to join and whether an ID was found
+// in the spec.
+func SandboxID(spec *specs.Spec) (string, bool) {
+ if id, ok := spec.Annotations[ContainerdSandboxIDAnnotation]; ok {
+ return id, true
+ }
+ if id, ok := spec.Annotations[CRIOSandboxIDAnnotation]; ok {
+ return id, true
+ }
+ return "", false
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 3d9ced1b6..d3c2e4e78 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -108,23 +108,18 @@ func ValidateSpec(spec *specs.Spec) error {
}
}
- // Two annotations are use by containerd to support multi-container pods.
- // "io.kubernetes.cri.container-type"
- // "io.kubernetes.cri.sandbox-id"
- containerType, hasContainerType := spec.Annotations[ContainerdContainerTypeAnnotation]
- _, hasSandboxID := spec.Annotations[ContainerdSandboxIDAnnotation]
- switch {
- // Non-containerd use won't set a container type.
- case !hasContainerType:
- case containerType == ContainerdContainerTypeSandbox:
- // When starting a container in an existing sandbox, the sandbox ID
- // must be set.
- case containerType == ContainerdContainerTypeContainer:
- if !hasSandboxID {
- return fmt.Errorf("spec has container-type of %s, but no sandbox ID set", containerType)
+ // CRI specifies whether a container should start a new sandbox, or run
+ // another container in an existing sandbox.
+ switch SpecContainerType(spec) {
+ case ContainerTypeContainer:
+ // When starting a container in an existing sandbox, the
+ // sandbox ID must be set.
+ if _, ok := SandboxID(spec); !ok {
+ return fmt.Errorf("spec has container-type of container, but no sandbox ID set")
}
+ case ContainerTypeUnknown:
+ return fmt.Errorf("unknown container-type")
default:
- return fmt.Errorf("unknown container-type: %s", containerType)
}
return nil
@@ -338,39 +333,6 @@ func IsSupportedDevMount(m specs.Mount) bool {
return true
}
-const (
- // ContainerdContainerTypeAnnotation is the OCI annotation set by
- // containerd to indicate whether the container to create should have
- // its own sandbox or a container within an existing sandbox.
- ContainerdContainerTypeAnnotation = "io.kubernetes.cri.container-type"
- // ContainerdContainerTypeContainer is the container type value
- // indicating the container should be created in an existing sandbox.
- ContainerdContainerTypeContainer = "container"
- // ContainerdContainerTypeSandbox is the container type value
- // indicating the container should be created in a new sandbox.
- ContainerdContainerTypeSandbox = "sandbox"
-
- // ContainerdSandboxIDAnnotation is the OCI annotation set to indicate
- // which sandbox the container should be created in when the container
- // is not the first container in the sandbox.
- ContainerdSandboxIDAnnotation = "io.kubernetes.cri.sandbox-id"
-)
-
-// ShouldCreateSandbox returns true if the spec indicates that a new sandbox
-// should be created for the container. If false, the container should be
-// started in an existing sandbox.
-func ShouldCreateSandbox(spec *specs.Spec) bool {
- t, ok := spec.Annotations[ContainerdContainerTypeAnnotation]
- return !ok || t == ContainerdContainerTypeSandbox
-}
-
-// SandboxID returns the ID of the sandbox to join and whether an ID was found
-// in the spec.
-func SandboxID(spec *specs.Spec) (string, bool) {
- id, ok := spec.Annotations[ContainerdSandboxIDAnnotation]
- return id, ok
-}
-
// WaitForReady waits for a process to become ready. The process is ready when
// the 'ready' function returns true. It continues to wait if 'ready' returns
// false. It returns error on timeout, if the process stops or if 'ready' fails.
diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD
index d44ebc906..c96ca2eb6 100644
--- a/runsc/testutil/BUILD
+++ b/runsc/testutil/BUILD
@@ -9,6 +9,7 @@ go_library(
importpath = "gvisor.dev/gvisor/runsc/testutil",
visibility = ["//:sandbox"],
deps = [
+ "//pkg/log",
"//runsc/boot",
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
index edf8b126c..26467bdc7 100644
--- a/runsc/testutil/testutil.go
+++ b/runsc/testutil/testutil.go
@@ -25,7 +25,6 @@ import (
"fmt"
"io"
"io/ioutil"
- "log"
"math"
"math/rand"
"net/http"
@@ -42,6 +41,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/specutils"
)
@@ -286,7 +286,7 @@ func WaitForHTTP(port int, timeout time.Duration) error {
url := fmt.Sprintf("http://localhost:%d/", port)
resp, err := c.Get(url)
if err != nil {
- log.Printf("Waiting %s: %v", url, err)
+ log.Infof("Waiting %s: %v", url, err)
return err
}
resp.Body.Close()