summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-10 22:42:41 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-10 22:42:41 +0000
commit8390a8227b571e82c42e3e90aa28a7b86f7e3f9b (patch)
tree8bfad5169182b7ba1c6ed5f3df0279729cc200b0 /runsc
parent4f56f1bf2248bb17da8b269b4191218d85ce6587 (diff)
parenta00157cc0e216a9829f2659ce35c856a22aa5ba2 (diff)
Merge a00157cc (automated)
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/config.go6
-rw-r--r--runsc/boot/filter/config.go4
-rw-r--r--runsc/boot/loader.go5
-rw-r--r--runsc/boot/network.go39
-rw-r--r--runsc/cmd/cmd.go19
-rw-r--r--runsc/cmd/create.go1
-rw-r--r--runsc/cmd/error.go72
-rw-r--r--runsc/cmd/exec.go28
-rw-r--r--runsc/cmd/start.go1
-rw-r--r--runsc/main.go100
-rw-r--r--runsc/sandbox/network.go119
11 files changed, 258 insertions, 136 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 15f624f9b..8564c502d 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -221,6 +221,11 @@ type Config struct {
// user, and without chrooting the sandbox process. This can be
// necessary in test environments that have limited capabilities.
TestOnlyAllowRunAsCurrentUserWithoutChroot bool
+
+ // NumNetworkChannels controls the number of AF_PACKET sockets that map
+ // to the same underlying network device. This allows netstack to better
+ // scale for high throughput use cases.
+ NumNetworkChannels int
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -244,6 +249,7 @@ func (c *Config) ToFlags() []string {
"--panic-signal=" + strconv.Itoa(c.PanicSignal),
"--profile=" + strconv.FormatBool(c.ProfileEnable),
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
+ "--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 652da1cef..ef2dbfad2 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -246,6 +246,10 @@ var allowedSyscalls = seccomp.SyscallRules{
},
syscall.SYS_SETITIMER: {},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
+ // Used by fs/host to shutdown host sockets.
+ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RD)},
+ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_WR)},
+ // Used by unet to shutdown connections.
{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index a997776f8..42bddb2e8 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -29,6 +29,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/cpuid"
"gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/memutil"
"gvisor.googlesource.com/gvisor/pkg/rand"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
@@ -37,7 +38,6 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memutil"
"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
@@ -424,6 +424,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return nil, fmt.Errorf("error creating memfd: %v", err)
}
memfile := os.NewFile(uintptr(memfd), memfileName)
+ // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if
+ // there are memory cgroups specified, because at this point we're already
+ // in a mount namespace in which the relevant cgroupfs is not visible.
mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{})
if err != nil {
memfile.Close()
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 0a154d90b..82c259f47 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -57,6 +57,10 @@ type FDBasedLink struct {
Routes []Route
GSOMaxSize uint32
LinkAddress []byte
+
+ // NumChannels controls how many underlying FD's are to be used to
+ // create this endpoint.
+ NumChannels int
}
// LoopbackLink configures a loopback li nk.
@@ -68,8 +72,9 @@ type LoopbackLink struct {
// CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes.
type CreateLinksAndRoutesArgs struct {
- // FilePayload contains the fds associated with the FDBasedLinks. The
- // two slices must have the same length.
+ // FilePayload contains the fds associated with the FDBasedLinks. The
+ // number of fd's should match the sum of the NumChannels field of the
+ // FDBasedLink entries below.
urpc.FilePayload
LoopbackLinks []LoopbackLink
@@ -95,8 +100,12 @@ func (r *Route) toTcpipRoute(id tcpip.NICID) tcpip.Route {
// CreateLinksAndRoutes creates links and routes in a network stack. It should
// only be called once.
func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error {
- if len(args.FilePayload.Files) != len(args.FDBasedLinks) {
- return fmt.Errorf("FilePayload must be same length at FDBasedLinks")
+ wantFDs := 0
+ for _, l := range args.FDBasedLinks {
+ wantFDs += l.NumChannels
+ }
+ if got := len(args.FilePayload.Files); got != wantFDs {
+ return fmt.Errorf("args.FilePayload.Files has %d FD's but we need %d entries based on FDBasedLinks", got, wantFDs)
}
var nicID tcpip.NICID
@@ -123,20 +132,26 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
}
- for i, link := range args.FDBasedLinks {
+ fdOffset := 0
+ for _, link := range args.FDBasedLinks {
nicID++
nicids[link.Name] = nicID
- // Copy the underlying FD.
- oldFD := args.FilePayload.Files[i].Fd()
- newFD, err := syscall.Dup(int(oldFD))
- if err != nil {
- return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
+ FDs := []int{}
+ for j := 0; j < link.NumChannels; j++ {
+ // Copy the underlying FD.
+ oldFD := args.FilePayload.Files[fdOffset].Fd()
+ newFD, err := syscall.Dup(int(oldFD))
+ if err != nil {
+ return fmt.Errorf("failed to dup FD %v: %v", oldFD, err)
+ }
+ FDs = append(FDs, newFD)
+ fdOffset++
}
mac := tcpip.LinkAddress(link.LinkAddress)
linkEP, err := fdbased.New(&fdbased.Options{
- FD: newFD,
+ FDs: FDs,
MTU: uint32(link.MTU),
EthernetHeader: true,
Address: mac,
@@ -148,7 +163,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
return err
}
- log.Infof("Enabling interface %q with id %d on addresses %+v (%v)", link.Name, nicID, link.Addresses, mac)
+ log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, false /* loopback */); err != nil {
return err
}
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
index a2fc377d1..5b4cc4a39 100644
--- a/runsc/cmd/cmd.go
+++ b/runsc/cmd/cmd.go
@@ -17,34 +17,15 @@ package cmd
import (
"fmt"
- "os"
"runtime"
"strconv"
"syscall"
- "github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
-// Errorf logs to stderr and returns subcommands.ExitFailure.
-func Errorf(s string, args ...interface{}) subcommands.ExitStatus {
- // If runsc is being invoked by docker or cri-o, then we might not have
- // access to stderr, so we log a serious-looking warning in addition to
- // writing to stderr.
- log.Warningf("FATAL ERROR: "+s, args...)
- fmt.Fprintf(os.Stderr, s+"\n", args...)
- // Return an error that is unlikely to be used by the application.
- return subcommands.ExitFailure
-}
-
-// Fatalf logs to stderr and exits with a failure status code.
-func Fatalf(s string, args ...interface{}) {
- Errorf(s, args...)
- os.Exit(128)
-}
-
// intFlags can be used with int flags that appear multiple times.
type intFlags []int
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 629c198fd..8bf9b7dcf 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -16,7 +16,6 @@ package cmd
import (
"context"
-
"flag"
"github.com/google/subcommands"
"gvisor.googlesource.com/gvisor/runsc/boot"
diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go
new file mode 100644
index 000000000..700b19f14
--- /dev/null
+++ b/runsc/cmd/error.go
@@ -0,0 +1,72 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "time"
+
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// ErrorLogger is where error messages should be written to. These messages are
+// consumed by containerd and show up to users of command line tools,
+// like docker/kubectl.
+var ErrorLogger io.Writer
+
+type jsonError struct {
+ Msg string `json:"msg"`
+ Level string `json:"level"`
+ Time time.Time `json:"time"`
+}
+
+// Errorf logs error to containerd log (--log), to stderr, and debug logs. It
+// returns subcommands.ExitFailure for convenience with subcommand.Execute()
+// methods:
+// return Errorf("Danger! Danger!")
+//
+func Errorf(format string, args ...interface{}) subcommands.ExitStatus {
+ // If runsc is being invoked by docker or cri-o, then we might not have
+ // access to stderr, so we log a serious-looking warning in addition to
+ // writing to stderr.
+ log.Warningf("FATAL ERROR: "+format, args...)
+ fmt.Fprintf(os.Stderr, format+"\n", args...)
+
+ j := jsonError{
+ Msg: fmt.Sprintf(format, args...),
+ Level: "error",
+ Time: time.Now(),
+ }
+ b, err := json.Marshal(j)
+ if err != nil {
+ panic(err)
+ }
+ if ErrorLogger != nil {
+ ErrorLogger.Write(b)
+ }
+
+ return subcommands.ExitFailure
+}
+
+// Fatalf logs the same way as Errorf() does, plus *exits* the process.
+func Fatalf(format string, args ...interface{}) {
+ Errorf(format, args...)
+ // Return an error that is unlikely to be used by the application.
+ os.Exit(128)
+}
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 8cd070e61..0eeaaadba 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -143,13 +143,16 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// write the child's PID to the pid file. So when the container returns, the
// child process will also return and signal containerd.
if ex.detach {
- return ex.execAndWait(waitStatus)
+ return ex.execChildAndWait(waitStatus)
}
+ return ex.exec(c, e, waitStatus)
+}
+func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
// Start the new process and get it pid.
pid, err := c.Execute(e)
if err != nil {
- Fatalf("executing processes for container: %v", err)
+ return Errorf("executing processes for container: %v", err)
}
if e.StdioIsPty {
@@ -163,29 +166,29 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if ex.internalPidFile != "" {
pidStr := []byte(strconv.Itoa(int(pid)))
if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil {
- Fatalf("writing internal pid file %q: %v", ex.internalPidFile, err)
+ return Errorf("writing internal pid file %q: %v", ex.internalPidFile, err)
}
}
- // Generate the pid file after the internal pid file is generated, so that users
- // can safely assume that the internal pid file is ready after `runsc exec -d`
- // returns.
+ // Generate the pid file after the internal pid file is generated, so that
+ // users can safely assume that the internal pid file is ready after
+ // `runsc exec -d` returns.
if ex.pidFile != "" {
if err := ioutil.WriteFile(ex.pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
- Fatalf("writing pid file: %v", err)
+ return Errorf("writing pid file: %v", err)
}
}
// Wait for the process to exit.
ws, err := c.WaitPID(pid)
if err != nil {
- Fatalf("waiting on pid %d: %v", pid, err)
+ return Errorf("waiting on pid %d: %v", pid, err)
}
*waitStatus = ws
return subcommands.ExitSuccess
}
-func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
+func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
var args []string
for _, a := range os.Args[1:] {
if !strings.Contains(a, "detach") {
@@ -193,7 +196,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
}
}
- // The command needs to write a pid file so that execAndWait can tell
+ // The command needs to write a pid file so that execChildAndWait can tell
// when it has started. If no pid-file was provided, we should use a
// filename in a temp directory.
pidFile := ex.pidFile
@@ -262,7 +265,10 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
return false, nil
}
if err := specutils.WaitForReady(cmd.Process.Pid, 10*time.Second, ready); err != nil {
- Fatalf("unexpected error waiting for PID file, err: %v", err)
+ // Don't log fatal error here, otherwise it will override the error logged
+ // by the child process that has failed to start.
+ log.Warningf("Unexpected error waiting for PID file, err: %v", err)
+ return subcommands.ExitFailure
}
*waitStatus = 0
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 657726251..31e8f42bb 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -16,7 +16,6 @@ package cmd
import (
"context"
-
"flag"
"github.com/google/subcommands"
"gvisor.googlesource.com/gvisor/runsc/boot"
diff --git a/runsc/main.go b/runsc/main.go
index 11bc73f75..6f8e6e378 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -48,11 +48,12 @@ var (
// system that are not covered by the runtime spec.
// Debugging flags.
- debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
- logPackets = flag.Bool("log-packets", false, "enable network packet logging")
- logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
- debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
- debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s")
+ debugLog = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+ logPackets = flag.Bool("log-packets", false, "enable network packet logging")
+ logFD = flag.Int("log-fd", -1, "file descriptor to log to. If set, the 'log' flag is ignored.")
+ debugLogFD = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored.")
+ debugLogFormat = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s")
+ alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr")
// Debugging flags: strace related
strace = flag.Bool("strace", false, "enable strace")
@@ -60,16 +61,16 @@ var (
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
// Flags that control sandbox runtime behavior.
- platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
- network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
- gso = flag.Bool("gso", true, "enable generic segmenation offload")
- fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
- overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
- panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
- profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
- netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
-
+ platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+ network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+ gso = flag.Bool("gso", true, "enable generic segmenation offload")
+ fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+ overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+ panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+ profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+ netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+ numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
)
@@ -117,6 +118,22 @@ func main() {
os.Exit(0)
}
+ var errorLogger io.Writer
+ if *logFD > -1 {
+ errorLogger = os.NewFile(uintptr(*logFD), "error log file")
+
+ } else if *logFilename != "" {
+ // We must set O_APPEND and not O_TRUNC because Docker passes
+ // the same log file for all commands (and also parses these
+ // log files), so we can't destroy them on each command.
+ var err error
+ errorLogger, err = os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
+ if err != nil {
+ cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
+ }
+ }
+ cmd.ErrorLogger = errorLogger
+
platformType, err := boot.MakePlatformType(*platform)
if err != nil {
cmd.Fatalf("%v", err)
@@ -141,6 +158,10 @@ func main() {
cmd.Fatalf("%v", err)
}
+ if *numNetworkChannels <= 0 {
+ cmd.Fatalf("num_network_channels must be > 0, got: %d", *numNetworkChannels)
+ }
+
// Create a new Config from the flags.
conf := &boot.Config{
RootDir: *rootDir,
@@ -162,6 +183,7 @@ func main() {
ProfileEnable: *profile,
EnableRaw: *netRaw,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
+ NumNetworkChannels: *numNetworkChannels,
}
if len(*straceSyscalls) != 0 {
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
@@ -174,24 +196,7 @@ func main() {
subcommand := flag.CommandLine.Arg(0)
- var logFile io.Writer = os.Stderr
- if *logFD > -1 {
- logFile = os.NewFile(uintptr(*logFD), "log file")
- } else if *logFilename != "" {
- // We must set O_APPEND and not O_TRUNC because Docker passes
- // the same log file for all commands (and also parses these
- // log files), so we can't destroy them on each command.
- f, err := os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
- if err != nil {
- cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
- }
- logFile = f
- } else if subcommand == "do" {
- logFile = ioutil.Discard
- }
-
- e := newEmitter(*logFormat, logFile)
-
+ var e log.Emitter
if *debugLogFD > -1 {
f := os.NewFile(uintptr(*debugLogFD), "debug log file")
@@ -201,28 +206,31 @@ func main() {
cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
}
- // If we are the boot process, then we own our stdio FDs and
- // can do what we want with them. Since Docker and Containerd
- // both eat boot's stderr, we dup our stderr to the provided
- // log FD so that panics will appear in the logs, rather than
- // just disappear.
+ // If we are the boot process, then we own our stdio FDs and can do what we
+ // want with them. Since Docker and Containerd both eat boot's stderr, we
+ // dup our stderr to the provided log FD so that panics will appear in the
+ // logs, rather than just disappear.
if err := syscall.Dup2(int(f.Fd()), int(os.Stderr.Fd())); err != nil {
cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err)
}
- if logFile == os.Stderr {
- // Suppress logging to stderr when debug log is enabled. Otherwise all
- // messages will be duplicated in the debug log (see Dup2() call above).
- e = newEmitter(*debugLogFormat, f)
- } else {
- e = log.MultiEmitter{e, newEmitter(*debugLogFormat, f)}
- }
+ e = newEmitter(*debugLogFormat, f)
+
} else if *debugLog != "" {
f, err := specutils.DebugLogFile(*debugLog, subcommand)
if err != nil {
cmd.Fatalf("error opening debug log file in %q: %v", *debugLog, err)
}
- e = log.MultiEmitter{e, newEmitter(*debugLogFormat, f)}
+ e = newEmitter(*debugLogFormat, f)
+
+ } else {
+ // Stderr is reserved for the application, just discard the logs if no debug
+ // log is specified.
+ e = newEmitter("text", ioutil.Discard)
+ }
+
+ if *alsoLogToStderr {
+ e = log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
}
log.SetTarget(e)
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 0460d5f1a..1fd091514 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -68,7 +68,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
// Build the path to the net namespace of the sandbox process.
// This is what we will copy.
nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
- if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO); err != nil {
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO, conf.NumNetworkChannels); err != nil {
return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
}
case boot.NetworkHost:
@@ -138,7 +138,7 @@ func isRootNS() (bool, error) {
// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
// net namespace with the given path, creates them in the sandbox, and removes
// them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool, numNetworkChannels int) error {
// Join the network namespace that we will be copying.
restore, err := joinNetNS(nsPath)
if err != nil {
@@ -202,25 +202,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
continue
}
- // Create the socket.
- const protocol = 0x0300 // htons(ETH_P_ALL)
- fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
- if err != nil {
- return fmt.Errorf("unable to create raw socket: %v", err)
- }
- deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
-
- // Bind to the appropriate device.
- ll := syscall.SockaddrLinklayer{
- Protocol: protocol,
- Ifindex: iface.Index,
- Hatype: 0, // No ARP type.
- Pkttype: syscall.PACKET_OTHERHOST,
- }
- if err := syscall.Bind(fd, &ll); err != nil {
- return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
- }
-
// Scrape the routes before removing the address, since that
// will remove the routes as well.
routes, def, err := routesForIface(iface)
@@ -236,9 +217,10 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
link := boot.FDBasedLink{
- Name: iface.Name,
- MTU: iface.MTU,
- Routes: routes,
+ Name: iface.Name,
+ MTU: iface.MTU,
+ Routes: routes,
+ NumChannels: numNetworkChannels,
}
// Get the link for the interface.
@@ -248,30 +230,23 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
- if enableGSO {
- gso, err := isGSOEnabled(fd, iface.Name)
+ log.Debugf("Setting up network channels")
+ // Create the socket for the device.
+ for i := 0; i < link.NumChannels; i++ {
+ log.Debugf("Creating Channel %d", i)
+ socketEntry, err := createSocket(iface, ifaceLink, enableGSO)
if err != nil {
- return fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ return fmt.Errorf("failed to createSocket for %s : %v", iface.Name, err)
}
- if gso {
- if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
- return fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
- }
- link.GSOMaxSize = ifaceLink.Attrs().GSOMaxSize
+ if i == 0 {
+ link.GSOMaxSize = socketEntry.gsoMaxSize
} else {
- log.Infof("GSO not available in host.")
+ if link.GSOMaxSize != socketEntry.gsoMaxSize {
+ return fmt.Errorf("inconsistent gsoMaxSize %d and %d when creating multiple channels for same interface: %s",
+ link.GSOMaxSize, socketEntry.gsoMaxSize, iface.Name)
+ }
}
- }
-
- // Use SO_RCVBUFFORCE because on linux the receive buffer for an
- // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
- // defaults to a unusually low value of 208KB. This is too low
- // for gVisor to be able to receive packets at high throughputs
- // without incurring packet drops.
- const rcvBufSize = 4 << 20 // 4MB.
-
- if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
- return fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ args.FilePayload.Files = append(args.FilePayload.Files, socketEntry.deviceFile)
}
// Collect the addresses for the interface, enable forwarding,
@@ -285,7 +260,6 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
}
}
- args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
args.FDBasedLinks = append(args.FDBasedLinks, link)
}
@@ -296,6 +270,61 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
return nil
}
+type socketEntry struct {
+ deviceFile *os.File
+ gsoMaxSize uint32
+}
+
+// createSocket creates an underlying AF_PACKET socket and configures it for use by
+// the sentry and returns an *os.File that wraps the underlying socket fd.
+func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (*socketEntry, error) {
+ // Create the socket.
+ const protocol = 0x0300 // htons(ETH_P_ALL)
+ fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+ if err != nil {
+ return nil, fmt.Errorf("unable to create raw socket: %v", err)
+ }
+ deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+ // Bind to the appropriate device.
+ ll := syscall.SockaddrLinklayer{
+ Protocol: protocol,
+ Ifindex: iface.Index,
+ Hatype: 0, // No ARP type.
+ Pkttype: syscall.PACKET_OTHERHOST,
+ }
+ if err := syscall.Bind(fd, &ll); err != nil {
+ return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+ }
+
+ gsoMaxSize := uint32(0)
+ if enableGSO {
+ gso, err := isGSOEnabled(fd, iface.Name)
+ if err != nil {
+ return nil, fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+ }
+ if gso {
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
+ return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
+ }
+ gsoMaxSize = ifaceLink.Attrs().GSOMaxSize
+ } else {
+ log.Infof("GSO not available in host.")
+ }
+ }
+
+ // Use SO_RCVBUFFORCE because on linux the receive buffer for an
+ // AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
+ // defaults to a unusually low value of 208KB. This is too low
+ // for gVisor to be able to receive packets at high throughputs
+ // without incurring packet drops.
+ const rcvBufSize = 4 << 20 // 4MB.
+
+ if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
+ return nil, fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+ }
+ return &socketEntry{deviceFile, gsoMaxSize}, nil
+}
+
// loopbackLinks collects the links for a loopback interface.
func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
var links []boot.LoopbackLink