summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/loader.go90
-rw-r--r--runsc/boot/loader_test.go11
-rw-r--r--runsc/cgroup/BUILD24
-rw-r--r--runsc/cgroup/cgroup.go405
-rw-r--r--runsc/cgroup/cgroup_test.go56
-rw-r--r--runsc/cmd/boot.go23
-rw-r--r--runsc/container/container.go16
-rw-r--r--runsc/sandbox/BUILD1
-rw-r--r--runsc/sandbox/sandbox.go57
-rw-r--r--runsc/specutils/BUILD1
-rw-r--r--runsc/specutils/cpu.go90
-rw-r--r--runsc/specutils/specutils.go37
-rw-r--r--runsc/test/integration/BUILD4
-rw-r--r--runsc/test/integration/integration_test.go84
-rw-r--r--runsc/test/testutil/docker.go9
15 files changed, 776 insertions, 132 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 1ad6b09f4..dc3c6c3d0 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,6 +20,7 @@ import (
"math/rand"
"os"
"os/signal"
+ "runtime"
"sync"
"sync/atomic"
"syscall"
@@ -138,14 +139,39 @@ func init() {
kernel.RegisterSyscallTable(slinux.AMD64)
}
+// Args are the arguments for New().
+type Args struct {
+ // Id is the sandbox ID.
+ ID string
+ // Spec is the sandbox specification.
+ Spec *specs.Spec
+ // Conf is the system configuration.
+ Conf *Config
+ // ControllerFD is the FD to the URPC controller.
+ ControllerFD int
+ // DeviceFD is an optional argument that is passed to the platform.
+ DeviceFD int
+ // GoferFDs is an array of FDs used to connect with the Gofer.
+ GoferFDs []int
+ // StdioFDs is the stdio for the application.
+ StdioFDs []int
+ // Console is set to true if using TTY.
+ Console bool
+ // NumCPU is the number of CPUs to create inside the sandbox.
+ NumCPU int
+ // TotalMem is the initial amount of total memory to report back to the
+ // container.
+ TotalMem uint64
+}
+
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
-func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int, goferFDs []int, stdioFDs []int, console bool) (*Loader, error) {
+func New(args Args) (*Loader, error) {
if err := usage.Init(); err != nil {
return nil, fmt.Errorf("error setting up memory usage: %v", err)
}
// Create kernel and platform.
- p, err := createPlatform(conf, deviceFD)
+ p, err := createPlatform(args.Conf, args.DeviceFD)
if err != nil {
return nil, fmt.Errorf("error creating platform: %v", err)
}
@@ -168,7 +194,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
}
tk.SetClocks(time.NewCalibratedClocks())
- if err := enableStrace(conf); err != nil {
+ if err := enableStrace(args.Conf); err != nil {
return nil, fmt.Errorf("failed to enable strace: %v", err)
}
@@ -176,35 +202,41 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
// this point. Netns is configured before Run() is called. Netstack is
// configured using a control uRPC message. Host network is configured inside
// Run().
- networkStack, err := newEmptyNetworkStack(conf, k)
+ networkStack, err := newEmptyNetworkStack(args.Conf, k)
if err != nil {
return nil, fmt.Errorf("failed to create network: %v", err)
}
// Create capabilities.
- caps, err := specutils.Capabilities(spec.Process.Capabilities)
+ caps, err := specutils.Capabilities(args.Spec.Process.Capabilities)
if err != nil {
return nil, fmt.Errorf("error creating capabilities: %v", err)
}
// Convert the spec's additional GIDs to KGIDs.
- extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
- for _, GID := range spec.Process.User.AdditionalGids {
+ extraKGIDs := make([]auth.KGID, 0, len(args.Spec.Process.User.AdditionalGids))
+ for _, GID := range args.Spec.Process.User.AdditionalGids {
extraKGIDs = append(extraKGIDs, auth.KGID(GID))
}
// Create credentials.
creds := auth.NewUserCredentials(
- auth.KUID(spec.Process.User.UID),
- auth.KGID(spec.Process.User.GID),
+ auth.KUID(args.Spec.Process.User.UID),
+ auth.KGID(args.Spec.Process.User.GID),
extraKGIDs,
caps,
auth.NewRootUserNamespace())
- // Get CPU numbers from spec.
- cpuNum, err := specutils.CalculateCPUNumber(spec)
- if err != nil {
- return nil, fmt.Errorf("cannot get cpus from spec: %v", err)
+ if args.NumCPU == 0 {
+ args.NumCPU = runtime.NumCPU()
+ }
+ log.Infof("CPUs: %d", args.NumCPU)
+
+ if args.TotalMem > 0 {
+ // Adjust the total memory returned by the Sentry so that applications that
+ // use /proc/meminfo can make allocations based on this limit.
+ usage.MinimumTotalMemoryBytes = args.TotalMem
+ log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30))
}
// Initiate the Kernel object, which is required by the Context passed
@@ -214,9 +246,9 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
NetworkStack: networkStack,
- ApplicationCores: uint(cpuNum),
+ ApplicationCores: uint(args.NumCPU),
Vdso: vdso,
- RootUTSNamespace: kernel.NewUTSNamespace(spec.Hostname, "", creds.UserNamespace),
+ RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, "", creds.UserNamespace),
RootIPCNamespace: kernel.NewIPCNamespace(creds.UserNamespace),
RootAbstractSocketNamespace: kernel.NewAbstractSocketNamespace(),
}); err != nil {
@@ -224,7 +256,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
}
// Turn on packet logging if enabled.
- if conf.LogPackets {
+ if args.Conf.LogPackets {
log.Infof("Packet logging enabled")
atomic.StoreUint32(&sniffer.LogPackets, 1)
} else {
@@ -233,7 +265,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
}
// Create a watchdog.
- watchdog := watchdog.New(k, watchdog.DefaultTimeout, conf.WatchdogAction)
+ watchdog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
// Create the control server using the provided FD.
//
@@ -244,7 +276,7 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
// misconfigured process will cause an error, and we want the control
// server up before that so that we don't time out trying to connect to
// it.
- ctrl, err := newController(controllerFD, k, watchdog)
+ ctrl, err := newController(args.ControllerFD, k, watchdog)
if err != nil {
return nil, fmt.Errorf("error creating control server: %v", err)
}
@@ -255,20 +287,20 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
}
// Ensure that signals received are forwarded to the emulated kernel.
- ps := syscall.Signal(conf.PanicSignal)
+ ps := syscall.Signal(args.Conf.PanicSignal)
startSignalForwarding := sighandling.PrepareForwarding(k, ps)
- if conf.PanicSignal != -1 {
- // Panics if the sentry receives 'conf.PanicSignal'.
+ if args.Conf.PanicSignal != -1 {
+ // Panics if the sentry receives 'Config.PanicSignal'.
panicChan := make(chan os.Signal, 1)
signal.Notify(panicChan, ps)
go func() { // S/R-SAFE: causes sentry panic.
<-panicChan
panic("Signal-induced panic")
}()
- log.Infof("Panic signal set to %v(%d)", ps, conf.PanicSignal)
+ log.Infof("Panic signal set to %v(%d)", ps, args.Conf.PanicSignal)
}
- procArgs, err := newProcess(id, spec, creds, k)
+ procArgs, err := newProcess(args.ID, args.Spec, creds, k)
if err != nil {
return nil, fmt.Errorf("failed to create root process: %v", err)
}
@@ -276,15 +308,15 @@ func New(id string, spec *specs.Spec, conf *Config, controllerFD, deviceFD int,
l := &Loader{
k: k,
ctrl: ctrl,
- conf: conf,
- console: console,
+ conf: args.Conf,
+ console: args.Console,
watchdog: watchdog,
- spec: spec,
- goferFDs: goferFDs,
- stdioFDs: stdioFDs,
+ spec: args.Spec,
+ goferFDs: args.GoferFDs,
+ stdioFDs: args.StdioFDs,
startSignalForwarding: startSignalForwarding,
rootProcArgs: procArgs,
- sandboxID: id,
+ sandboxID: args.ID,
processes: make(map[execID]*execProcess),
}
ctrl.manager.l = l
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index ea8411a8b..10efa4427 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -102,7 +102,16 @@ func createLoader() (*Loader, func(), error) {
}
stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())}
- l, err := New("foo", spec, conf, fd, -1 /* device fd */, []int{sandEnd}, stdio, false)
+ args := Args{
+ ID: "foo",
+ Spec: spec,
+ Conf: conf,
+ ControllerFD: fd,
+ DeviceFD: -1,
+ GoferFDs: []int{sandEnd},
+ StdioFDs: stdio,
+ }
+ l, err := New(args)
if err != nil {
cleanup()
return nil, nil, err
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
new file mode 100644
index 000000000..4a535d230
--- /dev/null
+++ b/runsc/cgroup/BUILD
@@ -0,0 +1,24 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "cgroup",
+ srcs = ["cgroup.go"],
+ importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup",
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+ deps = [
+ "//pkg/log",
+ "//runsc/specutils",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ ],
+)
+
+go_test(
+ name = "cgroup_test",
+ size = "small",
+ srcs = ["cgroup_test.go"],
+ embed = [":cgroup"],
+)
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
new file mode 100644
index 000000000..6a0092be8
--- /dev/null
+++ b/runsc/cgroup/cgroup.go
@@ -0,0 +1,405 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cgroup provides an interface to read and write configuration to
+// cgroup.
+package cgroup
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+const (
+ cgroupRoot = "/sys/fs/cgroup"
+)
+
+var controllers = map[string]controller{
+ "blkio": &blockIO{},
+ "cpu": &cpu{},
+ "cpuset": &cpuSet{},
+ "memory": &memory{},
+ "net_cls": &networkClass{},
+ "net_prio": &networkPrio{},
+
+ // These controllers either don't have anything in the OCI spec or is
+ // irrevalant for a sandbox, e.g. pids.
+ "devices": &noop{},
+ "freezer": &noop{},
+ "perf_event": &noop{},
+ "pids": &noop{},
+ "systemd": &noop{},
+}
+
+func setOptionalValueInt(path, name string, val *int64) error {
+ if val == nil || *val == 0 {
+ return nil
+ }
+ str := strconv.FormatInt(*val, 10)
+ return setValue(path, name, str)
+}
+
+func setOptionalValueUint(path, name string, val *uint64) error {
+ if val == nil || *val == 0 {
+ return nil
+ }
+ str := strconv.FormatUint(*val, 10)
+ return setValue(path, name, str)
+}
+
+func setOptionalValueUint32(path, name string, val *uint32) error {
+ if val == nil || *val == 0 {
+ return nil
+ }
+ str := strconv.FormatUint(uint64(*val), 10)
+ return setValue(path, name, str)
+}
+
+func setOptionalValueUint16(path, name string, val *uint16) error {
+ if val == nil || *val == 0 {
+ return nil
+ }
+ str := strconv.FormatUint(uint64(*val), 10)
+ return setValue(path, name, str)
+}
+
+func setValue(path, name, data string) error {
+ fullpath := filepath.Join(path, name)
+ return ioutil.WriteFile(fullpath, []byte(data), 0700)
+}
+
+func getValue(path, name string) (string, error) {
+ fullpath := filepath.Join(path, name)
+ out, err := ioutil.ReadFile(fullpath)
+ if err != nil {
+ return "", err
+ }
+ return string(out), nil
+}
+
+// fillFromAncestor sets the value of a cgroup file from the first ancestor
+// that has content. It does nothing if the file in 'path' has already been set.
+func fillFromAncestor(path string) (string, error) {
+ out, err := ioutil.ReadFile(path)
+ if err != nil {
+ return "", err
+ }
+ val := strings.TrimSpace(string(out))
+ if val != "" {
+ // File is set, stop here.
+ return val, nil
+ }
+
+ // File is not set, recurse to parent and then set here.
+ name := filepath.Base(path)
+ parent := filepath.Dir(filepath.Dir(path))
+ val, err = fillFromAncestor(filepath.Join(parent, name))
+ if err != nil {
+ return "", err
+ }
+ if err := ioutil.WriteFile(path, []byte(val), 0700); err != nil {
+ return "", err
+ }
+ return val, nil
+}
+
+func countCpuset(cpuset string) (int, error) {
+ var count int
+ for _, p := range strings.Split(cpuset, ",") {
+ interval := strings.Split(p, "-")
+ switch len(interval) {
+ case 1:
+ if _, err := strconv.Atoi(interval[0]); err != nil {
+ return 0, err
+ }
+ count++
+
+ case 2:
+ start, err := strconv.Atoi(interval[0])
+ if err != nil {
+ return 0, err
+ }
+ end, err := strconv.Atoi(interval[1])
+ if err != nil {
+ return 0, err
+ }
+ if start < 0 || end < 0 || start > end {
+ return 0, fmt.Errorf("invalid cpuset: %q", p)
+ }
+ count += end - start + 1
+
+ default:
+ return 0, fmt.Errorf("invalid cpuset: %q", p)
+ }
+ }
+ return count, nil
+}
+
+// Cgroup represents a group inside all controllers. For example: Name='/foo/bar'
+// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers.
+type Cgroup struct {
+ Name string `json:"name"`
+ Own bool `json:"own"`
+}
+
+// New creates a new Cgroup instance if the spec includes a cgroup path.
+// Otherwise it returns nil and false.
+func New(spec *specs.Spec) (*Cgroup, bool) {
+ if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
+ return nil, false
+ }
+ return &Cgroup{Name: spec.Linux.CgroupsPath}, true
+}
+
+// Install creates and configures cgroups according to 'res'. If cgroup path
+// already exists, it means that the caller has already provided a
+// pre-configured cgroups, and 'res' is ignored.
+func (c *Cgroup) Install(res *specs.LinuxResources) error {
+ if _, err := os.Stat(c.makePath("memory")); err == nil {
+ // If cgroup has already been created; it has been setup by caller. Don't
+ // make any changes to configuration, just join when sandbox/gofer starts.
+ log.Debugf("Using pre-created cgroup %q", c.Name)
+ return nil
+ }
+
+ // Mark that cgroup resources are owned by me.
+ log.Debugf("Creating cgroup %q", c.Name)
+ c.Own = true
+ clean := specutils.MakeCleanup(func() { c.Uninstall() })
+ defer clean.Clean()
+
+ for key, ctrl := range controllers {
+ path := c.makePath(key)
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return err
+ }
+ if res != nil {
+ if err := ctrl.set(res, path); err != nil {
+ return err
+ }
+ }
+ }
+ clean.Release()
+ return nil
+}
+
+// Uninstall removes the settings done in Install(). If cgroup path already
+// existed when Install() was called, Uninstall is a noop.
+func (c *Cgroup) Uninstall() error {
+ if !c.Own {
+ // cgroup is managed by caller, don't touch it.
+ return nil
+ }
+ log.Debugf("Deleting cgroup %q", c.Name)
+ for key := range controllers {
+ if err := syscall.Rmdir(c.makePath(key)); err != nil && !os.IsNotExist(err) {
+ return err
+ }
+ }
+ return nil
+}
+
+// Add adds given process to all controllers.
+func (c *Cgroup) Add(pid int) error {
+ for key := range controllers {
+ if err := setValue(c.makePath(key), "cgroup.procs", strconv.Itoa(pid)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'.
+func (c *Cgroup) NumCPU() (int, error) {
+ path := c.makePath("cpuset")
+ cpuset, err := getValue(path, "cpuset.cpus")
+ if err != nil {
+ return 0, err
+ }
+ return countCpuset(strings.TrimSpace(cpuset))
+}
+
+// MemoryLimit returns the memory limit.
+func (c *Cgroup) MemoryLimit() (uint64, error) {
+ path := c.makePath("memory")
+ limStr, err := getValue(path, "memory.limit_in_bytes")
+ if err != nil {
+ return 0, err
+ }
+ return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64)
+}
+
+func (c *Cgroup) makePath(controllerName string) string {
+ return filepath.Join(cgroupRoot, controllerName, c.Name)
+}
+
+type controller interface {
+ set(*specs.LinuxResources, string) error
+}
+
+type noop struct{}
+
+func (*noop) set(*specs.LinuxResources, string) error {
+ return nil
+}
+
+type memory struct{}
+
+func (*memory) set(spec *specs.LinuxResources, path string) error {
+ if spec.Memory == nil {
+ return nil
+ }
+ if err := setOptionalValueInt(path, "memory.limit_in_bytes", spec.Memory.Limit); err != nil {
+ return err
+ }
+ if err := setOptionalValueInt(path, "memory.soft_limit_in_bytes", spec.Memory.Reservation); err != nil {
+ return err
+ }
+ if err := setOptionalValueInt(path, "memory.memsw.limit_in_bytes", spec.Memory.Swap); err != nil {
+ return err
+ }
+ if err := setOptionalValueInt(path, "memory.kmem.limit_in_bytes", spec.Memory.Kernel); err != nil {
+ return err
+ }
+ if err := setOptionalValueInt(path, "memory.kmem.tcp.limit_in_bytes", spec.Memory.KernelTCP); err != nil {
+ return err
+ }
+ if err := setOptionalValueUint(path, "memory.swappiness", spec.Memory.Swappiness); err != nil {
+ return err
+ }
+
+ if spec.Memory.DisableOOMKiller != nil && *spec.Memory.DisableOOMKiller {
+ if err := setValue(path, "memory.oom_control", "1"); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+type cpu struct{}
+
+func (*cpu) set(spec *specs.LinuxResources, path string) error {
+ if spec.CPU == nil {
+ return nil
+ }
+ if err := setOptionalValueUint(path, "cpu.shares", spec.CPU.Shares); err != nil {
+ return err
+ }
+ if err := setOptionalValueInt(path, "cpu.cfs_quota_us", spec.CPU.Quota); err != nil {
+ return err
+ }
+ return setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period)
+}
+
+type cpuSet struct{}
+
+func (*cpuSet) set(spec *specs.LinuxResources, path string) error {
+ // cpuset.cpus and mems are required fields, but are not set on a new cgroup.
+ // If not set in the spec, get it from one of the ancestors cgroup.
+ if spec.CPU == nil || spec.CPU.Cpus == "" {
+ if _, err := fillFromAncestor(filepath.Join(path, "cpuset.cpus")); err != nil {
+ return err
+ }
+ } else {
+ if err := setValue(path, "cpuset.cpus", spec.CPU.Cpus); err != nil {
+ return err
+ }
+ }
+
+ if spec.CPU == nil || spec.CPU.Mems == "" {
+ _, err := fillFromAncestor(filepath.Join(path, "cpuset.mems"))
+ return err
+ }
+ mems := spec.CPU.Mems
+ return setValue(path, "cpuset.mems", mems)
+}
+
+type blockIO struct{}
+
+func (*blockIO) set(spec *specs.LinuxResources, path string) error {
+ if spec.BlockIO == nil {
+ return nil
+ }
+
+ if err := setOptionalValueUint16(path, "blkio.weight", spec.BlockIO.Weight); err != nil {
+ return err
+ }
+ if err := setOptionalValueUint16(path, "blkio.leaf_weight", spec.BlockIO.LeafWeight); err != nil {
+ return err
+ }
+
+ for _, dev := range spec.BlockIO.WeightDevice {
+ val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Weight)
+ if err := setValue(path, "blkio.weight_device", val); err != nil {
+ return err
+ }
+ val = fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.LeafWeight)
+ if err := setValue(path, "blkio.leaf_weight_device", val); err != nil {
+ return err
+ }
+ }
+ if err := setThrottle(path, "blkio.throttle.read_bps_device", spec.BlockIO.ThrottleReadBpsDevice); err != nil {
+ return err
+ }
+ if err := setThrottle(path, "blkio.throttle.write_bps_device", spec.BlockIO.ThrottleWriteBpsDevice); err != nil {
+ return err
+ }
+ if err := setThrottle(path, "blkio.throttle.read_iops_device", spec.BlockIO.ThrottleReadIOPSDevice); err != nil {
+ return err
+ }
+ return setThrottle(path, "blkio.throttle.write_iops_device", spec.BlockIO.ThrottleWriteIOPSDevice)
+}
+
+func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error {
+ for _, dev := range devs {
+ val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Rate)
+ if err := setValue(path, name, val); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+type networkClass struct{}
+
+func (*networkClass) set(spec *specs.LinuxResources, path string) error {
+ if spec.Network == nil {
+ return nil
+ }
+ return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID)
+}
+
+type networkPrio struct{}
+
+func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
+ if spec.Network == nil {
+ return nil
+ }
+ for _, prio := range spec.Network.Priorities {
+ val := fmt.Sprintf("%s %d", prio.Name, prio.Priority)
+ if err := setValue(path, "net_prio.ifpriomap", val); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
new file mode 100644
index 000000000..cde915329
--- /dev/null
+++ b/runsc/cgroup/cgroup_test.go
@@ -0,0 +1,56 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroup
+
+import (
+ "testing"
+)
+
+func TestCountCpuset(t *testing.T) {
+ for _, tc := range []struct {
+ str string
+ want int
+ error bool
+ }{
+ {str: "0", want: 1},
+ {str: "0,1,2,8,9,10", want: 6},
+ {str: "0-1", want: 2},
+ {str: "0-7", want: 8},
+ {str: "0-7,16,32-39,64,65", want: 19},
+ {str: "a", error: true},
+ {str: "5-a", error: true},
+ {str: "a-5", error: true},
+ {str: "-10", error: true},
+ {str: "15-", error: true},
+ {str: "-", error: true},
+ {str: "--", error: true},
+ } {
+ t.Run(tc.str, func(t *testing.T) {
+ got, err := countCpuset(tc.str)
+ if tc.error {
+ if err == nil {
+ t.Errorf("countCpuset(%q) should have failed", tc.str)
+ }
+ } else {
+ if err != nil {
+ t.Errorf("countCpuset(%q) failed: %v", tc.str, err)
+ }
+ if tc.want != got {
+ t.Errorf("countCpuset(%q) want: %d, got: %d", tc.str, tc.want, got)
+ }
+ }
+ })
+ }
+}
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index c6f78f63f..d26e92bcd 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -59,6 +59,13 @@ type Boot struct {
// applyCaps determines if capabilities defined in the spec should be applied
// to the process.
applyCaps bool
+
+ // cpuNum number of CPUs to create inside the sandbox.
+ cpuNum int
+
+ // totalMem sets the initial amount of total memory to report back to the
+ // container.
+ totalMem uint64
}
// Name implements subcommands.Command.Name.
@@ -86,6 +93,8 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order")
f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls")
f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process")
+ f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox")
+ f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
@@ -143,7 +152,19 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
// Create the loader.
- l, err := boot.New(f.Arg(0), spec, conf, b.controllerFD, b.deviceFD, b.ioFDs.GetArray(), b.stdioFDs.GetArray(), b.console)
+ bootArgs := boot.Args{
+ ID: f.Arg(0),
+ Spec: spec,
+ Conf: conf,
+ ControllerFD: b.controllerFD,
+ DeviceFD: b.deviceFD,
+ GoferFDs: b.ioFDs.GetArray(),
+ StdioFDs: b.stdioFDs.GetArray(),
+ Console: b.console,
+ NumCPU: b.cpuNum,
+ TotalMem: b.totalMem,
+ }
+ l, err := boot.New(bootArgs)
if err != nil {
Fatalf("error creating loader: %v", err)
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index f0cdee8d3..eaa62daf1 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -262,6 +262,8 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
Status: Creating,
Owner: os.Getenv("USER"),
}
+ cu := specutils.MakeCleanup(func() { c.Destroy() })
+ defer cu.Clean()
// If the metadata annotations indicate that this container should be
// started in an existing sandbox, we must do so. The metadata will
@@ -276,12 +278,13 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
- s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket, ioFiles)
+ c.Sandbox, err = sandbox.Create(id, spec, conf, bundleDir, consoleSocket, ioFiles)
if err != nil {
- c.Destroy()
return nil, err
}
- c.Sandbox = s
+ if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil {
+ return nil, err
+ }
} else {
// This is sort of confusing. For a sandbox with a root
// container and a child container in it, runsc sees:
@@ -300,7 +303,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Find the sandbox associated with this ID.
sb, err := Load(conf.RootDir, sbid)
if err != nil {
- c.Destroy()
return nil, err
}
c.Sandbox = sb.Sandbox
@@ -309,7 +311,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Save the metadata file.
if err := c.save(); err != nil {
- c.Destroy()
return nil, err
}
@@ -317,11 +318,11 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// this file is created, so it must be the last thing we do.
if pidFile != "" {
if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
- c.Destroy()
return nil, fmt.Errorf("error writing PID file: %v", err)
}
}
+ cu.Release()
return c, nil
}
@@ -358,6 +359,9 @@ func (c *Container) Start(conf *boot.Config) error {
if err := c.Sandbox.Start(c.Spec, conf, c.ID, ioFiles); err != nil {
return err
}
+ if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil {
+ return err
+ }
}
// "If any poststart hook fails, the runtime MUST log a warning, but
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index 09965dcc0..eb9c4cd76 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/sentry/platform/kvm",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/cgroup",
"//runsc/console",
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 847417a15..26d725bdd 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -34,6 +34,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
"gvisor.googlesource.com/gvisor/pkg/urpc"
"gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/cgroup"
"gvisor.googlesource.com/gvisor/runsc/console"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
@@ -58,12 +59,26 @@ type Sandbox struct {
// Chroot is the path to the chroot directory that the sandbox process
// is running in.
Chroot string `json:"chroot"`
+
+ // Ccroup has the cgroup configuration for the sandbox.
+ Cgroup *cgroup.Cgroup `json:"cgroup"`
}
// Create creates the sandbox process. The caller must call Destroy() on the
// sandbox.
func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket string, ioFiles []*os.File) (*Sandbox, error) {
s := &Sandbox{ID: id}
+ c := specutils.MakeCleanup(func() { s.destroy() })
+ defer c.Clean()
+
+ if cg, ok := cgroup.New(spec); ok {
+ s.Cgroup = cg
+
+ // If there is cgroup config, install it before creating sandbox process.
+ if err := s.Cgroup.Install(spec.Linux.Resources); err != nil {
+ return nil, fmt.Errorf("error configuring cgroup: %v", err)
+ }
+ }
// Create the sandbox process.
if err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, ioFiles); err != nil {
@@ -75,6 +90,13 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
return nil, err
}
+ if s.Cgroup != nil {
+ if err := s.Cgroup.Add(s.Pid); err != nil {
+ return nil, fmt.Errorf("error adding sandbox to cgroup: %v", err)
+ }
+ }
+
+ c.Release()
return s, nil
}
@@ -483,6 +505,24 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
}
}
+ if s.Cgroup != nil {
+ cpuNum, err := s.Cgroup.NumCPU()
+ if err != nil {
+ return fmt.Errorf("error getting cpu count from cgroups: %v", err)
+ }
+ cmd.Args = append(cmd.Args, "--cpu-num", strconv.Itoa(cpuNum))
+
+ mem, err := s.Cgroup.MemoryLimit()
+ if err != nil {
+ return fmt.Errorf("error getting memory limit from cgroups: %v", err)
+ }
+ // When memory limit is unset, a "large" number is returned. In that case,
+ // just stick with the default.
+ if mem < 0x7ffffffffffff000 {
+ cmd.Args = append(cmd.Args, "--total-memory", strconv.FormatUint(mem, 10))
+ }
+ }
+
// Add container as the last argument.
cmd.Args = append(cmd.Args, s.ID)
@@ -590,8 +630,15 @@ func (s *Sandbox) destroy() error {
}
}
+ if s.Cgroup != nil {
+ if err := s.Cgroup.Uninstall(); err != nil {
+ return err
+ }
+ }
if s.Chroot != "" {
- return tearDownChroot(s.Chroot)
+ if err := tearDownChroot(s.Chroot); err != nil {
+ return err
+ }
}
return nil
@@ -761,6 +808,14 @@ func (s *Sandbox) waitForStopped() error {
return backoff.Retry(op, b)
}
+// AddGoferToCgroup adds the gofer process to the sandbox's cgroup.
+func (s *Sandbox) AddGoferToCgroup(pid int) error {
+ if s.Cgroup != nil {
+ return s.Cgroup.Add(pid)
+ }
+ return nil
+}
+
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) {
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index f1a99ce48..e73b2293f 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -5,7 +5,6 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "specutils",
srcs = [
- "cpu.go",
"namespace.go",
"specutils.go",
],
diff --git a/runsc/specutils/cpu.go b/runsc/specutils/cpu.go
deleted file mode 100644
index 9abe26b64..000000000
--- a/runsc/specutils/cpu.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2018 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package specutils
-
-import (
- "fmt"
- "runtime"
- "strconv"
- "strings"
-
- specs "github.com/opencontainers/runtime-spec/specs-go"
-)
-
-// CalculateCPUNumber calculates the number of CPUs that should be exposed
-// inside the sandbox.
-func CalculateCPUNumber(spec *specs.Spec) (int, error) {
- // If spec does not contain CPU field, then return the number of host CPUs.
- if spec == nil || spec.Linux == nil || spec.Linux.Resources == nil || spec.Linux.Resources.CPU == nil {
- return runtime.NumCPU(), nil
- }
- cpuSpec := spec.Linux.Resources.CPU
-
- // If cpuSpec.Cpus is specified, then parse and return that. They must be in
- // the list format for cpusets, which is "a comma-separated list of CPU
- // numbers and ranges of numbers, in ASCII decimal." --man 7 cpuset.
- cpus := cpuSpec.Cpus
- if cpus != "" {
- cpuNum := 0
- for _, subs := range strings.Split(cpus, ",") {
- result, err := parseCPUNumber(subs)
- if err != nil {
- return 0, err
- }
- cpuNum += result
- }
- return cpuNum, nil
- }
-
- // If CPU.Quota and CPU.Period are specified, we can divide them to get an
- // approximation of the number of CPUs needed.
- if cpuSpec.Quota != nil && cpuSpec.Period != nil && *cpuSpec.Period != 0 {
- cpuQuota := *cpuSpec.Quota
- cpuPeriod := *cpuSpec.Period
- return int(cpuQuota)/int(cpuPeriod) + 1, nil
- }
-
- // Default to number of host cpus.
- return runtime.NumCPU(), nil
-}
-
-// parseCPUNumber converts a cpuset string into the number of cpus included in
-// the string , e.g. "3-6" -> 4.
-func parseCPUNumber(cpus string) (int, error) {
- switch cpusSlice := strings.Split(cpus, "-"); len(cpusSlice) {
- case 1:
- // cpus is not a range. We must only check that it is a valid number.
- if _, err := strconv.Atoi(cpus); err != nil {
- return 0, fmt.Errorf("invalid individual cpu number %q", cpus)
- }
- return 1, nil
- case 2:
- // cpus is a range. We must check that start and end are valid numbers,
- // and calculate their difference (inclusively).
- first, err := strconv.Atoi(cpusSlice[0])
- if err != nil || first < 0 {
- return 0, fmt.Errorf("invalid first cpu number %q in range %q", cpusSlice[0], cpus)
- }
- last, err := strconv.Atoi(cpusSlice[1])
- if err != nil || last < 0 {
- return 0, fmt.Errorf("invalid last cpu number %q in range %q", cpusSlice[1], cpus)
- }
- cpuNum := last - first + 1
- if cpuNum <= 0 {
- return 0, fmt.Errorf("cpu range %q does not include positive number of cpus", cpus)
- }
- }
- return 0, fmt.Errorf("invalid cpu string %q", cpus)
-}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index daf10b875..ac017ba2d 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -43,6 +43,13 @@ func LogSpec(spec *specs.Spec) {
log.Debugf("Spec: %+v", spec)
log.Debugf("Spec.Hooks: %+v", spec.Hooks)
log.Debugf("Spec.Linux: %+v", spec.Linux)
+ if spec.Linux != nil && spec.Linux.Resources != nil {
+ res := spec.Linux.Resources
+ log.Debugf("Spec.Linux.Resources.Memory: %+v", res.Memory)
+ log.Debugf("Spec.Linux.Resources.CPU: %+v", res.CPU)
+ log.Debugf("Spec.Linux.Resources.BlockIO: %+v", res.BlockIO)
+ log.Debugf("Spec.Linux.Resources.Network: %+v", res.Network)
+ }
log.Debugf("Spec.Process: %+v", spec.Process)
log.Debugf("Spec.Root: %+v", spec.Root)
}
@@ -402,3 +409,33 @@ func ContainsStr(strs []string, str string) bool {
}
return false
}
+
+// Cleanup allows defers to be aborted when cleanup needs to happen
+// conditionally. Usage:
+// c := MakeCleanup(func() { f.Close() })
+// defer c.Clean() // any failure before release is called will close the file.
+// ...
+// c.Release() // on success, aborts closing the file and return it.
+// return f
+type Cleanup struct {
+ clean func()
+ released bool
+}
+
+// MakeCleanup creates a new Cleanup object.
+func MakeCleanup(f func()) Cleanup {
+ return Cleanup{clean: f}
+}
+
+// Clean calls the cleanup function.
+func (c *Cleanup) Clean() {
+ if !c.released {
+ c.clean()
+ }
+}
+
+// Release releases the cleanup from its duties, i.e. cleanup function is not
+// called after this point.
+func (c *Cleanup) Release() {
+ c.released = true
+}
diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD
index 4407016ad..726ebf49e 100644
--- a/runsc/test/integration/BUILD
+++ b/runsc/test/integration/BUILD
@@ -15,9 +15,7 @@ go_test(
"manual",
"local",
],
- deps = [
- "//runsc/test/testutil",
- ],
+ deps = ["//runsc/test/testutil"],
)
go_library(
diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go
index 5f24aeed5..5480c5bbe 100644
--- a/runsc/test/integration/integration_test.go
+++ b/runsc/test/integration/integration_test.go
@@ -26,6 +26,7 @@ import (
"net"
"net/http"
"os"
+ "strconv"
"strings"
"testing"
"time"
@@ -179,6 +180,89 @@ func TestConnectToSelf(t *testing.T) {
}
}
+func TestMemLimit(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatal("docker pull failed:", err)
+ }
+ d := testutil.MakeDocker("cgroup-test")
+ cmd := "cat /proc/meminfo | grep MemTotal: | awk '{print $2}'"
+ out, err := d.RunFg("--memory=500MB", "alpine", "sh", "-c", cmd)
+ if err != nil {
+ t.Fatal("docker run failed:", err)
+ }
+ defer d.CleanUp()
+
+ // Remove warning message that swap isn't present.
+ if strings.HasPrefix(out, "WARNING") {
+ lines := strings.Split(out, "\n")
+ if len(lines) != 3 {
+ t.Fatalf("invalid output: %s", out)
+ }
+ out = lines[1]
+ }
+
+ got, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64)
+ if err != nil {
+ t.Fatalf("failed to parse %q: %v", out, err)
+ }
+ if want := uint64(500 * 1024); got != want {
+ t.Errorf("MemTotal got: %d, want: %d", got, want)
+ }
+}
+
+func TestNumCPU(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatal("docker pull failed:", err)
+ }
+ d := testutil.MakeDocker("cgroup-test")
+ cmd := "cat /proc/cpuinfo | grep 'processor.*:' | wc -l"
+ out, err := d.RunFg("--cpuset-cpus=0", "alpine", "sh", "-c", cmd)
+ if err != nil {
+ t.Fatal("docker run failed:", err)
+ }
+ defer d.CleanUp()
+
+ got, err := strconv.Atoi(strings.TrimSpace(out))
+ if err != nil {
+ t.Fatalf("failed to parse %q: %v", out, err)
+ }
+ if want := 1; got != want {
+ t.Errorf("MemTotal got: %d, want: %d", got, want)
+ }
+}
+
+// TestCgroup sets cgroup options and checks that container can start.
+// TODO: Verify that these were set to cgroup on the host.
+func TestCgroup(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatal("docker pull failed:", err)
+ }
+ d := testutil.MakeDocker("cgroup-test")
+
+ var args []string
+ args = append(args, "--cpu-shares=1000")
+ args = append(args, "--cpu-period=2000")
+ args = append(args, "--cpu-quota=3000")
+ args = append(args, "--cpuset-cpus=0")
+ args = append(args, "--cpuset-mems=0")
+ args = append(args, "--kernel-memory=100MB")
+ args = append(args, "--memory=1GB")
+ args = append(args, "--memory-reservation=500MB")
+ args = append(args, "--memory-swap=2GB")
+ args = append(args, "--memory-swappiness=5")
+ args = append(args, "--blkio-weight=750")
+
+ args = append(args, "hello-world")
+ if err := d.Run(args...); err != nil {
+ t.Fatal("docker create failed:", err)
+ }
+ defer d.CleanUp()
+
+ if _, err := d.WaitForOutput("Hello from Docker!", 5*time.Second); err != nil {
+ t.Fatalf("docker didn't say hello: %v", err)
+ }
+}
+
func TestMain(m *testing.M) {
testutil.EnsureSupportedDockerVersion()
os.Exit(m.Run())
diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go
index d70b4377a..2f15ab818 100644
--- a/runsc/test/testutil/docker.go
+++ b/runsc/test/testutil/docker.go
@@ -198,6 +198,15 @@ func (d *Docker) Run(args ...string) error {
return err
}
+// RunFg calls 'docker run' with the arguments provided in the foreground. It
+// blocks until the container exits and returns the output.
+func (d *Docker) RunFg(args ...string) (string, error) {
+ a := []string{"run", "--runtime", d.Runtime, "--name", d.Name}
+ a = append(a, args...)
+ out, err := do(a...)
+ return string(out), err
+}
+
// Logs calls 'docker logs'.
func (d *Docker) Logs() (string, error) {
return do("logs", d.Name)