diff options
author | Nicolas Lacasse <nlacasse@google.com> | 2018-09-07 10:15:34 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-09-07 10:16:39 -0700 |
commit | 210c2520890ea48d551c0c9fffe890a7c60fb802 (patch) | |
tree | 4f431b5737cd9e6a7c8c33e459242c3404eab7c0 | |
parent | 590d8320992d74e54e2c095c68c49abc2b23dcbe (diff) |
runsc: Run sandbox process inside minimal chroot.
We construct a dir with the executable bind-mounted at /exe, and proc mounted
at /proc. Runsc now executes the sandbox process inside this chroot, thus
limiting access to the host filesystem. The mounts and chroot dir are removed
when the sandbox is destroyed.
Because this requires bind-mounts, we can only do the chroot if we have
CAP_SYS_ADMIN.
PiperOrigin-RevId: 211994001
Change-Id: Ia71c515e26085e0b69b833e71691830148bc70d1
-rw-r--r-- | runsc/boot/config.go | 9 | ||||
-rw-r--r-- | runsc/container/fs.go | 30 | ||||
-rw-r--r-- | runsc/sandbox/BUILD | 1 | ||||
-rw-r--r-- | runsc/sandbox/chroot.go | 120 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 33 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 12 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 41 | ||||
-rw-r--r-- | runsc/test/testutil/BUILD | 1 | ||||
-rw-r--r-- | runsc/test/testutil/testutil.go | 26 |
9 files changed, 221 insertions, 52 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 87a47dd0b..28a1600cd 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -214,10 +214,11 @@ type Config struct { // SIGUSR2(12) to troubleshoot hangs. -1 disables it. PanicSignal int - // TestOnlyAllowRunAsCurrentUser should only be used in tests. It - // allows runsc to start the sandbox process as the current user if we - // do not have capability to set uid/gid to another user. - TestOnlyAllowRunAsCurrentUser bool + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in + // tests. It allows runsc to start the sandbox process as the current + // user, and without chrooting the sandbox process. This can be + // necessary in test environments that have limited capabilities. + TestOnlyAllowRunAsCurrentUserWithoutChroot bool } // ToFlags returns a slice of flags that correspond to the given Config. diff --git a/runsc/container/fs.go b/runsc/container/fs.go index fb352fc7c..a3c5772ba 100644 --- a/runsc/container/fs.go +++ b/runsc/container/fs.go @@ -77,11 +77,6 @@ func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { continue } - src := m.Source - srcfi, err := os.Stat(src) - if err != nil { - return fmt.Errorf("failed to stat() mount source: %v", err) - } // It's possible that 'm.Destination' follows symlinks inside the // container. @@ -90,30 +85,13 @@ func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { return fmt.Errorf("failed to resolve symlinks: %v", err) } - // Create mount point if it doesn't exits - if _, err := os.Stat(dst); os.IsNotExist(err) { - if srcfi.IsDir() { - if err := os.MkdirAll(dst, 0755); err != nil { - return fmt.Errorf("failed to make mount directory %q: %v", dst, err) - } - } else { - if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { - return fmt.Errorf("failed to make mount directory for file %q: %v", filepath.Dir(dst), err) - } - f, err := os.OpenFile(dst, os.O_CREATE, 0755) - if err != nil { - return fmt.Errorf("failed to open mount file %q: %v", dst, err) - } - f.Close() - } - } - flags := optionsToFlags(m.Options) flags |= syscall.MS_BIND - log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags) - if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil { - return fmt.Errorf("failed to mount src: %q, dst: %q, flags: %#x, err: %v", src, dst, flags, err) + log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) + if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { + return fmt.Errorf("failed to mount %v: %v", m, err) } + // Make the mount a slave, so that for recursive bind mount, umount won't // propagate to the source. flags = syscall.MS_SLAVE | syscall.MS_REC diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index 9317b1c14..8ebd14c4e 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -5,6 +5,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "sandbox", srcs = [ + "chroot.go", "network.go", "sandbox.go", ], diff --git a/runsc/sandbox/chroot.go b/runsc/sandbox/chroot.go new file mode 100644 index 000000000..a77a186c2 --- /dev/null +++ b/runsc/sandbox/chroot.go @@ -0,0 +1,120 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sandbox + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// chrootBinPath is the location inside the chroot where the runsc binary will +// be mounted. +const chrootBinPath = "/runsc" + +// mountInChroot creates the destination mount point in the given chroot and +// mounts the source. +func mountInChroot(chroot, src, dst, typ string, flags uint32) error { + chrootDst := filepath.Join(chroot, dst) + log.Infof("Mounting %q at %q", src, chrootDst) + + return specutils.Mount(src, chrootDst, typ, flags) +} + +// setUpChroot creates an empty directory with runsc mounted at /runsc, proc +// mounted at /proc, and any dev files needed for the platform. +func setUpChroot(platform boot.PlatformType) (string, error) { + // Create the chroot directory and make it accessible to all users. + chroot, err := ioutil.TempDir("", "runsc-sandbox-chroot-") + if err != nil { + return "", fmt.Errorf("TempDir() failed: %v", err) + } + if err := os.Chmod(chroot, 0777); err != nil { + return "", fmt.Errorf("Chmod(%q) failed: %v", chroot, err) + } + log.Infof("Setting up sandbox chroot in %q", chroot) + + // Mount /proc. + if err := mountInChroot(chroot, "proc", "/proc", "proc", 0); err != nil { + return "", fmt.Errorf("error mounting proc in chroot: %v", err) + } + + // Mount runsc at /runsc in the chroot. + binPath, err := specutils.BinPath() + if err != nil { + return "", err + } + if err := mountInChroot(chroot, binPath, chrootBinPath, "bind", syscall.MS_BIND|syscall.MS_RDONLY); err != nil { + return "", fmt.Errorf("error mounting runsc in chroot: %v", err) + } + + // Mount dev files needed for platform. + var devMount string + switch platform { + case boot.PlatformKVM: + devMount = "/dev/kvm" + } + if devMount != "" { + if err := mountInChroot(chroot, devMount, devMount, "bind", syscall.MS_BIND); err != nil { + return "", fmt.Errorf("error mounting platform device in chroot: %v", err) + } + } + + return chroot, nil +} + +// tearDownChroot unmounts /proc and /runsc from the chroot before deleting the +// directory. +func tearDownChroot(chroot string) error { + // Unmount /proc. + proc := filepath.Join(chroot, "proc") + if err := syscall.Unmount(proc, 0); err != nil { + return fmt.Errorf("error unmounting %q: %v", proc, err) + } + + // Unmount /runsc. + exe := filepath.Join(chroot, chrootBinPath) + if err := syscall.Unmount(exe, 0); err != nil { + return fmt.Errorf("error unmounting %q: %v", exe, err) + } + + // Unmount platform dev files. + devFiles := []string{"dev/kvm"} + for _, f := range devFiles { + devPath := filepath.Join(chroot, f) + if _, err := os.Stat(devPath); err != nil { + if os.IsNotExist(err) { + continue + } + return fmt.Errorf("Stat(%q) failed: %v", devPath, err) + } + if err := syscall.Unmount(devPath, 0); err != nil { + return fmt.Errorf("error unmounting %q: %v", devPath, err) + } + } + + // Remove chroot directory. + if err := os.RemoveAll(chroot); err != nil { + return fmt.Errorf("error removing %q: %v", chroot, err) + } + + return nil +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index dd5a0aa56..f6264d5b2 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -51,6 +51,10 @@ type Sandbox struct { // Pid is the pid of the running sandbox (immutable). May be 0 is the sandbox // is not running. Pid int `json:"pid"` + + // Chroot is the path to the chroot directory that the sandbox process + // is running in. + Chroot string `json:"chroot"` } // Create creates the sandbox process. @@ -392,12 +396,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund log.Infof("Sandbox will be started in new user namespace") nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) - if conf.TestOnlyAllowRunAsCurrentUser { + // If we have CAP_SETUID and CAP_SETGID, then we can also run + // as user nobody. + if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid()) } else if specutils.CanSetUIDGID() { - // If we have CAP_SETUID and CAP_SETGID, then we can also run - // as user nobody. - // Map nobody in the new namespace to nobody in the parent namespace. const nobody = 65534 cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ @@ -419,6 +422,23 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } + + // If we have CAP_SYS_ADMIN, we can create an empty chroot and + // bind-mount the executable inside it. + if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!") + } else if specutils.HasCapSysAdmin() { + log.Infof("Sandbox will be started in minimal chroot") + chroot, err := setUpChroot(conf.Platform) + if err != nil { + return fmt.Errorf("error setting up chroot: %v", err) + } + cmd.SysProcAttr.Chroot = chroot + cmd.Args[0] = "/runsc" + cmd.Path = "/runsc" + } else { + return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN") + } } // Log the fds we are donating to the sandbox process. @@ -525,6 +545,11 @@ func (s *Sandbox) Destroy() error { log.Debugf("Killing sandbox %q", s.ID) signalProcess(s.Pid, unix.SIGKILL) } + + if s.Chroot != "" { + return tearDownChroot(s.Chroot) + } + return nil } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 356943a65..48a199a77 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -216,3 +216,15 @@ func CanSetUIDGID() bool { return caps.Get(capability.EFFECTIVE, capability.CAP_SETUID) && caps.Get(capability.EFFECTIVE, capability.CAP_SETGID) } + +// HasCapSysAdmin returns true if the user has CAP_SYS_ADMIN capability. +func HasCapSysAdmin() bool { + caps, err := capability.NewPid2(os.Getpid()) + if err != nil { + return false + } + if err := caps.Load(); err != nil { + return false + } + return caps.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 551718e9a..f3fa8d129 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -363,3 +363,44 @@ func DebugLogFile(logDir, subcommand string) (*os.File, error) { filename := fmt.Sprintf("runsc.log.%s.%s", time.Now().Format("20060102-150405.000000"), subcommand) return os.OpenFile(filepath.Join(logDir, filename), os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) } + +// Mount creates the mount point and calls Mount with the given flags. +func Mount(src, dst, typ string, flags uint32) error { + // Create the mount point inside. The type must be the same as the + // source (file or directory). + var isDir bool + if typ == "proc" { + // Special case, as there is no source directory for proc + // mounts. + isDir = true + } else if fi, err := os.Stat(src); err != nil { + return fmt.Errorf("Stat(%q) failed: %v", src, err) + } else { + isDir = fi.IsDir() + } + + if isDir { + // Create the destination directory. + if err := os.MkdirAll(dst, 0777); err != nil { + return fmt.Errorf("Mkdir(%q) failed: %v", dst, err) + } + } else { + // Create the parent destination directory. + parent := path.Dir(dst) + if err := os.MkdirAll(parent, 0777); err != nil { + return fmt.Errorf("Mkdir(%q) failed: %v", parent, err) + } + // Create the destination file if it does not exist. + f, err := os.OpenFile(dst, syscall.O_CREAT, 0777) + if err != nil { + return fmt.Errorf("Open(%q) failed: %v", dst, err) + } + f.Close() + } + + // Do the mount. + if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil { + return fmt.Errorf("Mount(%q, %q, %d) failed: %v", src, dst, flags, err) + } + return nil +} diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD index ca91e07ff..03ab3c4ac 100644 --- a/runsc/test/testutil/BUILD +++ b/runsc/test/testutil/BUILD @@ -18,6 +18,5 @@ go_library( "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - "@com_github_syndtr_gocapability//capability:go_default_library", ], ) diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index 77bd56912..4f012a8ea 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -32,7 +32,6 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/syndtr/gocapability/capability" "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/specutils" ) @@ -104,14 +103,14 @@ func FindFile(path string) (string, error) { // TestConfig return the default configuration to use in tests. func TestConfig() *boot.Config { return &boot.Config{ - Debug: true, - LogFormat: "text", - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - MultiContainer: true, - FileAccess: boot.FileAccessProxyExclusive, - TestOnlyAllowRunAsCurrentUser: true, + Debug: true, + LogFormat: "text", + LogPackets: true, + Network: boot.NetworkNone, + Strace: true, + MultiContainer: true, + FileAccess: boot.FileAccessProxyExclusive, + TestOnlyAllowRunAsCurrentUserWithoutChroot: true, } } @@ -238,14 +237,7 @@ func WaitForHTTP(port int, timeout time.Duration) error { // RunAsRoot ensures the test runs with CAP_SYS_ADMIN. If need it will create // a new user namespace and reexecute the test as root inside of the namespace. func RunAsRoot(m *testing.M) { - caps, err := capability.NewPid2(os.Getpid()) - if err != nil { - panic(err.Error()) - } - if err := caps.Load(); err != nil { - panic(err.Error()) - } - if caps.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) { + if specutils.HasCapSysAdmin() { // Capability: check! Good to run. os.Exit(m.Run()) } |