diff options
author | Andrei Vagin <avagin@google.com> | 2019-01-14 14:07:05 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-01-14 14:08:19 -0800 |
commit | a46b6d453d198b96949342a81750114bfa5a5429 (patch) | |
tree | 71c32eae0ee72b46576226b0266ed63e0bf93463 | |
parent | 7182b9cf52087bc354104ad2a23fcf4c468ab20e (diff) |
runsc: set up a minimal chroot from the sandbox process
In this case, new mounts are not created in the host mount namspaces, so
tearDownChroot isn't needed, because chroot will be destroyed with a
sandbox mount namespace.
In additional, pivot_root can't be called instead of chroot.
PiperOrigin-RevId: 229250871
Change-Id: I765bdb587d0b8287a6a8efda8747639d37c7e7b6
-rw-r--r-- | runsc/cmd/BUILD | 1 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 31 | ||||
-rw-r--r-- | runsc/cmd/chroot.go | 95 | ||||
-rw-r--r-- | runsc/cmd/cmd.go | 26 | ||||
-rw-r--r-- | runsc/sandbox/BUILD | 1 | ||||
-rw-r--r-- | runsc/sandbox/chroot.go | 97 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 86 | ||||
-rw-r--r-- | runsc/test/root/chroot_test.go | 16 |
8 files changed, 201 insertions, 152 deletions
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 394bb0e1f..a908172af 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -8,6 +8,7 @@ go_library( "boot.go", "capability.go", "checkpoint.go", + "chroot.go", "cmd.go", "create.go", "debug.go", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index bb3435284..7ca2744bd 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -60,6 +60,9 @@ type Boot struct { // to the process. applyCaps bool + // setUpChroot is set to true if the sandbox is started in an empty root. + setUpRoot bool + // cpuNum number of CPUs to create inside the sandbox. cpuNum int @@ -99,6 +102,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order") f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls") f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process") + f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process") f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox") f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") @@ -116,6 +120,31 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Ensure that if there is a panic, all goroutine stacks are printed. debug.SetTraceback("all") + if b.setUpRoot { + if err := setUpChroot(); err != nil { + Fatalf("error setting up chroot: %v", err) + } + + specutils.ExePath = "/runsc" + if !b.applyCaps { + // Remove --setup-root arg to call myself. + var args []string + for _, arg := range os.Args { + if !strings.Contains(arg, "setup-root") { + args = append(args, arg) + } + } + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) + } + panic("callSelfAsNobody must never return success") + } + } + // Get the spec from the specFD. specFile := os.NewFile(uintptr(b.specFD), "spec file") defer specFile.Close() @@ -144,7 +173,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Remove --apply-caps arg to call myself. var args []string for _, arg := range os.Args { - if !strings.Contains(arg, "apply-caps") { + if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { args = append(args, arg) } } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go new file mode 100644 index 000000000..b53085934 --- /dev/null +++ b/runsc/cmd/chroot.go @@ -0,0 +1,95 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// chrootBinPath is the location inside the chroot where the runsc binary will +// be mounted. +const chrootBinPath = "/runsc" + +// mountInChroot creates the destination mount point in the given chroot and +// mounts the source. +func mountInChroot(chroot, src, dst, typ string, flags uint32) error { + chrootDst := filepath.Join(chroot, dst) + log.Infof("Mounting %q at %q", src, chrootDst) + + if err := specutils.Mount(src, chrootDst, typ, flags); err != nil { + return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) + } + return nil +} + +// setUpChroot creates an empty directory with runsc mounted at /runsc and proc +// mounted at /proc. +func setUpChroot() error { + // We are a new mount namespace, so we can use /tmp as a directory to + // construct a new root. + chroot := os.TempDir() + + log.Infof("Setting up sandbox chroot in %q", chroot) + + // Convert all shared mounts into slave to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("error converting mounts: %v", err) + } + + if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil { + return fmt.Errorf("error mounting tmpfs in choot: %v", err) + } + + if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil { + return fmt.Errorf("error mounting proc in chroot: %v", err) + } + + if err := mountInChroot(chroot, specutils.ExePath, chrootBinPath, "bind", syscall.MS_BIND|syscall.MS_RDONLY); err != nil { + return fmt.Errorf("error mounting runsc in chroot: %v", err) + } + + if err := os.Chdir(chroot); err != nil { + return fmt.Errorf("error changing working directory: %v", err) + } + + if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("error remounting chroot in read-only: %v", err) + } + // pivot_root(new_root, put_old) moves the root filesystem (old_root) + // of the calling process to the directory put_old and makes new_root + // the new root filesystem of the calling process. + // + // pivot_root(".", ".") makes a mount of the working directory the new + // root filesystem, so it will be moved in "/" and then the old_root + // will be moved to "/" too. The parent mount of the old_root will be + // new_root, so after umounting the old_root, we will see only + // the new_root in "/". + if err := syscall.PivotRoot(".", "."); err != nil { + return fmt.Errorf("error changing root filesystem: %v", err) + } + + if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + return fmt.Errorf("error umounting the old root file system: %v", err) + } + + return nil +} diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go index a1c3491a3..fbfc18fc9 100644 --- a/runsc/cmd/cmd.go +++ b/runsc/cmd/cmd.go @@ -86,6 +86,28 @@ func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error { } log.Infof("Execve %q again, bye!", binPath) - syscall.Exec(binPath, args, []string{}) - panic("unreachable") + err = syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) +} + +// callSelfAsNobody sets UID and GID to nobody and then execve's itself again. +func callSelfAsNobody(args []string) error { + // Keep thread locked while user/group are changed. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + const nobody = 65534 + + if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting uid: %v", err) + } + if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting gid: %v", err) + } + + binPath := "/runsc" + + log.Infof("Execve %q again, bye!", binPath) + err := syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) } diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index d6043bcf7..899fd99de 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -5,7 +5,6 @@ package(licenses = ["notice"]) # Apache 2.0 go_library( name = "sandbox", srcs = [ - "chroot.go", "network.go", "sandbox.go", ], diff --git a/runsc/sandbox/chroot.go b/runsc/sandbox/chroot.go deleted file mode 100644 index 354049871..000000000 --- a/runsc/sandbox/chroot.go +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sandbox - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "syscall" - - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/specutils" -) - -// chrootBinPath is the location inside the chroot where the runsc binary will -// be mounted. -const chrootBinPath = "/runsc" - -// mountInChroot creates the destination mount point in the given chroot and -// mounts the source. -func mountInChroot(chroot, src, dst, typ string, flags uint32) error { - chrootDst := filepath.Join(chroot, dst) - log.Infof("Mounting %q at %q", src, chrootDst) - - if err := specutils.Mount(src, chrootDst, typ, flags); err != nil { - return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) - } - return nil -} - -// setUpChroot creates an empty directory with runsc mounted at /runsc and proc -// mounted at /proc. -func setUpChroot() (string, error) { - // Create the chroot directory and make it accessible to all users. - chroot, err := ioutil.TempDir("", "runsc-sandbox-chroot-") - if err != nil { - return "", fmt.Errorf("TempDir() failed: %v", err) - } - if err := os.Chmod(chroot, 0777); err != nil { - return "", fmt.Errorf("Chmod(%q) failed: %v", chroot, err) - } - log.Infof("Setting up sandbox chroot in %q", chroot) - - // Mount /proc. - if err := mountInChroot(chroot, "proc", "/proc", "proc", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC); err != nil { - return "", fmt.Errorf("error mounting proc in chroot: %v", err) - } - - // Mount runsc at /runsc in the chroot. - binPath, err := specutils.BinPath() - if err != nil { - return "", err - } - if err := mountInChroot(chroot, binPath, chrootBinPath, "bind", syscall.MS_BIND|syscall.MS_RDONLY); err != nil { - return "", fmt.Errorf("error mounting runsc in chroot: %v", err) - } - - return chroot, nil -} - -// tearDownChroot unmounts /proc and /runsc from the chroot before deleting the -// directory. -func tearDownChroot(chroot string) error { - log.Debugf("Removing chroot mounts %q", chroot) - - // Unmount /proc. - proc := filepath.Join(chroot, "proc") - if err := syscall.Unmount(proc, 0); err != nil { - return fmt.Errorf("error unmounting %q: %v", proc, err) - } - - // Unmount /runsc. - exe := filepath.Join(chroot, chrootBinPath) - if err := syscall.Unmount(exe, 0); err != nil { - return fmt.Errorf("error unmounting %q: %v", exe, err) - } - - // Remove chroot directory. - if err := os.RemoveAll(chroot); err != nil { - return fmt.Errorf("error removing %q: %v", chroot, err) - } - - return nil -} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index fe55ddab8..411200793 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -56,10 +56,6 @@ type Sandbox struct { // is not running. Pid int `json:"pid"` - // Chroot is the path to the chroot directory that the sandbox process - // is running in. - Chroot string `json:"chroot"` - // Cgroup has the cgroup configuration for the sandbox. Cgroup *cgroup.Cgroup `json:"cgroup"` @@ -491,6 +487,17 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // rules. cmd.Args = append(cmd.Args, "--apply-caps=true") + // If we have CAP_SYS_ADMIN, we can create an empty chroot and + // bind-mount the executable inside it. + if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!") + + } else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) { + log.Infof("Sandbox will be started in minimal chroot") + cmd.Args = append(cmd.Args, "--setup-root") + } else { + return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN") + } } else { log.Infof("Sandbox will be started in new user namespace") nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace}) @@ -499,50 +506,53 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // as user nobody. if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid()) + log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!") } else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) { // Map nobody in the new namespace to nobody in the parent namespace. + // + // A sandbox process will construct an empty + // root for itself, so it has to have the CAP_SYS_ADMIN + // capability. + // + // FIXME: The current implementations of + // os/exec doesn't allow to set ambient capabilities if + // a process is started in a new user namespace. As a + // workaround, we start the sandbox process with the 0 + // UID and then it constructs a chroot and sets UID to + // nobody. https://github.com/golang/go/issues/2315 const nobody = 65534 - cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ - ContainerID: int(nobody), - HostID: int(nobody), - Size: int(1), - }} - cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ - ContainerID: int(nobody), - HostID: int(nobody), - Size: int(1), - }} + cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{ + { + ContainerID: int(0), + HostID: int(nobody - 1), + Size: int(1), + }, + { + ContainerID: int(nobody), + HostID: int(nobody), + Size: int(1), + }, + } + cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{ + { + ContainerID: int(nobody), + HostID: int(nobody), + Size: int(1), + }, + } // Set credentials to run as user and group nobody. cmd.SysProcAttr.Credential = &syscall.Credential{ - Uid: nobody, + Uid: 0, Gid: nobody, } + cmd.Args = append(cmd.Args, "--setup-root") } else { return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID") } } - // If we have CAP_SYS_ADMIN, we can create an empty chroot and - // bind-mount the executable inside it. - if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!") - - } else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) { - log.Infof("Sandbox will be started in minimal chroot") - chroot, err := setUpChroot() - if err != nil { - return fmt.Errorf("error setting up chroot: %v", err) - } - s.Chroot = chroot // Remember path so it can cleaned up. - cmd.SysProcAttr.Chroot = chroot - cmd.Dir = "/" - cmd.Args[0] = "/runsc" - cmd.Path = "/runsc" - - } else { - return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN and CAP_SYS_CHROOT") - } + cmd.Args[0] = "runsc-sandbox" if s.Cgroup != nil { cpuNum, err := s.Cgroup.NumCPU() @@ -676,12 +686,6 @@ func (s *Sandbox) destroy() error { } } - if s.Chroot != "" { - if err := tearDownChroot(s.Chroot); err != nil { - return err - } - } - return nil } diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go index 9f705c860..04124703d 100644 --- a/runsc/test/root/chroot_test.go +++ b/runsc/test/root/chroot_test.go @@ -52,12 +52,13 @@ func TestChroot(t *testing.T) { } // Check that sandbox is chroot'ed. - chroot, err := filepath.EvalSymlinks(filepath.Join("/proc", strconv.Itoa(pid), "root")) + procRoot := filepath.Join("/proc", strconv.Itoa(pid), "root") + chroot, err := filepath.EvalSymlinks(procRoot) if err != nil { t.Fatalf("error resolving /proc/<pid>/root symlink: %v", err) } - if want := "/tmp/runsc-sandbox-chroot-"; !strings.HasPrefix(chroot, want) { - t.Errorf("sandbox is not chroot'd, it should be inside: %q, got: %q", want, chroot) + if chroot != "/" { + t.Errorf("sandbox is not chroot'd, it should be inside: /, got: %q", chroot) } path, err := filepath.EvalSymlinks(filepath.Join("/proc", strconv.Itoa(pid), "cwd")) @@ -68,12 +69,12 @@ func TestChroot(t *testing.T) { t.Errorf("sandbox current dir is wrong, want: %q, got: %q", chroot, path) } - fi, err := ioutil.ReadDir(chroot) + fi, err := ioutil.ReadDir(procRoot) if err != nil { t.Fatalf("error listing %q: %v", chroot, err) } if want, got := 2, len(fi); want != got { - t.Fatalf("chroot dir got %d entries, want %d", want, got) + t.Fatalf("chroot dir got %d entries, want %d", got, want) } // chroot dir is prepared by runsc and should contains only the executable @@ -85,11 +86,6 @@ func TestChroot(t *testing.T) { } d.CleanUp() - - // Check that chroot directory was cleaned up. - if _, err := os.Stat(chroot); err == nil || !os.IsNotExist(err) { - t.Errorf("chroot directory %q was not deleted: %v", chroot, err) - } } func TestChrootGofer(t *testing.T) { |