diff options
Diffstat (limited to 'runsc/cmd')
-rw-r--r-- | runsc/cmd/BUILD | 1 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 31 | ||||
-rw-r--r-- | runsc/cmd/chroot.go | 95 | ||||
-rw-r--r-- | runsc/cmd/cmd.go | 26 |
4 files changed, 150 insertions, 3 deletions
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 394bb0e1f..a908172af 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -8,6 +8,7 @@ go_library( "boot.go", "capability.go", "checkpoint.go", + "chroot.go", "cmd.go", "create.go", "debug.go", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index bb3435284..7ca2744bd 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -60,6 +60,9 @@ type Boot struct { // to the process. applyCaps bool + // setUpChroot is set to true if the sandbox is started in an empty root. + setUpRoot bool + // cpuNum number of CPUs to create inside the sandbox. cpuNum int @@ -99,6 +102,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order") f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls") f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process") + f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process") f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox") f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") @@ -116,6 +120,31 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Ensure that if there is a panic, all goroutine stacks are printed. debug.SetTraceback("all") + if b.setUpRoot { + if err := setUpChroot(); err != nil { + Fatalf("error setting up chroot: %v", err) + } + + specutils.ExePath = "/runsc" + if !b.applyCaps { + // Remove --setup-root arg to call myself. + var args []string + for _, arg := range os.Args { + if !strings.Contains(arg, "setup-root") { + args = append(args, arg) + } + } + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) + } + panic("callSelfAsNobody must never return success") + } + } + // Get the spec from the specFD. specFile := os.NewFile(uintptr(b.specFD), "spec file") defer specFile.Close() @@ -144,7 +173,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Remove --apply-caps arg to call myself. var args []string for _, arg := range os.Args { - if !strings.Contains(arg, "apply-caps") { + if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { args = append(args, arg) } } diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go new file mode 100644 index 000000000..b53085934 --- /dev/null +++ b/runsc/cmd/chroot.go @@ -0,0 +1,95 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// chrootBinPath is the location inside the chroot where the runsc binary will +// be mounted. +const chrootBinPath = "/runsc" + +// mountInChroot creates the destination mount point in the given chroot and +// mounts the source. +func mountInChroot(chroot, src, dst, typ string, flags uint32) error { + chrootDst := filepath.Join(chroot, dst) + log.Infof("Mounting %q at %q", src, chrootDst) + + if err := specutils.Mount(src, chrootDst, typ, flags); err != nil { + return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) + } + return nil +} + +// setUpChroot creates an empty directory with runsc mounted at /runsc and proc +// mounted at /proc. +func setUpChroot() error { + // We are a new mount namespace, so we can use /tmp as a directory to + // construct a new root. + chroot := os.TempDir() + + log.Infof("Setting up sandbox chroot in %q", chroot) + + // Convert all shared mounts into slave to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("error converting mounts: %v", err) + } + + if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil { + return fmt.Errorf("error mounting tmpfs in choot: %v", err) + } + + if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil { + return fmt.Errorf("error mounting proc in chroot: %v", err) + } + + if err := mountInChroot(chroot, specutils.ExePath, chrootBinPath, "bind", syscall.MS_BIND|syscall.MS_RDONLY); err != nil { + return fmt.Errorf("error mounting runsc in chroot: %v", err) + } + + if err := os.Chdir(chroot); err != nil { + return fmt.Errorf("error changing working directory: %v", err) + } + + if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("error remounting chroot in read-only: %v", err) + } + // pivot_root(new_root, put_old) moves the root filesystem (old_root) + // of the calling process to the directory put_old and makes new_root + // the new root filesystem of the calling process. + // + // pivot_root(".", ".") makes a mount of the working directory the new + // root filesystem, so it will be moved in "/" and then the old_root + // will be moved to "/" too. The parent mount of the old_root will be + // new_root, so after umounting the old_root, we will see only + // the new_root in "/". + if err := syscall.PivotRoot(".", "."); err != nil { + return fmt.Errorf("error changing root filesystem: %v", err) + } + + if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + return fmt.Errorf("error umounting the old root file system: %v", err) + } + + return nil +} diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go index a1c3491a3..fbfc18fc9 100644 --- a/runsc/cmd/cmd.go +++ b/runsc/cmd/cmd.go @@ -86,6 +86,28 @@ func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error { } log.Infof("Execve %q again, bye!", binPath) - syscall.Exec(binPath, args, []string{}) - panic("unreachable") + err = syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) +} + +// callSelfAsNobody sets UID and GID to nobody and then execve's itself again. +func callSelfAsNobody(args []string) error { + // Keep thread locked while user/group are changed. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + const nobody = 65534 + + if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting uid: %v", err) + } + if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting gid: %v", err) + } + + binPath := "/runsc" + + log.Infof("Execve %q again, bye!", binPath) + err := syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) } |