diff options
Diffstat (limited to 'runsc/specutils')
-rw-r--r-- | runsc/specutils/BUILD | 5 | ||||
-rw-r--r-- | runsc/specutils/fs.go | 40 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 52 |
3 files changed, 82 insertions, 15 deletions
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 15476de6f..0456e4c4f 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -10,10 +10,7 @@ go_library( "specutils.go", ], importpath = "gvisor.googlesource.com/gvisor/runsc/specutils", - visibility = [ - "//runsc:__subpackages__", - "//test:__subpackages__", - ], + visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/log", diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 1f3afb4e4..6e6902e9f 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -16,6 +16,7 @@ package specutils import ( "fmt" + "math/bits" "path" "syscall" @@ -105,22 +106,30 @@ func optionsToFlags(opts []string, source map[string]mapping) uint32 { return rv } -// ValidateMount validates that spec mounts are correct. +// validateMount validates that spec mounts are correct. func validateMount(mnt *specs.Mount) error { if !path.IsAbs(mnt.Destination) { return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt) } - if mnt.Type == "bind" { - for _, o := range mnt.Options { - if ContainsStr(invalidOptions, o) { - return fmt.Errorf("mount option %q is not supported: %v", o, mnt) - } - _, ok1 := optionsMap[o] - _, ok2 := propOptionsMap[o] - if !ok1 && !ok2 { - return fmt.Errorf("unknown mount option %q", o) - } + return ValidateMountOptions(mnt.Options) + } + return nil +} + +// ValidateMountOptions validates that mount options are correct. +func ValidateMountOptions(opts []string) error { + for _, o := range opts { + if ContainsStr(invalidOptions, o) { + return fmt.Errorf("mount option %q is not supported", o) + } + _, ok1 := optionsMap[o] + _, ok2 := propOptionsMap[o] + if !ok1 && !ok2 { + return fmt.Errorf("unknown mount option %q", o) + } + if err := validatePropagation(o); err != nil { + return err } } return nil @@ -133,5 +142,14 @@ func validateRootfsPropagation(opt string) error { if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 { return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt) } + return validatePropagation(opt) +} + +func validatePropagation(opt string) error { + flags := PropOptionsToFlags([]string{opt}) + exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE) + if bits.OnesCount32(exclusive) > 1 { + return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt) + } return nil } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 7d194335c..06c13d1ab 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool { } return true } + +// MaybeRunAsRoot ensures the process runs with capabilities needed to create a +// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed, +// it will create a new user namespace and re-execute the process as root +// inside the namespace with the same arguments and environment. +// +// This function returns immediately when no new capability is needed. If +// another process is executed, it returns straight from here with the same exit +// code as the child. +func MaybeRunAsRoot() error { + if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) { + return nil + } + + // Current process doesn't have required capabilities, create user namespace + // and run as root inside the namespace to acquire capabilities. + log.Infof("*** Re-running as root in new user namespace ***") + + cmd := exec.Command("/proc/self/exe", os.Args[1:]...) + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + // Set current user/group as root inside the namespace. Since we may not + // have CAP_SETUID/CAP_SETGID, just map root to the current user/group. + UidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getuid(), Size: 1}, + }, + GidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getgid(), Size: 1}, + }, + Credential: &syscall.Credential{Uid: 0, Gid: 0}, + GidMappingsEnableSetgroups: false, + } + + cmd.Env = os.Environ() + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + if exit, ok := err.(*exec.ExitError); ok { + if ws, ok := exit.Sys().(syscall.WaitStatus); ok { + os.Exit(ws.ExitStatus()) + } + log.Warningf("No wait status provided, exiting with -1: %v", err) + os.Exit(-1) + } + return fmt.Errorf("re-executing self: %v", err) + } + // Child completed with success. + os.Exit(0) + panic("unreachable") +} |