diff options
author | Fabricio Voznika <fvoznika@google.com> | 2019-03-18 12:29:43 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-03-18 12:30:43 -0700 |
commit | e420cc3e5d2066674d32d16ad885bee6b30da210 (patch) | |
tree | 991b119af7c8816a539318560338b3e5f065a2f8 /runsc/specutils | |
parent | eb69542807a87491fd4e6405bdab1c0f64db536d (diff) |
Add support for mount propagation
Properly handle propagation options for root and mounts. Now usage of
mount options shared, rshared, and noexec cause error to start. shared/
rshared breaks sandbox=>host isolation. slave however can be supported
because changes propagate from host to sandbox.
Root FS setup moved inside the gofer. Apart from simplifying the code,
it keeps all mounts inside the namespace. And they are torn down when
the namespace is destroyed (DestroyFS is no longer needed).
PiperOrigin-RevId: 239037661
Change-Id: I8b5ee4d50da33c042ea34fa68e56514ebe20e6e0
Diffstat (limited to 'runsc/specutils')
-rw-r--r-- | runsc/specutils/BUILD | 1 | ||||
-rw-r--r-- | runsc/specutils/fs.go | 139 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 16 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 52 | ||||
-rw-r--r-- | runsc/specutils/specutils_test.go | 31 |
5 files changed, 204 insertions, 35 deletions
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 372799850..15476de6f 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) go_library( name = "specutils", srcs = [ + "fs.go", "namespace.go", "specutils.go", ], diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go new file mode 100644 index 000000000..b812a5fbd --- /dev/null +++ b/runsc/specutils/fs.go @@ -0,0 +1,139 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package specutils + +import ( + "fmt" + "path" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +type mapping struct { + set bool + val uint32 +} + +// optionsMap maps mount propagation-related OCI filesystem options to mount(2) +// syscall flags. +var optionsMap = map[string]mapping{ + "acl": {set: true, val: syscall.MS_POSIXACL}, + "async": {set: false, val: syscall.MS_SYNCHRONOUS}, + "atime": {set: false, val: syscall.MS_NOATIME}, + "bind": {set: true, val: syscall.MS_BIND}, + "defaults": {set: true, val: 0}, + "dev": {set: false, val: syscall.MS_NODEV}, + "diratime": {set: false, val: syscall.MS_NODIRATIME}, + "dirsync": {set: true, val: syscall.MS_DIRSYNC}, + "exec": {set: false, val: syscall.MS_NOEXEC}, + "iversion": {set: true, val: syscall.MS_I_VERSION}, + "loud": {set: false, val: syscall.MS_SILENT}, + "mand": {set: true, val: syscall.MS_MANDLOCK}, + "noacl": {set: false, val: syscall.MS_POSIXACL}, + "noatime": {set: true, val: syscall.MS_NOATIME}, + "nodev": {set: true, val: syscall.MS_NODEV}, + "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, + "noiversion": {set: false, val: syscall.MS_I_VERSION}, + "nomand": {set: false, val: syscall.MS_MANDLOCK}, + "norelatime": {set: false, val: syscall.MS_RELATIME}, + "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, + "nosuid": {set: true, val: syscall.MS_NOSUID}, + "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, + "relatime": {set: true, val: syscall.MS_RELATIME}, + "remount": {set: true, val: syscall.MS_REMOUNT}, + "ro": {set: true, val: syscall.MS_RDONLY}, + "rw": {set: false, val: syscall.MS_RDONLY}, + "silent": {set: true, val: syscall.MS_SILENT}, + "strictatime": {set: true, val: syscall.MS_STRICTATIME}, + "suid": {set: false, val: syscall.MS_NOSUID}, + "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, +} + +// propOptionsMap is similar to optionsMap, but it lists propagation options +// that cannot be used together with other flags. +var propOptionsMap = map[string]mapping{ + "private": {set: true, val: syscall.MS_PRIVATE}, + "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, + "slave": {set: true, val: syscall.MS_SLAVE}, + "rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC}, + "unbindable": {set: true, val: syscall.MS_UNBINDABLE}, + "runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC}, +} + +// invalidOptions list options not allowed. +// - shared: sandbox must be isolated from the host. Propagating mount changes +// from the sandbox to the host breaks the isolation. +// - noexec: not yet supported. Don't ignore it since it could break +// in-sandbox security. +var invalidOptions = []string{"shared", "rshared", "noexec"} + +// OptionsToFlags converts mount options to syscall flags. +func OptionsToFlags(opts []string) uint32 { + return optionsToFlags(opts, optionsMap) +} + +// PropOptionsToFlags converts propagation mount options to syscall flags. +// Propagation options cannot be set other with other options and must be +// handled separatedly. +func PropOptionsToFlags(opts []string) uint32 { + return optionsToFlags(opts, propOptionsMap) +} + +func optionsToFlags(opts []string, source map[string]mapping) uint32 { + var rv uint32 + for _, opt := range opts { + if m, ok := source[opt]; ok { + if m.set { + rv |= m.val + } else { + rv ^= m.val + } + } + } + return rv +} + +// ValidateMount validates that spec mounts are correct. +func validateMount(mnt *specs.Mount) error { + if !path.IsAbs(mnt.Destination) { + return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt) + } + + if mnt.Type == "bind" { + for _, o := range mnt.Options { + if ContainsStr(invalidOptions, o) { + return fmt.Errorf("mount option %q is not supported: %v", o, mnt) + } + _, ok1 := optionsMap[o] + _, ok2 := propOptionsMap[o] + if !ok1 && !ok2 { + log.Warningf("Ignoring unknown mount option %q", o) + } + } + } + return nil +} + +// ValidateRootfsPropagation validates that rootfs propagation options are +// correct. +func validateRootfsPropagation(opt string) error { + flags := PropOptionsToFlags([]string{opt}) + if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 { + return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt) + } + return nil +} diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 73fab13e1..35da789f4 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -105,9 +105,9 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam return out } -// SetNS sets the namespace of the given type. It must be called with +// setNS sets the namespace of the given type. It must be called with // OSThreadLocked. -func SetNS(fd, nsType uintptr) error { +func setNS(fd, nsType uintptr) error { if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 { return err } @@ -119,30 +119,30 @@ func SetNS(fd, nsType uintptr) error { // // Preconditions: Must be called with os thread locked. func ApplyNS(ns specs.LinuxNamespace) (func(), error) { - log.Infof("applying namespace %v at path %q", ns.Type, ns.Path) + log.Infof("Applying namespace %v at path %q", ns.Type, ns.Path) newNS, err := os.Open(ns.Path) if err != nil { return nil, fmt.Errorf("error opening %q: %v", ns.Path, err) } defer newNS.Close() - // Store current netns to restore back after child is started. + // Store current namespace to restore back. curPath := nsPath(ns.Type) oldNS, err := os.Open(curPath) if err != nil { return nil, fmt.Errorf("error opening %q: %v", curPath, err) } - // Set netns to the one requested and setup function to restore it back. + // Set namespace to the one requested and setup function to restore it back. flag := nsCloneFlag(ns.Type) - if err := SetNS(newNS.Fd(), flag); err != nil { + if err := setNS(newNS.Fd(), flag); err != nil { oldNS.Close() return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err) } return func() { - log.Infof("restoring namespace %v", ns.Type) + log.Infof("Restoring namespace %v", ns.Type) defer oldNS.Close() - if err := SetNS(oldNS.Fd(), flag); err != nil { + if err := setNS(oldNS.Fd(), flag); err != nil { panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err)) } }, nil diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 4e7893ab4..cbf099c64 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,9 +92,14 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("Seccomp spec is being ignored") } - for i, m := range spec.Mounts { - if !path.IsAbs(m.Destination) { - return fmt.Errorf("Spec.Mounts[%d] Mount.Destination must be an absolute path: %v", i, m) + if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { + if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil { + return err + } + } + for _, m := range spec.Mounts { + if err := validateMount(&m); err != nil { + return err } } @@ -129,15 +134,19 @@ func absPath(base, rel string) string { return filepath.Join(base, rel) } +// OpenSpec opens an OCI runtime spec from the given bundle directory. +func OpenSpec(bundleDir string) (*os.File, error) { + // The spec file must be named "config.json" inside the bundle directory. + return os.Open(filepath.Join(bundleDir, "config.json")) +} + // ReadSpec reads an OCI runtime spec from the given bundle directory. // ReadSpec also normalizes all potential relative paths into absolute // path, e.g. spec.Root.Path, mount.Source. func ReadSpec(bundleDir string) (*specs.Spec, error) { - // The spec file must be in "config.json" inside the bundle directory. - specPath := filepath.Join(bundleDir, "config.json") - specFile, err := os.Open(specPath) + specFile, err := OpenSpec(bundleDir) if err != nil { - return nil, fmt.Errorf("error opening spec file %q: %v", specPath, err) + return nil, fmt.Errorf("error opening spec file %q: %v", specFile.Name(), err) } defer specFile.Close() return ReadSpecFromFile(bundleDir, specFile) @@ -171,27 +180,17 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error) return &spec, nil } -// OpenCleanSpec opens spec file that has destination mount paths resolved to -// their absolute location. -func OpenCleanSpec(bundleDir string) (*os.File, error) { - f, err := os.Open(filepath.Join(bundleDir, "config.clean.json")) +// ReadMounts reads mount list from a file. +func ReadMounts(f *os.File) ([]specs.Mount, error) { + bytes, err := ioutil.ReadAll(f) if err != nil { - return nil, err + return nil, fmt.Errorf("error reading mounts: %v", err) } - if _, err := f.Seek(0, os.SEEK_SET); err != nil { - f.Close() - return nil, fmt.Errorf("error seeking to beginning of file %q: %v", f.Name(), err) - } - return f, nil -} - -// WriteCleanSpec writes a spec file that has destination mount paths resolved. -func WriteCleanSpec(bundleDir string, spec *specs.Spec) error { - bytes, err := json.Marshal(spec) - if err != nil { - return err + var mounts []specs.Mount + if err := json.Unmarshal(bytes, &mounts); err != nil { + return nil, fmt.Errorf("error unmarshaling mounts: %v\n %s", err, string(bytes)) } - return ioutil.WriteFile(filepath.Join(bundleDir, "config.clean.json"), bytes, 0755) + return mounts, nil } // Capabilities takes in spec and returns a TaskCapabilities corresponding to @@ -407,8 +406,7 @@ func Mount(src, dst, typ string, flags uint32) error { // source (file or directory). var isDir bool if typ == "proc" { - // Special case, as there is no source directory for proc - // mounts. + // Special case, as there is no source directory for proc mounts. isDir = true } else if fi, err := os.Stat(src); err != nil { return fmt.Errorf("Stat(%q) failed: %v", src, err) diff --git a/runsc/specutils/specutils_test.go b/runsc/specutils/specutils_test.go index b61f1ca62..02af6e6ad 100644 --- a/runsc/specutils/specutils_test.go +++ b/runsc/specutils/specutils_test.go @@ -219,6 +219,37 @@ func TestSpecInvalid(t *testing.T) { }, error: "must be an absolute path", }, + { + name: "invalid mount option", + spec: specs.Spec{ + Root: &specs.Root{Path: "/"}, + Process: &specs.Process{ + Args: []string{"/bin/true"}, + }, + Mounts: []specs.Mount{ + { + Source: "/src", + Destination: "/dst", + Type: "bind", + Options: []string{"shared"}, + }, + }, + }, + error: "is not supported", + }, + { + name: "invalid rootfs propagation", + spec: specs.Spec{ + Root: &specs.Root{Path: "/"}, + Process: &specs.Process{ + Args: []string{"/bin/true"}, + }, + Linux: &specs.Linux{ + RootfsPropagation: "foo", + }, + }, + error: "root mount propagation option must specify private or slave", + }, } { err := ValidateSpec(&test.spec) if len(test.error) == 0 { |