diff options
Diffstat (limited to 'runsc/container/fs.go')
-rw-r--r-- | runsc/container/fs.go | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/runsc/container/fs.go b/runsc/container/fs.go new file mode 100644 index 000000000..652f81bbf --- /dev/null +++ b/runsc/container/fs.go @@ -0,0 +1,198 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +type mapping struct { + set bool + val uint32 +} + +var optionsMap = map[string]mapping{ + "acl": {set: true, val: syscall.MS_POSIXACL}, + "async": {set: false, val: syscall.MS_SYNCHRONOUS}, + "atime": {set: false, val: syscall.MS_NOATIME}, + "bind": {set: true, val: syscall.MS_BIND}, + "defaults": {set: true, val: 0}, + "dev": {set: false, val: syscall.MS_NODEV}, + "diratime": {set: false, val: syscall.MS_NODIRATIME}, + "dirsync": {set: true, val: syscall.MS_DIRSYNC}, + "exec": {set: false, val: syscall.MS_NOEXEC}, + "iversion": {set: true, val: syscall.MS_I_VERSION}, + "loud": {set: false, val: syscall.MS_SILENT}, + "mand": {set: true, val: syscall.MS_MANDLOCK}, + "noacl": {set: false, val: syscall.MS_POSIXACL}, + "noatime": {set: true, val: syscall.MS_NOATIME}, + "nodev": {set: true, val: syscall.MS_NODEV}, + "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, + "noexec": {set: true, val: syscall.MS_NOEXEC}, + "noiversion": {set: false, val: syscall.MS_I_VERSION}, + "nomand": {set: false, val: syscall.MS_MANDLOCK}, + "norelatime": {set: false, val: syscall.MS_RELATIME}, + "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, + "nosuid": {set: true, val: syscall.MS_NOSUID}, + "private": {set: true, val: syscall.MS_PRIVATE}, + "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, + "relatime": {set: true, val: syscall.MS_RELATIME}, + "remount": {set: true, val: syscall.MS_REMOUNT}, + "ro": {set: true, val: syscall.MS_RDONLY}, + "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, + "rw": {set: false, val: syscall.MS_RDONLY}, + "silent": {set: true, val: syscall.MS_SILENT}, + "strictatime": {set: true, val: syscall.MS_STRICTATIME}, + "suid": {set: false, val: syscall.MS_NOSUID}, + "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, +} + +// setupFS creates the container directory structure under 'spec.Root.Path'. +// This allows the gofer serving the containers to be chroot under this +// directory to create an extra layer to security in case the gofer gets +// compromised. +func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { + for _, m := range spec.Mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + src := m.Source + if !filepath.IsAbs(src) { + src = filepath.Join(bundleDir, src) + } + srcfi, err := os.Stat(src) + if err != nil { + return err + } + + // It's possible that 'm.Destination' follows symlinks inside the + // container. + dst, err := resolveSymlinks(spec.Root.Path, m.Destination) + if err != nil { + return err + } + + // Create mount point if it doesn't exits + if _, err := os.Stat(dst); os.IsNotExist(err) { + if srcfi.IsDir() { + if err := os.MkdirAll(dst, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { + return err + } + f, err := os.OpenFile(dst, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + + flags := optionsToFlags(m.Options) + flags |= syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags) + if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil { + return err + } + } + + // Remount root as readonly after setup is done, if requested. + if spec.Root.Readonly { + log.Infof("Remounting root as readonly: %q", spec.Root.Path) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + return unix.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, "") + } + return nil +} + +// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} + +func optionsToFlags(opts []string) uint32 { + var rv uint32 + for _, opt := range opts { + if m, ok := optionsMap[opt]; ok { + if m.set { + rv |= m.val + } else { + rv ^= m.val + } + } else { + log.Warningf("Ignoring mount option %q", opt) + } + } + return rv +} |