diff options
Diffstat (limited to 'runsc/container/fs.go')
-rw-r--r-- | runsc/container/fs.go | 287 |
1 files changed, 0 insertions, 287 deletions
diff --git a/runsc/container/fs.go b/runsc/container/fs.go deleted file mode 100644 index 998160487..000000000 --- a/runsc/container/fs.go +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package container - -import ( - "bufio" - "fmt" - "os" - "path/filepath" - "strings" - "syscall" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/runsc/boot" - "gvisor.googlesource.com/gvisor/runsc/specutils" -) - -type mapping struct { - set bool - val uint32 -} - -var optionsMap = map[string]mapping{ - "acl": {set: true, val: syscall.MS_POSIXACL}, - "async": {set: false, val: syscall.MS_SYNCHRONOUS}, - "atime": {set: false, val: syscall.MS_NOATIME}, - "bind": {set: true, val: syscall.MS_BIND}, - "defaults": {set: true, val: 0}, - "dev": {set: false, val: syscall.MS_NODEV}, - "diratime": {set: false, val: syscall.MS_NODIRATIME}, - "dirsync": {set: true, val: syscall.MS_DIRSYNC}, - "exec": {set: false, val: syscall.MS_NOEXEC}, - "iversion": {set: true, val: syscall.MS_I_VERSION}, - "loud": {set: false, val: syscall.MS_SILENT}, - "mand": {set: true, val: syscall.MS_MANDLOCK}, - "noacl": {set: false, val: syscall.MS_POSIXACL}, - "noatime": {set: true, val: syscall.MS_NOATIME}, - "nodev": {set: true, val: syscall.MS_NODEV}, - "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, - "noexec": {set: true, val: syscall.MS_NOEXEC}, - "noiversion": {set: false, val: syscall.MS_I_VERSION}, - "nomand": {set: false, val: syscall.MS_MANDLOCK}, - "norelatime": {set: false, val: syscall.MS_RELATIME}, - "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, - "nosuid": {set: true, val: syscall.MS_NOSUID}, - "private": {set: true, val: syscall.MS_PRIVATE}, - "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, - "relatime": {set: true, val: syscall.MS_RELATIME}, - "remount": {set: true, val: syscall.MS_REMOUNT}, - "ro": {set: true, val: syscall.MS_RDONLY}, - "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, - "rw": {set: false, val: syscall.MS_RDONLY}, - "silent": {set: true, val: syscall.MS_SILENT}, - "strictatime": {set: true, val: syscall.MS_STRICTATIME}, - "suid": {set: false, val: syscall.MS_NOSUID}, - "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, -} - -// setupFS creates the container directory structure under 'spec.Root.Path'. -// This allows the gofer serving the containers to be chroot under this -// directory to create an extra layer to security in case the gofer gets -// compromised. -// Returns list of mounts equivalent to 'spec.Mounts' with all destination paths -// cleaned and with symlinks resolved. -func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]specs.Mount, error) { - rv := make([]specs.Mount, 0, len(spec.Mounts)) - for _, m := range spec.Mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { - rv = append(rv, m) - continue - } - - // It's possible that 'm.Destination' follows symlinks inside the - // container. - dst, err := resolveSymlinks(spec.Root.Path, m.Destination) - if err != nil { - return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) - } - - flags := optionsToFlags(m.Options) - flags |= syscall.MS_BIND - log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) - if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { - return nil, fmt.Errorf("mounting %v: %v", m, err) - } - - // Make the mount a slave, so that for recursive bind mount, umount won't - // propagate to the source. - flags = syscall.MS_SLAVE | syscall.MS_REC - if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { - return nil, fmt.Errorf("mount rslave dst: %q, flags: %#x, err: %v", dst, flags, err) - } - - cpy := m - relDst, err := filepath.Rel(spec.Root.Path, dst) - if err != nil { - panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, spec.Root.Path, err)) - } - cpy.Destination = filepath.Join("/", relDst) - rv = append(rv, cpy) - } - - if spec.Process.Cwd != "" { - dst, err := resolveSymlinks(spec.Root.Path, spec.Process.Cwd) - if err != nil { - return nil, fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) - } - if err := os.MkdirAll(dst, 0755); err != nil { - return nil, err - } - } - - // If root is read only, check if it needs to be remounted as readonly. - if spec.Root.Readonly { - isMountPoint, readonly, err := mountInfo(spec.Root.Path) - if err != nil { - return nil, err - } - if readonly { - return rv, nil - } - if !isMountPoint { - // Readonly root is not a mount point nor read-only. Can't do much other - // than just logging a warning. The gofer will prevent files to be open - // in write mode. - log.Warningf("Mount where root is located is not read-only and cannot be changed: %q", spec.Root.Path) - return rv, nil - } - - // If root is a mount point but not read-only, we can change mount options - // to make it read-only for extra safety. - log.Infof("Remounting root as readonly: %q", spec.Root.Path) - flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) - src := spec.Root.Path - if err := syscall.Mount(src, src, "bind", flags, ""); err != nil { - return nil, fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err) - } - } - return rv, nil -} - -// mountInfo returns whether the path is a mount point and whether the mount -// that path belongs to is read-only. -func mountInfo(path string) (bool, bool, error) { - // Mounts are listed by their real paths. - realPath, err := filepath.EvalSymlinks(path) - if err != nil { - return false, false, err - } - f, err := os.Open("/proc/mounts") - if err != nil { - return false, false, err - } - scanner := bufio.NewScanner(f) - - var mountPoint string - var readonly bool - for scanner.Scan() { - line := scanner.Text() - parts := strings.Split(line, " ") - if len(parts) < 4 { - return false, false, fmt.Errorf("invalid /proc/mounts line format %q", line) - } - mp := parts[1] - opts := strings.Split(parts[3], ",") - - // Find the closest submount to the path. - if strings.Contains(realPath, mp) && len(mp) > len(mountPoint) { - mountPoint = mp - readonly = specutils.ContainsStr(opts, "ro") - } - } - if err := scanner.Err(); err != nil { - return false, false, err - } - return mountPoint == realPath, readonly, nil -} - -// destroyFS unmounts mounts done by runsc under `spec.Root.Path`. This -// recovers the container rootfs into the original state. -func destroyFS(spec *specs.Spec) error { - for _, m := range spec.Mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { - continue - } - - // It's possible that 'm.Destination' follows symlinks inside the - // container. - dst, err := resolveSymlinks(spec.Root.Path, m.Destination) - if err != nil { - return err - } - - flags := syscall.MNT_DETACH - log.Infof("Unmounting dst: %q, flags: %#x", dst, flags) - // Do not return error if dst is not a mountpoint. - // Based on http://man7.org/linux/man-pages/man2/umount.2.html - // For kernel version 2.6+ and MNT_DETACH flag, EINVAL means - // the dst is not a mount point. - if err := syscall.Unmount(dst, flags); err != nil && - !os.IsNotExist(err) && err != syscall.EINVAL { - return err - } - } - return nil -} - -// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are -// symlinks, they are evaluated relative to 'root' to ensure the end result is -// the same as if the process was running inside the container. -func resolveSymlinks(root, rel string) (string, error) { - return resolveSymlinksImpl(root, root, rel, 255) -} - -func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { - if followCount == 0 { - return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) - } - - rel = filepath.Clean(rel) - for _, name := range strings.Split(rel, string(filepath.Separator)) { - if name == "" { - continue - } - // Note that Join() resolves things like ".." and returns a clean path. - path := filepath.Join(base, name) - if !strings.HasPrefix(path, root) { - // One cannot '..' their way out of root. - path = root - continue - } - fi, err := os.Lstat(path) - if err != nil { - if !os.IsNotExist(err) { - return "", err - } - // Not found means there is no symlink to check. Just keep walking dirs. - base = path - continue - } - if fi.Mode()&os.ModeSymlink != 0 { - link, err := os.Readlink(path) - if err != nil { - return "", err - } - if filepath.IsAbs(link) { - base = root - } - base, err = resolveSymlinksImpl(root, base, link, followCount-1) - if err != nil { - return "", err - } - continue - } - base = path - } - return base, nil -} - -func optionsToFlags(opts []string) uint32 { - var rv uint32 - for _, opt := range opts { - if m, ok := optionsMap[opt]; ok { - if m.set { - rv |= m.val - } else { - rv ^= m.val - } - } else { - log.Warningf("Ignoring mount option %q", opt) - } - } - return rv -} |