summaryrefslogtreecommitdiffhomepage
path: root/runsc/container/fs.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/container/fs.go')
-rw-r--r--runsc/container/fs.go198
1 files changed, 198 insertions, 0 deletions
diff --git a/runsc/container/fs.go b/runsc/container/fs.go
new file mode 100644
index 000000000..652f81bbf
--- /dev/null
+++ b/runsc/container/fs.go
@@ -0,0 +1,198 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+type mapping struct {
+ set bool
+ val uint32
+}
+
+var optionsMap = map[string]mapping{
+ "acl": {set: true, val: syscall.MS_POSIXACL},
+ "async": {set: false, val: syscall.MS_SYNCHRONOUS},
+ "atime": {set: false, val: syscall.MS_NOATIME},
+ "bind": {set: true, val: syscall.MS_BIND},
+ "defaults": {set: true, val: 0},
+ "dev": {set: false, val: syscall.MS_NODEV},
+ "diratime": {set: false, val: syscall.MS_NODIRATIME},
+ "dirsync": {set: true, val: syscall.MS_DIRSYNC},
+ "exec": {set: false, val: syscall.MS_NOEXEC},
+ "iversion": {set: true, val: syscall.MS_I_VERSION},
+ "loud": {set: false, val: syscall.MS_SILENT},
+ "mand": {set: true, val: syscall.MS_MANDLOCK},
+ "noacl": {set: false, val: syscall.MS_POSIXACL},
+ "noatime": {set: true, val: syscall.MS_NOATIME},
+ "nodev": {set: true, val: syscall.MS_NODEV},
+ "nodiratime": {set: true, val: syscall.MS_NODIRATIME},
+ "noexec": {set: true, val: syscall.MS_NOEXEC},
+ "noiversion": {set: false, val: syscall.MS_I_VERSION},
+ "nomand": {set: false, val: syscall.MS_MANDLOCK},
+ "norelatime": {set: false, val: syscall.MS_RELATIME},
+ "nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
+ "nosuid": {set: true, val: syscall.MS_NOSUID},
+ "private": {set: true, val: syscall.MS_PRIVATE},
+ "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
+ "relatime": {set: true, val: syscall.MS_RELATIME},
+ "remount": {set: true, val: syscall.MS_REMOUNT},
+ "ro": {set: true, val: syscall.MS_RDONLY},
+ "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
+ "rw": {set: false, val: syscall.MS_RDONLY},
+ "silent": {set: true, val: syscall.MS_SILENT},
+ "strictatime": {set: true, val: syscall.MS_STRICTATIME},
+ "suid": {set: false, val: syscall.MS_NOSUID},
+ "sync": {set: true, val: syscall.MS_SYNCHRONOUS},
+}
+
+// setupFS creates the container directory structure under 'spec.Root.Path'.
+// This allows the gofer serving the containers to be chroot under this
+// directory to create an extra layer to security in case the gofer gets
+// compromised.
+func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error {
+ for _, m := range spec.Mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ continue
+ }
+ src := m.Source
+ if !filepath.IsAbs(src) {
+ src = filepath.Join(bundleDir, src)
+ }
+ srcfi, err := os.Stat(src)
+ if err != nil {
+ return err
+ }
+
+ // It's possible that 'm.Destination' follows symlinks inside the
+ // container.
+ dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
+ if err != nil {
+ return err
+ }
+
+ // Create mount point if it doesn't exits
+ if _, err := os.Stat(dst); os.IsNotExist(err) {
+ if srcfi.IsDir() {
+ if err := os.MkdirAll(dst, 0755); err != nil {
+ return err
+ }
+ } else {
+ if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+ return err
+ }
+ f, err := os.OpenFile(dst, os.O_CREATE, 0755)
+ if err != nil {
+ return err
+ }
+ f.Close()
+ }
+ }
+
+ flags := optionsToFlags(m.Options)
+ flags |= syscall.MS_BIND
+ log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags)
+ if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil {
+ return err
+ }
+ }
+
+ // Remount root as readonly after setup is done, if requested.
+ if spec.Root.Readonly {
+ log.Infof("Remounting root as readonly: %q", spec.Root.Path)
+ flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
+ return unix.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, "")
+ }
+ return nil
+}
+
+// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are
+// symlinks, they are evaluated relative to 'root' to ensure the end result is
+// the same as if the process was running inside the container.
+func resolveSymlinks(root, rel string) (string, error) {
+ return resolveSymlinksImpl(root, root, rel, 255)
+}
+
+func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
+ if followCount == 0 {
+ return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
+ }
+
+ rel = filepath.Clean(rel)
+ for _, name := range strings.Split(rel, string(filepath.Separator)) {
+ if name == "" {
+ continue
+ }
+ // Note that Join() resolves things like ".." and returns a clean path.
+ path := filepath.Join(base, name)
+ if !strings.HasPrefix(path, root) {
+ // One cannot '..' their way out of root.
+ path = root
+ continue
+ }
+ fi, err := os.Lstat(path)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return "", err
+ }
+ // Not found means there is no symlink to check. Just keep walking dirs.
+ base = path
+ continue
+ }
+ if fi.Mode()&os.ModeSymlink != 0 {
+ link, err := os.Readlink(path)
+ if err != nil {
+ return "", err
+ }
+ if filepath.IsAbs(link) {
+ base = root
+ }
+ base, err = resolveSymlinksImpl(root, base, link, followCount-1)
+ if err != nil {
+ return "", err
+ }
+ continue
+ }
+ base = path
+ }
+ return base, nil
+}
+
+func optionsToFlags(opts []string) uint32 {
+ var rv uint32
+ for _, opt := range opts {
+ if m, ok := optionsMap[opt]; ok {
+ if m.set {
+ rv |= m.val
+ } else {
+ rv ^= m.val
+ }
+ } else {
+ log.Warningf("Ignoring mount option %q", opt)
+ }
+ }
+ return rv
+}