diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-08-27 11:09:06 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-08-27 11:10:14 -0700 |
commit | db81c0b02f2f947ae837a3e16471a148a66436eb (patch) | |
tree | d91ef12da80b0a76ef1c69db290665e31cc59860 /runsc/container/fs.go | |
parent | 2524111fc63343fd7372f5ea0266130adea778a5 (diff) |
Put fsgofer inside chroot
Now each container gets its own dedicated gofer that is chroot'd to the
rootfs path. This is done to add an extra layer of security in case the
gofer gets compromised.
PiperOrigin-RevId: 210396476
Change-Id: Iba21360a59dfe90875d61000db103f8609157ca0
Diffstat (limited to 'runsc/container/fs.go')
-rw-r--r-- | runsc/container/fs.go | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/runsc/container/fs.go b/runsc/container/fs.go new file mode 100644 index 000000000..652f81bbf --- /dev/null +++ b/runsc/container/fs.go @@ -0,0 +1,198 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +type mapping struct { + set bool + val uint32 +} + +var optionsMap = map[string]mapping{ + "acl": {set: true, val: syscall.MS_POSIXACL}, + "async": {set: false, val: syscall.MS_SYNCHRONOUS}, + "atime": {set: false, val: syscall.MS_NOATIME}, + "bind": {set: true, val: syscall.MS_BIND}, + "defaults": {set: true, val: 0}, + "dev": {set: false, val: syscall.MS_NODEV}, + "diratime": {set: false, val: syscall.MS_NODIRATIME}, + "dirsync": {set: true, val: syscall.MS_DIRSYNC}, + "exec": {set: false, val: syscall.MS_NOEXEC}, + "iversion": {set: true, val: syscall.MS_I_VERSION}, + "loud": {set: false, val: syscall.MS_SILENT}, + "mand": {set: true, val: syscall.MS_MANDLOCK}, + "noacl": {set: false, val: syscall.MS_POSIXACL}, + "noatime": {set: true, val: syscall.MS_NOATIME}, + "nodev": {set: true, val: syscall.MS_NODEV}, + "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, + "noexec": {set: true, val: syscall.MS_NOEXEC}, + "noiversion": {set: false, val: syscall.MS_I_VERSION}, + "nomand": {set: false, val: syscall.MS_MANDLOCK}, + "norelatime": {set: false, val: syscall.MS_RELATIME}, + "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, + "nosuid": {set: true, val: syscall.MS_NOSUID}, + "private": {set: true, val: syscall.MS_PRIVATE}, + "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, + "relatime": {set: true, val: syscall.MS_RELATIME}, + "remount": {set: true, val: syscall.MS_REMOUNT}, + "ro": {set: true, val: syscall.MS_RDONLY}, + "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, + "rw": {set: false, val: syscall.MS_RDONLY}, + "silent": {set: true, val: syscall.MS_SILENT}, + "strictatime": {set: true, val: syscall.MS_STRICTATIME}, + "suid": {set: false, val: syscall.MS_NOSUID}, + "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, +} + +// setupFS creates the container directory structure under 'spec.Root.Path'. +// This allows the gofer serving the containers to be chroot under this +// directory to create an extra layer to security in case the gofer gets +// compromised. +func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { + for _, m := range spec.Mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + src := m.Source + if !filepath.IsAbs(src) { + src = filepath.Join(bundleDir, src) + } + srcfi, err := os.Stat(src) + if err != nil { + return err + } + + // It's possible that 'm.Destination' follows symlinks inside the + // container. + dst, err := resolveSymlinks(spec.Root.Path, m.Destination) + if err != nil { + return err + } + + // Create mount point if it doesn't exits + if _, err := os.Stat(dst); os.IsNotExist(err) { + if srcfi.IsDir() { + if err := os.MkdirAll(dst, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { + return err + } + f, err := os.OpenFile(dst, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + + flags := optionsToFlags(m.Options) + flags |= syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags) + if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil { + return err + } + } + + // Remount root as readonly after setup is done, if requested. + if spec.Root.Readonly { + log.Infof("Remounting root as readonly: %q", spec.Root.Path) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + return unix.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, "") + } + return nil +} + +// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} + +func optionsToFlags(opts []string) uint32 { + var rv uint32 + for _, opt := range opts { + if m, ok := optionsMap[opt]; ok { + if m.set { + rv |= m.val + } else { + rv ^= m.val + } + } else { + log.Warningf("Ignoring mount option %q", opt) + } + } + return rv +} |