diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-08-27 11:09:06 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-08-27 11:10:14 -0700 |
commit | db81c0b02f2f947ae837a3e16471a148a66436eb (patch) | |
tree | d91ef12da80b0a76ef1c69db290665e31cc59860 /runsc/container | |
parent | 2524111fc63343fd7372f5ea0266130adea778a5 (diff) |
Put fsgofer inside chroot
Now each container gets its own dedicated gofer that is chroot'd to the
rootfs path. This is done to add an extra layer of security in case the
gofer gets compromised.
PiperOrigin-RevId: 210396476
Change-Id: Iba21360a59dfe90875d61000db103f8609157ca0
Diffstat (limited to 'runsc/container')
-rw-r--r-- | runsc/container/BUILD | 7 | ||||
-rw-r--r-- | runsc/container/container.go | 96 | ||||
-rw-r--r-- | runsc/container/container_test.go | 7 | ||||
-rw-r--r-- | runsc/container/fs.go | 198 | ||||
-rw-r--r-- | runsc/container/fs_test.go | 158 |
5 files changed, 450 insertions, 16 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD index e40ca4709..cba418d0c 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -13,6 +13,7 @@ go_library( name = "container", srcs = [ "container.go", + "fs.go", "hook.go", "status.go", ], @@ -28,13 +29,17 @@ go_library( "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) go_test( name = "container_test", size = "medium", - srcs = ["container_test.go"], + srcs = [ + "container_test.go", + "fs_test.go", + ], data = [ ":uds_test_app", "//runsc", diff --git a/runsc/container/container.go b/runsc/container/container.go index 8bd47aac1..16af66d3e 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -21,6 +21,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "regexp" "strconv" @@ -223,15 +224,19 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // init container in the sandbox. if specutils.ShouldCreateSandbox(spec) || !conf.MultiContainer { log.Debugf("Creating new sandbox for container %q", id) + ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) + if err != nil { + return nil, err + } + // Start a new sandbox for this container. Any errors after this point // must destroy the container. - s, goferPid, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket) + s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket, ioFiles) if err != nil { c.Destroy() return nil, err } c.Sandbox = s - c.GoferPid = goferPid } else { // This is sort of confusing. For a sandbox with a root // container and a child container in it, runsc sees: @@ -254,13 +259,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo return nil, err } c.Sandbox = sb.Sandbox - - // Prepare the gofer to serve the container's filesystem. - err = sb.Sandbox.CreateChild(c.ID, bundleDir) - if err != nil { - c.Destroy() - return nil, err - } } c.Status = Created @@ -304,7 +302,12 @@ func (c *Container) Start(conf *boot.Config) error { return err } } else { - if err := c.Sandbox.Start(c.Spec, conf, c.ID); err != nil { + // Create the gofer process. + ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) + if err != nil { + return err + } + if err := c.Sandbox.Start(c.Spec, conf, c.ID, ioFiles); err != nil { c.Destroy() return err } @@ -518,6 +521,8 @@ func (c *Container) Destroy() error { log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err) } } + c.Sandbox = nil + if c.GoferPid != 0 { log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid) if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { @@ -527,9 +532,7 @@ func (c *Container) Destroy() error { } } - c.Sandbox = nil c.Status = Stopped - return nil } @@ -596,3 +599,72 @@ func (c *Container) waitForStopped() error { } return backoff.Retry(op, b) } + +func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) { + if conf.FileAccess == boot.FileAccessDirect { + // Don't start a gofer. The sandbox will access host FS directly. + return nil, nil + } + + if err := setupFS(spec, conf, bundleDir); err != nil { + return nil, fmt.Errorf("failed to setup mounts: %v", err) + } + + // Start with the general config flags. + args := conf.ToFlags() + args = append(args, "gofer", "--bundle", bundleDir) + if conf.Overlay { + args = append(args, "--panic-on-write=true") + } + + // Add root mount and then add any other additional mounts. + mountCount := 1 + + // Add additional mounts. + for _, m := range spec.Mounts { + if specutils.Is9PMount(m) { + mountCount++ + } + } + sandEnds := make([]*os.File, 0, mountCount) + goferEnds := make([]*os.File, 0, mountCount) + + // nextFD is the next available file descriptor for the gofer process. + // It starts at 3 because 0-2 are used by stdin/stdout/stderr. + nextFD := 3 + for ; nextFD-3 < mountCount; nextFD++ { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, err + } + sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd")) + + goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd") + defer goferEnd.Close() + goferEnds = append(goferEnds, goferEnd) + + args = append(args, fmt.Sprintf("--io-fds=%d", nextFD)) + } + + binPath, err := specutils.BinPath() + if err != nil { + return nil, err + } + cmd := exec.Command(binPath, args...) + cmd.ExtraFiles = goferEnds + + // Setup any uid/gid mappings, and create or join the configured user + // namespace so the gofer's view of the filesystem aligns with the + // users in the sandbox. + specutils.SetUIDGIDMappings(cmd, spec) + nss := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec) + + // Start the gofer in the given namespace. + log.Debugf("Starting gofer: %s %v", binPath, args) + if err := specutils.StartInNS(cmd, nss); err != nil { + return nil, err + } + log.Infof("Gofer started, pid: %d", cmd.Process.Pid) + c.GoferPid = cmd.Process.Pid + return sandEnds, nil +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 6d84700ce..25aaf3f86 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -1211,9 +1211,6 @@ func TestMountNewDir(t *testing.T) { if err != nil { t.Fatal("ioutil.TempDir() failed:", err) } - if err := os.Chmod(root, 0755); err != nil { - t.Fatalf("os.Chmod(%q) failed: %v", root, err) - } srcDir := path.Join(root, "src", "dir", "anotherdir") if err := os.MkdirAll(srcDir, 0755); err != nil { @@ -1747,3 +1744,7 @@ func TestGoferExits(t *testing.T) { t.Errorf("container shouldn't be running, container: %+v", c) } } + +func TestMain(m *testing.M) { + testutil.RunAsRoot(m) +} diff --git a/runsc/container/fs.go b/runsc/container/fs.go new file mode 100644 index 000000000..652f81bbf --- /dev/null +++ b/runsc/container/fs.go @@ -0,0 +1,198 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +type mapping struct { + set bool + val uint32 +} + +var optionsMap = map[string]mapping{ + "acl": {set: true, val: syscall.MS_POSIXACL}, + "async": {set: false, val: syscall.MS_SYNCHRONOUS}, + "atime": {set: false, val: syscall.MS_NOATIME}, + "bind": {set: true, val: syscall.MS_BIND}, + "defaults": {set: true, val: 0}, + "dev": {set: false, val: syscall.MS_NODEV}, + "diratime": {set: false, val: syscall.MS_NODIRATIME}, + "dirsync": {set: true, val: syscall.MS_DIRSYNC}, + "exec": {set: false, val: syscall.MS_NOEXEC}, + "iversion": {set: true, val: syscall.MS_I_VERSION}, + "loud": {set: false, val: syscall.MS_SILENT}, + "mand": {set: true, val: syscall.MS_MANDLOCK}, + "noacl": {set: false, val: syscall.MS_POSIXACL}, + "noatime": {set: true, val: syscall.MS_NOATIME}, + "nodev": {set: true, val: syscall.MS_NODEV}, + "nodiratime": {set: true, val: syscall.MS_NODIRATIME}, + "noexec": {set: true, val: syscall.MS_NOEXEC}, + "noiversion": {set: false, val: syscall.MS_I_VERSION}, + "nomand": {set: false, val: syscall.MS_MANDLOCK}, + "norelatime": {set: false, val: syscall.MS_RELATIME}, + "nostrictatime": {set: false, val: syscall.MS_STRICTATIME}, + "nosuid": {set: true, val: syscall.MS_NOSUID}, + "private": {set: true, val: syscall.MS_PRIVATE}, + "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC}, + "relatime": {set: true, val: syscall.MS_RELATIME}, + "remount": {set: true, val: syscall.MS_REMOUNT}, + "ro": {set: true, val: syscall.MS_RDONLY}, + "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC}, + "rw": {set: false, val: syscall.MS_RDONLY}, + "silent": {set: true, val: syscall.MS_SILENT}, + "strictatime": {set: true, val: syscall.MS_STRICTATIME}, + "suid": {set: false, val: syscall.MS_NOSUID}, + "sync": {set: true, val: syscall.MS_SYNCHRONOUS}, +} + +// setupFS creates the container directory structure under 'spec.Root.Path'. +// This allows the gofer serving the containers to be chroot under this +// directory to create an extra layer to security in case the gofer gets +// compromised. +func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error { + for _, m := range spec.Mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + src := m.Source + if !filepath.IsAbs(src) { + src = filepath.Join(bundleDir, src) + } + srcfi, err := os.Stat(src) + if err != nil { + return err + } + + // It's possible that 'm.Destination' follows symlinks inside the + // container. + dst, err := resolveSymlinks(spec.Root.Path, m.Destination) + if err != nil { + return err + } + + // Create mount point if it doesn't exits + if _, err := os.Stat(dst); os.IsNotExist(err) { + if srcfi.IsDir() { + if err := os.MkdirAll(dst, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { + return err + } + f, err := os.OpenFile(dst, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + + flags := optionsToFlags(m.Options) + flags |= syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags) + if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil { + return err + } + } + + // Remount root as readonly after setup is done, if requested. + if spec.Root.Readonly { + log.Infof("Remounting root as readonly: %q", spec.Root.Path) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + return unix.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, "") + } + return nil +} + +// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} + +func optionsToFlags(opts []string) uint32 { + var rv uint32 + for _, opt := range opts { + if m, ok := optionsMap[opt]; ok { + if m.set { + rv |= m.val + } else { + rv ^= m.val + } + } else { + log.Warningf("Ignoring mount option %q", opt) + } + } + return rv +} diff --git a/runsc/container/fs_test.go b/runsc/container/fs_test.go new file mode 100644 index 000000000..84bde18fb --- /dev/null +++ b/runsc/container/fs_test.go @@ -0,0 +1,158 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "testing" + + "gvisor.googlesource.com/gvisor/runsc/test/testutil" +) + +type dir struct { + rel string + link string +} + +func construct(root string, dirs []dir) error { + for _, d := range dirs { + p := path.Join(root, d.rel) + if d.link == "" { + if err := os.MkdirAll(p, 0755); err != nil { + return fmt.Errorf("error creating dir: %v", err) + } + } else { + if err := os.MkdirAll(path.Dir(p), 0755); err != nil { + return fmt.Errorf("error creating dir: %v", err) + } + if err := os.Symlink(d.link, p); err != nil { + return fmt.Errorf("error creating symlink: %v", err) + } + } + } + return nil +} + +func TestResolveSymlinks(t *testing.T) { + root, err := ioutil.TempDir(testutil.TmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } + dirs := []dir{ + {"dir1/dir11/dir111/dir1111", ""}, // Just a boring dir + {"dir1/lnk12", "dir11"}, // Link to sibling + {"dir1/lnk13", "./dir11"}, // Link to sibling through self + {"dir1/lnk14", "../dir1/dir11"}, // Link to sibling through parent + {"dir1/dir15/lnk151", ".."}, // Link to parent + {"dir1/lnk16", "dir11/dir111"}, // Link to child + {"dir1/lnk17", "."}, // Link to self + {"dir1/lnk18", "lnk13"}, // Link to link + {"lnk2", "dir1/lnk13"}, // Link to link to link + {"dir3/dir21/lnk211", "../.."}, // Link to root relative + {"dir3/lnk22", "/"}, // Link to root absolute + {"dir3/lnk23", "/dir1"}, // Link to dir absolute + {"dir3/lnk24", "/dir1/lnk12"}, // Link to link absolute + {"lnk5", "../../.."}, // Link outside root + } + if err := construct(root, dirs); err != nil { + t.Fatal("construct failed:", err) + } + + tests := []struct { + name string + rel string + want string + compareHost bool + }{ + {name: "root", rel: "/", want: "/", compareHost: true}, + {name: "basic dir", rel: "/dir1/dir11/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dot 1", rel: "/dir1/dir11/./dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dot 2", rel: "/dir1/././dir11/./././././dir111/.", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "dotdot 1", rel: "/dir1/dir11/../dir15", want: "/dir1/dir15", compareHost: true}, + {name: "dotdot 2", rel: "/dir1/dir11/dir1111/../..", want: "/dir1", compareHost: true}, + + {name: "link sibling", rel: "/dir1/lnk12", want: "/dir1/dir11", compareHost: true}, + {name: "link sibling + dir", rel: "/dir1/lnk12/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link sibling through self", rel: "/dir1/lnk13", want: "/dir1/dir11", compareHost: true}, + {name: "link sibling through parent", rel: "/dir1/lnk14", want: "/dir1/dir11", compareHost: true}, + + {name: "link parent", rel: "/dir1/dir15/lnk151", want: "/dir1", compareHost: true}, + {name: "link parent + dir", rel: "/dir1/dir15/lnk151/dir11", want: "/dir1/dir11", compareHost: true}, + {name: "link child", rel: "/dir1/lnk16", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link child + dir", rel: "/dir1/lnk16/dir1111", want: "/dir1/dir11/dir111/dir1111", compareHost: true}, + {name: "link self", rel: "/dir1/lnk17", want: "/dir1", compareHost: true}, + {name: "link self + dir", rel: "/dir1/lnk17/dir11", want: "/dir1/dir11", compareHost: true}, + + {name: "link^2", rel: "/dir1/lnk18", want: "/dir1/dir11", compareHost: true}, + {name: "link^2 + dir", rel: "/dir1/lnk18/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + {name: "link^3", rel: "/lnk2", want: "/dir1/dir11", compareHost: true}, + {name: "link^3 + dir", rel: "/lnk2/dir111", want: "/dir1/dir11/dir111", compareHost: true}, + + {name: "link abs", rel: "/dir3/lnk23", want: "/dir1"}, + {name: "link abs + dir", rel: "/dir3/lnk23/dir11", want: "/dir1/dir11"}, + {name: "link^2 abs", rel: "/dir3/lnk24", want: "/dir1/dir11"}, + {name: "link^2 abs + dir", rel: "/dir3/lnk24/dir111", want: "/dir1/dir11/dir111"}, + + {name: "root link rel", rel: "/dir3/dir21/lnk211", want: "/", compareHost: true}, + {name: "root link abs", rel: "/dir3/lnk22", want: "/"}, + {name: "root contain link", rel: "/lnk5/dir1", want: "/dir1"}, + {name: "root contain dotdot", rel: "/dir1/dir11/../../../../../../../..", want: "/"}, + + {name: "crazy", rel: "/dir3/dir21/lnk211/dir3/lnk22/dir1/dir11/../../lnk5/dir3/../dir3/lnk24/dir111/dir1111/..", want: "/dir1/dir11/dir111"}, + } + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + got, err := resolveSymlinks(root, tst.rel) + if err != nil { + t.Errorf("resolveSymlinks(root, %q) failed: %v", tst.rel, err) + } + want := path.Join(root, tst.want) + if got != want { + t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, got, want) + } + if tst.compareHost { + // Check that host got to the same end result. + host, err := filepath.EvalSymlinks(path.Join(root, tst.rel)) + if err != nil { + t.Errorf("path.EvalSymlinks(root, %q) failed: %v", tst.rel, err) + } + if host != got { + t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, host, got) + } + } + }) + } +} + +func TestResolveSymlinksLoop(t *testing.T) { + root, err := ioutil.TempDir(testutil.TmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } + dirs := []dir{ + {"loop1", "loop2"}, + {"loop2", "loop1"}, + } + if err := construct(root, dirs); err != nil { + t.Fatal("construct failed:", err) + } + if _, err := resolveSymlinks(root, "loop1"); err == nil { + t.Errorf("resolveSymlinks() should have failed") + } +} |