summaryrefslogtreecommitdiffhomepage
path: root/runsc/container
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/container')
-rw-r--r--runsc/container/BUILD7
-rw-r--r--runsc/container/container.go96
-rw-r--r--runsc/container/container_test.go7
-rw-r--r--runsc/container/fs.go198
-rw-r--r--runsc/container/fs_test.go158
5 files changed, 450 insertions, 16 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index e40ca4709..cba418d0c 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -13,6 +13,7 @@ go_library(
name = "container",
srcs = [
"container.go",
+ "fs.go",
"hook.go",
"status.go",
],
@@ -28,13 +29,17 @@ go_library(
"//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
go_test(
name = "container_test",
size = "medium",
- srcs = ["container_test.go"],
+ srcs = [
+ "container_test.go",
+ "fs_test.go",
+ ],
data = [
":uds_test_app",
"//runsc",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 8bd47aac1..16af66d3e 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -21,6 +21,7 @@ import (
"fmt"
"io/ioutil"
"os"
+ "os/exec"
"path/filepath"
"regexp"
"strconv"
@@ -223,15 +224,19 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// init container in the sandbox.
if specutils.ShouldCreateSandbox(spec) || !conf.MultiContainer {
log.Debugf("Creating new sandbox for container %q", id)
+ ioFiles, err := c.createGoferProcess(spec, conf, bundleDir)
+ if err != nil {
+ return nil, err
+ }
+
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
- s, goferPid, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket)
+ s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket, ioFiles)
if err != nil {
c.Destroy()
return nil, err
}
c.Sandbox = s
- c.GoferPid = goferPid
} else {
// This is sort of confusing. For a sandbox with a root
// container and a child container in it, runsc sees:
@@ -254,13 +259,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
return nil, err
}
c.Sandbox = sb.Sandbox
-
- // Prepare the gofer to serve the container's filesystem.
- err = sb.Sandbox.CreateChild(c.ID, bundleDir)
- if err != nil {
- c.Destroy()
- return nil, err
- }
}
c.Status = Created
@@ -304,7 +302,12 @@ func (c *Container) Start(conf *boot.Config) error {
return err
}
} else {
- if err := c.Sandbox.Start(c.Spec, conf, c.ID); err != nil {
+ // Create the gofer process.
+ ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
+ if err != nil {
+ return err
+ }
+ if err := c.Sandbox.Start(c.Spec, conf, c.ID, ioFiles); err != nil {
c.Destroy()
return err
}
@@ -518,6 +521,8 @@ func (c *Container) Destroy() error {
log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err)
}
}
+ c.Sandbox = nil
+
if c.GoferPid != 0 {
log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid)
if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
@@ -527,9 +532,7 @@ func (c *Container) Destroy() error {
}
}
- c.Sandbox = nil
c.Status = Stopped
-
return nil
}
@@ -596,3 +599,72 @@ func (c *Container) waitForStopped() error {
}
return backoff.Retry(op, b)
}
+
+func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) {
+ if conf.FileAccess == boot.FileAccessDirect {
+ // Don't start a gofer. The sandbox will access host FS directly.
+ return nil, nil
+ }
+
+ if err := setupFS(spec, conf, bundleDir); err != nil {
+ return nil, fmt.Errorf("failed to setup mounts: %v", err)
+ }
+
+ // Start with the general config flags.
+ args := conf.ToFlags()
+ args = append(args, "gofer", "--bundle", bundleDir)
+ if conf.Overlay {
+ args = append(args, "--panic-on-write=true")
+ }
+
+ // Add root mount and then add any other additional mounts.
+ mountCount := 1
+
+ // Add additional mounts.
+ for _, m := range spec.Mounts {
+ if specutils.Is9PMount(m) {
+ mountCount++
+ }
+ }
+ sandEnds := make([]*os.File, 0, mountCount)
+ goferEnds := make([]*os.File, 0, mountCount)
+
+ // nextFD is the next available file descriptor for the gofer process.
+ // It starts at 3 because 0-2 are used by stdin/stdout/stderr.
+ nextFD := 3
+ for ; nextFD-3 < mountCount; nextFD++ {
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, err
+ }
+ sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd"))
+
+ goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd")
+ defer goferEnd.Close()
+ goferEnds = append(goferEnds, goferEnd)
+
+ args = append(args, fmt.Sprintf("--io-fds=%d", nextFD))
+ }
+
+ binPath, err := specutils.BinPath()
+ if err != nil {
+ return nil, err
+ }
+ cmd := exec.Command(binPath, args...)
+ cmd.ExtraFiles = goferEnds
+
+ // Setup any uid/gid mappings, and create or join the configured user
+ // namespace so the gofer's view of the filesystem aligns with the
+ // users in the sandbox.
+ specutils.SetUIDGIDMappings(cmd, spec)
+ nss := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
+
+ // Start the gofer in the given namespace.
+ log.Debugf("Starting gofer: %s %v", binPath, args)
+ if err := specutils.StartInNS(cmd, nss); err != nil {
+ return nil, err
+ }
+ log.Infof("Gofer started, pid: %d", cmd.Process.Pid)
+ c.GoferPid = cmd.Process.Pid
+ return sandEnds, nil
+}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 6d84700ce..25aaf3f86 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -1211,9 +1211,6 @@ func TestMountNewDir(t *testing.T) {
if err != nil {
t.Fatal("ioutil.TempDir() failed:", err)
}
- if err := os.Chmod(root, 0755); err != nil {
- t.Fatalf("os.Chmod(%q) failed: %v", root, err)
- }
srcDir := path.Join(root, "src", "dir", "anotherdir")
if err := os.MkdirAll(srcDir, 0755); err != nil {
@@ -1747,3 +1744,7 @@ func TestGoferExits(t *testing.T) {
t.Errorf("container shouldn't be running, container: %+v", c)
}
}
+
+func TestMain(m *testing.M) {
+ testutil.RunAsRoot(m)
+}
diff --git a/runsc/container/fs.go b/runsc/container/fs.go
new file mode 100644
index 000000000..652f81bbf
--- /dev/null
+++ b/runsc/container/fs.go
@@ -0,0 +1,198 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+type mapping struct {
+ set bool
+ val uint32
+}
+
+var optionsMap = map[string]mapping{
+ "acl": {set: true, val: syscall.MS_POSIXACL},
+ "async": {set: false, val: syscall.MS_SYNCHRONOUS},
+ "atime": {set: false, val: syscall.MS_NOATIME},
+ "bind": {set: true, val: syscall.MS_BIND},
+ "defaults": {set: true, val: 0},
+ "dev": {set: false, val: syscall.MS_NODEV},
+ "diratime": {set: false, val: syscall.MS_NODIRATIME},
+ "dirsync": {set: true, val: syscall.MS_DIRSYNC},
+ "exec": {set: false, val: syscall.MS_NOEXEC},
+ "iversion": {set: true, val: syscall.MS_I_VERSION},
+ "loud": {set: false, val: syscall.MS_SILENT},
+ "mand": {set: true, val: syscall.MS_MANDLOCK},
+ "noacl": {set: false, val: syscall.MS_POSIXACL},
+ "noatime": {set: true, val: syscall.MS_NOATIME},
+ "nodev": {set: true, val: syscall.MS_NODEV},
+ "nodiratime": {set: true, val: syscall.MS_NODIRATIME},
+ "noexec": {set: true, val: syscall.MS_NOEXEC},
+ "noiversion": {set: false, val: syscall.MS_I_VERSION},
+ "nomand": {set: false, val: syscall.MS_MANDLOCK},
+ "norelatime": {set: false, val: syscall.MS_RELATIME},
+ "nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
+ "nosuid": {set: true, val: syscall.MS_NOSUID},
+ "private": {set: true, val: syscall.MS_PRIVATE},
+ "rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
+ "relatime": {set: true, val: syscall.MS_RELATIME},
+ "remount": {set: true, val: syscall.MS_REMOUNT},
+ "ro": {set: true, val: syscall.MS_RDONLY},
+ "rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
+ "rw": {set: false, val: syscall.MS_RDONLY},
+ "silent": {set: true, val: syscall.MS_SILENT},
+ "strictatime": {set: true, val: syscall.MS_STRICTATIME},
+ "suid": {set: false, val: syscall.MS_NOSUID},
+ "sync": {set: true, val: syscall.MS_SYNCHRONOUS},
+}
+
+// setupFS creates the container directory structure under 'spec.Root.Path'.
+// This allows the gofer serving the containers to be chroot under this
+// directory to create an extra layer to security in case the gofer gets
+// compromised.
+func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) error {
+ for _, m := range spec.Mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ continue
+ }
+ src := m.Source
+ if !filepath.IsAbs(src) {
+ src = filepath.Join(bundleDir, src)
+ }
+ srcfi, err := os.Stat(src)
+ if err != nil {
+ return err
+ }
+
+ // It's possible that 'm.Destination' follows symlinks inside the
+ // container.
+ dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
+ if err != nil {
+ return err
+ }
+
+ // Create mount point if it doesn't exits
+ if _, err := os.Stat(dst); os.IsNotExist(err) {
+ if srcfi.IsDir() {
+ if err := os.MkdirAll(dst, 0755); err != nil {
+ return err
+ }
+ } else {
+ if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+ return err
+ }
+ f, err := os.OpenFile(dst, os.O_CREATE, 0755)
+ if err != nil {
+ return err
+ }
+ f.Close()
+ }
+ }
+
+ flags := optionsToFlags(m.Options)
+ flags |= syscall.MS_BIND
+ log.Infof("Mounting src: %q, dst: %q, flags: %#x", src, dst, flags)
+ if err := syscall.Mount(src, dst, m.Type, uintptr(flags), ""); err != nil {
+ return err
+ }
+ }
+
+ // Remount root as readonly after setup is done, if requested.
+ if spec.Root.Readonly {
+ log.Infof("Remounting root as readonly: %q", spec.Root.Path)
+ flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
+ return unix.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, "")
+ }
+ return nil
+}
+
+// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are
+// symlinks, they are evaluated relative to 'root' to ensure the end result is
+// the same as if the process was running inside the container.
+func resolveSymlinks(root, rel string) (string, error) {
+ return resolveSymlinksImpl(root, root, rel, 255)
+}
+
+func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
+ if followCount == 0 {
+ return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
+ }
+
+ rel = filepath.Clean(rel)
+ for _, name := range strings.Split(rel, string(filepath.Separator)) {
+ if name == "" {
+ continue
+ }
+ // Note that Join() resolves things like ".." and returns a clean path.
+ path := filepath.Join(base, name)
+ if !strings.HasPrefix(path, root) {
+ // One cannot '..' their way out of root.
+ path = root
+ continue
+ }
+ fi, err := os.Lstat(path)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return "", err
+ }
+ // Not found means there is no symlink to check. Just keep walking dirs.
+ base = path
+ continue
+ }
+ if fi.Mode()&os.ModeSymlink != 0 {
+ link, err := os.Readlink(path)
+ if err != nil {
+ return "", err
+ }
+ if filepath.IsAbs(link) {
+ base = root
+ }
+ base, err = resolveSymlinksImpl(root, base, link, followCount-1)
+ if err != nil {
+ return "", err
+ }
+ continue
+ }
+ base = path
+ }
+ return base, nil
+}
+
+func optionsToFlags(opts []string) uint32 {
+ var rv uint32
+ for _, opt := range opts {
+ if m, ok := optionsMap[opt]; ok {
+ if m.set {
+ rv |= m.val
+ } else {
+ rv ^= m.val
+ }
+ } else {
+ log.Warningf("Ignoring mount option %q", opt)
+ }
+ }
+ return rv
+}
diff --git a/runsc/container/fs_test.go b/runsc/container/fs_test.go
new file mode 100644
index 000000000..84bde18fb
--- /dev/null
+++ b/runsc/container/fs_test.go
@@ -0,0 +1,158 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path"
+ "path/filepath"
+ "testing"
+
+ "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+)
+
+type dir struct {
+ rel string
+ link string
+}
+
+func construct(root string, dirs []dir) error {
+ for _, d := range dirs {
+ p := path.Join(root, d.rel)
+ if d.link == "" {
+ if err := os.MkdirAll(p, 0755); err != nil {
+ return fmt.Errorf("error creating dir: %v", err)
+ }
+ } else {
+ if err := os.MkdirAll(path.Dir(p), 0755); err != nil {
+ return fmt.Errorf("error creating dir: %v", err)
+ }
+ if err := os.Symlink(d.link, p); err != nil {
+ return fmt.Errorf("error creating symlink: %v", err)
+ }
+ }
+ }
+ return nil
+}
+
+func TestResolveSymlinks(t *testing.T) {
+ root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+ if err != nil {
+ t.Fatal("ioutil.TempDir() failed:", err)
+ }
+ dirs := []dir{
+ {"dir1/dir11/dir111/dir1111", ""}, // Just a boring dir
+ {"dir1/lnk12", "dir11"}, // Link to sibling
+ {"dir1/lnk13", "./dir11"}, // Link to sibling through self
+ {"dir1/lnk14", "../dir1/dir11"}, // Link to sibling through parent
+ {"dir1/dir15/lnk151", ".."}, // Link to parent
+ {"dir1/lnk16", "dir11/dir111"}, // Link to child
+ {"dir1/lnk17", "."}, // Link to self
+ {"dir1/lnk18", "lnk13"}, // Link to link
+ {"lnk2", "dir1/lnk13"}, // Link to link to link
+ {"dir3/dir21/lnk211", "../.."}, // Link to root relative
+ {"dir3/lnk22", "/"}, // Link to root absolute
+ {"dir3/lnk23", "/dir1"}, // Link to dir absolute
+ {"dir3/lnk24", "/dir1/lnk12"}, // Link to link absolute
+ {"lnk5", "../../.."}, // Link outside root
+ }
+ if err := construct(root, dirs); err != nil {
+ t.Fatal("construct failed:", err)
+ }
+
+ tests := []struct {
+ name string
+ rel string
+ want string
+ compareHost bool
+ }{
+ {name: "root", rel: "/", want: "/", compareHost: true},
+ {name: "basic dir", rel: "/dir1/dir11/dir111", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "dot 1", rel: "/dir1/dir11/./dir111", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "dot 2", rel: "/dir1/././dir11/./././././dir111/.", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "dotdot 1", rel: "/dir1/dir11/../dir15", want: "/dir1/dir15", compareHost: true},
+ {name: "dotdot 2", rel: "/dir1/dir11/dir1111/../..", want: "/dir1", compareHost: true},
+
+ {name: "link sibling", rel: "/dir1/lnk12", want: "/dir1/dir11", compareHost: true},
+ {name: "link sibling + dir", rel: "/dir1/lnk12/dir111", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "link sibling through self", rel: "/dir1/lnk13", want: "/dir1/dir11", compareHost: true},
+ {name: "link sibling through parent", rel: "/dir1/lnk14", want: "/dir1/dir11", compareHost: true},
+
+ {name: "link parent", rel: "/dir1/dir15/lnk151", want: "/dir1", compareHost: true},
+ {name: "link parent + dir", rel: "/dir1/dir15/lnk151/dir11", want: "/dir1/dir11", compareHost: true},
+ {name: "link child", rel: "/dir1/lnk16", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "link child + dir", rel: "/dir1/lnk16/dir1111", want: "/dir1/dir11/dir111/dir1111", compareHost: true},
+ {name: "link self", rel: "/dir1/lnk17", want: "/dir1", compareHost: true},
+ {name: "link self + dir", rel: "/dir1/lnk17/dir11", want: "/dir1/dir11", compareHost: true},
+
+ {name: "link^2", rel: "/dir1/lnk18", want: "/dir1/dir11", compareHost: true},
+ {name: "link^2 + dir", rel: "/dir1/lnk18/dir111", want: "/dir1/dir11/dir111", compareHost: true},
+ {name: "link^3", rel: "/lnk2", want: "/dir1/dir11", compareHost: true},
+ {name: "link^3 + dir", rel: "/lnk2/dir111", want: "/dir1/dir11/dir111", compareHost: true},
+
+ {name: "link abs", rel: "/dir3/lnk23", want: "/dir1"},
+ {name: "link abs + dir", rel: "/dir3/lnk23/dir11", want: "/dir1/dir11"},
+ {name: "link^2 abs", rel: "/dir3/lnk24", want: "/dir1/dir11"},
+ {name: "link^2 abs + dir", rel: "/dir3/lnk24/dir111", want: "/dir1/dir11/dir111"},
+
+ {name: "root link rel", rel: "/dir3/dir21/lnk211", want: "/", compareHost: true},
+ {name: "root link abs", rel: "/dir3/lnk22", want: "/"},
+ {name: "root contain link", rel: "/lnk5/dir1", want: "/dir1"},
+ {name: "root contain dotdot", rel: "/dir1/dir11/../../../../../../../..", want: "/"},
+
+ {name: "crazy", rel: "/dir3/dir21/lnk211/dir3/lnk22/dir1/dir11/../../lnk5/dir3/../dir3/lnk24/dir111/dir1111/..", want: "/dir1/dir11/dir111"},
+ }
+ for _, tst := range tests {
+ t.Run(tst.name, func(t *testing.T) {
+ got, err := resolveSymlinks(root, tst.rel)
+ if err != nil {
+ t.Errorf("resolveSymlinks(root, %q) failed: %v", tst.rel, err)
+ }
+ want := path.Join(root, tst.want)
+ if got != want {
+ t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, got, want)
+ }
+ if tst.compareHost {
+ // Check that host got to the same end result.
+ host, err := filepath.EvalSymlinks(path.Join(root, tst.rel))
+ if err != nil {
+ t.Errorf("path.EvalSymlinks(root, %q) failed: %v", tst.rel, err)
+ }
+ if host != got {
+ t.Errorf("resolveSymlinks(root, %q) got: %q, want: %q", tst.rel, host, got)
+ }
+ }
+ })
+ }
+}
+
+func TestResolveSymlinksLoop(t *testing.T) {
+ root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+ if err != nil {
+ t.Fatal("ioutil.TempDir() failed:", err)
+ }
+ dirs := []dir{
+ {"loop1", "loop2"},
+ {"loop2", "loop1"},
+ }
+ if err := construct(root, dirs); err != nil {
+ t.Fatal("construct failed:", err)
+ }
+ if _, err := resolveSymlinks(root, "loop1"); err == nil {
+ t.Errorf("resolveSymlinks() should have failed")
+ }
+}