diff options
Diffstat (limited to 'runsc/sandbox')
-rw-r--r-- | runsc/sandbox/BUILD | 2 | ||||
-rw-r--r-- | runsc/sandbox/namespace.go | 204 | ||||
-rw-r--r-- | runsc/sandbox/network.go | 3 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 176 |
4 files changed, 21 insertions, 364 deletions
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index e9a39f797..9317b1c14 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -5,7 +5,6 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "sandbox", srcs = [ - "namespace.go", "network.go", "sandbox.go", ], @@ -21,7 +20,6 @@ go_library( "//pkg/urpc", "//runsc/boot", "//runsc/console", - "//runsc/fsgofer", "//runsc/specutils", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_vishvananda_netlink//:go_default_library", diff --git a/runsc/sandbox/namespace.go b/runsc/sandbox/namespace.go deleted file mode 100644 index 1d3bcfbb5..000000000 --- a/runsc/sandbox/namespace.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2018 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sandbox - -import ( - "fmt" - "os" - "os/exec" - "path/filepath" - "runtime" - "syscall" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/log" -) - -// nsCloneFlag returns the clone flag that can be used to set a namespace of -// the given type. -func nsCloneFlag(nst specs.LinuxNamespaceType) uintptr { - switch nst { - case specs.IPCNamespace: - return syscall.CLONE_NEWIPC - case specs.MountNamespace: - return syscall.CLONE_NEWNS - case specs.NetworkNamespace: - return syscall.CLONE_NEWNET - case specs.PIDNamespace: - return syscall.CLONE_NEWPID - case specs.UTSNamespace: - return syscall.CLONE_NEWUTS - case specs.UserNamespace: - return syscall.CLONE_NEWUSER - case specs.CgroupNamespace: - panic("cgroup namespace has no associated clone flag") - default: - panic(fmt.Sprintf("unknown namespace %v", nst)) - } -} - -// nsPath returns the path of the namespace for the current process and the -// given namespace. -func nsPath(nst specs.LinuxNamespaceType) string { - base := "/proc/self/ns" - switch nst { - case specs.CgroupNamespace: - return filepath.Join(base, "cgroup") - case specs.IPCNamespace: - return filepath.Join(base, "ipc") - case specs.MountNamespace: - return filepath.Join(base, "mnt") - case specs.NetworkNamespace: - return filepath.Join(base, "net") - case specs.PIDNamespace: - return filepath.Join(base, "pid") - case specs.UserNamespace: - return filepath.Join(base, "user") - case specs.UTSNamespace: - return filepath.Join(base, "uts") - default: - panic(fmt.Sprintf("unknown namespace %v", nst)) - } -} - -// getNS returns true and the namespace with the given type from the slice of -// namespaces in the spec. It returns false if the slice does not contain a -// namespace with the type. -func getNS(nst specs.LinuxNamespaceType, s *specs.Spec) (specs.LinuxNamespace, bool) { - if s.Linux == nil { - return specs.LinuxNamespace{}, false - } - for _, ns := range s.Linux.Namespaces { - if ns.Type == nst { - return ns, true - } - } - return specs.LinuxNamespace{}, false -} - -// filterNS returns a slice of namespaces from the spec with types that match -// those in the `filter` slice. -func filterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNamespace { - if s.Linux == nil { - return nil - } - var out []specs.LinuxNamespace - for _, nst := range filter { - if ns, ok := getNS(nst, s); ok { - out = append(out, ns) - } - } - return out -} - -// setNS sets the namespace of the given type. It must be called with -// OSThreadLocked. -func setNS(fd, nsType uintptr) error { - if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 { - return err - } - return nil -} - -// applyNS applies the namespace on the current thread and returns a function -// that will restore the namespace to the original value. -// -// Preconditions: Must be called with os thread locked. -func applyNS(ns specs.LinuxNamespace) (func(), error) { - log.Infof("applying namespace %v at path %q", ns.Type, ns.Path) - newNS, err := os.Open(ns.Path) - if err != nil { - return nil, fmt.Errorf("error opening %q: %v", ns.Path, err) - } - defer newNS.Close() - - // Store current netns to restore back after child is started. - curPath := nsPath(ns.Type) - oldNS, err := os.Open(curPath) - if err != nil { - return nil, fmt.Errorf("error opening %q: %v", curPath, err) - } - - // Set netns to the one requested and setup function to restore it back. - flag := nsCloneFlag(ns.Type) - if err := setNS(newNS.Fd(), flag); err != nil { - oldNS.Close() - return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err) - } - return func() { - log.Infof("restoring namespace %v", ns.Type) - defer oldNS.Close() - if err := setNS(oldNS.Fd(), flag); err != nil { - panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err)) - } - }, nil -} - -// startInNS joins or creates the given namespaces and calls cmd.Start before -// restoring the namespaces to the original values. -func startInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error { - // We are about to setup namespaces, which requires the os thread being - // locked so that Go doesn't change the thread out from under us. - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - - for _, ns := range nss { - if ns.Path == "" { - // No path. Just set a flag to create a new namespace. - cmd.SysProcAttr.Cloneflags |= nsCloneFlag(ns.Type) - continue - } - // Join the given namespace, and restore the current namespace - // before exiting. - restoreNS, err := applyNS(ns) - if err != nil { - return err - } - defer restoreNS() - } - - return cmd.Start() -} - -// setUIDGIDMappings sets the given uid/gid mappings from the spec on the cmd. -func setUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { - if s.Linux == nil { - return - } - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - for _, idMap := range s.Linux.UIDMappings { - log.Infof("Mapping host uid %d to container uid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size) - cmd.SysProcAttr.UidMappings = append(cmd.SysProcAttr.UidMappings, syscall.SysProcIDMap{ - ContainerID: int(idMap.ContainerID), - HostID: int(idMap.HostID), - Size: int(idMap.Size), - }) - } - for _, idMap := range s.Linux.GIDMappings { - log.Infof("Mapping host gid %d to container gid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size) - cmd.SysProcAttr.GidMappings = append(cmd.SysProcAttr.GidMappings, syscall.SysProcIDMap{ - ContainerID: int(idMap.ContainerID), - HostID: int(idMap.HostID), - Size: int(idMap.Size), - }) - } -} diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index d0ce6228b..8694ba755 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -29,6 +29,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/urpc" "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" ) const ( @@ -132,7 +133,7 @@ func createDefaultLoopbackInterface(conn *urpc.Client) error { func joinNetNS(nsPath string) (func(), error) { runtime.LockOSThread() - restoreNS, err := applyNS(specs.LinuxNamespace{ + restoreNS, err := specutils.ApplyNS(specs.LinuxNamespace{ Type: specs.NetworkNamespace, Path: nsPath, }) diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index e54ba4ba3..f14a2f8c9 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -32,7 +32,6 @@ import ( "gvisor.googlesource.com/gvisor/pkg/urpc" "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/console" - "gvisor.googlesource.com/gvisor/runsc/fsgofer" "gvisor.googlesource.com/gvisor/runsc/specutils" ) @@ -55,31 +54,20 @@ type Sandbox struct { } // Create creates the sandbox process. -func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket string) (*Sandbox, int, error) { +func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket string, ioFiles []*os.File) (*Sandbox, error) { s := &Sandbox{ID: id} - binPath, err := specutils.BinPath() - if err != nil { - return nil, 0, err - } - - // Create the gofer process. - goferPid, ioFiles, err := s.createGoferProcess(spec, conf, bundleDir, binPath) - if err != nil { - return nil, 0, err - } - // Create the sandbox process. - if err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, binPath, ioFiles); err != nil { - return nil, 0, err + if err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, ioFiles); err != nil { + return nil, err } // Wait for the control server to come up (or timeout). if err := s.waitForCreated(10 * time.Second); err != nil { - return nil, 0, err + return nil, err } - return s, goferPid, nil + return s, nil } // StartRoot starts running the root container process inside the sandbox. @@ -105,70 +93,29 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error { return nil } -// CreateChild creates a non-root container inside the sandbox. -func (s *Sandbox) CreateChild(cid, bundleDir string) error { - log.Debugf("Create non-root container sandbox %q, pid: %d for container %q with bundle directory %q", s.ID, s.Pid, cid, bundleDir) - - // Connect to the gofer and prepare it to serve from bundleDir for this - // container. - goferConn, err := s.goferConnect() - if err != nil { - return fmt.Errorf("couldn't connect to gofer: %v", err) - } - defer goferConn.Close() - goferReq := fsgofer.AddBundleDirsRequest{BundleDirs: map[string]string{cid: bundleDir}} - if err := goferConn.Call(fsgofer.AddBundleDirs, &goferReq, nil); err != nil { - return fmt.Errorf("error serving new filesystem for non-root container %v: %v", goferReq, err) +// Start starts running a non-root container inside the sandbox. +func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config, cid string, ioFiles []*os.File) error { + for _, f := range ioFiles { + defer f.Close() } - return nil -} - -// Start starts running a non-root container inside the sandbox. -func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config, cid string) error { log.Debugf("Start non-root container sandbox %q, pid: %d", s.ID, s.Pid) - sandboxConn, err := s.sandboxConnect() if err != nil { return fmt.Errorf("couldn't connect to sandbox: %v", err) } defer sandboxConn.Close() - goferConn, err := s.goferConnect() - if err != nil { - return fmt.Errorf("couldn't connect to gofer: %v", err) - } - defer goferConn.Close() - - // Create socket that connects the sandbox and gofer. - sandEnd, goferEnd, err := createSocketPair() - if err != nil { - return err - } - defer sandEnd.Close() - defer goferEnd.Close() - - // Tell the Gofer about the new filesystem it needs to serve. - goferReq := fsgofer.ServeDirectoryRequest{ - Dir: spec.Root.Path, - IsReadOnly: spec.Root.Readonly, - CID: cid, - FilePayload: urpc.FilePayload{Files: []*os.File{goferEnd}}, - } - if err := goferConn.Call(fsgofer.ServeDirectory, &goferReq, nil); err != nil { - return fmt.Errorf("error serving new filesystem for non-root container %v: %v", goferReq, err) - } // Start running the container. args := boot.StartArgs{ Spec: spec, Conf: conf, CID: cid, - FilePayload: urpc.FilePayload{Files: []*os.File{sandEnd}}, + FilePayload: urpc.FilePayload{Files: ioFiles}, } if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil { return fmt.Errorf("error starting non-root container %v: %v", spec.Process.Args, err) } - return nil } @@ -275,102 +222,13 @@ func (s *Sandbox) sandboxConnect() (*urpc.Client, error) { return conn, nil } -func (s *Sandbox) goferConnect() (*urpc.Client, error) { - log.Debugf("Connecting to gofer for sandbox %q", s.ID) - conn, err := client.ConnectTo(fsgofer.ControlSocketAddr(s.ID)) - if err != nil { - return nil, s.connError(err) - } - return conn, nil -} - func (s *Sandbox) connError(err error) error { return fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err) } -func (s *Sandbox) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir, binPath string) (int, []*os.File, error) { - if conf.FileAccess == boot.FileAccessDirect { - // Don't start a gofer. The sandbox will access host FS directly. - return 0, nil, nil - } - - // Start with the general config flags. - args := conf.ToFlags() - args = append(args, "gofer", "--bundle", bundleDir) - - // Add root mount and then add any other additional mounts. - mountCount := 1 - - // Add additional mounts. - for _, m := range spec.Mounts { - if specutils.Is9PMount(m) { - mountCount++ - } - } - sandEnds := make([]*os.File, 0, mountCount) - goferEnds := make([]*os.File, 0, mountCount) - // nextFD is the next available file descriptor for the gofer process. - // It starts at 3 because 0-2 are used by stdin/stdout/stderr. - var nextFD int - for nextFD = 3; nextFD-3 < mountCount; nextFD++ { - sandEnd, goferEnd, err := createSocketPair() - if err != nil { - return 0, nil, err - } - defer goferEnd.Close() - sandEnds = append(sandEnds, sandEnd) - goferEnds = append(goferEnds, goferEnd) - args = append(args, fmt.Sprintf("--io-fds=%d", nextFD)) - } - - // Create and donate a file descriptor for the control server. - addr := fsgofer.ControlSocketAddr(s.ID) - serverFD, err := server.CreateSocket(addr) - if err != nil { - return 0, nil, fmt.Errorf("error creating control server socket for sandbox %q: %v", s.ID, err) - } - - // Add the control server fd. - args = append(args, "--controller-fd="+strconv.Itoa(nextFD)) - nextFD++ - controllerFile := os.NewFile(uintptr(serverFD), "gofer_control_socket_server") - defer controllerFile.Close() - - cmd := exec.Command(binPath, args...) - cmd.ExtraFiles = goferEnds - cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile) - - // Setup any uid/gid mappings, and create or join the configured user - // namespace so the gofer's view of the filesystem aligns with the - // users in the sandbox. - setUIDGIDMappings(cmd, spec) - nss := filterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec) - - if conf.Overlay { - args = append(args, "--panic-on-write=true") - } - - // Start the gofer in the given namespace. - log.Debugf("Starting gofer: %s %v", binPath, args) - if err := startInNS(cmd, nss); err != nil { - return 0, nil, err - } - log.Infof("Gofer started, pid: %d", cmd.Process.Pid) - return cmd.Process.Pid, sandEnds, nil -} - -// createSocketPair creates a pair of files wrapping a socket pair. -func createSocketPair() (*os.File, *os.File, error) { - fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[0]), "sandbox io fd"), os.NewFile(uintptr(fds[1]), "gofer io fd"), nil -} - // createSandboxProcess starts the sandbox as a subprocess by running the "boot" // command, passing in the bundle dir. -func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, binPath string, ioFiles []*os.File) error { +func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket string, ioFiles []*os.File) error { // nextFD is used to get unused FDs that we can pass to the sandbox. It // starts at 3 because 0, 1, and 2 are taken by stdin/out/err. nextFD := 3 @@ -387,6 +245,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund consoleEnabled := consoleSocket != "" + binPath, err := specutils.BinPath() + if err != nil { + return err + } cmd := exec.Command(binPath, conf.ToFlags()...) cmd.SysProcAttr = &syscall.SysProcAttr{} cmd.Args = append(cmd.Args, @@ -464,7 +326,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // Joins the network namespace if network is enabled. the sandbox talks // directly to the host network, which may have been configured in the // namespace. - if ns, ok := getNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone { + if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone { log.Infof("Sandbox will be started in the container's network namespace: %+v", ns) nss = append(nss, ns) } else { @@ -478,10 +340,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund // - Gofer: when using a Gofer, the sandbox process can run isolated in an // empty namespace. if conf.Network == boot.NetworkHost || conf.FileAccess == boot.FileAccessDirect { - if userns, ok := getNS(specs.UserNamespace, spec); ok { + if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok { log.Infof("Sandbox will be started in container's user namespace: %+v", userns) nss = append(nss, userns) - setUIDGIDMappings(cmd, spec) + specutils.SetUIDGIDMappings(cmd, spec) } else { log.Infof("Sandbox will be started in the current user namespace") } @@ -496,7 +358,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund } log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) - if err := startInNS(cmd, nss); err != nil { + if err := specutils.StartInNS(cmd, nss); err != nil { return err } s.Pid = cmd.Process.Pid |