From 607be0585fdc659ec3c043c989a8a6f86fcc14db Mon Sep 17 00:00:00 2001
From: praveensastry <sastry.praveen@gmail.com>
Date: Tue, 6 Aug 2019 01:15:48 +1000
Subject: Add option to configure reference leak checking

---
 runsc/boot/config.go | 19 +++++++++++++++++++
 runsc/boot/loader.go |  8 +++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 7ae0dd05d..139eb1cce 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -19,6 +19,7 @@ import (
 	"strconv"
 	"strings"
 
+	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
 )
 
@@ -112,6 +113,20 @@ func MakeWatchdogAction(s string) (watchdog.Action, error) {
 	}
 }
 
+// MakeRefsLeakMode converts type from string
+func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
+	switch strings.ToLower(s) {
+	case "nocheck":
+		return refs.NoLeakChecking, nil
+	case "warning":
+		return refs.LeaksLogWarning, nil
+	case "traces":
+		return refs.LeaksLogTraces, nil
+	default:
+		return 0, fmt.Errorf("invalid refs leakmode %q", s)
+	}
+}
+
 // Config holds configuration that is not part of the runtime spec.
 type Config struct {
 	// RootDir is the runtime root directory.
@@ -201,6 +216,9 @@ type Config struct {
 
 	// AlsoLogToStderr allows to send log messages to stderr.
 	AlsoLogToStderr bool
+
+	// ReferenceLeakMode sets reference leak check mode
+	ReferenceLeakMode refs.LeakMode
 }
 
 // ToFlags returns a slice of flags that correspond to the given Config.
@@ -227,6 +245,7 @@ func (c *Config) ToFlags() []string {
 		"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
 		"--rootless=" + strconv.FormatBool(c.Rootless),
 		"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
+		"--refs-leak-mode=" + c.ReferenceLeakMode.String(),
 	}
 	if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
 		// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 50cac0433..2fce800ae 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -191,6 +191,9 @@ func New(args Args) (*Loader, error) {
 		return nil, fmt.Errorf("setting up memory usage: %v", err)
 	}
 
+	// Sets the refs leak check mode
+	refs.SetLeakMode(args.Conf.ReferenceLeakMode)
+
 	// Create kernel and platform.
 	p, err := createPlatform(args.Conf, args.Device)
 	if err != nil {
@@ -1040,8 +1043,3 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host
 	}
 	return ep.tg, ep.tty, nil
 }
-
-func init() {
-	// TODO(gvisor.dev/issue/365): Make this configurable.
-	refs.SetLeakMode(refs.NoLeakChecking)
-}
-- 
cgit v1.2.3


From 8d89c0d92b3839eed0839b1a9bc7666e6261d972 Mon Sep 17 00:00:00 2001
From: praveensastry <sastry.praveen@gmail.com>
Date: Tue, 6 Aug 2019 11:57:50 +1000
Subject: Remove traces option for ref leak mode

---
 runsc/boot/config.go | 6 ++----
 runsc/boot/loader.go | 6 +++---
 runsc/main.go        | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 139eb1cce..4276a4cc4 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -116,12 +116,10 @@ func MakeWatchdogAction(s string) (watchdog.Action, error) {
 // MakeRefsLeakMode converts type from string
 func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
 	switch strings.ToLower(s) {
-	case "nocheck":
+	case "disabled":
 		return refs.NoLeakChecking, nil
 	case "warning":
 		return refs.LeaksLogWarning, nil
-	case "traces":
-		return refs.LeaksLogTraces, nil
 	default:
 		return 0, fmt.Errorf("invalid refs leakmode %q", s)
 	}
@@ -245,7 +243,7 @@ func (c *Config) ToFlags() []string {
 		"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
 		"--rootless=" + strconv.FormatBool(c.Rootless),
 		"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
-		"--refs-leak-mode=" + c.ReferenceLeakMode.String(),
+		"--ref-leak-mode=" + c.ReferenceLeakMode.String(),
 	}
 	if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
 		// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 2fce800ae..65ac67dbf 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -181,6 +181,9 @@ type Args struct {
 // New initializes a new kernel loader configured by spec.
 // New also handles setting up a kernel for restoring a container.
 func New(args Args) (*Loader, error) {
+	// Sets the reference leak check mode
+	refs.SetLeakMode(args.Conf.ReferenceLeakMode)
+
 	// We initialize the rand package now to make sure /dev/urandom is pre-opened
 	// on kernels that do not support getrandom(2).
 	if err := rand.Init(); err != nil {
@@ -191,9 +194,6 @@ func New(args Args) (*Loader, error) {
 		return nil, fmt.Errorf("setting up memory usage: %v", err)
 	}
 
-	// Sets the refs leak check mode
-	refs.SetLeakMode(args.Conf.ReferenceLeakMode)
-
 	// Create kernel and platform.
 	p, err := createPlatform(args.Conf, args.Device)
 	if err != nil {
diff --git a/runsc/main.go b/runsc/main.go
index a10138049..8857b96ac 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -73,7 +73,7 @@ var (
 	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
 	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
 	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
-	referenceLeakMode  = flag.String("refs-leak-mode", "nocheck", "sets reference leak check mode: nocheck (default), warning, traces.")
+	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), warning.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
-- 
cgit v1.2.3


From 73985c6545d887644d8aa4f0e00491cc903501c7 Mon Sep 17 00:00:00 2001
From: praveensastry <sastry.praveen@gmail.com>
Date: Fri, 9 Aug 2019 17:13:06 +1000
Subject: Fix the Stringer for leak mode

---
 pkg/refs/refcounter.go | 6 +++---
 runsc/boot/config.go   | 2 ++
 runsc/main.go          | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go
index 417f4a2d6..7fe42d8ff 100644
--- a/pkg/refs/refcounter.go
+++ b/pkg/refs/refcounter.go
@@ -235,11 +235,11 @@ const (
 func (l LeakMode) String() string {
 	switch l {
 	case NoLeakChecking:
-		return "NoLeakChecking"
+		return "disabled"
 	case LeaksLogWarning:
-		return "LeaksLogWarning"
+		return "warning"
 	case LeaksLogTraces:
-		return "LeaksLogTraces"
+		return "traces"
 	default:
 		panic(fmt.Sprintf("Invalid leakmode: %d", l))
 	}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 4276a4cc4..3c0f72e9f 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -120,6 +120,8 @@ func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
 		return refs.NoLeakChecking, nil
 	case "warning":
 		return refs.LeaksLogWarning, nil
+	case "traces":
+		return refs.LeaksLogTraces, nil
 	default:
 		return 0, fmt.Errorf("invalid refs leakmode %q", s)
 	}
diff --git a/runsc/main.go b/runsc/main.go
index 8857b96ac..1b7c1c4b7 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -73,7 +73,7 @@ var (
 	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
 	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
 	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
-	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), warning.")
+	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), warning, traces.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
-- 
cgit v1.2.3


From 7672eaae25eebad650e71ba790e1585736866ccc Mon Sep 17 00:00:00 2001
From: praveensastry <sastry.praveen@gmail.com>
Date: Thu, 22 Aug 2019 22:52:43 +1000
Subject: Add log prefix for better clarity

---
 pkg/refs/refcounter.go | 4 ++--
 runsc/boot/config.go   | 4 ++--
 runsc/main.go          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go
index 7fe42d8ff..8c3e3d5ab 100644
--- a/pkg/refs/refcounter.go
+++ b/pkg/refs/refcounter.go
@@ -237,9 +237,9 @@ func (l LeakMode) String() string {
 	case NoLeakChecking:
 		return "disabled"
 	case LeaksLogWarning:
-		return "warning"
+		return "log-names"
 	case LeaksLogTraces:
-		return "traces"
+		return "log-traces"
 	default:
 		panic(fmt.Sprintf("Invalid leakmode: %d", l))
 	}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 3c0f72e9f..6a742f349 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -118,9 +118,9 @@ func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
 	switch strings.ToLower(s) {
 	case "disabled":
 		return refs.NoLeakChecking, nil
-	case "warning":
+	case "log-names":
 		return refs.LeaksLogWarning, nil
-	case "traces":
+	case "log-traces":
 		return refs.LeaksLogTraces, nil
 	default:
 		return 0, fmt.Errorf("invalid refs leakmode %q", s)
diff --git a/runsc/main.go b/runsc/main.go
index 1b7c1c4b7..58e7dd8f3 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -73,7 +73,7 @@ var (
 	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
 	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
 	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
-	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), warning, traces.")
+	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
-- 
cgit v1.2.3


From c39564332bdd5030b9031ed3b1a428464fea670e Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 27 Aug 2019 10:46:06 -0700
Subject: Mount volumes as super user

This used to be the case, but regressed after a recent change.
Also made a few fixes around it and clean up the code a bit.

Closes #720

PiperOrigin-RevId: 265717496
---
 pkg/sentry/fs/mounts.go                 |   3 +-
 runsc/boot/fs.go                        | 159 ++++++++++++++++++--------------
 runsc/boot/loader.go                    |  17 ++--
 runsc/boot/loader_test.go               |  17 ++--
 runsc/boot/user_test.go                 |  12 +--
 runsc/container/container_test.go       |  16 +++-
 runsc/container/multi_container_test.go |  55 +++++++++++
 7 files changed, 179 insertions(+), 100 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index 9b713e785..ac0398bd9 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -171,8 +171,6 @@ type MountNamespace struct {
 // NewMountNamespace returns a new MountNamespace, with the provided node at the
 // root, and the given cache size. A root must always be provided.
 func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error) {
-	creds := auth.CredentialsFromContext(ctx)
-
 	// Set the root dirent and id on the root mount. The reference returned from
 	// NewDirent will be donated to the MountNamespace constructed below.
 	d := NewDirent(ctx, root, "/")
@@ -181,6 +179,7 @@ func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error
 		d: newRootMount(1, d),
 	}
 
+	creds := auth.CredentialsFromContext(ctx)
 	mns := MountNamespace{
 		userns:  creds.UserNamespace,
 		root:    d,
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index b6eeacf98..34c674840 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -25,19 +25,21 @@ import (
 
 	// Include filesystem types that OCI spec might mount.
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
-	"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/host"
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
-	"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
 	_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
+	"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -261,6 +263,18 @@ func subtargets(root string, mnts []specs.Mount) []string {
 	return targets
 }
 
+func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+	mns, err := mntr.setupFS(conf, procArgs)
+	if err != nil {
+		return err
+	}
+
+	// Set namespace here so that it can be found in ctx.
+	procArgs.MountNamespace = mns
+
+	return setExecutablePath(ctx, procArgs)
+}
+
 // setExecutablePath sets the procArgs.Filename by searching the PATH for an
 // executable matching the procArgs.Argv[0].
 func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
@@ -500,73 +514,95 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
 	}
 }
 
-// setupChildContainer is used to set up the file system for non-root containers
-// and amend the procArgs accordingly. This is the main entry point for this
-// rest of functions in this file. procArgs are passed by reference and the
-// FDMap field is modified. It dups stdioFDs.
-func (c *containerMounter) setupChildContainer(conf *Config, procArgs *kernel.CreateProcessArgs) error {
-	// Setup a child container.
-	log.Infof("Creating new process in child container.")
-
-	// Create a new root inode and mount namespace for the container.
-	rootCtx := c.k.SupervisorContext()
-	rootInode, err := c.createRootMount(rootCtx, conf)
-	if err != nil {
-		return fmt.Errorf("creating filesystem for container: %v", err)
+// processHints processes annotations that container hints about how volumes
+// should be mounted (e.g. a volume shared between containers). It must be
+// called for the root container only.
+func (c *containerMounter) processHints(conf *Config) error {
+	ctx := c.k.SupervisorContext()
+	for _, hint := range c.hints.mounts {
+		log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+		inode, err := c.mountSharedMaster(ctx, conf, hint)
+		if err != nil {
+			return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+		}
+		hint.root = inode
 	}
-	mns, err := fs.NewMountNamespace(rootCtx, rootInode)
+	return nil
+}
+
+// setupFS is used to set up the file system for all containers. This is the
+// main entry point method, with most of the other being internal only. It
+// returns the mount namespace that is created for the container.
+func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
+	log.Infof("Configuring container's file system")
+
+	// Create context with root credentials to mount the filesystem (the current
+	// user may not be privileged enough).
+	rootProcArgs := *procArgs
+	rootProcArgs.WorkingDirectory = "/"
+	rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+	rootProcArgs.Umask = 0022
+	rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
+	rootCtx := rootProcArgs.NewContext(c.k)
+
+	mns, err := c.createMountNamespace(rootCtx, conf)
 	if err != nil {
-		return fmt.Errorf("creating new mount namespace for container: %v", err)
+		return nil, err
 	}
-	procArgs.MountNamespace = mns
-	root := mns.Root()
-	defer root.DecRef()
 
-	// Mount all submounts.
-	if err := c.mountSubmounts(rootCtx, conf, mns, root); err != nil {
-		return err
+	// Set namespace here so that it can be found in rootCtx.
+	rootProcArgs.MountNamespace = mns
+
+	if err := c.mountSubmounts(rootCtx, conf, mns); err != nil {
+		return nil, err
 	}
-	return c.checkDispenser()
+	return mns, nil
 }
 
-func (c *containerMounter) checkDispenser() error {
-	if !c.fds.empty() {
-		return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds)
+func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) {
+	rootInode, err := c.createRootMount(ctx, conf)
+	if err != nil {
+		return nil, fmt.Errorf("creating filesystem for container: %v", err)
 	}
-	return nil
+	mns, err := fs.NewMountNamespace(ctx, rootInode)
+	if err != nil {
+		return nil, fmt.Errorf("creating new mount namespace for container: %v", err)
+	}
+	return mns, nil
 }
 
-// setupRootContainer creates a mount namespace containing the root filesystem
-// and all mounts. 'rootCtx' is used to walk directories to find mount points.
-// The 'setMountNS' callback is called after the mount namespace is created and
-// will get a reference on that namespace. The callback must ensure that the
-// rootCtx has the provided mount namespace.
-func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
-	for _, hint := range c.hints.mounts {
-		log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
-		inode, err := c.mountSharedMaster(rootCtx, conf, hint)
-		if err != nil {
-			return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error {
+	root := mns.Root()
+	defer root.DecRef()
+
+	for _, m := range c.mounts {
+		log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options)
+		if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
+			if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
+				return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
+			}
+		} else {
+			if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
+				return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+			}
 		}
-		hint.root = inode
 	}
 
-	rootInode, err := c.createRootMount(rootCtx, conf)
-	if err != nil {
-		return fmt.Errorf("creating root mount: %v", err)
+	if err := c.mountTmp(ctx, conf, mns, root); err != nil {
+		return fmt.Errorf("mount submount %q: %v", "tmp", err)
 	}
-	mns, err := fs.NewMountNamespace(userCtx, rootInode)
-	if err != nil {
-		return fmt.Errorf("creating root mount namespace: %v", err)
+
+	if err := c.checkDispenser(); err != nil {
+		return err
 	}
-	setMountNS(mns)
+	return nil
+}
 
-	root := mns.Root()
-	defer root.DecRef()
-	if err := c.mountSubmounts(rootCtx, conf, mns, root); err != nil {
-		return fmt.Errorf("mounting submounts: %v", err)
+func (c *containerMounter) checkDispenser() error {
+	if !c.fds.empty() {
+		return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds)
 	}
-	return c.checkDispenser()
+	return nil
 }
 
 // mountSharedMaster mounts the master of a volume that is shared among
@@ -684,25 +720,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
 	return fsName, opts, useOverlay, err
 }
 
-func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
-	for _, m := range c.mounts {
-		if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
-			if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
-				return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
-			}
-		} else {
-			if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
-				return fmt.Errorf("mount submount %q: %v", m.Destination, err)
-			}
-		}
-	}
-
-	if err := c.mountTmp(ctx, conf, mns, root); err != nil {
-		return fmt.Errorf("mount submount %q: %v", "tmp", err)
-	}
-	return nil
-}
-
 // mountSubmount mounts volumes inside the container's root. Because mounts may
 // be readonly, a lower ramfs overlay is added to create the mount point dir.
 // Another overlay is added with tmpfs on top if Config.Overlay is true.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f91158027..02dd080fe 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -527,14 +527,12 @@ func (l *Loader) run() error {
 
 		// Setup the root container file system.
 		l.startGoferMonitor(l.sandboxID, l.goferFDs)
+
 		mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
-		if err := mntr.setupRootContainer(ctx, ctx, l.conf, func(mns *fs.MountNamespace) {
-			l.rootProcArgs.MountNamespace = mns
-		}); err != nil {
+		if err := mntr.processHints(l.conf); err != nil {
 			return err
 		}
-
-		if err := setExecutablePath(ctx, &l.rootProcArgs); err != nil {
+		if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil {
 			return err
 		}
 
@@ -687,13 +685,10 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
 
 	// Setup the child container file system.
 	l.startGoferMonitor(cid, goferFDs)
-	mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
-	if err := mntr.setupChildContainer(conf, &procArgs); err != nil {
-		return fmt.Errorf("configuring container FS: %v", err)
-	}
 
-	if err := setExecutablePath(ctx, &procArgs); err != nil {
-		return fmt.Errorf("setting executable path for %+v: %v", procArgs, err)
+	mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
+	if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil {
+		return err
 	}
 
 	// Create and start the new process.
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index e0e32b9d5..147ff7703 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -401,17 +401,16 @@ func TestCreateMountNamespace(t *testing.T) {
 			}
 			defer cleanup()
 
-			// setupRootContainer needs to find root from the context after the
-			// namespace is created.
-			var mns *fs.MountNamespace
-			setMountNS := func(m *fs.MountNamespace) {
-				mns = m
-				ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
-			}
 			mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{})
-			if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
-				t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
+			mns, err := mntr.createMountNamespace(ctx, conf)
+			if err != nil {
+				t.Fatalf("failed to create mount namespace: %v", err)
 			}
+			ctx = fs.WithRoot(ctx, mns.Root())
+			if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
+				t.Fatalf("failed to create mount namespace: %v", err)
+			}
+
 			root := mns.Root()
 			defer root.DecRef()
 			for _, p := range tc.expectedPaths {
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 01f666507..906baf3e5 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -164,13 +164,13 @@ func TestGetExecUserHome(t *testing.T) {
 				},
 			}
 
-			var mns *fs.MountNamespace
-			setMountNS := func(m *fs.MountNamespace) {
-				mns = m
-				ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
-			}
 			mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{})
-			if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
+			mns, err := mntr.createMountNamespace(ctx, conf)
+			if err != nil {
+				t.Fatalf("failed to create mount namespace: %v", err)
+			}
+			ctx = fs.WithRoot(ctx, mns.Root())
+			if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
 				t.Fatalf("failed to create mount namespace: %v", err)
 			}
 
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index af128bf1c..3d4f304f3 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -1310,10 +1310,13 @@ func TestRunNonRoot(t *testing.T) {
 		t.Logf("Running test with conf: %+v", conf)
 
 		spec := testutil.NewSpecWithArgs("/bin/true")
+
+		// Set a random user/group with no access to "blocked" dir.
 		spec.Process.User.UID = 343
 		spec.Process.User.GID = 2401
+		spec.Process.Capabilities = nil
 
-		// User that container runs as can't list '$TMP/blocked' and would fail to
+		// User running inside container can't list '$TMP/blocked' and would fail to
 		// mount it.
 		dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
 		if err != nil {
@@ -1327,6 +1330,17 @@ func TestRunNonRoot(t *testing.T) {
 			t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
 		}
 
+		src, err := ioutil.TempDir(testutil.TmpDir(), "src")
+		if err != nil {
+			t.Fatalf("ioutil.TempDir() failed: %v", err)
+		}
+
+		spec.Mounts = append(spec.Mounts, specs.Mount{
+			Destination: dir,
+			Source:      src,
+			Type:        "bind",
+		})
+
 		if err := run(spec, conf); err != nil {
 			t.Fatalf("error running sandbox: %v", err)
 		}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 2d51fecc6..ae03d24b4 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -1485,3 +1485,58 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
 		t.Errorf("containers not found: %v", wantIDs)
 	}
 }
+
+// TestMultiContainerRunNonRoot checks that child container can be configured
+// when running as non-privileged user.
+func TestMultiContainerRunNonRoot(t *testing.T) {
+	cmdRoot := []string{"/bin/sleep", "100"}
+	cmdSub := []string{"/bin/true"}
+	podSpecs, ids := createSpecs(cmdRoot, cmdSub)
+
+	// User running inside container can't list '$TMP/blocked' and would fail to
+	// mount it.
+	blocked, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
+	if err != nil {
+		t.Fatalf("ioutil.TempDir() failed: %v", err)
+	}
+	if err := os.Chmod(blocked, 0700); err != nil {
+		t.Fatalf("os.MkDir(%q) failed: %v", blocked, err)
+	}
+	dir := path.Join(blocked, "test")
+	if err := os.Mkdir(dir, 0755); err != nil {
+		t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
+	}
+
+	src, err := ioutil.TempDir(testutil.TmpDir(), "src")
+	if err != nil {
+		t.Fatalf("ioutil.TempDir() failed: %v", err)
+	}
+
+	// Set a random user/group with no access to "blocked" dir.
+	podSpecs[1].Process.User.UID = 343
+	podSpecs[1].Process.User.GID = 2401
+	podSpecs[1].Process.Capabilities = nil
+
+	podSpecs[1].Mounts = append(podSpecs[1].Mounts, specs.Mount{
+		Destination: dir,
+		Source:      src,
+		Type:        "bind",
+	})
+
+	conf := testutil.TestConfig()
+	pod, cleanup, err := startContainers(conf, podSpecs, ids)
+	if err != nil {
+		t.Fatalf("error starting containers: %v", err)
+	}
+	defer cleanup()
+
+	// Once all containers are started, wait for the child container to exit.
+	// This means that the volume was mounted properly.
+	ws, err := pod[1].Wait()
+	if err != nil {
+		t.Fatalf("running child container: %v", err)
+	}
+	if !ws.Exited() || ws.ExitStatus() != 0 {
+		t.Fatalf("child container failed, waitStatus: %v", ws)
+	}
+}
-- 
cgit v1.2.3


From 0f5cdc1e00488823f1f7b9884c15b899677362b6 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Wed, 4 Sep 2019 18:55:39 -0700
Subject: Resolve flakes with TestMultiContainerDestroy

Some processes are reparented to the root container depending
on the kill order and the root container would not reap in time.
So some zombie processes were still present when the test checked.

Fix it by running the second container inside a PID namespace.

PiperOrigin-RevId: 267278591
---
 runsc/boot/loader.go                    | 49 +++++++++++++++------------------
 runsc/container/multi_container_test.go | 14 +++++++---
 2 files changed, 32 insertions(+), 31 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 19b738705..823a34619 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -760,26 +760,34 @@ func (l *Loader) destroyContainer(cid string) error {
 		if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
 			return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
 		}
+		// Wait for all processes that belong to the container to exit (including
+		// exec'd processes).
+		for _, t := range l.k.TaskSet().Root.Tasks() {
+			if t.ContainerID() == cid {
+				t.ThreadGroup().WaitExited()
+			}
+		}
+
+		// At this point, all processes inside of the container have exited,
+		// releasing all references to the container's MountNamespace and
+		// causing all submounts and overlays to be unmounted.
+		//
+		// Since the container's MountNamespace has been released,
+		// MountNamespace.destroy() will have executed, but that function may
+		// trigger async close operations. We must wait for those to complete
+		// before returning, otherwise the caller may kill the gofer before
+		// they complete, causing a cascade of failing RPCs.
+		fs.AsyncBarrier()
 	}
 
-	// Remove all container thread groups from the map.
+	// No more failure from this point on. Remove all container thread groups
+	// from the map.
 	for key := range l.processes {
 		if key.cid == cid {
 			delete(l.processes, key)
 		}
 	}
 
-	// At this point, all processes inside of the container have exited,
-	// releasing all references to the container's MountNamespace and
-	// causing all submounts and overlays to be unmounted.
-	//
-	// Since the container's MountNamespace has been released,
-	// MountNamespace.destroy() will have executed, but that function may
-	// trigger async close operations. We must wait for those to complete
-	// before returning, otherwise the caller may kill the gofer before
-	// they complete, causing a cascade of failing RPCs.
-	fs.AsyncBarrier()
-
 	log.Debugf("Container destroyed %q", cid)
 	return nil
 }
@@ -1037,21 +1045,8 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
 	// the signal is delivered. This prevents process leaks when SIGKILL is
 	// sent to the entire container.
 	l.k.Pause()
-	if err := l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}); err != nil {
-		l.k.Unpause()
-		return err
-	}
-	l.k.Unpause()
-
-	// If SIGKILLing all processes, wait for them to exit.
-	if linux.Signal(signo) == linux.SIGKILL {
-		for _, t := range l.k.TaskSet().Root.Tasks() {
-			if t.ContainerID() == cid {
-				t.ThreadGroup().WaitExited()
-			}
-		}
-	}
-	return nil
+	defer l.k.Unpause()
+	return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
 }
 
 // threadGroupFromID same as threadGroupFromIDLocked except that it acquires
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 6e5f23ff2..bd45a5118 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -549,10 +549,16 @@ func TestMultiContainerDestroy(t *testing.T) {
 		t.Logf("Running test with conf: %+v", conf)
 
 		// First container will remain intact while the second container is killed.
-		specs, ids := createSpecs(
-			[]string{app, "reaper"},
+		podSpecs, ids := createSpecs(
+			[]string{"sleep", "100"},
 			[]string{app, "fork-bomb"})
-		containers, cleanup, err := startContainers(conf, specs, ids)
+
+		// Run the fork bomb in a PID namespace to prevent processes to be
+		// re-parented to PID=1 in the root container.
+		podSpecs[1].Linux = &specs.Linux{
+			Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
+		}
+		containers, cleanup, err := startContainers(conf, podSpecs, ids)
 		if err != nil {
 			t.Fatalf("error starting containers: %v", err)
 		}
@@ -580,7 +586,7 @@ func TestMultiContainerDestroy(t *testing.T) {
 		if err != nil {
 			t.Fatalf("error getting process data from sandbox: %v", err)
 		}
-		expectedPL := []*control.Process{{PID: 1, Cmd: "test_app"}}
+		expectedPL := []*control.Process{{PID: 1, Cmd: "sleep"}}
 		if !procListsEqual(pss, expectedPL) {
 			t.Errorf("container got process list: %s, want: %s", procListToString(pss), procListToString(expectedPL))
 		}
-- 
cgit v1.2.3


From fe1f5210774d015d653df164d6f676658863780c Mon Sep 17 00:00:00 2001
From: Ian Gudger <igudger@google.com>
Date: Fri, 6 Sep 2019 17:59:46 -0700
Subject: Remove reundant global tcpip.LinkEndpointID.

PiperOrigin-RevId: 267709597
---
 pkg/tcpip/link/channel/channel.go                  |   6 +-
 pkg/tcpip/link/fdbased/endpoint.go                 |  18 +-
 pkg/tcpip/link/fdbased/endpoint_test.go            |   3 +-
 pkg/tcpip/link/loopback/loopback.go                |   4 +-
 pkg/tcpip/link/muxed/injectable.go                 |   5 +-
 pkg/tcpip/link/muxed/injectable_test.go            |   4 +-
 pkg/tcpip/link/sharedmem/sharedmem.go              |   8 +-
 pkg/tcpip/link/sharedmem/sharedmem_test.go         |   4 +-
 pkg/tcpip/link/sniffer/sniffer.go                  |  18 +-
 pkg/tcpip/link/waitable/waitable.go                |   7 +-
 pkg/tcpip/link/waitable/waitable_test.go           |   6 +-
 pkg/tcpip/network/arp/arp_test.go                  |  10 +-
 pkg/tcpip/network/ipv4/ipv4_test.go                |  22 +-
 pkg/tcpip/network/ipv6/icmp_test.go                |  22 +-
 pkg/tcpip/network/ipv6/ndp_test.go                 |  15 +-
 pkg/tcpip/stack/registration.go                    |  28 ---
 pkg/tcpip/stack/stack.go                           |  27 +-
 pkg/tcpip/stack/stack_test.go                      | 278 ++++++++++-----------
 pkg/tcpip/stack/transport_test.go                  |  24 +-
 pkg/tcpip/tcpip.go                                 |   3 -
 pkg/tcpip/transport/tcp/testing/context/context.go |   9 +-
 pkg/tcpip/transport/udp/udp_test.go                |   9 +-
 runsc/boot/network.go                              |  18 +-
 23 files changed, 250 insertions(+), 298 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index c40744b8e..eec430d0a 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -44,14 +44,12 @@ type Endpoint struct {
 }
 
 // New creates a new channel endpoint.
-func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) (tcpip.LinkEndpointID, *Endpoint) {
-	e := &Endpoint{
+func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint {
+	return &Endpoint{
 		C:        make(chan PacketInfo, size),
 		mtu:      mtu,
 		linkAddr: linkAddr,
 	}
-
-	return stack.RegisterLinkEndpoint(e), e
 }
 
 // Drain removes all outbound packets from the channel and counts them.
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index 77f988b9f..adcf21371 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -165,7 +165,7 @@ type Options struct {
 //
 // Makes fd non-blocking, but does not take ownership of fd, which must remain
 // open for the lifetime of the returned endpoint.
-func New(opts *Options) (tcpip.LinkEndpointID, error) {
+func New(opts *Options) (stack.LinkEndpoint, error) {
 	caps := stack.LinkEndpointCapabilities(0)
 	if opts.RXChecksumOffload {
 		caps |= stack.CapabilityRXChecksumOffload
@@ -190,7 +190,7 @@ func New(opts *Options) (tcpip.LinkEndpointID, error) {
 	}
 
 	if len(opts.FDs) == 0 {
-		return 0, fmt.Errorf("opts.FD is empty, at least one FD must be specified")
+		return nil, fmt.Errorf("opts.FD is empty, at least one FD must be specified")
 	}
 
 	e := &endpoint{
@@ -207,12 +207,12 @@ func New(opts *Options) (tcpip.LinkEndpointID, error) {
 	for i := 0; i < len(e.fds); i++ {
 		fd := e.fds[i]
 		if err := syscall.SetNonblock(fd, true); err != nil {
-			return 0, fmt.Errorf("syscall.SetNonblock(%v) failed: %v", fd, err)
+			return nil, fmt.Errorf("syscall.SetNonblock(%v) failed: %v", fd, err)
 		}
 
 		isSocket, err := isSocketFD(fd)
 		if err != nil {
-			return 0, err
+			return nil, err
 		}
 		if isSocket {
 			if opts.GSOMaxSize != 0 {
@@ -222,12 +222,12 @@ func New(opts *Options) (tcpip.LinkEndpointID, error) {
 		}
 		inboundDispatcher, err := createInboundDispatcher(e, fd, isSocket)
 		if err != nil {
-			return 0, fmt.Errorf("createInboundDispatcher(...) = %v", err)
+			return nil, fmt.Errorf("createInboundDispatcher(...) = %v", err)
 		}
 		e.inboundDispatchers = append(e.inboundDispatchers, inboundDispatcher)
 	}
 
-	return stack.RegisterLinkEndpoint(e), nil
+	return e, nil
 }
 
 func createInboundDispatcher(e *endpoint, fd int, isSocket bool) (linkDispatcher, error) {
@@ -435,14 +435,12 @@ func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buf
 }
 
 // NewInjectable creates a new fd-based InjectableEndpoint.
-func NewInjectable(fd int, mtu uint32, capabilities stack.LinkEndpointCapabilities) (tcpip.LinkEndpointID, *InjectableEndpoint) {
+func NewInjectable(fd int, mtu uint32, capabilities stack.LinkEndpointCapabilities) *InjectableEndpoint {
 	syscall.SetNonblock(fd, true)
 
-	e := &InjectableEndpoint{endpoint: endpoint{
+	return &InjectableEndpoint{endpoint: endpoint{
 		fds:  []int{fd},
 		mtu:  mtu,
 		caps: capabilities,
 	}}
-
-	return stack.RegisterLinkEndpoint(e), e
 }
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index e305252d6..04406bc9a 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -68,11 +68,10 @@ func newContext(t *testing.T, opt *Options) *context {
 	}
 
 	opt.FDs = []int{fds[1]}
-	epID, err := New(opt)
+	ep, err := New(opt)
 	if err != nil {
 		t.Fatalf("Failed to create FD endpoint: %v", err)
 	}
-	ep := stack.FindLinkEndpoint(epID).(*endpoint)
 
 	c := &context{
 		t:    t,
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index ab6a53988..e121ea1a5 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -32,8 +32,8 @@ type endpoint struct {
 
 // New creates a new loopback endpoint. This link-layer endpoint just turns
 // outbound packets into inbound packets.
-func New() tcpip.LinkEndpointID {
-	return stack.RegisterLinkEndpoint(&endpoint{})
+func New() stack.LinkEndpoint {
+	return &endpoint{}
 }
 
 // Attach implements stack.LinkEndpoint.Attach. It just saves the stack network-
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index a577a3d52..3ed7b98d1 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -105,9 +105,8 @@ func (m *InjectableEndpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *
 }
 
 // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint.
-func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) (tcpip.LinkEndpointID, *InjectableEndpoint) {
-	e := &InjectableEndpoint{
+func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint {
+	return &InjectableEndpoint{
 		routes: routes,
 	}
-	return stack.RegisterLinkEndpoint(e), e
 }
diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go
index 174b9330f..3086fec00 100644
--- a/pkg/tcpip/link/muxed/injectable_test.go
+++ b/pkg/tcpip/link/muxed/injectable_test.go
@@ -87,8 +87,8 @@ func makeTestInjectableEndpoint(t *testing.T) (*InjectableEndpoint, *os.File, tc
 	if err != nil {
 		t.Fatal("Failed to create socket pair:", err)
 	}
-	_, underlyingEndpoint := fdbased.NewInjectable(pair[1], 6500, stack.CapabilityNone)
+	underlyingEndpoint := fdbased.NewInjectable(pair[1], 6500, stack.CapabilityNone)
 	routes := map[tcpip.Address]stack.InjectableLinkEndpoint{dstIP: underlyingEndpoint}
-	_, endpoint := NewInjectableEndpoint(routes)
+	endpoint := NewInjectableEndpoint(routes)
 	return endpoint, os.NewFile(uintptr(pair[0]), "test route end"), dstIP
 }
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 834ea5c40..ba387af73 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -94,7 +94,7 @@ type endpoint struct {
 
 // New creates a new shared-memory-based endpoint. Buffers will be broken up
 // into buffers of "bufferSize" bytes.
-func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (tcpip.LinkEndpointID, error) {
+func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (stack.LinkEndpoint, error) {
 	e := &endpoint{
 		mtu:        mtu,
 		bufferSize: bufferSize,
@@ -102,15 +102,15 @@ func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (tc
 	}
 
 	if err := e.tx.init(bufferSize, &tx); err != nil {
-		return 0, err
+		return nil, err
 	}
 
 	if err := e.rx.init(bufferSize, &rx); err != nil {
 		e.tx.cleanup()
-		return 0, err
+		return nil, err
 	}
 
-	return stack.RegisterLinkEndpoint(e), nil
+	return e, nil
 }
 
 // Close frees all resources associated with the endpoint.
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 98036f367..0e9ba0846 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -119,12 +119,12 @@ func newTestContext(t *testing.T, mtu, bufferSize uint32, addr tcpip.LinkAddress
 	initQueue(t, &c.txq, &c.txCfg)
 	initQueue(t, &c.rxq, &c.rxCfg)
 
-	id, err := New(mtu, bufferSize, addr, c.txCfg, c.rxCfg)
+	ep, err := New(mtu, bufferSize, addr, c.txCfg, c.rxCfg)
 	if err != nil {
 		t.Fatalf("New failed: %v", err)
 	}
 
-	c.ep = stack.FindLinkEndpoint(id).(*endpoint)
+	c.ep = ep.(*endpoint)
 	c.ep.Attach(c)
 
 	return c
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 36c8c46fc..e7b6d7912 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -58,10 +58,10 @@ type endpoint struct {
 
 // New creates a new sniffer link-layer endpoint. It wraps around another
 // endpoint and logs packets and they traverse the endpoint.
-func New(lower tcpip.LinkEndpointID) tcpip.LinkEndpointID {
-	return stack.RegisterLinkEndpoint(&endpoint{
-		lower: stack.FindLinkEndpoint(lower),
-	})
+func New(lower stack.LinkEndpoint) stack.LinkEndpoint {
+	return &endpoint{
+		lower: lower,
+	}
 }
 
 func zoneOffset() (int32, error) {
@@ -102,15 +102,15 @@ func writePCAPHeader(w io.Writer, maxLen uint32) error {
 // snapLen is the maximum amount of a packet to be saved. Packets with a length
 // less than or equal too snapLen will be saved in their entirety. Longer
 // packets will be truncated to snapLen.
-func NewWithFile(lower tcpip.LinkEndpointID, file *os.File, snapLen uint32) (tcpip.LinkEndpointID, error) {
+func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack.LinkEndpoint, error) {
 	if err := writePCAPHeader(file, snapLen); err != nil {
-		return 0, err
+		return nil, err
 	}
-	return stack.RegisterLinkEndpoint(&endpoint{
-		lower:      stack.FindLinkEndpoint(lower),
+	return &endpoint{
+		lower:      lower,
 		file:       file,
 		maxPCAPLen: snapLen,
-	}), nil
+	}, nil
 }
 
 // DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 3b6ac2ff7..408cc62f7 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -40,11 +40,10 @@ type Endpoint struct {
 // New creates a new waitable link-layer endpoint. It wraps around another
 // endpoint and allows the caller to block new write/dispatch calls and wait for
 // the inflight ones to finish before returning.
-func New(lower tcpip.LinkEndpointID) (tcpip.LinkEndpointID, *Endpoint) {
-	e := &Endpoint{
-		lower: stack.FindLinkEndpoint(lower),
+func New(lower stack.LinkEndpoint) *Endpoint {
+	return &Endpoint{
+		lower: lower,
 	}
-	return stack.RegisterLinkEndpoint(e), e
 }
 
 // DeliverNetworkPacket implements stack.NetworkDispatcher.DeliverNetworkPacket.
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 56e18ecb0..1031438b1 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -72,7 +72,7 @@ func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.P
 
 func TestWaitWrite(t *testing.T) {
 	ep := &countedEndpoint{}
-	_, wep := New(stack.RegisterLinkEndpoint(ep))
+	wep := New(ep)
 
 	// Write and check that it goes through.
 	wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
@@ -97,7 +97,7 @@ func TestWaitWrite(t *testing.T) {
 
 func TestWaitDispatch(t *testing.T) {
 	ep := &countedEndpoint{}
-	_, wep := New(stack.RegisterLinkEndpoint(ep))
+	wep := New(ep)
 
 	// Check that attach happens.
 	wep.Attach(ep)
@@ -139,7 +139,7 @@ func TestOtherMethods(t *testing.T) {
 		hdrLen:       hdrLen,
 		linkAddr:     linkAddr,
 	}
-	_, wep := New(stack.RegisterLinkEndpoint(ep))
+	wep := New(ep)
 
 	if v := wep.MTU(); v != mtu {
 		t.Fatalf("Unexpected mtu: got=%v, want=%v", v, mtu)
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 4c4b54469..387fca96e 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -47,11 +47,13 @@ func newTestContext(t *testing.T) *testContext {
 	s := stack.New([]string{ipv4.ProtocolName, arp.ProtocolName}, []string{icmp.ProtocolName4}, stack.Options{})
 
 	const defaultMTU = 65536
-	id, linkEP := channel.New(256, defaultMTU, stackLinkAddr)
+	ep := channel.New(256, defaultMTU, stackLinkAddr)
+	wep := stack.LinkEndpoint(ep)
+
 	if testing.Verbose() {
-		id = sniffer.New(id)
+		wep = sniffer.New(ep)
 	}
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, wep); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -73,7 +75,7 @@ func newTestContext(t *testing.T) *testContext {
 	return &testContext{
 		t:      t,
 		s:      s,
-		linkEP: linkEP,
+		linkEP: ep,
 	}
 }
 
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 1b5a55bea..ae827ca27 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -36,11 +36,11 @@ func TestExcludeBroadcast(t *testing.T) {
 	s := stack.New([]string{ipv4.ProtocolName}, []string{udp.ProtocolName}, stack.Options{})
 
 	const defaultMTU = 65536
-	id, _ := channel.New(256, defaultMTU, "")
+	ep := stack.LinkEndpoint(channel.New(256, defaultMTU, ""))
 	if testing.Verbose() {
-		id = sniffer.New(id)
+		ep = sniffer.New(ep)
 	}
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -184,15 +184,12 @@ type errorChannel struct {
 // newErrorChannel creates a new errorChannel endpoint. Each call to WritePacket
 // will return successive errors from packetCollectorErrors until the list is
 // empty and then return nil each time.
-func newErrorChannel(size int, mtu uint32, linkAddr tcpip.LinkAddress, packetCollectorErrors []*tcpip.Error) (tcpip.LinkEndpointID, *errorChannel) {
-	_, e := channel.New(size, mtu, linkAddr)
-	ec := errorChannel{
-		Endpoint:              e,
+func newErrorChannel(size int, mtu uint32, linkAddr tcpip.LinkAddress, packetCollectorErrors []*tcpip.Error) *errorChannel {
+	return &errorChannel{
+		Endpoint:              channel.New(size, mtu, linkAddr),
 		Ch:                    make(chan packetInfo, size),
 		packetCollectorErrors: packetCollectorErrors,
 	}
-
-	return stack.RegisterLinkEndpoint(e), &ec
 }
 
 // packetInfo holds all the information about an outbound packet.
@@ -242,9 +239,8 @@ type context struct {
 func buildContext(t *testing.T, packetCollectorErrors []*tcpip.Error, mtu uint32) context {
 	// Make the packet and write it.
 	s := stack.New([]string{ipv4.ProtocolName}, []string{}, stack.Options{})
-	_, linkEP := newErrorChannel(100 /* Enough for all tests. */, mtu, "", packetCollectorErrors)
-	linkEPId := stack.RegisterLinkEndpoint(linkEP)
-	s.CreateNIC(1, linkEPId)
+	ep := newErrorChannel(100 /* Enough for all tests. */, mtu, "", packetCollectorErrors)
+	s.CreateNIC(1, ep)
 	const (
 		src = "\x10\x00\x00\x01"
 		dst = "\x10\x00\x00\x02"
@@ -266,7 +262,7 @@ func buildContext(t *testing.T, packetCollectorErrors []*tcpip.Error, mtu uint32
 	}
 	return context{
 		Route:  r,
-		linkEP: linkEP,
+		linkEP: ep,
 	}
 }
 
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 227a65cf2..a6a1a5232 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -83,8 +83,7 @@ func (*stubLinkAddressCache) AddLinkAddress(tcpip.NICID, tcpip.Address, tcpip.Li
 func TestICMPCounts(t *testing.T) {
 	s := stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{})
 	{
-		id := stack.RegisterLinkEndpoint(&stubLinkEndpoint{})
-		if err := s.CreateNIC(1, id); err != nil {
+		if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
 			t.Fatalf("CreateNIC(_) = %s", err)
 		}
 		if err := s.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
@@ -211,14 +210,13 @@ func newTestContext(t *testing.T) *testContext {
 	}
 
 	const defaultMTU = 65536
-	_, linkEP0 := channel.New(256, defaultMTU, linkAddr0)
-	c.linkEP0 = linkEP0
-	wrappedEP0 := endpointWithResolutionCapability{LinkEndpoint: linkEP0}
-	id0 := stack.RegisterLinkEndpoint(wrappedEP0)
+	c.linkEP0 = channel.New(256, defaultMTU, linkAddr0)
+
+	wrappedEP0 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP0})
 	if testing.Verbose() {
-		id0 = sniffer.New(id0)
+		wrappedEP0 = sniffer.New(wrappedEP0)
 	}
-	if err := c.s0.CreateNIC(1, id0); err != nil {
+	if err := c.s0.CreateNIC(1, wrappedEP0); err != nil {
 		t.Fatalf("CreateNIC s0: %v", err)
 	}
 	if err := c.s0.AddAddress(1, ProtocolNumber, lladdr0); err != nil {
@@ -228,11 +226,9 @@ func newTestContext(t *testing.T) *testContext {
 		t.Fatalf("AddAddress sn lladdr0: %v", err)
 	}
 
-	_, linkEP1 := channel.New(256, defaultMTU, linkAddr1)
-	c.linkEP1 = linkEP1
-	wrappedEP1 := endpointWithResolutionCapability{LinkEndpoint: linkEP1}
-	id1 := stack.RegisterLinkEndpoint(wrappedEP1)
-	if err := c.s1.CreateNIC(1, id1); err != nil {
+	c.linkEP1 = channel.New(256, defaultMTU, linkAddr1)
+	wrappedEP1 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP1})
+	if err := c.s1.CreateNIC(1, wrappedEP1); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 	if err := c.s1.AddAddress(1, ProtocolNumber, lladdr1); err != nil {
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index 8e4cf0e74..571915d3f 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -32,15 +32,14 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack
 	t.Helper()
 
 	s := stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{})
-	{
-		id := stack.RegisterLinkEndpoint(&stubLinkEndpoint{})
-		if err := s.CreateNIC(1, id); err != nil {
-			t.Fatalf("CreateNIC(_) = %s", err)
-		}
-		if err := s.AddAddress(1, ProtocolNumber, llladdr); err != nil {
-			t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, llladdr, err)
-		}
+
+	if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
+		t.Fatalf("CreateNIC(_) = %s", err)
+	}
+	if err := s.AddAddress(1, ProtocolNumber, llladdr); err != nil {
+		t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, llladdr, err)
 	}
+
 	{
 		subnet, err := tcpip.NewSubnet(rlladdr, tcpip.AddressMask(strings.Repeat("\xff", len(rlladdr))))
 		if err != nil {
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 67b70b2ee..88a698b18 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -15,8 +15,6 @@
 package stack
 
 import (
-	"sync"
-
 	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -379,10 +377,6 @@ var (
 	networkProtocols   = make(map[string]NetworkProtocolFactory)
 
 	unassociatedFactory UnassociatedEndpointFactory
-
-	linkEPMu           sync.RWMutex
-	nextLinkEndpointID tcpip.LinkEndpointID = 1
-	linkEndpoints                           = make(map[tcpip.LinkEndpointID]LinkEndpoint)
 )
 
 // RegisterTransportProtocolFactory registers a new transport protocol factory
@@ -406,28 +400,6 @@ func RegisterUnassociatedFactory(f UnassociatedEndpointFactory) {
 	unassociatedFactory = f
 }
 
-// RegisterLinkEndpoint register a link-layer protocol endpoint and returns an
-// ID that can be used to refer to it.
-func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID {
-	linkEPMu.Lock()
-	defer linkEPMu.Unlock()
-
-	v := nextLinkEndpointID
-	nextLinkEndpointID++
-
-	linkEndpoints[v] = linkEP
-
-	return v
-}
-
-// FindLinkEndpoint finds the link endpoint associated with the given ID.
-func FindLinkEndpoint(id tcpip.LinkEndpointID) LinkEndpoint {
-	linkEPMu.RLock()
-	defer linkEPMu.RUnlock()
-
-	return linkEndpoints[id]
-}
-
 // GSOType is the type of GSO segments.
 //
 // +stateify savable
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 6beca6ae8..a961e8ebe 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -620,12 +620,7 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network
 
 // createNIC creates a NIC with the provided id and link-layer endpoint, and
 // optionally enable it.
-func (s *Stack) createNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, enabled, loopback bool) *tcpip.Error {
-	ep := FindLinkEndpoint(linkEP)
-	if ep == nil {
-		return tcpip.ErrBadLinkEndpoint
-	}
-
+func (s *Stack) createNIC(id tcpip.NICID, name string, ep LinkEndpoint, enabled, loopback bool) *tcpip.Error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
@@ -645,33 +640,33 @@ func (s *Stack) createNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpoint
 }
 
 // CreateNIC creates a NIC with the provided id and link-layer endpoint.
-func (s *Stack) CreateNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) *tcpip.Error {
-	return s.createNIC(id, "", linkEP, true, false)
+func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error {
+	return s.createNIC(id, "", ep, true, false)
 }
 
 // CreateNamedNIC creates a NIC with the provided id and link-layer endpoint,
 // and a human-readable name.
-func (s *Stack) CreateNamedNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error {
-	return s.createNIC(id, name, linkEP, true, false)
+func (s *Stack) CreateNamedNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
+	return s.createNIC(id, name, ep, true, false)
 }
 
 // CreateNamedLoopbackNIC creates a NIC with the provided id and link-layer
 // endpoint, and a human-readable name.
-func (s *Stack) CreateNamedLoopbackNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error {
-	return s.createNIC(id, name, linkEP, true, true)
+func (s *Stack) CreateNamedLoopbackNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
+	return s.createNIC(id, name, ep, true, true)
 }
 
 // CreateDisabledNIC creates a NIC with the provided id and link-layer endpoint,
 // but leave it disable. Stack.EnableNIC must be called before the link-layer
 // endpoint starts delivering packets to it.
-func (s *Stack) CreateDisabledNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) *tcpip.Error {
-	return s.createNIC(id, "", linkEP, false, false)
+func (s *Stack) CreateDisabledNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error {
+	return s.createNIC(id, "", ep, false, false)
 }
 
 // CreateDisabledNamedNIC is a combination of CreateNamedNIC and
 // CreateDisabledNIC.
-func (s *Stack) CreateDisabledNamedNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error {
-	return s.createNIC(id, name, linkEP, false, false)
+func (s *Stack) CreateDisabledNamedNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
+	return s.createNIC(id, name, ep, false, false)
 }
 
 // EnableNIC enables the given NIC so that the link-layer endpoint can start
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index c6a8160af..0c26c9911 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -60,11 +60,11 @@ type fakeNetworkEndpoint struct {
 	prefixLen  int
 	proto      *fakeNetworkProtocol
 	dispatcher stack.TransportDispatcher
-	linkEP     stack.LinkEndpoint
+	ep         stack.LinkEndpoint
 }
 
 func (f *fakeNetworkEndpoint) MTU() uint32 {
-	return f.linkEP.MTU() - uint32(f.MaxHeaderLength())
+	return f.ep.MTU() - uint32(f.MaxHeaderLength())
 }
 
 func (f *fakeNetworkEndpoint) NICID() tcpip.NICID {
@@ -108,7 +108,7 @@ func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedV
 }
 
 func (f *fakeNetworkEndpoint) MaxHeaderLength() uint16 {
-	return f.linkEP.MaxHeaderLength() + fakeNetHeaderLen
+	return f.ep.MaxHeaderLength() + fakeNetHeaderLen
 }
 
 func (f *fakeNetworkEndpoint) PseudoHeaderChecksum(protocol tcpip.TransportProtocolNumber, dstAddr tcpip.Address) uint16 {
@@ -116,7 +116,7 @@ func (f *fakeNetworkEndpoint) PseudoHeaderChecksum(protocol tcpip.TransportProto
 }
 
 func (f *fakeNetworkEndpoint) Capabilities() stack.LinkEndpointCapabilities {
-	return f.linkEP.Capabilities()
+	return f.ep.Capabilities()
 }
 
 func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, _ uint8, loop stack.PacketLooping) *tcpip.Error {
@@ -141,7 +141,7 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr bu
 		return nil
 	}
 
-	return f.linkEP.WritePacket(r, gso, hdr, payload, fakeNetNumber)
+	return f.ep.WritePacket(r, gso, hdr, payload, fakeNetNumber)
 }
 
 func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error {
@@ -189,14 +189,14 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres
 	return tcpip.Address(v[1:2]), tcpip.Address(v[0:1])
 }
 
-func (f *fakeNetworkProtocol) NewEndpoint(nicid tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint) (stack.NetworkEndpoint, *tcpip.Error) {
+func (f *fakeNetworkProtocol) NewEndpoint(nicid tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, ep stack.LinkEndpoint) (stack.NetworkEndpoint, *tcpip.Error) {
 	return &fakeNetworkEndpoint{
 		nicid:      nicid,
 		id:         stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
 		prefixLen:  addrWithPrefix.PrefixLen,
 		proto:      f,
 		dispatcher: dispatcher,
-		linkEP:     linkEP,
+		ep:         ep,
 	}, nil
 }
 
@@ -225,9 +225,9 @@ func (f *fakeNetworkProtocol) Option(option interface{}) *tcpip.Error {
 func TestNetworkReceive(t *testing.T) {
 	// Create a stack with the fake network protocol, one nic, and two
 	// addresses attached to it: 1 & 2.
-	id, linkEP := channel.New(10, defaultMTU, "")
+	ep := channel.New(10, defaultMTU, "")
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -245,7 +245,7 @@ func TestNetworkReceive(t *testing.T) {
 
 	// Make sure packet with wrong address is not delivered.
 	buf[0] = 3
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if fakeNet.packetCount[1] != 0 {
 		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 0)
 	}
@@ -255,7 +255,7 @@ func TestNetworkReceive(t *testing.T) {
 
 	// Make sure packet is delivered to first endpoint.
 	buf[0] = 1
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if fakeNet.packetCount[1] != 1 {
 		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
 	}
@@ -265,7 +265,7 @@ func TestNetworkReceive(t *testing.T) {
 
 	// Make sure packet is delivered to second endpoint.
 	buf[0] = 2
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if fakeNet.packetCount[1] != 1 {
 		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
 	}
@@ -274,7 +274,7 @@ func TestNetworkReceive(t *testing.T) {
 	}
 
 	// Make sure packet is not delivered if protocol number is wrong.
-	linkEP.Inject(fakeNetNumber-1, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber-1, buf.ToVectorisedView())
 	if fakeNet.packetCount[1] != 1 {
 		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
 	}
@@ -284,7 +284,7 @@ func TestNetworkReceive(t *testing.T) {
 
 	// Make sure packet that is too small is dropped.
 	buf.CapLength(2)
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if fakeNet.packetCount[1] != 1 {
 		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
 	}
@@ -307,59 +307,59 @@ func send(r stack.Route, payload buffer.View) *tcpip.Error {
 	return r.WritePacket(nil /* gso */, hdr, payload.ToVectorisedView(), fakeTransNumber, 123)
 }
 
-func testSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, linkEP *channel.Endpoint, payload buffer.View) {
+func testSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, ep *channel.Endpoint, payload buffer.View) {
 	t.Helper()
-	linkEP.Drain()
+	ep.Drain()
 	if err := sendTo(s, addr, payload); err != nil {
 		t.Error("sendTo failed:", err)
 	}
-	if got, want := linkEP.Drain(), 1; got != want {
+	if got, want := ep.Drain(), 1; got != want {
 		t.Errorf("sendTo packet count: got = %d, want %d", got, want)
 	}
 }
 
-func testSend(t *testing.T, r stack.Route, linkEP *channel.Endpoint, payload buffer.View) {
+func testSend(t *testing.T, r stack.Route, ep *channel.Endpoint, payload buffer.View) {
 	t.Helper()
-	linkEP.Drain()
+	ep.Drain()
 	if err := send(r, payload); err != nil {
 		t.Error("send failed:", err)
 	}
-	if got, want := linkEP.Drain(), 1; got != want {
+	if got, want := ep.Drain(), 1; got != want {
 		t.Errorf("send packet count: got = %d, want %d", got, want)
 	}
 }
 
-func testFailingSend(t *testing.T, r stack.Route, linkEP *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
+func testFailingSend(t *testing.T, r stack.Route, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
 	t.Helper()
 	if gotErr := send(r, payload); gotErr != wantErr {
 		t.Errorf("send failed: got = %s, want = %s ", gotErr, wantErr)
 	}
 }
 
-func testFailingSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, linkEP *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
+func testFailingSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
 	t.Helper()
 	if gotErr := sendTo(s, addr, payload); gotErr != wantErr {
 		t.Errorf("sendto failed: got = %s, want = %s ", gotErr, wantErr)
 	}
 }
 
-func testRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View) {
+func testRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, ep *channel.Endpoint, buf buffer.View) {
 	t.Helper()
 	// testRecvInternal injects one packet, and we expect to receive it.
 	want := fakeNet.PacketCount(localAddrByte) + 1
-	testRecvInternal(t, fakeNet, localAddrByte, linkEP, buf, want)
+	testRecvInternal(t, fakeNet, localAddrByte, ep, buf, want)
 }
 
-func testFailingRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View) {
+func testFailingRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, ep *channel.Endpoint, buf buffer.View) {
 	t.Helper()
 	// testRecvInternal injects one packet, and we do NOT expect to receive it.
 	want := fakeNet.PacketCount(localAddrByte)
-	testRecvInternal(t, fakeNet, localAddrByte, linkEP, buf, want)
+	testRecvInternal(t, fakeNet, localAddrByte, ep, buf, want)
 }
 
-func testRecvInternal(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View, want int) {
+func testRecvInternal(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, ep *channel.Endpoint, buf buffer.View, want int) {
 	t.Helper()
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if got := fakeNet.PacketCount(localAddrByte); got != want {
 		t.Errorf("receive packet count: got = %d, want %d", got, want)
 	}
@@ -369,9 +369,9 @@ func TestNetworkSend(t *testing.T) {
 	// Create a stack with the fake network protocol, one nic, and one
 	// address: 1. The route table sends all packets through the only
 	// existing nic.
-	id, linkEP := channel.New(10, defaultMTU, "")
+	ep := channel.New(10, defaultMTU, "")
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("NewNIC failed:", err)
 	}
 
@@ -388,7 +388,7 @@ func TestNetworkSend(t *testing.T) {
 	}
 
 	// Make sure that the link-layer endpoint received the outbound packet.
-	testSendTo(t, s, "\x03", linkEP, nil)
+	testSendTo(t, s, "\x03", ep, nil)
 }
 
 func TestNetworkSendMultiRoute(t *testing.T) {
@@ -397,8 +397,8 @@ func TestNetworkSendMultiRoute(t *testing.T) {
 	// even addresses.
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id1, linkEP1 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id1); err != nil {
+	ep1 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -410,8 +410,8 @@ func TestNetworkSendMultiRoute(t *testing.T) {
 		t.Fatal("AddAddress failed:", err)
 	}
 
-	id2, linkEP2 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(2, id2); err != nil {
+	ep2 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(2, ep2); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -442,10 +442,10 @@ func TestNetworkSendMultiRoute(t *testing.T) {
 	}
 
 	// Send a packet to an odd destination.
-	testSendTo(t, s, "\x05", linkEP1, nil)
+	testSendTo(t, s, "\x05", ep1, nil)
 
 	// Send a packet to an even destination.
-	testSendTo(t, s, "\x06", linkEP2, nil)
+	testSendTo(t, s, "\x06", ep2, nil)
 }
 
 func testRoute(t *testing.T, s *stack.Stack, nic tcpip.NICID, srcAddr, dstAddr, expectedSrcAddr tcpip.Address) {
@@ -478,8 +478,8 @@ func TestRoutes(t *testing.T) {
 	// even addresses.
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id1, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id1); err != nil {
+	ep1 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -491,8 +491,8 @@ func TestRoutes(t *testing.T) {
 		t.Fatal("AddAddress failed:", err)
 	}
 
-	id2, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(2, id2); err != nil {
+	ep2 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(2, ep2); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -556,8 +556,8 @@ func TestAddressRemoval(t *testing.T) {
 
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -578,15 +578,15 @@ func TestAddressRemoval(t *testing.T) {
 
 	// Send and receive packets, and verify they are received.
 	buf[0] = localAddrByte
-	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-	testSendTo(t, s, remoteAddr, linkEP, nil)
+	testRecv(t, fakeNet, localAddrByte, ep, buf)
+	testSendTo(t, s, remoteAddr, ep, nil)
 
 	// Remove the address, then check that send/receive doesn't work anymore.
 	if err := s.RemoveAddress(1, localAddr); err != nil {
 		t.Fatal("RemoveAddress failed:", err)
 	}
-	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
-	testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
+	testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 
 	// Check that removing the same address fails.
 	if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress {
@@ -601,9 +601,9 @@ func TestAddressRemovalWithRouteHeld(t *testing.T) {
 
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
-		t.Fatal("CreateNIC failed:", err)
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
+		t.Fatalf("CreateNIC failed: %v", err)
 	}
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
 	buf := buffer.NewView(30)
@@ -626,17 +626,17 @@ func TestAddressRemovalWithRouteHeld(t *testing.T) {
 
 	// Send and receive packets, and verify they are received.
 	buf[0] = localAddrByte
-	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-	testSend(t, r, linkEP, nil)
-	testSendTo(t, s, remoteAddr, linkEP, nil)
+	testRecv(t, fakeNet, localAddrByte, ep, buf)
+	testSend(t, r, ep, nil)
+	testSendTo(t, s, remoteAddr, ep, nil)
 
 	// Remove the address, then check that send/receive doesn't work anymore.
 	if err := s.RemoveAddress(1, localAddr); err != nil {
 		t.Fatal("RemoveAddress failed:", err)
 	}
-	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
-	testFailingSend(t, r, linkEP, nil, tcpip.ErrInvalidEndpointState)
-	testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
+	testFailingSend(t, r, ep, nil, tcpip.ErrInvalidEndpointState)
+	testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 
 	// Check that removing the same address fails.
 	if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress {
@@ -690,8 +690,8 @@ func TestEndpointExpiration(t *testing.T) {
 			t.Run(fmt.Sprintf("promiscuous=%t spoofing=%t", promiscuous, spoofing), func(t *testing.T) {
 				s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-				id, linkEP := channel.New(10, defaultMTU, "")
-				if err := s.CreateNIC(nicid, id); err != nil {
+				ep := channel.New(10, defaultMTU, "")
+				if err := s.CreateNIC(nicid, ep); err != nil {
 					t.Fatal("CreateNIC failed:", err)
 				}
 
@@ -724,15 +724,15 @@ func TestEndpointExpiration(t *testing.T) {
 				//-----------------------
 				verifyAddress(t, s, nicid, noAddr)
 				if promiscuous {
-					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testRecv(t, fakeNet, localAddrByte, ep, buf)
 				} else {
-					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 				}
 				if spoofing {
 					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
-					// testSendTo(t, s, remoteAddr, linkEP, nil)
+					// testSendTo(t, s, remoteAddr, ep, nil)
 				} else {
-					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+					testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 				}
 
 				// 2. Add Address, everything should work.
@@ -741,8 +741,8 @@ func TestEndpointExpiration(t *testing.T) {
 					t.Fatal("AddAddress failed:", err)
 				}
 				verifyAddress(t, s, nicid, localAddr)
-				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-				testSendTo(t, s, remoteAddr, linkEP, nil)
+				testRecv(t, fakeNet, localAddrByte, ep, buf)
+				testSendTo(t, s, remoteAddr, ep, nil)
 
 				// 3. Remove the address, send should only work for spoofing, receive
 				// for promiscuous mode.
@@ -752,15 +752,15 @@ func TestEndpointExpiration(t *testing.T) {
 				}
 				verifyAddress(t, s, nicid, noAddr)
 				if promiscuous {
-					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testRecv(t, fakeNet, localAddrByte, ep, buf)
 				} else {
-					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 				}
 				if spoofing {
 					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
-					// testSendTo(t, s, remoteAddr, linkEP, nil)
+					// testSendTo(t, s, remoteAddr, ep, nil)
 				} else {
-					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+					testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 				}
 
 				// 4. Add Address back, everything should work again.
@@ -769,8 +769,8 @@ func TestEndpointExpiration(t *testing.T) {
 					t.Fatal("AddAddress failed:", err)
 				}
 				verifyAddress(t, s, nicid, localAddr)
-				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-				testSendTo(t, s, remoteAddr, linkEP, nil)
+				testRecv(t, fakeNet, localAddrByte, ep, buf)
+				testSendTo(t, s, remoteAddr, ep, nil)
 
 				// 5. Take a reference to the endpoint by getting a route. Verify that
 				// we can still send/receive, including sending using the route.
@@ -779,9 +779,9 @@ func TestEndpointExpiration(t *testing.T) {
 				if err != nil {
 					t.Fatal("FindRoute failed:", err)
 				}
-				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-				testSendTo(t, s, remoteAddr, linkEP, nil)
-				testSend(t, r, linkEP, nil)
+				testRecv(t, fakeNet, localAddrByte, ep, buf)
+				testSendTo(t, s, remoteAddr, ep, nil)
+				testSend(t, r, ep, nil)
 
 				// 6. Remove the address. Send should only work for spoofing, receive
 				// for promiscuous mode.
@@ -791,16 +791,16 @@ func TestEndpointExpiration(t *testing.T) {
 				}
 				verifyAddress(t, s, nicid, noAddr)
 				if promiscuous {
-					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testRecv(t, fakeNet, localAddrByte, ep, buf)
 				} else {
-					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 				}
 				if spoofing {
-					testSend(t, r, linkEP, nil)
-					testSendTo(t, s, remoteAddr, linkEP, nil)
+					testSend(t, r, ep, nil)
+					testSendTo(t, s, remoteAddr, ep, nil)
 				} else {
-					testFailingSend(t, r, linkEP, nil, tcpip.ErrInvalidEndpointState)
-					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+					testFailingSend(t, r, ep, nil, tcpip.ErrInvalidEndpointState)
+					testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 				}
 
 				// 7. Add Address back, everything should work again.
@@ -809,16 +809,16 @@ func TestEndpointExpiration(t *testing.T) {
 					t.Fatal("AddAddress failed:", err)
 				}
 				verifyAddress(t, s, nicid, localAddr)
-				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-				testSendTo(t, s, remoteAddr, linkEP, nil)
-				testSend(t, r, linkEP, nil)
+				testRecv(t, fakeNet, localAddrByte, ep, buf)
+				testSendTo(t, s, remoteAddr, ep, nil)
+				testSend(t, r, ep, nil)
 
 				// 8. Remove the route, sendTo/recv should still work.
 				//-----------------------
 				r.Release()
 				verifyAddress(t, s, nicid, localAddr)
-				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
-				testSendTo(t, s, remoteAddr, linkEP, nil)
+				testRecv(t, fakeNet, localAddrByte, ep, buf)
+				testSendTo(t, s, remoteAddr, ep, nil)
 
 				// 9. Remove the address. Send should only work for spoofing, receive
 				// for promiscuous mode.
@@ -828,15 +828,15 @@ func TestEndpointExpiration(t *testing.T) {
 				}
 				verifyAddress(t, s, nicid, noAddr)
 				if promiscuous {
-					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testRecv(t, fakeNet, localAddrByte, ep, buf)
 				} else {
-					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+					testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 				}
 				if spoofing {
 					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
-					// testSendTo(t, s, remoteAddr, linkEP, nil)
+					// testSendTo(t, s, remoteAddr, ep, nil)
 				} else {
-					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+					testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute)
 				}
 			})
 		}
@@ -846,8 +846,8 @@ func TestEndpointExpiration(t *testing.T) {
 func TestPromiscuousMode(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -867,13 +867,13 @@ func TestPromiscuousMode(t *testing.T) {
 	// have a matching endpoint.
 	const localAddrByte byte = 0x01
 	buf[0] = localAddrByte
-	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 
 	// Set promiscuous mode, then check that packet is delivered.
 	if err := s.SetPromiscuousMode(1, true); err != nil {
 		t.Fatal("SetPromiscuousMode failed:", err)
 	}
-	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testRecv(t, fakeNet, localAddrByte, ep, buf)
 
 	// Check that we can't get a route as there is no local address.
 	_, err := s.FindRoute(0, "", "\x02", fakeNetNumber, false /* multicastLoop */)
@@ -886,7 +886,7 @@ func TestPromiscuousMode(t *testing.T) {
 	if err := s.SetPromiscuousMode(1, false); err != nil {
 		t.Fatal("SetPromiscuousMode failed:", err)
 	}
-	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 }
 
 func TestSpoofingWithAddress(t *testing.T) {
@@ -896,8 +896,8 @@ func TestSpoofingWithAddress(t *testing.T) {
 
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -936,8 +936,8 @@ func TestSpoofingWithAddress(t *testing.T) {
 		t.Errorf("Route has wrong remote address: got %v, wanted %v", r.RemoteAddress, dstAddr)
 	}
 	// Sending a packet works.
-	testSendTo(t, s, dstAddr, linkEP, nil)
-	testSend(t, r, linkEP, nil)
+	testSendTo(t, s, dstAddr, ep, nil)
+	testSend(t, r, ep, nil)
 
 	// FindRoute should also work with a local address that exists on the NIC.
 	r, err = s.FindRoute(0, localAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
@@ -951,7 +951,7 @@ func TestSpoofingWithAddress(t *testing.T) {
 		t.Errorf("Route has wrong remote address: got %v, wanted %v", r.RemoteAddress, dstAddr)
 	}
 	// Sending a packet using the route works.
-	testSend(t, r, linkEP, nil)
+	testSend(t, r, ep, nil)
 }
 
 func TestSpoofingNoAddress(t *testing.T) {
@@ -960,8 +960,8 @@ func TestSpoofingNoAddress(t *testing.T) {
 
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -980,7 +980,7 @@ func TestSpoofingNoAddress(t *testing.T) {
 		t.Errorf("FindRoute succeeded with route %+v when it should have failed", r)
 	}
 	// Sending a packet fails.
-	testFailingSendTo(t, s, dstAddr, linkEP, nil, tcpip.ErrNoRoute)
+	testFailingSendTo(t, s, dstAddr, ep, nil, tcpip.ErrNoRoute)
 
 	// With address spoofing enabled, FindRoute permits any address to be used
 	// as the source.
@@ -999,14 +999,14 @@ func TestSpoofingNoAddress(t *testing.T) {
 	}
 	// Sending a packet works.
 	// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
-	// testSendTo(t, s, remoteAddr, linkEP, nil)
+	// testSendTo(t, s, remoteAddr, ep, nil)
 }
 
 func TestBroadcastNeedsNoRoute(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 	s.SetRouteTable([]tcpip.Route{})
@@ -1076,8 +1076,8 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-			id, _ := channel.New(10, defaultMTU, "")
-			if err := s.CreateNIC(1, id); err != nil {
+			ep := channel.New(10, defaultMTU, "")
+			if err := s.CreateNIC(1, ep); err != nil {
 				t.Fatal("CreateNIC failed:", err)
 			}
 
@@ -1132,8 +1132,8 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 func TestAddressRangeAcceptsMatchingPacket(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1159,7 +1159,7 @@ func TestAddressRangeAcceptsMatchingPacket(t *testing.T) {
 		t.Fatal("AddAddressRange failed:", err)
 	}
 
-	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testRecv(t, fakeNet, localAddrByte, ep, buf)
 }
 
 func testNicForAddressRange(t *testing.T, nicID tcpip.NICID, s *stack.Stack, subnet tcpip.Subnet, rangeExists bool) {
@@ -1198,8 +1198,8 @@ func TestCheckLocalAddressForSubnet(t *testing.T) {
 	const nicID tcpip.NICID = 1
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicID, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicID, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1236,8 +1236,8 @@ func TestCheckLocalAddressForSubnet(t *testing.T) {
 func TestAddressRangeRejectsNonmatchingPacket(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, linkEP := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1262,7 +1262,7 @@ func TestAddressRangeRejectsNonmatchingPacket(t *testing.T) {
 	if err := s.AddAddressRange(1, fakeNetNumber, subnet); err != nil {
 		t.Fatal("AddAddressRange failed:", err)
 	}
-	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 }
 
 func TestNetworkOptions(t *testing.T) {
@@ -1320,8 +1320,8 @@ func stackContainsAddressRange(s *stack.Stack, id tcpip.NICID, addrRange tcpip.S
 
 func TestAddresRangeAddRemove(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1361,8 +1361,8 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
 					for never := 0; never < 3; never++ {
 						t.Run(fmt.Sprintf("never=%d", never), func(t *testing.T) {
 							s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-							id, _ := channel.New(10, defaultMTU, "")
-							if err := s.CreateNIC(1, id); err != nil {
+							ep := channel.New(10, defaultMTU, "")
+							if err := s.CreateNIC(1, ep); err != nil {
 								t.Fatal("CreateNIC failed:", err)
 							}
 							// Insert <canBe> primary and <never> never-primary addresses.
@@ -1426,8 +1426,8 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
 
 func TestGetMainNICAddressAddRemove(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1501,8 +1501,8 @@ func verifyAddresses(t *testing.T, expectedAddresses, gotAddresses []tcpip.Proto
 func TestAddAddress(t *testing.T) {
 	const nicid = 1
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicid, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1526,8 +1526,8 @@ func TestAddAddress(t *testing.T) {
 func TestAddProtocolAddress(t *testing.T) {
 	const nicid = 1
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicid, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1558,8 +1558,8 @@ func TestAddProtocolAddress(t *testing.T) {
 func TestAddAddressWithOptions(t *testing.T) {
 	const nicid = 1
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicid, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1587,8 +1587,8 @@ func TestAddAddressWithOptions(t *testing.T) {
 func TestAddProtocolAddressWithOptions(t *testing.T) {
 	const nicid = 1
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id, _ := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(nicid, id); err != nil {
+	ep := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
@@ -1621,8 +1621,8 @@ func TestAddProtocolAddressWithOptions(t *testing.T) {
 
 func TestNICStats(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
-	id1, linkEP1 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id1); err != nil {
+	ep1 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatal("CreateNIC failed: ", err)
 	}
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
@@ -1639,7 +1639,7 @@ func TestNICStats(t *testing.T) {
 
 	// Send a packet to address 1.
 	buf := buffer.NewView(30)
-	linkEP1.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep1.Inject(fakeNetNumber, buf.ToVectorisedView())
 	if got, want := s.NICInfo()[1].Stats.Rx.Packets.Value(), uint64(1); got != want {
 		t.Errorf("got Rx.Packets.Value() = %d, want = %d", got, want)
 	}
@@ -1653,9 +1653,9 @@ func TestNICStats(t *testing.T) {
 	if err := sendTo(s, "\x01", payload); err != nil {
 		t.Fatal("sendTo failed: ", err)
 	}
-	want := uint64(linkEP1.Drain())
+	want := uint64(ep1.Drain())
 	if got := s.NICInfo()[1].Stats.Tx.Packets.Value(); got != want {
-		t.Errorf("got Tx.Packets.Value() = %d, linkEP1.Drain() = %d", got, want)
+		t.Errorf("got Tx.Packets.Value() = %d, ep1.Drain() = %d", got, want)
 	}
 
 	if got, want := s.NICInfo()[1].Stats.Tx.Bytes.Value(), uint64(len(payload)); got != want {
@@ -1669,16 +1669,16 @@ func TestNICForwarding(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 	s.SetForwarding(true)
 
-	id1, linkEP1 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(1, id1); err != nil {
+	ep1 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatal("CreateNIC #1 failed:", err)
 	}
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatal("AddAddress #1 failed:", err)
 	}
 
-	id2, linkEP2 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(2, id2); err != nil {
+	ep2 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(2, ep2); err != nil {
 		t.Fatal("CreateNIC #2 failed:", err)
 	}
 	if err := s.AddAddress(2, fakeNetNumber, "\x02"); err != nil {
@@ -1697,10 +1697,10 @@ func TestNICForwarding(t *testing.T) {
 	// Send a packet to address 3.
 	buf := buffer.NewView(30)
 	buf[0] = 3
-	linkEP1.Inject(fakeNetNumber, buf.ToVectorisedView())
+	ep1.Inject(fakeNetNumber, buf.ToVectorisedView())
 
 	select {
-	case <-linkEP2.C:
+	case <-ep2.C:
 	default:
 		t.Fatal("Packet not forwarded")
 	}
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index ca185279e..87d1e0d0d 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -278,9 +278,9 @@ func (f *fakeTransportProtocol) Option(option interface{}) *tcpip.Error {
 }
 
 func TestTransportReceive(t *testing.T) {
-	id, linkEP := channel.New(10, defaultMTU, "")
+	linkEP := channel.New(10, defaultMTU, "")
 	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -340,9 +340,9 @@ func TestTransportReceive(t *testing.T) {
 }
 
 func TestTransportControlReceive(t *testing.T) {
-	id, linkEP := channel.New(10, defaultMTU, "")
+	linkEP := channel.New(10, defaultMTU, "")
 	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -408,9 +408,9 @@ func TestTransportControlReceive(t *testing.T) {
 }
 
 func TestTransportSend(t *testing.T) {
-	id, _ := channel.New(10, defaultMTU, "")
+	linkEP := channel.New(10, defaultMTU, "")
 	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -497,16 +497,16 @@ func TestTransportForwarding(t *testing.T) {
 	s.SetForwarding(true)
 
 	// TODO(b/123449044): Change this to a channel NIC.
-	id1 := loopback.New()
-	if err := s.CreateNIC(1, id1); err != nil {
+	ep1 := loopback.New()
+	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatalf("CreateNIC #1 failed: %v", err)
 	}
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatalf("AddAddress #1 failed: %v", err)
 	}
 
-	id2, linkEP2 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(2, id2); err != nil {
+	ep2 := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(2, ep2); err != nil {
 		t.Fatalf("CreateNIC #2 failed: %v", err)
 	}
 	if err := s.AddAddress(2, fakeNetNumber, "\x02"); err != nil {
@@ -545,7 +545,7 @@ func TestTransportForwarding(t *testing.T) {
 	req[0] = 1
 	req[1] = 3
 	req[2] = byte(fakeTransNumber)
-	linkEP2.Inject(fakeNetNumber, req.ToVectorisedView())
+	ep2.Inject(fakeNetNumber, req.ToVectorisedView())
 
 	aep, _, err := ep.Accept()
 	if err != nil || aep == nil {
@@ -559,7 +559,7 @@ func TestTransportForwarding(t *testing.T) {
 
 	var p channel.PacketInfo
 	select {
-	case p = <-linkEP2.C:
+	case p = <-ep2.C:
 	default:
 		t.Fatal("Response packet not forwarded")
 	}
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 418e771d2..ebf8a2d04 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -600,9 +600,6 @@ func (r Route) String() string {
 	return out.String()
 }
 
-// LinkEndpointID represents a data link layer endpoint.
-type LinkEndpointID uint64
-
 // TransportProtocolNumber is the number of a transport protocol.
 type TransportProtocolNumber uint32
 
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 18c707a57..16783e716 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -150,11 +150,12 @@ func New(t *testing.T, mtu uint32) *Context {
 
 	// Some of the congestion control tests send up to 640 packets, we so
 	// set the channel size to 1000.
-	id, linkEP := channel.New(1000, mtu, "")
+	ep := channel.New(1000, mtu, "")
+	wep := stack.LinkEndpoint(ep)
 	if testing.Verbose() {
-		id = sniffer.New(id)
+		wep = sniffer.New(ep)
 	}
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, wep); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -180,7 +181,7 @@ func New(t *testing.T, mtu uint32) *Context {
 	return &Context{
 		t:           t,
 		s:           s,
-		linkEP:      linkEP,
+		linkEP:      ep,
 		WindowScale: uint8(tcp.FindWndScale(tcp.DefaultReceiveBufferSize)),
 	}
 }
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 995d6e8a1..c6deab892 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -275,12 +275,13 @@ func newDualTestContext(t *testing.T, mtu uint32) *testContext {
 	t.Helper()
 
 	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{udp.ProtocolName}, stack.Options{})
+	ep := channel.New(256, mtu, "")
+	wep := stack.LinkEndpoint(ep)
 
-	id, linkEP := channel.New(256, mtu, "")
 	if testing.Verbose() {
-		id = sniffer.New(id)
+		wep = sniffer.New(ep)
 	}
-	if err := s.CreateNIC(1, id); err != nil {
+	if err := s.CreateNIC(1, wep); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
@@ -306,7 +307,7 @@ func newDualTestContext(t *testing.T, mtu uint32) *testContext {
 	return &testContext{
 		t:      t,
 		s:      s,
-		linkEP: linkEP,
+		linkEP: ep,
 	}
 }
 
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index ea0d9f790..32cba5ac1 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -121,10 +121,10 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 		nicID++
 		nicids[link.Name] = nicID
 
-		linkEP := loopback.New()
+		ep := loopback.New()
 
 		log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses)
-		if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, true /* loopback */); err != nil {
+		if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, true /* loopback */); err != nil {
 			return err
 		}
 
@@ -156,7 +156,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 		}
 
 		mac := tcpip.LinkAddress(link.LinkAddress)
-		linkEP, err := fdbased.New(&fdbased.Options{
+		ep, err := fdbased.New(&fdbased.Options{
 			FDs:                FDs,
 			MTU:                uint32(link.MTU),
 			EthernetHeader:     true,
@@ -170,7 +170,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 		}
 
 		log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
-		if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses, false /* loopback */); err != nil {
+		if err := n.createNICWithAddrs(nicID, link.Name, ep, link.Addresses, false /* loopback */); err != nil {
 			return err
 		}
 
@@ -203,14 +203,14 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 
 // createNICWithAddrs creates a NIC in the network stack and adds the given
 // addresses.
-func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, addrs []net.IP, loopback bool) error {
+func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP, loopback bool) error {
 	if loopback {
-		if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(linkEP)); err != nil {
-			return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
+		if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(ep)); err != nil {
+			return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v) failed: %v", id, name, err)
 		}
 	} else {
-		if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(linkEP)); err != nil {
-			return fmt.Errorf("CreateNamedNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
+		if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(ep)); err != nil {
+			return fmt.Errorf("CreateNamedNIC(%v, %v) failed: %v", id, name, err)
 		}
 	}
 
-- 
cgit v1.2.3


From a8834fc555539bd6b0b46936c4a79817812658ff Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Thu, 12 Sep 2019 23:36:18 -0700
Subject: Update p9 to support flipcall.

PiperOrigin-RevId: 268845090
---
 pkg/p9/BUILD                   |   3 +
 pkg/p9/client.go               | 280 ++++++++++++++++++++++++++++++++++++++---
 pkg/p9/client_test.go          |  50 +++++++-
 pkg/p9/handlers.go             |  53 +++++++-
 pkg/p9/messages.go             | 103 +++++++++++----
 pkg/p9/p9.go                   |   2 +
 pkg/p9/p9test/p9test.go        |   2 +-
 pkg/p9/server.go               | 178 +++++++++++++++++++++-----
 pkg/p9/transport.go            |   5 +-
 pkg/p9/transport_flipcall.go   | 254 +++++++++++++++++++++++++++++++++++++
 pkg/p9/transport_test.go       |   4 +-
 pkg/p9/version.go              |   9 +-
 runsc/boot/filter/config.go    |  14 ++-
 runsc/fsgofer/filter/BUILD     |   1 +
 runsc/fsgofer/filter/config.go |  36 +++++-
 15 files changed, 905 insertions(+), 89 deletions(-)
 create mode 100644 pkg/p9/transport_flipcall.go

(limited to 'runsc/boot')

diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD
index 6bc4d3bc7..f32244c69 100644
--- a/pkg/p9/BUILD
+++ b/pkg/p9/BUILD
@@ -20,11 +20,14 @@ go_library(
         "pool.go",
         "server.go",
         "transport.go",
+        "transport_flipcall.go",
         "version.go",
     ],
     importpath = "gvisor.dev/gvisor/pkg/p9",
     deps = [
         "//pkg/fd",
+        "//pkg/fdchannel",
+        "//pkg/flipcall",
         "//pkg/log",
         "//pkg/unet",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/p9/client.go b/pkg/p9/client.go
index 7dc20aeef..123f54e29 100644
--- a/pkg/p9/client.go
+++ b/pkg/p9/client.go
@@ -20,6 +20,8 @@ import (
 	"sync"
 	"syscall"
 
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/flipcall"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/unet"
 )
@@ -77,6 +79,45 @@ type Client struct {
 	// fidPool is the collection of available fids.
 	fidPool pool
 
+	// messageSize is the maximum total size of a message.
+	messageSize uint32
+
+	// payloadSize is the maximum payload size of a read or write.
+	//
+	// For large reads and writes this means that the read or write is
+	// broken up into buffer-size/payloadSize requests.
+	payloadSize uint32
+
+	// version is the agreed upon version X of 9P2000.L.Google.X.
+	// version 0 implies 9P2000.L.
+	version uint32
+
+	// sendRecv is the transport function.
+	//
+	// This is determined dynamically based on whether or not the server
+	// supports flipcall channels (preferred as it is faster and more
+	// efficient, and does not require tags).
+	sendRecv func(message, message) error
+
+	// -- below corresponds to sendRecvChannel --
+
+	// channelsMu protects channels.
+	channelsMu sync.Mutex
+
+	// channelsWg is a wait group for active clients.
+	channelsWg sync.WaitGroup
+
+	// channels are the set of initialized IPCs channels.
+	channels []*channel
+
+	// inuse is set when the channels are actually in use.
+	//
+	// This is a fixed-size slice, and the entries will be nil when the
+	// corresponding channel is available.
+	inuse []*channel
+
+	// -- below corresponds to sendRecvLegacy --
+
 	// pending is the set of pending messages.
 	pending   map[Tag]*response
 	pendingMu sync.Mutex
@@ -89,19 +130,6 @@ type Client struct {
 	// Whoever writes to this channel is permitted to call recv. When
 	// finished calling recv, this channel should be emptied.
 	recvr chan bool
-
-	// messageSize is the maximum total size of a message.
-	messageSize uint32
-
-	// payloadSize is the maximum payload size of a read or write
-	// request.  For large reads and writes this means that the
-	// read or write is broken up into buffer-size/payloadSize
-	// requests.
-	payloadSize uint32
-
-	// version is the agreed upon version X of 9P2000.L.Google.X.
-	// version 0 implies 9P2000.L.
-	version uint32
 }
 
 // NewClient creates a new client.  It performs a Tversion exchange with
@@ -138,8 +166,15 @@ func NewClient(socket *unet.Socket, messageSize uint32, version string) (*Client
 		return nil, ErrBadVersionString
 	}
 	for {
+		// Always exchange the version using the legacy version of the
+		// protocol. If the protocol supports flipcall, then we switch
+		// our sendRecv function to use that functionality.  Otherwise,
+		// we stick to sendRecvLegacy.
 		rversion := Rversion{}
-		err := c.sendRecv(&Tversion{Version: versionString(requested), MSize: messageSize}, &rversion)
+		err := c.sendRecvLegacy(&Tversion{
+			Version: versionString(requested),
+			MSize:   messageSize,
+		}, &rversion)
 
 		// The server told us to try again with a lower version.
 		if err == syscall.EAGAIN {
@@ -165,9 +200,125 @@ func NewClient(socket *unet.Socket, messageSize uint32, version string) (*Client
 		c.version = version
 		break
 	}
+
+	// Can we switch to use the more advanced channels and create
+	// independent channels for communication? Prefer it if possible.
+	if versionSupportsFlipcall(c.version) {
+		// Attempt to initialize IPC-based communication.
+		for i := 0; i < channelsPerClient; i++ {
+			if err := c.openChannel(i); err != nil {
+				log.Warningf("error opening flipcall channel: %v", err)
+				break // Stop.
+			}
+		}
+		if len(c.channels) >= 1 {
+			// At least one channel created.
+			c.sendRecv = c.sendRecvChannel
+
+			// If we are using channels for communication, then we must poll
+			// for shutdown events on the main socket. If the socket happens
+			// to shutdown, then we will close the channels as well. This is
+			// necessary because channels can hang forever if the server dies
+			// while we're expecting a response.
+			go c.watch(socket) // S/R-SAFE: not relevant.
+		} else {
+			// Channel setup failed; fallback.
+			c.sendRecv = c.sendRecvLegacy
+		}
+	} else {
+		// No channels available: use the legacy mechanism.
+		c.sendRecv = c.sendRecvLegacy
+	}
+
 	return c, nil
 }
 
+// watch watches the given socket and calls Close on hang up events.
+//
+// This is intended to be called as a goroutine.
+func (c *Client) watch(socket *unet.Socket) {
+	events := []unix.PollFd{
+		unix.PollFd{
+			Fd:     int32(socket.FD()),
+			Events: unix.POLLHUP | unix.POLLRDHUP,
+		},
+	}
+
+	for {
+		// Wait for a shutdown event.
+		n, err := unix.Ppoll(events, nil, nil)
+		if n == 0 || err == syscall.EAGAIN {
+			continue
+		}
+		break
+	}
+
+	// Close everything down: this will kick all active clients off any
+	// pending requests. Note that Close must be safe to call concurrently,
+	// and multiple times (see Close below).
+	c.Close()
+}
+
+// openChannel attempts to open a client channel.
+//
+// Note that this function returns naked errors which should not be propagated
+// directly to a caller. It is expected that the errors will be logged and a
+// fallback path will be used instead.
+func (c *Client) openChannel(id int) error {
+	var (
+		rchannel0 Rchannel
+		rchannel1 Rchannel
+		res       = new(channel)
+	)
+
+	// Open the data channel.
+	if err := c.sendRecvLegacy(&Tchannel{
+		ID:      uint32(id),
+		Control: 0,
+	}, &rchannel0); err != nil {
+		return fmt.Errorf("error handling Tchannel message: %v", err)
+	}
+	if rchannel0.FilePayload() == nil {
+		return fmt.Errorf("missing file descriptor on primary channel")
+	}
+
+	// We don't need to hold this.
+	defer rchannel0.FilePayload().Close()
+
+	// Open the channel for file descriptors.
+	if err := c.sendRecvLegacy(&Tchannel{
+		ID:      uint32(id),
+		Control: 1,
+	}, &rchannel1); err != nil {
+		return err
+	}
+	if rchannel1.FilePayload() == nil {
+		return fmt.Errorf("missing file descriptor on file descriptor channel")
+	}
+
+	// Construct the endpoints.
+	res.desc = flipcall.PacketWindowDescriptor{
+		FD:     rchannel0.FilePayload().FD(),
+		Offset: int64(rchannel0.Offset),
+		Length: int(rchannel0.Length),
+	}
+	if err := res.data.Init(flipcall.ClientSide, res.desc); err != nil {
+		rchannel1.FilePayload().Close()
+		return err
+	}
+
+	// The fds channel owns the control payload, and it will be closed when
+	// the channel object is closed.
+	res.fds.Init(rchannel1.FilePayload().Release())
+
+	// Save the channel.
+	c.channelsMu.Lock()
+	defer c.channelsMu.Unlock()
+	c.channels = append(c.channels, res)
+	c.inuse = append(c.inuse, nil)
+	return nil
+}
+
 // handleOne handles a single incoming message.
 //
 // This should only be called with the token from recvr. Note that the received
@@ -247,10 +398,10 @@ func (c *Client) waitAndRecv(done chan error) error {
 	}
 }
 
-// sendRecv performs a roundtrip message exchange.
+// sendRecvLegacy performs a roundtrip message exchange.
 //
 // This is called by internal functions.
-func (c *Client) sendRecv(t message, r message) error {
+func (c *Client) sendRecvLegacy(t message, r message) error {
 	tag, ok := c.tagPool.Get()
 	if !ok {
 		return ErrOutOfTags
@@ -296,12 +447,107 @@ func (c *Client) sendRecv(t message, r message) error {
 	return nil
 }
 
+// sendRecvChannel uses channels to send a message.
+func (c *Client) sendRecvChannel(t message, r message) error {
+	c.channelsMu.Lock()
+	if len(c.channels) == 0 {
+		// No channel available.
+		c.channelsMu.Unlock()
+		return c.sendRecvLegacy(t, r)
+	}
+
+	// Find the last used channel.
+	//
+	// Note that we must add one to the wait group while holding the
+	// channel mutex, in order for the Wait operation to be race-free
+	// below. The Wait operation shuts down all in use channels and
+	// waits for them to return, but must do so holding the mutex.
+	idx := len(c.channels) - 1
+	ch := c.channels[idx]
+	c.channels = c.channels[:idx]
+	c.inuse[idx] = ch
+	c.channelsWg.Add(1)
+	c.channelsMu.Unlock()
+
+	// Ensure that it's connected.
+	if !ch.connected {
+		ch.connected = true
+		if err := ch.data.Connect(); err != nil {
+			// The channel is unusable, so don't return it.
+			ch.Close()
+			c.channelsWg.Done()
+			return err
+		}
+	}
+
+	// Send the message.
+	err := ch.sendRecv(c, t, r)
+	if err != nil {
+		// On shutdown, we'll see ENOENT. This is a normal situation, and
+		// we shouldn't generate a spurious warning message in that case.
+		log.Debugf("error calling sendRecvChannel: %v", err)
+	}
+	c.channelsWg.Done()
+
+	// Return the channel.
+	//
+	// Note that we check the channel from the inuse slice here. This
+	// prevents a race where Close is called, which clears inuse, and
+	// means that we will not actually return the closed channel.
+	c.channelsMu.Lock()
+	if c.inuse[idx] != nil {
+		c.channels = append(c.channels, ch)
+		c.inuse[idx] = nil
+	}
+	c.channelsMu.Unlock()
+
+	return err
+}
+
 // Version returns the negotiated 9P2000.L.Google version number.
 func (c *Client) Version() uint32 {
 	return c.version
 }
 
-// Close closes the underlying socket.
+// Close closes the underlying socket and channels.
+//
+// Because Close may be called asynchronously from watch, it must be
+// safe to call concurrently and multiple times.
 func (c *Client) Close() error {
+	c.channelsMu.Lock()
+	defer c.channelsMu.Unlock()
+
+	// Close all inactive channels.
+	for _, ch := range c.channels {
+		ch.Shutdown()
+		ch.Close()
+	}
+	// Close all active channels.
+	for _, ch := range c.inuse {
+		if ch != nil {
+			log.Debugf("shutting down active channel@%p...", ch)
+			ch.Shutdown()
+		}
+	}
+
+	// Wait for active users.
+	c.channelsWg.Wait()
+
+	// Close all previously active channels.
+	for i, ch := range c.inuse {
+		if ch != nil {
+			ch.Close()
+
+			// Clear the inuse entry here so that it will not be returned
+			// to the channel slice, which is cleared below. See the
+			// comment at the end of sendRecvChannel.
+			c.inuse[i] = nil
+		}
+	}
+	c.channels = nil // Prevent use again.
+
+	// Close the main socket. Note that operation is safe to be called
+	// multiple times, unlikely the channel Close operations above, which
+	// we are careful to ensure aren't called twice.
 	return c.socket.Close()
 }
diff --git a/pkg/p9/client_test.go b/pkg/p9/client_test.go
index 87b2dd61e..29a0afadf 100644
--- a/pkg/p9/client_test.go
+++ b/pkg/p9/client_test.go
@@ -35,23 +35,23 @@ func TestVersion(t *testing.T) {
 	go s.Handle(serverSocket)
 
 	// NewClient does a Tversion exchange, so this is our test for success.
-	c, err := NewClient(clientSocket, 1024*1024 /* 1M message size */, HighestVersionString())
+	c, err := NewClient(clientSocket, DefaultMessageSize, HighestVersionString())
 	if err != nil {
 		t.Fatalf("got %v, expected nil", err)
 	}
 
 	// Check a bogus version string.
-	if err := c.sendRecv(&Tversion{Version: "notokay", MSize: 1024 * 1024}, &Rversion{}); err != syscall.EINVAL {
+	if err := c.sendRecv(&Tversion{Version: "notokay", MSize: DefaultMessageSize}, &Rversion{}); err != syscall.EINVAL {
 		t.Errorf("got %v expected %v", err, syscall.EINVAL)
 	}
 
 	// Check a bogus version number.
-	if err := c.sendRecv(&Tversion{Version: "9P1000.L", MSize: 1024 * 1024}, &Rversion{}); err != syscall.EINVAL {
+	if err := c.sendRecv(&Tversion{Version: "9P1000.L", MSize: DefaultMessageSize}, &Rversion{}); err != syscall.EINVAL {
 		t.Errorf("got %v expected %v", err, syscall.EINVAL)
 	}
 
 	// Check a too high version number.
-	if err := c.sendRecv(&Tversion{Version: versionString(highestSupportedVersion + 1), MSize: 1024 * 1024}, &Rversion{}); err != syscall.EAGAIN {
+	if err := c.sendRecv(&Tversion{Version: versionString(highestSupportedVersion + 1), MSize: DefaultMessageSize}, &Rversion{}); err != syscall.EAGAIN {
 		t.Errorf("got %v expected %v", err, syscall.EAGAIN)
 	}
 
@@ -60,3 +60,45 @@ func TestVersion(t *testing.T) {
 		t.Errorf("got %v expected %v", err, syscall.EINVAL)
 	}
 }
+
+func benchmarkSendRecv(b *testing.B, fn func(c *Client) func(message, message) error) {
+	// See above.
+	serverSocket, clientSocket, err := unet.SocketPair(false)
+	if err != nil {
+		b.Fatalf("socketpair got err %v expected nil", err)
+	}
+	defer clientSocket.Close()
+
+	// See above.
+	s := NewServer(nil)
+	go s.Handle(serverSocket)
+
+	// See above.
+	c, err := NewClient(clientSocket, DefaultMessageSize, HighestVersionString())
+	if err != nil {
+		b.Fatalf("got %v, expected nil", err)
+	}
+
+	// Initialize messages.
+	sendRecv := fn(c)
+	tversion := &Tversion{
+		Version: versionString(highestSupportedVersion),
+		MSize:   DefaultMessageSize,
+	}
+	rversion := new(Rversion)
+
+	// Run in a loop.
+	for i := 0; i < b.N; i++ {
+		if err := sendRecv(tversion, rversion); err != nil {
+			b.Fatalf("got unexpected err: %v", err)
+		}
+	}
+}
+
+func BenchmarkSendRecvLegacy(b *testing.B) {
+	benchmarkSendRecv(b, func(c *Client) func(message, message) error { return c.sendRecvLegacy })
+}
+
+func BenchmarkSendRecvChannel(b *testing.B) {
+	benchmarkSendRecv(b, func(c *Client) func(message, message) error { return c.sendRecvChannel })
+}
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index 999b4f684..ba9a55d6d 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -305,7 +305,9 @@ func (t *Tlopen) handle(cs *connState) message {
 	ref.opened = true
 	ref.openFlags = t.Flags
 
-	return &Rlopen{QID: qid, IoUnit: ioUnit, File: osFile}
+	rlopen := &Rlopen{QID: qid, IoUnit: ioUnit}
+	rlopen.SetFilePayload(osFile)
+	return rlopen
 }
 
 func (t *Tlcreate) do(cs *connState, uid UID) (*Rlcreate, error) {
@@ -364,7 +366,9 @@ func (t *Tlcreate) do(cs *connState, uid UID) (*Rlcreate, error) {
 	// Replace the FID reference.
 	cs.InsertFID(t.FID, newRef)
 
-	return &Rlcreate{Rlopen: Rlopen{QID: qid, IoUnit: ioUnit, File: osFile}}, nil
+	rlcreate := &Rlcreate{Rlopen: Rlopen{QID: qid, IoUnit: ioUnit}}
+	rlcreate.SetFilePayload(osFile)
+	return rlcreate, nil
 }
 
 // handle implements handler.handle.
@@ -1287,5 +1291,48 @@ func (t *Tlconnect) handle(cs *connState) message {
 		return newErr(err)
 	}
 
-	return &Rlconnect{File: osFile}
+	rlconnect := &Rlconnect{}
+	rlconnect.SetFilePayload(osFile)
+	return rlconnect
+}
+
+// handle implements handler.handle.
+func (t *Tchannel) handle(cs *connState) message {
+	// Ensure that channels are enabled.
+	if err := cs.initializeChannels(); err != nil {
+		return newErr(err)
+	}
+
+	// Lookup the given channel.
+	ch := cs.lookupChannel(t.ID)
+	if ch == nil {
+		return newErr(syscall.ENOSYS)
+	}
+
+	// Return the payload. Note that we need to duplicate the file
+	// descriptor for the channel allocator, because sending is a
+	// destructive operation between sendRecvLegacy (and now the newer
+	// channel send operations). Same goes for the client FD.
+	rchannel := &Rchannel{
+		Offset: uint64(ch.desc.Offset),
+		Length: uint64(ch.desc.Length),
+	}
+	switch t.Control {
+	case 0:
+		// Open the main data channel.
+		mfd, err := syscall.Dup(int(cs.channelAlloc.FD()))
+		if err != nil {
+			return newErr(err)
+		}
+		rchannel.SetFilePayload(fd.New(mfd))
+	case 1:
+		cfd, err := syscall.Dup(ch.client.FD())
+		if err != nil {
+			return newErr(err)
+		}
+		rchannel.SetFilePayload(fd.New(cfd))
+	default:
+		return newErr(syscall.EINVAL)
+	}
+	return rchannel
 }
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index fd9eb1c5d..ffdd7e8c6 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -64,6 +64,21 @@ type filer interface {
 	SetFilePayload(*fd.FD)
 }
 
+// filePayload embeds a File object.
+type filePayload struct {
+	File *fd.FD
+}
+
+// FilePayload returns the file payload.
+func (f *filePayload) FilePayload() *fd.FD {
+	return f.File
+}
+
+// SetFilePayload sets the received file.
+func (f *filePayload) SetFilePayload(file *fd.FD) {
+	f.File = file
+}
+
 // Tversion is a version request.
 type Tversion struct {
 	// MSize is the message size to use.
@@ -524,10 +539,7 @@ type Rlopen struct {
 	// IoUnit is the recommended I/O unit.
 	IoUnit uint32
 
-	// File may be attached via the socket.
-	//
-	// This is an extension specific to this package.
-	File *fd.FD
+	filePayload
 }
 
 // Decode implements encoder.Decode.
@@ -547,16 +559,6 @@ func (*Rlopen) Type() MsgType {
 	return MsgRlopen
 }
 
-// FilePayload returns the file payload.
-func (r *Rlopen) FilePayload() *fd.FD {
-	return r.File
-}
-
-// SetFilePayload sets the received file.
-func (r *Rlopen) SetFilePayload(file *fd.FD) {
-	r.File = file
-}
-
 // String implements fmt.Stringer.
 func (r *Rlopen) String() string {
 	return fmt.Sprintf("Rlopen{QID: %s, IoUnit: %d, File: %v}", r.QID, r.IoUnit, r.File)
@@ -2171,8 +2173,7 @@ func (t *Tlconnect) String() string {
 
 // Rlconnect is a connect response.
 type Rlconnect struct {
-	// File is a host socket.
-	File *fd.FD
+	filePayload
 }
 
 // Decode implements encoder.Decode.
@@ -2186,19 +2187,71 @@ func (*Rlconnect) Type() MsgType {
 	return MsgRlconnect
 }
 
-// FilePayload returns the file payload.
-func (r *Rlconnect) FilePayload() *fd.FD {
-	return r.File
+// String implements fmt.Stringer.
+func (r *Rlconnect) String() string {
+	return fmt.Sprintf("Rlconnect{File: %v}", r.File)
 }
 
-// SetFilePayload sets the received file.
-func (r *Rlconnect) SetFilePayload(file *fd.FD) {
-	r.File = file
+// Tchannel creates a new channel.
+type Tchannel struct {
+	// ID is the channel ID.
+	ID uint32
+
+	// Control is 0 if the Rchannel response should provide the flipcall
+	// component of the channel, and 1 if the Rchannel response should
+	// provide the fdchannel component of the channel.
+	Control uint32
+}
+
+// Decode implements encoder.Decode.
+func (t *Tchannel) Decode(b *buffer) {
+	t.ID = b.Read32()
+	t.Control = b.Read32()
+}
+
+// Encode implements encoder.Encode.
+func (t *Tchannel) Encode(b *buffer) {
+	b.Write32(t.ID)
+	b.Write32(t.Control)
+}
+
+// Type implements message.Type.
+func (*Tchannel) Type() MsgType {
+	return MsgTchannel
 }
 
 // String implements fmt.Stringer.
-func (r *Rlconnect) String() string {
-	return fmt.Sprintf("Rlconnect{File: %v}", r.File)
+func (t *Tchannel) String() string {
+	return fmt.Sprintf("Tchannel{ID: %d, Control: %d}", t.ID, t.Control)
+}
+
+// Rchannel is the channel response.
+type Rchannel struct {
+	Offset uint64
+	Length uint64
+	filePayload
+}
+
+// Decode implements encoder.Decode.
+func (r *Rchannel) Decode(b *buffer) {
+	r.Offset = b.Read64()
+	r.Length = b.Read64()
+}
+
+// Encode implements encoder.Encode.
+func (r *Rchannel) Encode(b *buffer) {
+	b.Write64(r.Offset)
+	b.Write64(r.Length)
+}
+
+// Type implements message.Type.
+func (*Rchannel) Type() MsgType {
+	return MsgRchannel
+}
+
+// String implements fmt.Stringer.
+func (r *Rchannel) String() string {
+	return fmt.Sprintf("Rchannel{Offset: %d, Length: %d}", r.Offset, r.Length)
 }
 
 const maxCacheSize = 3
@@ -2356,4 +2409,6 @@ func init() {
 	msgRegistry.register(MsgRlconnect, func() message { return &Rlconnect{} })
 	msgRegistry.register(MsgTallocate, func() message { return &Tallocate{} })
 	msgRegistry.register(MsgRallocate, func() message { return &Rallocate{} })
+	msgRegistry.register(MsgTchannel, func() message { return &Tchannel{} })
+	msgRegistry.register(MsgRchannel, func() message { return &Rchannel{} })
 }
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index e12831dbd..25530adca 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -378,6 +378,8 @@ const (
 	MsgRlconnect            = 137
 	MsgTallocate            = 138
 	MsgRallocate            = 139
+	MsgTchannel             = 250
+	MsgRchannel             = 251
 )
 
 // QIDType represents the file type for QIDs.
diff --git a/pkg/p9/p9test/p9test.go b/pkg/p9/p9test/p9test.go
index 95846e5f7..9d74638bb 100644
--- a/pkg/p9/p9test/p9test.go
+++ b/pkg/p9/p9test/p9test.go
@@ -315,7 +315,7 @@ func NewHarness(t *testing.T) (*Harness, *p9.Client) {
 	}()
 
 	// Create the client.
-	client, err := p9.NewClient(clientSocket, 1024, p9.HighestVersionString())
+	client, err := p9.NewClient(clientSocket, p9.DefaultMessageSize, p9.HighestVersionString())
 	if err != nil {
 		serverSocket.Close()
 		clientSocket.Close()
diff --git a/pkg/p9/server.go b/pkg/p9/server.go
index b294efbb0..69c886a5d 100644
--- a/pkg/p9/server.go
+++ b/pkg/p9/server.go
@@ -21,6 +21,9 @@ import (
 	"sync/atomic"
 	"syscall"
 
+	"gvisor.dev/gvisor/pkg/fd"
+	"gvisor.dev/gvisor/pkg/fdchannel"
+	"gvisor.dev/gvisor/pkg/flipcall"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/unet"
 )
@@ -45,7 +48,6 @@ type Server struct {
 }
 
 // NewServer returns a new server.
-//
 func NewServer(attacher Attacher) *Server {
 	return &Server{
 		attacher: attacher,
@@ -85,6 +87,8 @@ type connState struct {
 	// version 0 implies 9P2000.L.
 	version uint32
 
+	// -- below relates to the legacy handler --
+
 	// recvOkay indicates that a receive may start.
 	recvOkay chan bool
 
@@ -93,6 +97,20 @@ type connState struct {
 
 	// sendDone is signalled when a send is finished.
 	sendDone chan error
+
+	// -- below relates to the flipcall handler --
+
+	// channelMu protects below.
+	channelMu sync.Mutex
+
+	// channelWg represents active workers.
+	channelWg sync.WaitGroup
+
+	// channelAlloc allocates channel memory.
+	channelAlloc *flipcall.PacketWindowAllocator
+
+	// channels are the set of initialized channels.
+	channels []*channel
 }
 
 // fidRef wraps a node and tracks references.
@@ -386,6 +404,99 @@ func (cs *connState) WaitTag(t Tag) {
 	<-ch
 }
 
+// initializeChannels initializes all channels.
+//
+// This is a no-op if channels are already initialized.
+func (cs *connState) initializeChannels() (err error) {
+	cs.channelMu.Lock()
+	defer cs.channelMu.Unlock()
+
+	// Initialize our channel allocator.
+	if cs.channelAlloc == nil {
+		alloc, err := flipcall.NewPacketWindowAllocator()
+		if err != nil {
+			return err
+		}
+		cs.channelAlloc = alloc
+	}
+
+	// Create all the channels.
+	for len(cs.channels) < channelsPerClient {
+		res := &channel{
+			done: make(chan struct{}),
+		}
+
+		res.desc, err = cs.channelAlloc.Allocate(channelSize)
+		if err != nil {
+			return err
+		}
+		if err := res.data.Init(flipcall.ServerSide, res.desc); err != nil {
+			return err
+		}
+
+		socks, err := fdchannel.NewConnectedSockets()
+		if err != nil {
+			res.data.Destroy() // Cleanup.
+			return err
+		}
+		res.fds.Init(socks[0])
+		res.client = fd.New(socks[1])
+
+		cs.channels = append(cs.channels, res)
+
+		// Start servicing the channel.
+		//
+		// When we call stop, we will close all the channels and these
+		// routines should finish. We need the wait group to ensure
+		// that active handlers are actually finished before cleanup.
+		cs.channelWg.Add(1)
+		go func() { // S/R-SAFE: Server side.
+			defer cs.channelWg.Done()
+			res.service(cs)
+		}()
+	}
+
+	return nil
+}
+
+// lookupChannel looks up the channel with given id.
+//
+// The function returns nil if no such channel is available.
+func (cs *connState) lookupChannel(id uint32) *channel {
+	cs.channelMu.Lock()
+	defer cs.channelMu.Unlock()
+	if id >= uint32(len(cs.channels)) {
+		return nil
+	}
+	return cs.channels[id]
+}
+
+// handle handles a single message.
+func (cs *connState) handle(m message) (r message) {
+	defer func() {
+		if r == nil {
+			// Don't allow a panic to propagate.
+			recover()
+
+			// Include a useful log message.
+			log.Warningf("panic in handler: %s", debug.Stack())
+
+			// Wrap in an EFAULT error; we don't really have a
+			// better way to describe this kind of error. It will
+			// usually manifest as a result of the test framework.
+			r = newErr(syscall.EFAULT)
+		}
+	}()
+	if handler, ok := m.(handler); ok {
+		// Call the message handler.
+		r = handler.handle(cs)
+	} else {
+		// Produce an ENOSYS error.
+		r = newErr(syscall.ENOSYS)
+	}
+	return
+}
+
 // handleRequest handles a single request.
 //
 // The recvDone channel is signaled when recv is done (with a error if
@@ -428,41 +539,20 @@ func (cs *connState) handleRequest() {
 	}
 
 	// Handle the message.
-	var r message // r is the response.
-	defer func() {
-		if r == nil {
-			// Don't allow a panic to propagate.
-			recover()
+	r := cs.handle(m)
 
-			// Include a useful log message.
-			log.Warningf("panic in handler: %s", debug.Stack())
+	// Clear the tag before sending. That's because as soon as this hits
+	// the wire, the client can legally send the same tag.
+	cs.ClearTag(tag)
 
-			// Wrap in an EFAULT error; we don't really have a
-			// better way to describe this kind of error. It will
-			// usually manifest as a result of the test framework.
-			r = newErr(syscall.EFAULT)
-		}
+	// Send back the result.
+	cs.sendMu.Lock()
+	err = send(cs.conn, tag, r)
+	cs.sendMu.Unlock()
+	cs.sendDone <- err
 
-		// Clear the tag before sending. That's because as soon as this
-		// hits the wire, the client can legally send another message
-		// with the same tag.
-		cs.ClearTag(tag)
-
-		// Send back the result.
-		cs.sendMu.Lock()
-		err = send(cs.conn, tag, r)
-		cs.sendMu.Unlock()
-		cs.sendDone <- err
-	}()
-	if handler, ok := m.(handler); ok {
-		// Call the message handler.
-		r = handler.handle(cs)
-	} else {
-		// Produce an ENOSYS error.
-		r = newErr(syscall.ENOSYS)
-	}
+	// Return the message to the cache.
 	msgRegistry.put(m)
-	m = nil // 'm' should not be touched after this point.
 }
 
 func (cs *connState) handleRequests() {
@@ -477,7 +567,27 @@ func (cs *connState) stop() {
 	close(cs.recvDone)
 	close(cs.sendDone)
 
-	for _, fidRef := range cs.fids {
+	// Free the channels.
+	cs.channelMu.Lock()
+	for _, ch := range cs.channels {
+		ch.Shutdown()
+	}
+	cs.channelWg.Wait()
+	for _, ch := range cs.channels {
+		ch.Close()
+	}
+	cs.channels = nil // Clear.
+	cs.channelMu.Unlock()
+
+	// Free the channel memory.
+	if cs.channelAlloc != nil {
+		cs.channelAlloc.Destroy()
+	}
+
+	// Close all remaining fids.
+	for fid, fidRef := range cs.fids {
+		delete(cs.fids, fid)
+
 		// Drop final reference in the FID table. Note this should
 		// always close the file, since we've ensured that there are no
 		// handlers running via the wait for Pending => 0 below.
@@ -510,7 +620,7 @@ func (cs *connState) service() error {
 				for i := 0; i < pending; i++ {
 					<-cs.sendDone
 				}
-				return err
+				return nil
 			}
 
 			// This handler is now pending.
diff --git a/pkg/p9/transport.go b/pkg/p9/transport.go
index 5648df589..6e8b4bbcd 100644
--- a/pkg/p9/transport.go
+++ b/pkg/p9/transport.go
@@ -54,7 +54,10 @@ const (
 	headerLength uint32 = 7
 
 	// maximumLength is the largest possible message.
-	maximumLength uint32 = 4 * 1024 * 1024
+	maximumLength uint32 = 1 << 20
+
+	// DefaultMessageSize is a sensible default.
+	DefaultMessageSize uint32 = 64 << 10
 
 	// initialBufferLength is the initial data buffer we allocate.
 	initialBufferLength uint32 = 64
diff --git a/pkg/p9/transport_flipcall.go b/pkg/p9/transport_flipcall.go
new file mode 100644
index 000000000..aebb54959
--- /dev/null
+++ b/pkg/p9/transport_flipcall.go
@@ -0,0 +1,254 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package p9
+
+import (
+	"runtime"
+	"syscall"
+
+	"gvisor.dev/gvisor/pkg/fd"
+	"gvisor.dev/gvisor/pkg/fdchannel"
+	"gvisor.dev/gvisor/pkg/flipcall"
+	"gvisor.dev/gvisor/pkg/log"
+)
+
+// channelsPerClient is the number of channels to create per client.
+//
+// While the client and server will generally agree on this number, in reality
+// it's completely up to the server. We simply define a minimum of 2, and a
+// maximum of 4, and select the number of available processes as a tie-breaker.
+// Note that we don't want the number of channels to be too large, because each
+// will account for channelSize memory used, which can be large.
+var channelsPerClient = func() int {
+	n := runtime.NumCPU()
+	if n < 2 {
+		return 2
+	}
+	if n > 4 {
+		return 4
+	}
+	return n
+}()
+
+// channelSize is the channel size to create.
+//
+// We simply ensure that this is larger than the largest possible message size,
+// plus the flipcall packet header, plus the two bytes we write below.
+const channelSize = int(2 + flipcall.PacketHeaderBytes + 2 + maximumLength)
+
+// channel is a fast IPC channel.
+//
+// The same object is used by both the server and client implementations. In
+// general, the client will use only the send and recv methods.
+type channel struct {
+	desc flipcall.PacketWindowDescriptor
+	data flipcall.Endpoint
+	fds  fdchannel.Endpoint
+	buf  buffer
+
+	// -- client only --
+	connected bool
+
+	// -- server only --
+	client *fd.FD
+	done   chan struct{}
+}
+
+// reset resets the channel buffer.
+func (ch *channel) reset(sz uint32) {
+	ch.buf.data = ch.data.Data()[:sz]
+}
+
+// service services the channel.
+func (ch *channel) service(cs *connState) error {
+	rsz, err := ch.data.RecvFirst()
+	if err != nil {
+		return err
+	}
+	for rsz > 0 {
+		m, err := ch.recv(nil, rsz)
+		if err != nil {
+			return err
+		}
+		r := cs.handle(m)
+		msgRegistry.put(m)
+		rsz, err = ch.send(r)
+		if err != nil {
+			return err
+		}
+	}
+	return nil // Done.
+}
+
+// Shutdown shuts down the channel.
+//
+// This must be called before Close.
+func (ch *channel) Shutdown() {
+	ch.data.Shutdown()
+}
+
+// Close closes the channel.
+//
+// This must only be called once, and cannot return an error. Note that
+// synchronization for this method is provided at a high-level, depending on
+// whether it is the client or server. This cannot be called while there are
+// active callers in either service or sendRecv.
+//
+// Precondition: the channel should be shutdown.
+func (ch *channel) Close() error {
+	// Close all backing transports.
+	ch.fds.Destroy()
+	ch.data.Destroy()
+	if ch.client != nil {
+		ch.client.Close()
+	}
+	return nil
+}
+
+// send sends the given message.
+//
+// The return value is the size of the received response. Not that in the
+// server case, this is the size of the next request.
+func (ch *channel) send(m message) (uint32, error) {
+	if log.IsLogging(log.Debug) {
+		log.Debugf("send [channel @%p] %s", ch, m.String())
+	}
+
+	// Send any file payload.
+	sentFD := false
+	if filer, ok := m.(filer); ok {
+		if f := filer.FilePayload(); f != nil {
+			if err := ch.fds.SendFD(f.FD()); err != nil {
+				return 0, syscall.EIO // Map everything to EIO.
+			}
+			f.Close()     // Per sendRecvLegacy.
+			sentFD = true // To mark below.
+		}
+	}
+
+	// Encode the message.
+	//
+	// Note that IPC itself encodes the length of messages, so we don't
+	// need to encode a standard 9P header. We write only the message type.
+	ch.reset(0)
+
+	ch.buf.WriteMsgType(m.Type())
+	if sentFD {
+		ch.buf.Write8(1) // Incoming FD.
+	} else {
+		ch.buf.Write8(0) // No incoming FD.
+	}
+	m.Encode(&ch.buf)
+	ssz := uint32(len(ch.buf.data)) // Updated below.
+
+	// Is there a payload?
+	if payloader, ok := m.(payloader); ok {
+		p := payloader.Payload()
+		copy(ch.data.Data()[ssz:], p)
+		ssz += uint32(len(p))
+	}
+
+	// Perform the one-shot communication.
+	n, err := ch.data.SendRecv(ssz)
+	if err != nil {
+		if n > 0 {
+			return n, nil
+		}
+		return 0, syscall.EIO // See above.
+	}
+
+	return n, nil
+}
+
+// recv decodes a message that exists on the channel.
+//
+// If the passed r is non-nil, then the type must match or an error will be
+// generated. If the passed r is nil, then a new message will be created and
+// returned.
+func (ch *channel) recv(r message, rsz uint32) (message, error) {
+	// Decode the response from the inline buffer.
+	ch.reset(rsz)
+	t := ch.buf.ReadMsgType()
+	hasFD := ch.buf.Read8() != 0
+	if t == MsgRlerror {
+		// Change the message type. We check for this special case
+		// after decoding below, and transform into an error.
+		r = &Rlerror{}
+	} else if r == nil {
+		nr, err := msgRegistry.get(0, t)
+		if err != nil {
+			return nil, err
+		}
+		r = nr // New message.
+	} else if t != r.Type() {
+		// Not an error and not the expected response; propagate.
+		return nil, &ErrBadResponse{Got: t, Want: r.Type()}
+	}
+
+	// Is there a payload? Set to the latter portion.
+	if payloader, ok := r.(payloader); ok {
+		fs := payloader.FixedSize()
+		payloader.SetPayload(ch.buf.data[fs:])
+		ch.buf.data = ch.buf.data[:fs]
+	}
+
+	r.Decode(&ch.buf)
+	if ch.buf.isOverrun() {
+		// Nothing valid was available.
+		log.Debugf("recv [got %d bytes, needed more]", rsz)
+		return nil, ErrNoValidMessage
+	}
+
+	// Read any FD result.
+	if hasFD {
+		if rfd, err := ch.fds.RecvFDNonblock(); err == nil {
+			f := fd.New(rfd)
+			if filer, ok := r.(filer); ok {
+				// Set the payload.
+				filer.SetFilePayload(f)
+			} else {
+				// Don't want the FD.
+				f.Close()
+			}
+		} else {
+			// The header bit was set but nothing came in.
+			log.Warningf("expected FD, got err: %v", err)
+		}
+	}
+
+	// Log a message.
+	if log.IsLogging(log.Debug) {
+		log.Debugf("recv [channel @%p] %s", ch, r.String())
+	}
+
+	// Convert errors appropriately; see above.
+	if rlerr, ok := r.(*Rlerror); ok {
+		return nil, syscall.Errno(rlerr.Error)
+	}
+
+	return r, nil
+}
+
+// sendRecv sends the given message over the channel.
+//
+// This is used by the client.
+func (ch *channel) sendRecv(c *Client, m, r message) error {
+	rsz, err := ch.send(m)
+	if err != nil {
+		return err
+	}
+	_, err = ch.recv(r, rsz)
+	return err
+}
diff --git a/pkg/p9/transport_test.go b/pkg/p9/transport_test.go
index cdb3bc841..2f50ff3ea 100644
--- a/pkg/p9/transport_test.go
+++ b/pkg/p9/transport_test.go
@@ -124,7 +124,9 @@ func TestSendRecvWithFile(t *testing.T) {
 		t.Fatalf("unable to create file: %v", err)
 	}
 
-	if err := send(client, Tag(1), &Rlopen{File: f}); err != nil {
+	rlopen := &Rlopen{}
+	rlopen.SetFilePayload(f)
+	if err := send(client, Tag(1), rlopen); err != nil {
 		t.Fatalf("send got err %v expected nil", err)
 	}
 
diff --git a/pkg/p9/version.go b/pkg/p9/version.go
index c2a2885ae..f1ffdd23a 100644
--- a/pkg/p9/version.go
+++ b/pkg/p9/version.go
@@ -26,7 +26,7 @@ const (
 	//
 	// Clients are expected to start requesting this version number and
 	// to continuously decrement it until a Tversion request succeeds.
-	highestSupportedVersion uint32 = 7
+	highestSupportedVersion uint32 = 8
 
 	// lowestSupportedVersion is the lowest supported version X in a
 	// version string of the format 9P2000.L.Google.X.
@@ -148,3 +148,10 @@ func VersionSupportsMultiUser(v uint32) bool {
 func versionSupportsTallocate(v uint32) bool {
 	return v >= 7
 }
+
+// versionSupportsFlipcall returns true if version v supports IPC channels from
+// the flipcall package. Note that these must be negotiated, but this version
+// string indicates that such a facility exists.
+func versionSupportsFlipcall(v uint32) bool {
+	return v >= 8
+}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 7ca776b3a..a2ecc6bcb 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -88,14 +88,24 @@ var allowedSyscalls = seccomp.SyscallRules{
 			seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
 			seccomp.AllowAny{},
 			seccomp.AllowAny{},
-			seccomp.AllowValue(0),
 		},
 		{
 			seccomp.AllowAny{},
 			seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
 			seccomp.AllowAny{},
+		},
+		// Non-private variants are included for flipcall support. They are otherwise
+		// unncessary, as the sentry will use only private futexes internally.
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(linux.FUTEX_WAIT),
+			seccomp.AllowAny{},
+			seccomp.AllowAny{},
+		},
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(linux.FUTEX_WAKE),
 			seccomp.AllowAny{},
-			seccomp.AllowValue(0),
 		},
 	},
 	syscall.SYS_GETPID: {},
diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD
index e2318a978..02168ad1b 100644
--- a/runsc/fsgofer/filter/BUILD
+++ b/runsc/fsgofer/filter/BUILD
@@ -17,6 +17,7 @@ go_library(
     ],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/flipcall",
         "//pkg/log",
         "//pkg/seccomp",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 8ddfa77d6..2f3f2039a 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -83,6 +83,11 @@ var allowedSyscalls = seccomp.SyscallRules{
 			seccomp.AllowAny{},
 			seccomp.AllowValue(syscall.F_GETFD),
 		},
+		// Used by flipcall.PacketWindowAllocator.Init().
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(unix.F_ADD_SEALS),
+		},
 	},
 	syscall.SYS_FSTAT:     {},
 	syscall.SYS_FSTATFS:   {},
@@ -103,6 +108,19 @@ var allowedSyscalls = seccomp.SyscallRules{
 			seccomp.AllowAny{},
 			seccomp.AllowValue(0),
 		},
+		// Non-private futex used for flipcall.
+		seccomp.Rule{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(linux.FUTEX_WAIT),
+			seccomp.AllowAny{},
+			seccomp.AllowAny{},
+		},
+		seccomp.Rule{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(linux.FUTEX_WAKE),
+			seccomp.AllowAny{},
+			seccomp.AllowAny{},
+		},
 	},
 	syscall.SYS_GETDENTS64:   {},
 	syscall.SYS_GETPID:       {},
@@ -112,6 +130,7 @@ var allowedSyscalls = seccomp.SyscallRules{
 	syscall.SYS_LINKAT:       {},
 	syscall.SYS_LSEEK:        {},
 	syscall.SYS_MADVISE:      {},
+	unix.SYS_MEMFD_CREATE:    {}, /// Used by flipcall.PacketWindowAllocator.Init().
 	syscall.SYS_MKDIRAT:      {},
 	syscall.SYS_MMAP: []seccomp.Rule{
 		{
@@ -160,6 +179,13 @@ var allowedSyscalls = seccomp.SyscallRules{
 	syscall.SYS_RT_SIGPROCMASK:  {},
 	syscall.SYS_SCHED_YIELD:     {},
 	syscall.SYS_SENDMSG: []seccomp.Rule{
+		// Used by fdchannel.Endpoint.SendFD().
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowAny{},
+			seccomp.AllowValue(0),
+		},
+		// Used by unet.SocketWriter.WriteVec().
 		{
 			seccomp.AllowAny{},
 			seccomp.AllowAny{},
@@ -170,7 +196,15 @@ var allowedSyscalls = seccomp.SyscallRules{
 		{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
 	},
 	syscall.SYS_SIGALTSTACK: {},
-	syscall.SYS_SYMLINKAT:   {},
+	// Used by fdchannel.NewConnectedSockets().
+	syscall.SYS_SOCKETPAIR: {
+		{
+			seccomp.AllowValue(syscall.AF_UNIX),
+			seccomp.AllowValue(syscall.SOCK_SEQPACKET | syscall.SOCK_CLOEXEC),
+			seccomp.AllowValue(0),
+		},
+	},
+	syscall.SYS_SYMLINKAT: {},
 	syscall.SYS_TGKILL: []seccomp.Rule{
 		{
 			seccomp.AllowValue(uint64(os.Getpid())),
-- 
cgit v1.2.3


From 010b0932583711ab3f6a88b1136cf8d87c2a53d2 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Mon, 16 Sep 2019 08:15:40 -0700
Subject: Bring back to life features lost in recent refactor

- Sandbox logs are generated when running tests
- Kokoro uploads the sandbox logs
- Supports multiple parallel runs
- Revive script to install locally built runsc with docker

PiperOrigin-RevId: 269337274
---
 CONTRIBUTING.md                | 32 +++++++++++++++++++
 runsc/boot/config.go           | 26 ++++++++++-----
 runsc/container/container.go   |  9 +++++-
 runsc/dockerutil/dockerutil.go | 15 ++++++---
 runsc/main.go                  |  4 ++-
 runsc/sandbox/sandbox.go       | 10 +++++-
 runsc/specutils/specutils.go   | 16 +++++++++-
 scripts/common.sh              | 59 +++++++++++++++++++++++++++++++++-
 scripts/common_bazel.sh        | 34 ++++++++++++++------
 scripts/dev.sh                 | 72 ++++++++++++++++++++++++++++++++++++++++++
 scripts/docker_tests.sh        |  6 ++--
 scripts/go.sh                  |  2 +-
 scripts/hostnet_tests.sh       |  5 ++-
 scripts/kvm_tests.sh           |  5 ++-
 scripts/overlay_tests.sh       |  5 ++-
 scripts/root_tests.sh          |  6 ++--
 test/root/main_test.go         |  5 +--
 17 files changed, 265 insertions(+), 46 deletions(-)
 create mode 100755 scripts/dev.sh

(limited to 'runsc/boot')

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 638942a42..5d46168bc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -83,6 +83,8 @@ Rules:
 
 ### Code reviews
 
+Before sending code reviews, run `bazel test ...` to ensure tests are passing.
+
 Code changes are accepted via [pull request][github].
 
 When approved, the change will be submitted by a team member and automatically
@@ -100,6 +102,36 @@ form `b/1234`. These correspond to bugs in our internal bug tracker. Eventually
 these bugs will be moved to the GitHub Issues, but until then they can simply be
 ignored.
 
+### Build and test with Docker
+
+`scripts/dev.sh` is a convenient script that builds and installs `runsc` as a
+new Docker runtime for you. The scripts tries to extract the runtime name from
+your local environment and will print it at the end. You can also customize it.
+The script creates one regular runtime and another with debug flags enabled.
+Here are a few examples:
+
+```bash
+# Default case (inside branch my-branch)
+$ scripts/dev.sh
+...
+Runtimes my-branch and my-branch-d (debug enabled) setup.
+Use --runtime=my-branch with your Docker command.
+  docker run --rm --runtime=my-branch --rm hello-world
+
+If you rebuild, use scripts/dev.sh --refresh.
+Logs are in: /tmp/my-branch/logs
+
+# --refresh just updates the runtime binary and doesn't restart docker.
+$ git/my_branch> scripts/dev.sh --refresh
+
+# Using a custom runtime name
+$ git/my_branch> scripts/dev.sh my-runtime
+...
+Runtimes my-runtime and my-runtime-d (debug enabled) setup.
+Use --runtime=my-runtime with your Docker command.
+  docker run --rm --runtime=my-runtime --rm hello-world
+```
+
 ### The small print
 
 Contributions made by corporations are covered by a different agreement than the
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 05b8f8761..31103367d 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -211,12 +211,6 @@ type Config struct {
 	// RestoreFile is the path to the saved container image
 	RestoreFile string
 
-	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
-	// tests. It allows runsc to start the sandbox process as the current
-	// user, and without chrooting the sandbox process. This can be
-	// necessary in test environments that have limited capabilities.
-	TestOnlyAllowRunAsCurrentUserWithoutChroot bool
-
 	// NumNetworkChannels controls the number of AF_PACKET sockets that map
 	// to the same underlying network device. This allows netstack to better
 	// scale for high throughput use cases.
@@ -233,6 +227,19 @@ type Config struct {
 
 	// ReferenceLeakMode sets reference leak check mode
 	ReferenceLeakMode refs.LeakMode
+
+	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
+	// tests. It allows runsc to start the sandbox process as the current
+	// user, and without chrooting the sandbox process. This can be
+	// necessary in test environments that have limited capabilities.
+	TestOnlyAllowRunAsCurrentUserWithoutChroot bool
+
+	// TestOnlyTestNameEnv should only be used in tests. It looks up for the
+	// test name in the container environment variables and adds it to the debug
+	// log file name. This is done to help identify the log with the test when
+	// multiple tests are run in parallel, since there is no way to pass
+	// parameters to the runtime from docker.
+	TestOnlyTestNameEnv string
 }
 
 // ToFlags returns a slice of flags that correspond to the given Config.
@@ -261,9 +268,12 @@ func (c *Config) ToFlags() []string {
 		"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
 		"--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
 	}
+	// Only include these if set since it is never to be used by users.
 	if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
-		// Only include if set since it is never to be used by users.
-		f = append(f, "-TESTONLY-unsafe-nonroot=true")
+		f = append(f, "--TESTONLY-unsafe-nonroot=true")
+	}
+	if len(c.TestOnlyTestNameEnv) != 0 {
+		f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
 	}
 	return f
 }
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 00f1b1de9..a721c1c31 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -946,7 +946,14 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
 	}
 
 	if conf.DebugLog != "" {
-		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer")
+		test := ""
+		if len(conf.TestOnlyTestNameEnv) != 0 {
+			// Fetch test name if one is provided and the test only flag was set.
+			if t, ok := specutils.EnvVar(spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
+				test = t
+			}
+		}
+		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer", test)
 		if err != nil {
 			return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
 		}
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
index 41f5fe1e8..c073d8f75 100644
--- a/runsc/dockerutil/dockerutil.go
+++ b/runsc/dockerutil/dockerutil.go
@@ -240,7 +240,7 @@ func (d *Docker) Stop() error {
 // Run calls 'docker run' with the arguments provided. The container starts
 // running in the background and the call returns immediately.
 func (d *Docker) Run(args ...string) error {
-	a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"}
+	a := d.runArgs("-d")
 	a = append(a, args...)
 	_, err := do(a...)
 	if err == nil {
@@ -251,7 +251,7 @@ func (d *Docker) Run(args ...string) error {
 
 // RunWithPty is like Run but with an attached pty.
 func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) {
-	a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-it"}
+	a := d.runArgs("-it")
 	a = append(a, args...)
 	return doWithPty(a...)
 }
@@ -259,8 +259,7 @@ func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) {
 // RunFg calls 'docker run' with the arguments provided in the foreground. It
 // blocks until the container exits and returns the output.
 func (d *Docker) RunFg(args ...string) (string, error) {
-	a := []string{"run", "--runtime", d.Runtime, "--name", d.Name}
-	a = append(a, args...)
+	a := d.runArgs(args...)
 	out, err := do(a...)
 	if err == nil {
 		d.logDockerID()
@@ -268,6 +267,14 @@ func (d *Docker) RunFg(args ...string) (string, error) {
 	return string(out), err
 }
 
+func (d *Docker) runArgs(args ...string) []string {
+	// Environment variable RUNSC_TEST_NAME is picked up by the runtime and added
+	// to the log name, so one can easily identify the corresponding logs for
+	// this test.
+	rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name}
+	return append(rv, args...)
+}
+
 // Logs calls 'docker logs'.
 func (d *Docker) Logs() (string, error) {
 	return do("logs", d.Name)
diff --git a/runsc/main.go b/runsc/main.go
index 0ff68160d..ff74c0a3d 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -79,6 +79,7 @@ var (
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
+	testOnlyTestNameEnv                        = flag.String("TESTONLY-test-name-env", "", "TEST ONLY; do not ever use! Used for automated tests to improve logging.")
 )
 
 func main() {
@@ -211,6 +212,7 @@ func main() {
 		ReferenceLeakMode:  refsLeakMode,
 
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
+		TestOnlyTestNameEnv:                        *testOnlyTestNameEnv,
 	}
 	if len(*straceSyscalls) != 0 {
 		conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
@@ -244,7 +246,7 @@ func main() {
 		e = newEmitter(*debugLogFormat, f)
 
 	} else if *debugLog != "" {
-		f, err := specutils.DebugLogFile(*debugLog, subcommand)
+		f, err := specutils.DebugLogFile(*debugLog, subcommand, "" /* name */)
 		if err != nil {
 			cmd.Fatalf("error opening debug log file in %q: %v", *debugLog, err)
 		}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index df3c0c5ef..4c6c83fbd 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -351,7 +351,15 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 	if conf.DebugLog != "" {
-		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "boot")
+		test := ""
+		if len(conf.TestOnlyTestNameEnv) == 0 {
+			// Fetch test name if one is provided and the test only flag was set.
+			if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
+				test = t
+			}
+		}
+
+		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "boot", test)
 		if err != nil {
 			return fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
 		}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index df435f88d..cb9e58dfb 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -399,13 +399,15 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er
 //   - %TIMESTAMP%: is replaced with a timestamp using the following format:
 //			<yyyymmdd-hhmmss.uuuuuu>
 //	 - %COMMAND%: is replaced with 'command'
-func DebugLogFile(logPattern, command string) (*os.File, error) {
+//	 - %TEST%: is replaced with 'test' (omitted by default)
+func DebugLogFile(logPattern, command, test string) (*os.File, error) {
 	if strings.HasSuffix(logPattern, "/") {
 		// Default format: <debug-log>/runsc.log.<yyyymmdd-hhmmss.uuuuuu>.<command>
 		logPattern += "runsc.log.%TIMESTAMP%.%COMMAND%"
 	}
 	logPattern = strings.Replace(logPattern, "%TIMESTAMP%", time.Now().Format("20060102-150405.000000"), -1)
 	logPattern = strings.Replace(logPattern, "%COMMAND%", command, -1)
+	logPattern = strings.Replace(logPattern, "%TEST%", test, -1)
 
 	dir := filepath.Dir(logPattern)
 	if err := os.MkdirAll(dir, 0775); err != nil {
@@ -542,3 +544,15 @@ func GetParentPid(pid int) (int, error) {
 
 	return ppid, nil
 }
+
+// EnvVar looks for a varible value in the env slice assuming the following
+// format: "NAME=VALUE".
+func EnvVar(env []string, name string) (string, bool) {
+	prefix := name + "="
+	for _, e := range env {
+		if strings.HasPrefix(e, prefix) {
+			return strings.TrimPrefix(e, prefix), true
+		}
+	}
+	return "", false
+}
diff --git a/scripts/common.sh b/scripts/common.sh
index f2b9e24d8..6dabad141 100755
--- a/scripts/common.sh
+++ b/scripts/common.sh
@@ -14,10 +14,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -xeo pipefail
+set -xeou pipefail
 
 if [[ -f $(dirname $0)/common_google.sh ]]; then
   source $(dirname $0)/common_google.sh
 else
   source $(dirname $0)/common_bazel.sh
 fi
+
+# Ensure it attempts to collect logs in all cases.
+trap collect_logs EXIT
+
+function set_runtime() {
+  RUNTIME=${1:-runsc}
+  RUNSC_BIN=/tmp/"${RUNTIME}"/runsc
+  RUNSC_LOGS_DIR="$(dirname ${RUNSC_BIN})"/logs
+  RUNSC_LOGS="${RUNSC_LOGS_DIR}"/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%
+}
+
+function test_runsc() {
+  test --test_arg=--runtime=${RUNTIME} "$@"
+}
+
+function install_runsc_for_test() {
+  local -r test_name=$1
+  shift
+  if [[ -z "${test_name}" ]]; then
+    echo "Missing mandatory test name"
+    exit 1
+  fi
+
+  # Add test to the name, so it doesn't conflict with other runtimes.
+  set_runtime $(find_branch_name)_"${test_name}"
+
+  # ${RUNSC_TEST_NAME} is set by tests (see dockerutil) to pass the test name
+  # down to the runtime.
+  install_runsc "${RUNTIME}" \
+      --TESTONLY-test-name-env=RUNSC_TEST_NAME \
+      --debug \
+      --strace \
+      --log-packets \
+      "$@"
+}
+
+# Installs the runsc with given runtime name. set_runtime must have been called
+# to set runtime and logs location.
+function install_runsc() {
+  local -r runtime=$1
+  shift
+
+  # Prepare the runtime binary.
+  local -r output=$(build //runsc)
+  mkdir -p "$(dirname ${RUNSC_BIN})"
+  cp -f "${output}" "${RUNSC_BIN}"
+  chmod 0755 "${RUNSC_BIN}"
+
+  # Install the runtime.
+  sudo "${RUNSC_BIN}" install --experimental=true --runtime="${runtime}" -- --debug-log "${RUNSC_LOGS}" "$@"
+
+  # Clear old logs files that may exist.
+  sudo rm -f "${RUNSC_LOGS_DIR}"/*
+
+  # Restart docker to pick up the new runtime configuration.
+  sudo systemctl restart docker
+}
diff --git a/scripts/common_bazel.sh b/scripts/common_bazel.sh
index dc0e2041d..dde0b51ed 100755
--- a/scripts/common_bazel.sh
+++ b/scripts/common_bazel.sh
@@ -53,16 +53,7 @@ function build() {
 }
 
 function test() {
-  (bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" && rc=0) || rc=$?
-
-  # Zip out everything into a convenient form.
-  if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then
-    find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" |
-      tar --create --files-from - --transform 's/test\./sponge_log./' |
-      tar --extract --directory ${KOKORO_ARTIFACTS_DIR}
-  fi
-
-  return $rc
+  bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@"
 }
 
 function run() {
@@ -76,3 +67,26 @@ function run_as_root() {
   shift
   bazel run --run_under="sudo" "${binary}" -- "$@"
 }
+
+function collect_logs() {
+  # Zip out everything into a convenient form.
+  if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then
+    # Move test logs to Kokoro directory. tar is used to conveniently perform
+    # renames while moving files.
+    find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" |
+      tar --create --files-from - --transform 's/test\./sponge_log./' |
+      tar --extract --directory ${KOKORO_ARTIFACTS_DIR}
+
+    # Collect sentry logs, if any.
+    if [[ -v RUNSC_LOGS_DIR ]] && [[ -d "${RUNSC_LOGS_DIR}" ]]; then
+      local -r logs=$(ls "${RUNSC_LOGS_DIR}")
+      if [[ -z "${logs}" ]]; then
+        tar --create --gzip --file="${KOKORO_ARTIFACTS_DIR}/${RUNTIME}.tar.gz" -C "${RUNSC_LOGS_DIR}" .
+      fi
+    fi
+  fi
+}
+
+function find_branch_name() {
+  git branch --show-current || git rev-parse HEAD || bazel info workspace | xargs basename
+}
diff --git a/scripts/dev.sh b/scripts/dev.sh
new file mode 100755
index 000000000..64151c558
--- /dev/null
+++ b/scripts/dev.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Copyright 2019 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source $(dirname $0)/common.sh
+
+# common.sh sets '-x', but it's annoying to see so much output.
+set +x
+
+# Defaults
+declare -i REFRESH=0
+declare NAME=$(find_branch_name)
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --refresh)
+      REFRESH=1
+      ;;
+    --help)
+      echo "Use this script to build and install runsc with Docker."
+      echo
+      echo "usage: $0 [--refresh] [runtime_name]"
+      exit 1
+      ;;
+    *)
+      NAME=$1
+      ;;
+  esac
+  shift
+done
+
+set_runtime "${NAME}"
+echo
+echo "Using runtime=${RUNTIME}"
+echo
+
+echo Building runsc...
+# Build first and fail on error. $() prevents "set -e" from reporting errors.
+build //runsc
+declare OUTPUT="$(build //runsc)"
+
+if [[ ${REFRESH} -eq 0 ]]; then
+  install_runsc "${RUNTIME}"   --net-raw
+  install_runsc "${RUNTIME}-d" --net-raw --debug --strace --log-packets
+
+  echo
+  echo "Runtimes ${RUNTIME} and ${RUNTIME}-d (debug enabled) setup."
+  echo "Use --runtime="${RUNTIME}" with your Docker command."
+  echo "  docker run --rm --runtime="${RUNTIME}" --rm hello-world"
+  echo
+  echo "If you rebuild, use $0 --refresh."
+
+else
+  cp -f ${OUTPUT} "${RUNSC_BIN}"
+
+  echo
+  echo "Runtime ${RUNTIME} refreshed."
+fi
+
+echo "Logs are in: ${RUNSC_LOGS_DIR}"
diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh
index d6b18a35b..72ba05260 100755
--- a/scripts/docker_tests.sh
+++ b/scripts/docker_tests.sh
@@ -16,7 +16,5 @@
 
 source $(dirname $0)/common.sh
 
-# Install the runtime and perform basic tests.
-run_as_root //runsc install --experimental=true -- --debug --strace --log-packets
-sudo systemctl restart docker
-test //test/image:image_test //test/e2e:integration_test
+install_runsc_for_test docker
+test_runsc //test/image:image_test //test/e2e:integration_test
diff --git a/scripts/go.sh b/scripts/go.sh
index f24fad04c..0dbfb7747 100755
--- a/scripts/go.sh
+++ b/scripts/go.sh
@@ -29,7 +29,7 @@ git checkout go && git clean -f
 go build ./...
 
 # Push, if required.
-if [[ "${KOKORO_GO_PUSH}" == "true" ]]; then
+if [[ -v KOKORO_GO_PUSH ]] && [[ "${KOKORO_GO_PUSH}" == "true" ]]; then
   if [[ -v KOKORO_GITHUB_ACCESS_TOKEN ]]; then
     git config --global credential.helper cache
     git credential approve <<EOF
diff --git a/scripts/hostnet_tests.sh b/scripts/hostnet_tests.sh
index 0631c5510..41298293d 100755
--- a/scripts/hostnet_tests.sh
+++ b/scripts/hostnet_tests.sh
@@ -17,6 +17,5 @@
 source $(dirname $0)/common.sh
 
 # Install the runtime and perform basic tests.
-run_as_root //runsc install --experimental=true -- --debug --strace --log-packets --network=host
-sudo systemctl restart docker
-test --test_arg=-checkpoint=false //test/image:image_test //test/e2e:integration_test
+install_runsc_for_test hostnet --network=host
+test_runsc --test_arg=-checkpoint=false //test/image:image_test //test/e2e:integration_test
diff --git a/scripts/kvm_tests.sh b/scripts/kvm_tests.sh
index b6d787f0f..5662401df 100755
--- a/scripts/kvm_tests.sh
+++ b/scripts/kvm_tests.sh
@@ -24,6 +24,5 @@ sudo chmod a+rw /dev/kvm
 run_as_root //pkg/sentry/platform/kvm:kvm_test
 
 # Install the KVM runtime and run all integration tests.
-run_as_root //runsc install --experimental=true -- --debug --strace --log-packets --platform=kvm
-sudo systemctl restart docker
-test //test/image:image_test //test/e2e:integration_test
+install_runsc_for_test kvm --platform=kvm
+test_runsc //test/image:image_test //test/e2e:integration_test
diff --git a/scripts/overlay_tests.sh b/scripts/overlay_tests.sh
index 651a51f70..2a1f12c0b 100755
--- a/scripts/overlay_tests.sh
+++ b/scripts/overlay_tests.sh
@@ -17,6 +17,5 @@
 source $(dirname $0)/common.sh
 
 # Install the runtime and perform basic tests.
-run_as_root //runsc install --experimental=true -- --debug --strace --log-packets --overlay
-sudo systemctl restart docker
-test //test/image:image_test //test/e2e:integration_test
+install_runsc_for_test overlay --overlay
+test_runsc //test/image:image_test //test/e2e:integration_test
diff --git a/scripts/root_tests.sh b/scripts/root_tests.sh
index e42c0e3ec..4e4fcc76b 100755
--- a/scripts/root_tests.sh
+++ b/scripts/root_tests.sh
@@ -26,6 +26,6 @@ chmod +x ${shim_path}
 sudo mv ${shim_path} /usr/local/bin/gvisor-containerd-shim
 
 # Run the tests that require root.
-run_as_root //runsc install --experimental=true -- --debug --strace --log-packets
-sudo systemctl restart docker
-run_as_root //test/root:root_test
+install_runsc_for_test root
+run_as_root //test/root:root_test --runtime=${RUNTIME}
+
diff --git a/test/root/main_test.go b/test/root/main_test.go
index a3a2a91d9..d74dec85f 100644
--- a/test/root/main_test.go
+++ b/test/root/main_test.go
@@ -29,13 +29,15 @@ import (
 // supported docker version, required capabilities, and configures the executable
 // path for runsc.
 func TestMain(m *testing.M) {
-	dockerutil.EnsureSupportedDockerVersion()
+	flag.Parse()
 
 	if !specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_DAC_OVERRIDE) {
 		fmt.Println("Test requires sysadmin privileges to run. Try again with sudo.")
 		os.Exit(1)
 	}
 
+	dockerutil.EnsureSupportedDockerVersion()
+
 	// Configure exe for tests.
 	path, err := dockerutil.RuntimePath()
 	if err != nil {
@@ -43,6 +45,5 @@ func TestMain(m *testing.M) {
 	}
 	specutils.ExePath = path
 
-	flag.Parse()
 	os.Exit(m.Run())
 }
-- 
cgit v1.2.3


From ac38a7ead0870118d27d570a8a98a90a7a225a12 Mon Sep 17 00:00:00 2001
From: Robert Tonic <btonic@users.noreply.github.com>
Date: Thu, 19 Sep 2019 12:37:15 -0400
Subject: Place the host UDS mounting behind --fsgofer-host-uds-allowed.

This commit allows the use of the `--fsgofer-host-uds-allowed` flag to
enable mounting sockets and add the appropriate seccomp filters.
---
 runsc/boot/config.go           |  3 ++
 runsc/cmd/gofer.go             | 25 +++++++++++-----
 runsc/container/container.go   |  5 ++++
 runsc/fsgofer/filter/config.go | 23 ++++++++-------
 runsc/fsgofer/filter/filter.go | 12 ++++++++
 runsc/fsgofer/fsgofer.go       | 18 +++++++++---
 runsc/main.go                  | 66 ++++++++++++++++++++++--------------------
 7 files changed, 98 insertions(+), 54 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 7ae0dd05d..954ad2c2a 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -138,6 +138,9 @@ type Config struct {
 	// Overlay is whether to wrap the root filesystem in an overlay.
 	Overlay bool
 
+	// fsGoferHostUDSAllowed enables the gofer to mount a host UDS
+	FSGoferHostUDSAllowed bool
+
 	// Network indicates what type of network to use.
 	Network NetworkType
 
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 9faabf494..8e63c80e0 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -56,10 +56,11 @@ var goferCaps = &specs.LinuxCapabilities{
 // Gofer implements subcommands.Command for the "gofer" command, which starts a
 // filesystem gofer.  This command should not be called directly.
 type Gofer struct {
-	bundleDir string
-	ioFDs     intFlags
-	applyCaps bool
-	setUpRoot bool
+	bundleDir      string
+	ioFDs          intFlags
+	applyCaps      bool
+	hostUDSAllowed bool
+	setUpRoot      bool
 
 	panicOnWrite bool
 	specFD       int
@@ -86,6 +87,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
 	f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
 	f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
 	f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
+	f.BoolVar(&g.hostUDSAllowed, "host-uds-allowed", false, "if true, allow the Gofer to mount a host UDS")
 	f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
 	f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
 	f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
@@ -180,8 +182,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	for _, m := range spec.Mounts {
 		if specutils.Is9PMount(m) {
 			cfg := fsgofer.Config{
-				ROMount:      isReadonlyMount(m.Options),
-				PanicOnWrite: g.panicOnWrite,
+				ROMount:        isReadonlyMount(m.Options),
+				PanicOnWrite:   g.panicOnWrite,
+				HostUDSAllowed: g.hostUDSAllowed,
 			}
 			ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
 			if err != nil {
@@ -200,8 +203,14 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs))
 	}
 
-	if err := filter.Install(); err != nil {
-		Fatalf("installing seccomp filters: %v", err)
+	if g.hostUDSAllowed {
+		if err := filter.InstallUDS(); err != nil {
+			Fatalf("installing UDS seccomp filters: %v", err)
+		}
+	} else {
+		if err := filter.Install(); err != nil {
+			Fatalf("installing seccomp filters: %v", err)
+		}
 	}
 
 	runServers(ats, g.ioFDs)
diff --git a/runsc/container/container.go b/runsc/container/container.go
index bbb364214..ceadb38aa 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -941,6 +941,11 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
 		args = append(args, "--panic-on-write=true")
 	}
 
+	// Add support for mounting host UDS in the gofer
+	if conf.FSGoferHostUDSAllowed {
+		args = append(args, "--host-uds-allowed=true")
+	}
+
 	// Open the spec file to donate to the sandbox.
 	specFile, err := specutils.OpenSpec(bundleDir)
 	if err != nil {
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 73407383d..8989cdb2f 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -26,16 +26,6 @@ import (
 // allowedSyscalls is the set of syscalls executed by the gofer.
 var allowedSyscalls = seccomp.SyscallRules{
 	syscall.SYS_ACCEPT: {},
-	syscall.SYS_SOCKET: []seccomp.Rule{
-		{
-			seccomp.AllowValue(syscall.AF_UNIX),
-		},
-	},
-	syscall.SYS_CONNECT: []seccomp.Rule{
-		{
-			seccomp.AllowAny{},
-		},
-	},
 	syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
 		{seccomp.AllowValue(linux.ARCH_GET_FS)},
 		{seccomp.AllowValue(linux.ARCH_SET_FS)},
@@ -194,3 +184,16 @@ var allowedSyscalls = seccomp.SyscallRules{
 	syscall.SYS_UTIMENSAT: {},
 	syscall.SYS_WRITE:     {},
 }
+
+var udsSyscalls = seccomp.SyscallRules{
+	syscall.SYS_SOCKET: []seccomp.Rule{
+		{
+			seccomp.AllowValue(syscall.AF_UNIX),
+		},
+	},
+	syscall.SYS_CONNECT: []seccomp.Rule{
+		{
+			seccomp.AllowAny{},
+		},
+	},
+}
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index 65053415f..12ef19d18 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -31,3 +31,15 @@ func Install() error {
 
 	return seccomp.Install(s)
 }
+
+// InstallUDS installs the standard Gofer seccomp filters along with filters
+// allowing the gofer to connect to a host UDS.
+func InstallUDS() error {
+	// Use the base syscall
+	s := allowedSyscalls
+
+	// Add additional filters required for connecting to the host's sockets.
+	s.Merge(udsSyscalls)
+
+	return seccomp.Install(s)
+}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 89171c811..d9f3ba8d6 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -85,6 +85,9 @@ type Config struct {
 
 	// PanicOnWrite panics on attempts to write to RO mounts.
 	PanicOnWrite bool
+
+	// HostUDS prevents
+	HostUDSAllowed bool
 }
 
 type attachPoint struct {
@@ -128,12 +131,21 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		return nil, fmt.Errorf("stat file %q, err: %v", a.prefix, err)
 	}
 
+	// Acquire the attach point lock
+	a.attachedMu.Lock()
+	defer a.attachedMu.Unlock()
+
 	// Hold the file descriptor we are converting into a p9.File
 	var f *fd.FD
 
 	// Apply the S_IFMT bitmask so we can detect file type appropriately
 	switch fmtStat := stat.Mode & syscall.S_IFMT; {
 	case fmtStat == syscall.S_IFSOCK:
+		// Check to see if the CLI option has been set to allow the UDS mount
+		if !a.conf.HostUDSAllowed {
+			return nil, fmt.Errorf("host UDS support is disabled")
+		}
+
 		// Attempt to open a connection. Bubble up the failures.
 		f, err = fd.OpenUnix(a.prefix)
 		if err != nil {
@@ -144,7 +156,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		// Default to Read/Write permissions.
 		mode := syscall.O_RDWR
 
-		// If the configuration is Read Only & the mount point is a directory,
+		// If the configuration is Read Only or the mount point is a directory,
 		// set the mode to Read Only.
 		if a.conf.ROMount || fmtStat == syscall.S_IFDIR {
 			mode = syscall.O_RDONLY
@@ -157,9 +169,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		}
 	}
 
-	// Close the connection if the UDS is already attached.
-	a.attachedMu.Lock()
-	defer a.attachedMu.Unlock()
+	// Close the connection if already attached.
 	if a.attached {
 		f.Close()
 		return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix)
diff --git a/runsc/main.go b/runsc/main.go
index c61583441..5eba949f6 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -63,17 +63,18 @@ var (
 	straceLogSize  = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
 
 	// Flags that control sandbox runtime behavior.
-	platformName       = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
-	network            = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
-	gso                = flag.Bool("gso", true, "enable generic segmenation offload")
-	fileAccess         = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
-	overlay            = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
-	watchdogAction     = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
-	panicSignal        = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
-	profile            = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
-	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
-	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
-	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+	platformName          = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+	network               = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+	gso                   = flag.Bool("gso", true, "enable generic segmenation offload")
+	fileAccess            = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+	fsGoferHostUDSAllowed = flag.Bool("fsgofer-host-uds-allowed", false, "Allow the gofer to mount Unix Domain Sockets.")
+	overlay               = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+	watchdogAction        = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+	panicSignal           = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+	profile               = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+	netRaw                = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+	numNetworkChannels    = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+	rootless              = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -171,27 +172,28 @@ func main() {
 
 	// Create a new Config from the flags.
 	conf := &boot.Config{
-		RootDir:            *rootDir,
-		Debug:              *debug,
-		LogFilename:        *logFilename,
-		LogFormat:          *logFormat,
-		DebugLog:           *debugLog,
-		DebugLogFormat:     *debugLogFormat,
-		FileAccess:         fsAccess,
-		Overlay:            *overlay,
-		Network:            netType,
-		GSO:                *gso,
-		LogPackets:         *logPackets,
-		Platform:           platformType,
-		Strace:             *strace,
-		StraceLogSize:      *straceLogSize,
-		WatchdogAction:     wa,
-		PanicSignal:        *panicSignal,
-		ProfileEnable:      *profile,
-		EnableRaw:          *netRaw,
-		NumNetworkChannels: *numNetworkChannels,
-		Rootless:           *rootless,
-		AlsoLogToStderr:    *alsoLogToStderr,
+		RootDir:               *rootDir,
+		Debug:                 *debug,
+		LogFilename:           *logFilename,
+		LogFormat:             *logFormat,
+		DebugLog:              *debugLog,
+		DebugLogFormat:        *debugLogFormat,
+		FileAccess:            fsAccess,
+		FSGoferHostUDSAllowed: *fsGoferHostUDSAllowed,
+		Overlay:               *overlay,
+		Network:               netType,
+		GSO:                   *gso,
+		LogPackets:            *logPackets,
+		Platform:              platformType,
+		Strace:                *strace,
+		StraceLogSize:         *straceLogSize,
+		WatchdogAction:        wa,
+		PanicSignal:           *panicSignal,
+		ProfileEnable:         *profile,
+		EnableRaw:             *netRaw,
+		NumNetworkChannels:    *numNetworkChannels,
+		Rootless:              *rootless,
+		AlsoLogToStderr:       *alsoLogToStderr,
 
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
 	}
-- 
cgit v1.2.3


From 46beb919121f02d8bd110a54fb8f6de5dfd2891e Mon Sep 17 00:00:00 2001
From: Robert Tonic <btonic@users.noreply.github.com>
Date: Thu, 19 Sep 2019 17:10:50 -0400
Subject: Fix documentation, clean up seccomp filter installation, rename
 helpers.

Filter installation has been streamlined and functions renamed.
Documentation has been fixed to be standards compliant, and missing
documentation added. gofmt has also been applied to modified files.
---
 pkg/fd/fd.go                   |  6 +++---
 runsc/boot/config.go           |  2 +-
 runsc/cmd/gofer.go             | 12 +++++-------
 runsc/fsgofer/filter/filter.go | 19 ++++++-------------
 runsc/fsgofer/fsgofer.go       | 21 +++++++++++----------
 5 files changed, 26 insertions(+), 34 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/fd/fd.go b/pkg/fd/fd.go
index 7f1f9d984..24e959944 100644
--- a/pkg/fd/fd.go
+++ b/pkg/fd/fd.go
@@ -17,12 +17,12 @@ package fd
 
 import (
 	"fmt"
+	"gvisor.dev/gvisor/pkg/unet"
 	"io"
 	"os"
 	"runtime"
 	"sync/atomic"
 	"syscall"
-	"gvisor.dev/gvisor/pkg/unet"
 )
 
 // ReadWriter implements io.ReadWriter, io.ReaderAt, and io.WriterAt for fd. It
@@ -186,8 +186,8 @@ func OpenAt(dir *FD, path string, flags int, mode uint32) (*FD, error) {
 	return New(f), nil
 }
 
-// OpenUnix Open a Unix Domain Socket and return the file descriptor for it.
-func OpenUnix(path string) (*FD, error) {
+// DialUnix connects to a Unix Domain Socket and return the file descriptor.
+func DialUnix(path string) (*FD, error) {
 	socket, err := unet.Connect(path, false)
 	return New(socket.FD()), err
 }
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 954ad2c2a..f1adaba01 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -138,7 +138,7 @@ type Config struct {
 	// Overlay is whether to wrap the root filesystem in an overlay.
 	Overlay bool
 
-	// fsGoferHostUDSAllowed enables the gofer to mount a host UDS
+	// FSGoferHostUDSAllowed enables the gofer to mount a host UDS.
 	FSGoferHostUDSAllowed bool
 
 	// Network indicates what type of network to use.
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 8e63c80e0..fa4f0034d 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -204,13 +204,11 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	if g.hostUDSAllowed {
-		if err := filter.InstallUDS(); err != nil {
-			Fatalf("installing UDS seccomp filters: %v", err)
-		}
-	} else {
-		if err := filter.Install(); err != nil {
-			Fatalf("installing seccomp filters: %v", err)
-		}
+		filter.InstallUDSFilters()
+	}
+
+	if err := filter.Install(); err != nil {
+		Fatalf("installing seccomp filters: %v", err)
 	}
 
 	runServers(ats, g.ioFDs)
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index 12ef19d18..8d4ec9c24 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -23,23 +23,16 @@ import (
 
 // Install installs seccomp filters.
 func Install() error {
-	s := allowedSyscalls
-
 	// Set of additional filters used by -race and -msan. Returns empty
 	// when not enabled.
-	s.Merge(instrumentationFilters())
+	allowedSyscalls.Merge(instrumentationFilters())
 
-	return seccomp.Install(s)
+	return seccomp.Install(allowedSyscalls)
 }
 
-// InstallUDS installs the standard Gofer seccomp filters along with filters
-// allowing the gofer to connect to a host UDS.
-func InstallUDS() error {
-	// Use the base syscall
-	s := allowedSyscalls
-
+// InstallUDSFilters installs the seccomp filters required to let the gofer connect
+// to a host UDS.
+func InstallUDSFilters() {
 	// Add additional filters required for connecting to the host's sockets.
-	s.Merge(udsSyscalls)
-
-	return seccomp.Install(s)
+	allowedSyscalls.Merge(udsSyscalls)
 }
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index d9f3ba8d6..357d712c6 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -21,6 +21,7 @@
 package fsgofer
 
 import (
+	"errors"
 	"fmt"
 	"io"
 	"math"
@@ -86,7 +87,7 @@ type Config struct {
 	// PanicOnWrite panics on attempts to write to RO mounts.
 	PanicOnWrite bool
 
-	// HostUDS prevents
+	// HostUDSAllowed signals whether the gofer can mount a host's UDS.
 	HostUDSAllowed bool
 }
 
@@ -131,23 +132,23 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		return nil, fmt.Errorf("stat file %q, err: %v", a.prefix, err)
 	}
 
-	// Acquire the attach point lock
+	// Acquire the attach point lock.
 	a.attachedMu.Lock()
 	defer a.attachedMu.Unlock()
 
-	// Hold the file descriptor we are converting into a p9.File
+	// Hold the file descriptor we are converting into a p9.File.
 	var f *fd.FD
 
-	// Apply the S_IFMT bitmask so we can detect file type appropriately
-	switch fmtStat := stat.Mode & syscall.S_IFMT; {
-	case fmtStat == syscall.S_IFSOCK:
-		// Check to see if the CLI option has been set to allow the UDS mount
+	// Apply the S_IFMT bitmask so we can detect file type appropriately.
+	switch fmtStat := stat.Mode & syscall.S_IFMT; fmtStat {
+	case syscall.S_IFSOCK:
+		// Check to see if the CLI option has been set to allow the UDS mount.
 		if !a.conf.HostUDSAllowed {
-			return nil, fmt.Errorf("host UDS support is disabled")
+			return nil, errors.New("host UDS support is disabled")
 		}
 
 		// Attempt to open a connection. Bubble up the failures.
-		f, err = fd.OpenUnix(a.prefix)
+		f, err = fd.DialUnix(a.prefix)
 		if err != nil {
 			return nil, err
 		}
@@ -1058,7 +1059,7 @@ func (l *localFile) Flush() error {
 
 // Connect implements p9.File.
 func (l *localFile) Connect(p9.ConnectFlags) (*fd.FD, error) {
-	return fd.OpenUnix(l.hostPath)
+	return fd.DialUnix(l.hostPath)
 }
 
 // Close implements p9.File.
-- 
cgit v1.2.3


From f2ea8e6b249d729d4616ee219c0472bfff93a575 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Mon, 23 Sep 2019 17:04:45 -0700
Subject: Always set HOME env var with `runsc exec`.

We already do this for `runsc run`, but need to do the same for `runsc exec`.

PiperOrigin-RevId: 270793459
---
 runsc/boot/BUILD               |  1 +
 runsc/boot/loader.go           | 32 +++++++++++++++-----------------
 runsc/boot/user.go             | 28 ++++++++++++++++++++++++++--
 runsc/boot/user_test.go        |  3 ++-
 runsc/cmd/exec.go              |  1 +
 runsc/dockerutil/dockerutil.go |  8 ++++++++
 test/e2e/exec_test.go          | 42 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 95 insertions(+), 20 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 588bb8851..54d1ab129 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -109,6 +109,7 @@ go_test(
         "//pkg/sentry/arch:registers_go_proto",
         "//pkg/sentry/context/contexttest",
         "//pkg/sentry/fs",
+        "//pkg/sentry/kernel/auth",
         "//pkg/unet",
         "//runsc/fsgofer",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 823a34619..d824d7dc5 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,7 +20,6 @@ import (
 	mrand "math/rand"
 	"os"
 	"runtime"
-	"strings"
 	"sync"
 	"sync/atomic"
 	"syscall"
@@ -535,23 +534,12 @@ func (l *Loader) run() error {
 			return err
 		}
 
-		// Read /etc/passwd for the user's HOME directory and set the HOME
-		// environment variable as required by POSIX if it is not overridden by
-		// the user.
-		hasHomeEnvv := false
-		for _, envv := range l.rootProcArgs.Envv {
-			if strings.HasPrefix(envv, "HOME=") {
-				hasHomeEnvv = true
-			}
-		}
-		if !hasHomeEnvv {
-			homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
-			if err != nil {
-				return fmt.Errorf("error reading exec user: %v", err)
-			}
-
-			l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+		// Add the HOME enviroment variable if it is not already set.
+		envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+		if err != nil {
+			return err
 		}
+		l.rootProcArgs.Envv = envv
 
 		// Create the root container init task. It will begin running
 		// when the kernel is started.
@@ -815,6 +803,16 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
 	})
 	defer args.MountNamespace.DecRef()
 
+	// Add the HOME enviroment varible if it is not already set.
+	root := args.MountNamespace.Root()
+	defer root.DecRef()
+	ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+	envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+	if err != nil {
+		return 0, err
+	}
+	args.Envv = envv
+
 	// Start the process.
 	proc := control.Proc{Kernel: l.k}
 	args.PIDNamespace = tg.PIDNamespace()
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
index d1d423a5c..56cc12ee0 100644
--- a/runsc/boot/user.go
+++ b/runsc/boot/user.go
@@ -16,6 +16,7 @@ package boot
 
 import (
 	"bufio"
+	"fmt"
 	"io"
 	"strconv"
 	"strings"
@@ -23,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/usermem"
 )
 
@@ -42,7 +44,7 @@ func (r *fileReader) Read(buf []byte) (int, error) {
 
 // getExecUserHome returns the home directory of the executing user read from
 // /etc/passwd as read from the container filesystem.
-func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) {
 	// The default user home directory to return if no user matching the user
 	// if found in the /etc/passwd found in the image.
 	const defaultHome = "/"
@@ -82,7 +84,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
 		File: f,
 	}
 
-	homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+	homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
 	if err != nil {
 		return "", err
 	}
@@ -90,6 +92,28 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
 	return homeDir, nil
 }
 
+// maybeAddExecUserHome returns a new slice with the HOME enviroment variable
+// set if the slice does not already contain it, otherwise it returns the
+// original slice unmodified.
+func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
+	// Check if the envv already contains HOME.
+	for _, env := range envv {
+		if strings.HasPrefix(env, "HOME=") {
+			// We have it. Return the original slice unmodified.
+			return envv, nil
+		}
+	}
+
+	// Read /etc/passwd for the user's HOME directory and set the HOME
+	// environment variable as required by POSIX if it is not overridden by
+	// the user.
+	homeDir, err := getExecUserHome(ctx, mns, uid)
+	if err != nil {
+		return nil, fmt.Errorf("error reading exec user: %v", err)
+	}
+	return append(envv, "HOME="+homeDir), nil
+}
+
 // findHomeInPasswd parses a passwd file and returns the given user's home
 // directory. This function does it's best to replicate the runc's behavior.
 func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 906baf3e5..9aee2ad07 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -25,6 +25,7 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
 
 func setupTempDir() (string, error) {
@@ -68,7 +69,7 @@ func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
 // TestGetExecUserHome tests the getExecUserHome function.
 func TestGetExecUserHome(t *testing.T) {
 	tests := map[string]struct {
-		uid        uint32
+		uid        auth.KUID
 		createRoot func() (string, error)
 		expected   string
 	}{
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index e817eff77..bf1225e1c 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -127,6 +127,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			Fatalf("getting environment variables: %v", err)
 		}
 	}
+
 	if e.Capabilities == nil {
 		// enableRaw is set to true to prevent the filtering out of
 		// CAP_NET_RAW. This is the opposite of Create() because exec
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
index c073d8f75..e37ec0ffd 100644
--- a/runsc/dockerutil/dockerutil.go
+++ b/runsc/dockerutil/dockerutil.go
@@ -287,6 +287,14 @@ func (d *Docker) Exec(args ...string) (string, error) {
 	return do(a...)
 }
 
+// ExecAsUser calls 'docker exec' as the given user with the arguments
+// provided.
+func (d *Docker) ExecAsUser(user string, args ...string) (string, error) {
+	a := []string{"exec", "--user", user, d.Name}
+	a = append(a, args...)
+	return do(a...)
+}
+
 // ExecWithTerminal calls 'docker exec -it' with the arguments provided and
 // attaches a pty to stdio.
 func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) {
diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
index 267679268..7238c2afe 100644
--- a/test/e2e/exec_test.go
+++ b/test/e2e/exec_test.go
@@ -177,3 +177,45 @@ func TestExecEnv(t *testing.T) {
 		t.Errorf("wanted exec output to contain %q, got %q", want, got)
 	}
 }
+
+// Test that exec always has HOME environment set, even when not set in run.
+func TestExecEnvHasHome(t *testing.T) {
+	// Base alpine image does not have any environment variables set.
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-env-test")
+
+	// We will check that HOME is set for root user, and also for a new
+	// non-root user we will create.
+	newUID := 1234
+	newHome := "/foo/bar"
+
+	// Create a new user with a home directory, and then sleep.
+	script := fmt.Sprintf(`
+	mkdir -p -m 777 %s && \
+	adduser foo -D -u %d -h %s && \
+	sleep 1000`, newHome, newUID, newHome)
+	if err := d.Run("alpine", "/bin/sh", "-c", script); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Exec "echo $HOME", and expect to see "/root".
+	got, err := d.Exec("/bin/sh", "-c", "echo $HOME")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if want := "/root"; !strings.Contains(got, want) {
+		t.Errorf("wanted exec output to contain %q, got %q", want, got)
+	}
+
+	// Execute the same as uid 123 and expect newHome.
+	got, err = d.ExecAsUser(strconv.Itoa(newUID), "/bin/sh", "-c", "echo $HOME")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if want := newHome; !strings.Contains(got, want) {
+		t.Errorf("wanted exec output to contain %q, got %q", want, got)
+	}
+}
-- 
cgit v1.2.3


From 7810b30983ec4d3a706df01163c29814cd21d6ca Mon Sep 17 00:00:00 2001
From: Robert Tonic <btonic@users.noreply.github.com>
Date: Tue, 24 Sep 2019 18:24:10 -0400
Subject: Refactor command line options and remove the allowed terminology for
 uds

---
 runsc/boot/config.go         |  5 ++--
 runsc/cmd/gofer.go           | 18 ++++++------
 runsc/container/container.go |  5 ----
 runsc/fsgofer/fsgofer.go     | 10 +++++--
 runsc/main.go                | 68 ++++++++++++++++++++++----------------------
 5 files changed, 52 insertions(+), 54 deletions(-)

(limited to 'runsc/boot')

diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index f1adaba01..b76b0e574 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -138,8 +138,8 @@ type Config struct {
 	// Overlay is whether to wrap the root filesystem in an overlay.
 	Overlay bool
 
-	// FSGoferHostUDSAllowed enables the gofer to mount a host UDS.
-	FSGoferHostUDSAllowed bool
+	// FSGoferHostUDS enables the gofer to mount a host UDS.
+	FSGoferHostUDS bool
 
 	// Network indicates what type of network to use.
 	Network NetworkType
@@ -217,6 +217,7 @@ func (c *Config) ToFlags() []string {
 		"--debug-log-format=" + c.DebugLogFormat,
 		"--file-access=" + c.FileAccess.String(),
 		"--overlay=" + strconv.FormatBool(c.Overlay),
+		"--fsgofer-host-uds=" + strconv.FormatBool(c.FSGoferHostUDS),
 		"--network=" + c.Network.String(),
 		"--log-packets=" + strconv.FormatBool(c.LogPackets),
 		"--platform=" + c.Platform,
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index fa4f0034d..fbd579fb8 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -56,11 +56,10 @@ var goferCaps = &specs.LinuxCapabilities{
 // Gofer implements subcommands.Command for the "gofer" command, which starts a
 // filesystem gofer.  This command should not be called directly.
 type Gofer struct {
-	bundleDir      string
-	ioFDs          intFlags
-	applyCaps      bool
-	hostUDSAllowed bool
-	setUpRoot      bool
+	bundleDir string
+	ioFDs     intFlags
+	applyCaps bool
+	setUpRoot bool
 
 	panicOnWrite bool
 	specFD       int
@@ -87,7 +86,6 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
 	f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
 	f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
 	f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
-	f.BoolVar(&g.hostUDSAllowed, "host-uds-allowed", false, "if true, allow the Gofer to mount a host UDS")
 	f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
 	f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
 	f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
@@ -182,9 +180,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	for _, m := range spec.Mounts {
 		if specutils.Is9PMount(m) {
 			cfg := fsgofer.Config{
-				ROMount:        isReadonlyMount(m.Options),
-				PanicOnWrite:   g.panicOnWrite,
-				HostUDSAllowed: g.hostUDSAllowed,
+				ROMount:      isReadonlyMount(m.Options),
+				PanicOnWrite: g.panicOnWrite,
+				HostUDS:      conf.FSGoferHostUDS,
 			}
 			ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
 			if err != nil {
@@ -203,7 +201,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs))
 	}
 
-	if g.hostUDSAllowed {
+	if conf.FSGoferHostUDS {
 		filter.InstallUDSFilters()
 	}
 
diff --git a/runsc/container/container.go b/runsc/container/container.go
index ceadb38aa..bbb364214 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -941,11 +941,6 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
 		args = append(args, "--panic-on-write=true")
 	}
 
-	// Add support for mounting host UDS in the gofer
-	if conf.FSGoferHostUDSAllowed {
-		args = append(args, "--host-uds-allowed=true")
-	}
-
 	// Open the spec file to donate to the sandbox.
 	specFile, err := specutils.OpenSpec(bundleDir)
 	if err != nil {
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 357d712c6..507d52b50 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -87,8 +87,8 @@ type Config struct {
 	// PanicOnWrite panics on attempts to write to RO mounts.
 	PanicOnWrite bool
 
-	// HostUDSAllowed signals whether the gofer can mount a host's UDS.
-	HostUDSAllowed bool
+	// HostUDS signals whether the gofer can mount a host's UDS.
+	HostUDS bool
 }
 
 type attachPoint struct {
@@ -143,7 +143,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 	switch fmtStat := stat.Mode & syscall.S_IFMT; fmtStat {
 	case syscall.S_IFSOCK:
 		// Check to see if the CLI option has been set to allow the UDS mount.
-		if !a.conf.HostUDSAllowed {
+		if !a.conf.HostUDS {
 			return nil, errors.New("host UDS support is disabled")
 		}
 
@@ -1059,6 +1059,10 @@ func (l *localFile) Flush() error {
 
 // Connect implements p9.File.
 func (l *localFile) Connect(p9.ConnectFlags) (*fd.FD, error) {
+	// Check to see if the CLI option has been set to allow the UDS mount.
+	if !l.attachPoint.conf.HostUDS {
+		return nil, errors.New("host UDS support is disabled")
+	}
 	return fd.DialUnix(l.hostPath)
 }
 
diff --git a/runsc/main.go b/runsc/main.go
index 5eba949f6..b788b1f76 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -63,18 +63,18 @@ var (
 	straceLogSize  = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
 
 	// Flags that control sandbox runtime behavior.
-	platformName          = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
-	network               = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
-	gso                   = flag.Bool("gso", true, "enable generic segmenation offload")
-	fileAccess            = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
-	fsGoferHostUDSAllowed = flag.Bool("fsgofer-host-uds-allowed", false, "Allow the gofer to mount Unix Domain Sockets.")
-	overlay               = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
-	watchdogAction        = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
-	panicSignal           = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
-	profile               = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
-	netRaw                = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
-	numNetworkChannels    = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
-	rootless              = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+	platformName       = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+	network            = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+	gso                = flag.Bool("gso", true, "enable generic segmenation offload")
+	fileAccess         = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+	fsGoferHostUDS     = flag.Bool("fsgofer-host-uds", false, "Allow the gofer to mount Unix Domain Sockets.")
+	overlay            = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+	watchdogAction     = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+	panicSignal        = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+	profile            = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+	netRaw             = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+	numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -172,28 +172,28 @@ func main() {
 
 	// Create a new Config from the flags.
 	conf := &boot.Config{
-		RootDir:               *rootDir,
-		Debug:                 *debug,
-		LogFilename:           *logFilename,
-		LogFormat:             *logFormat,
-		DebugLog:              *debugLog,
-		DebugLogFormat:        *debugLogFormat,
-		FileAccess:            fsAccess,
-		FSGoferHostUDSAllowed: *fsGoferHostUDSAllowed,
-		Overlay:               *overlay,
-		Network:               netType,
-		GSO:                   *gso,
-		LogPackets:            *logPackets,
-		Platform:              platformType,
-		Strace:                *strace,
-		StraceLogSize:         *straceLogSize,
-		WatchdogAction:        wa,
-		PanicSignal:           *panicSignal,
-		ProfileEnable:         *profile,
-		EnableRaw:             *netRaw,
-		NumNetworkChannels:    *numNetworkChannels,
-		Rootless:              *rootless,
-		AlsoLogToStderr:       *alsoLogToStderr,
+		RootDir:            *rootDir,
+		Debug:              *debug,
+		LogFilename:        *logFilename,
+		LogFormat:          *logFormat,
+		DebugLog:           *debugLog,
+		DebugLogFormat:     *debugLogFormat,
+		FileAccess:         fsAccess,
+		FSGoferHostUDS:     *fsGoferHostUDS,
+		Overlay:            *overlay,
+		Network:            netType,
+		GSO:                *gso,
+		LogPackets:         *logPackets,
+		Platform:           platformType,
+		Strace:             *strace,
+		StraceLogSize:      *straceLogSize,
+		WatchdogAction:     wa,
+		PanicSignal:        *panicSignal,
+		ProfileEnable:      *profile,
+		EnableRaw:          *netRaw,
+		NumNetworkChannels: *numNetworkChannels,
+		Rootless:           *rootless,
+		AlsoLogToStderr:    *alsoLogToStderr,
 
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
 	}
-- 
cgit v1.2.3


From 59ccbb10446063f5347fb026e35549bc2f677971 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Wed, 25 Sep 2019 12:56:00 -0700
Subject: Remove centralized registration of protocols.

Also removes the need for protocol names.

PiperOrigin-RevId: 271186030
---
 pkg/tcpip/adapters/gonet/gonet_test.go             |   5 +-
 pkg/tcpip/network/arp/arp.go                       |  16 ++-
 pkg/tcpip/network/arp/arp_test.go                  |   5 +-
 pkg/tcpip/network/ip_test.go                       |  10 +-
 pkg/tcpip/network/ipv4/ipv4.go                     |  44 +++-----
 pkg/tcpip/network/ipv4/ipv4_test.go                |   9 +-
 pkg/tcpip/network/ipv6/icmp_test.go                |  15 ++-
 pkg/tcpip/network/ipv6/ipv6.go                     |  24 ++---
 pkg/tcpip/network/ipv6/ipv6_test.go                |  34 ++++---
 pkg/tcpip/network/ipv6/ndp_test.go                 |   5 +-
 pkg/tcpip/sample/tun_tcp_connect/main.go           |   5 +-
 pkg/tcpip/sample/tun_tcp_echo/main.go              |   5 +-
 pkg/tcpip/stack/registration.go                    |  36 -------
 pkg/tcpip/stack/stack.go                           |  45 ++++-----
 pkg/tcpip/stack/stack_test.go                      | 111 +++++++++++++++------
 pkg/tcpip/stack/transport_test.go                  |  35 +++++--
 pkg/tcpip/transport/icmp/protocol.go               |  27 ++---
 pkg/tcpip/transport/raw/protocol.go                |   9 +-
 pkg/tcpip/transport/tcp/protocol.go                |  22 ++--
 pkg/tcpip/transport/tcp/tcp_test.go                |  21 ++--
 pkg/tcpip/transport/tcp/testing/context/context.go |   5 +-
 pkg/tcpip/transport/udp/protocol.go                |  12 +--
 pkg/tcpip/transport/udp/udp_test.go                |   5 +-
 runsc/boot/BUILD                                   |   1 +
 runsc/boot/loader.go                               |  17 ++--
 25 files changed, 278 insertions(+), 245 deletions(-)

(limited to 'runsc/boot')

diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index 672f026b2..8ced960bb 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -60,7 +60,10 @@ func TestTimeouts(t *testing.T) {
 
 func newLoopbackStack() (*stack.Stack, *tcpip.Error) {
 	// Create the stack and add a NIC.
-	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName, udp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol()},
+	})
 
 	if err := s.CreateNIC(NICID, loopback.New()); err != nil {
 		return nil, err
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index fd6395fc1..26cf1c528 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -16,9 +16,9 @@
 // IPv4 addresses into link-local MAC addresses, and advertises IPv4
 // addresses of its stack with the local network.
 //
-// To use it in the networking stack, pass arp.ProtocolName as one of the
-// network protocols when calling stack.New. Then add an "arp" address to
-// every NIC on the stack that should respond to ARP requests. That is:
+// To use it in the networking stack, pass arp.NewProtocol() as one of the
+// network protocols when calling stack.New. Then add an "arp" address to every
+// NIC on the stack that should respond to ARP requests. That is:
 //
 //	if err := s.AddAddress(1, arp.ProtocolNumber, "arp"); err != nil {
 //		// handle err
@@ -33,9 +33,6 @@ import (
 )
 
 const (
-	// ProtocolName is the string representation of the ARP protocol name.
-	ProtocolName = "arp"
-
 	// ProtocolNumber is the ARP protocol number.
 	ProtocolNumber = header.ARPProtocolNumber
 
@@ -200,8 +197,7 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 
 var broadcastMAC = tcpip.LinkAddress([]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff})
 
-func init() {
-	stack.RegisterNetworkProtocolFactory(ProtocolName, func() stack.NetworkProtocol {
-		return &protocol{}
-	})
+// NewProtocol returns an ARP network protocol.
+func NewProtocol() stack.NetworkProtocol {
+	return &protocol{}
 }
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 387fca96e..88b57ec03 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -44,7 +44,10 @@ type testContext struct {
 }
 
 func newTestContext(t *testing.T) *testContext {
-	s := stack.New([]string{ipv4.ProtocolName, arp.ProtocolName}, []string{icmp.ProtocolName4}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()},
+	})
 
 	const defaultMTU = 65536
 	ep := channel.New(256, defaultMTU, stackLinkAddr)
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 6a40e7ee3..a9741622e 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -172,7 +172,10 @@ func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prepen
 }
 
 func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
-	s := stack.New([]string{ipv4.ProtocolName}, []string{udp.ProtocolName, tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
+	})
 	s.CreateNIC(1, loopback.New())
 	s.AddAddress(1, ipv4.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
@@ -185,7 +188,10 @@ func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 }
 
 func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
-	s := stack.New([]string{ipv6.ProtocolName}, []string{udp.ProtocolName, tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv6.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol(), tcp.NewProtocol()},
+	})
 	s.CreateNIC(1, loopback.New())
 	s.AddAddress(1, ipv6.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index b7a06f525..b7b07a6c1 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -14,9 +14,9 @@
 
 // Package ipv4 contains the implementation of the ipv4 network protocol. To use
 // it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing ipv4.ProtocolName (or "ipv4") as one of the
-// network protocols when calling stack.New(). Then endpoints can be created
-// by passing ipv4.ProtocolNumber as the network protocol number when calling
+// activated on the stack by passing ipv4.NewProtocol() as one of the network
+// protocols when calling stack.New(). Then endpoints can be created by passing
+// ipv4.ProtocolNumber as the network protocol number when calling
 // Stack.NewEndpoint().
 package ipv4
 
@@ -32,9 +32,6 @@ import (
 )
 
 const (
-	// ProtocolName is the string representation of the ipv4 protocol name.
-	ProtocolName = "ipv4"
-
 	// ProtocolNumber is the ipv4 protocol number.
 	ProtocolNumber = header.IPv4ProtocolNumber
 
@@ -53,6 +50,7 @@ type endpoint struct {
 	linkEP        stack.LinkEndpoint
 	dispatcher    stack.TransportDispatcher
 	fragmentation *fragmentation.Fragmentation
+	protocol      *protocol
 }
 
 // NewEndpoint creates a new ipv4 endpoint.
@@ -64,6 +62,7 @@ func (p *protocol) NewEndpoint(nicid tcpip.NICID, addrWithPrefix tcpip.AddressWi
 		linkEP:        linkEP,
 		dispatcher:    dispatcher,
 		fragmentation: fragmentation.NewFragmentation(fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
+		protocol:      p,
 	}
 
 	return e, nil
@@ -204,7 +203,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
 	if length > header.IPv4MaximumHeaderSize+8 {
 		// Packets of 68 bytes or less are required by RFC 791 to not be
 		// fragmented, so we only assign ids to larger packets.
-		id = atomic.AddUint32(&ids[hashRoute(r, protocol)%buckets], 1)
+		id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, protocol, e.protocol.hashIV)%buckets], 1)
 	}
 	ip.Encode(&header.IPv4Fields{
 		IHL:         header.IPv4MinimumSize,
@@ -267,7 +266,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.Vect
 		if payload.Size() > header.IPv4MaximumHeaderSize+8 {
 			// Packets of 68 bytes or less are required by RFC 791 to not be
 			// fragmented, so we only assign ids to larger packets.
-			id = atomic.AddUint32(&ids[hashRoute(r, 0 /* protocol */)%buckets], 1)
+			id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)
 		}
 		ip.SetID(uint16(id))
 	}
@@ -325,14 +324,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) {
 // Close cleans up resources associated with the endpoint.
 func (e *endpoint) Close() {}
 
-type protocol struct{}
-
-// NewProtocol creates a new protocol ipv4 protocol descriptor. This is exported
-// only for tests that short-circuit the stack. Regular use of the protocol is
-// done via the stack, which gets a protocol descriptor from the init() function
-// below.
-func NewProtocol() stack.NetworkProtocol {
-	return &protocol{}
+type protocol struct {
+	ids    []uint32
+	hashIV uint32
 }
 
 // Number returns the ipv4 protocol number.
@@ -378,7 +372,7 @@ func calculateMTU(mtu uint32) uint32 {
 // hashRoute calculates a hash value for the given route. It uses the source &
 // destination address, the transport protocol number, and a random initial
 // value (generated once on initialization) to generate the hash.
-func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber) uint32 {
+func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 {
 	t := r.LocalAddress
 	a := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24
 	t = r.RemoteAddress
@@ -386,22 +380,16 @@ func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber) uint32 {
 	return hash.Hash3Words(a, b, uint32(protocol), hashIV)
 }
 
-var (
-	ids    []uint32
-	hashIV uint32
-)
-
-func init() {
-	ids = make([]uint32, buckets)
+// NewProtocol returns an IPv4 network protocol.
+func NewProtocol() stack.NetworkProtocol {
+	ids := make([]uint32, buckets)
 
 	// Randomly initialize hashIV and the ids.
 	r := hash.RandN32(1 + buckets)
 	for i := range ids {
 		ids[i] = r[i]
 	}
-	hashIV = r[buckets]
+	hashIV := r[buckets]
 
-	stack.RegisterNetworkProtocolFactory(ProtocolName, func() stack.NetworkProtocol {
-		return &protocol{}
-	})
+	return &protocol{ids: ids, hashIV: hashIV}
 }
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index ae827ca27..b6641ccc3 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -33,7 +33,10 @@ import (
 )
 
 func TestExcludeBroadcast(t *testing.T) {
-	s := stack.New([]string{ipv4.ProtocolName}, []string{udp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+	})
 
 	const defaultMTU = 65536
 	ep := stack.LinkEndpoint(channel.New(256, defaultMTU, ""))
@@ -238,7 +241,9 @@ type context struct {
 
 func buildContext(t *testing.T, packetCollectorErrors []*tcpip.Error, mtu uint32) context {
 	// Make the packet and write it.
-	s := stack.New([]string{ipv4.ProtocolName}, []string{}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
+	})
 	ep := newErrorChannel(100 /* Enough for all tests. */, mtu, "", packetCollectorErrors)
 	s.CreateNIC(1, ep)
 	const (
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 653d984e9..01f5a17ec 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -81,7 +81,10 @@ func (*stubLinkAddressCache) AddLinkAddress(tcpip.NICID, tcpip.Address, tcpip.Li
 }
 
 func TestICMPCounts(t *testing.T) {
-	s := stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+	})
 	{
 		if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
 			t.Fatalf("CreateNIC(_) = %s", err)
@@ -205,8 +208,14 @@ func (e endpointWithResolutionCapability) Capabilities() stack.LinkEndpointCapab
 
 func newTestContext(t *testing.T) *testContext {
 	c := &testContext{
-		s0: stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{}),
-		s1: stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{}),
+		s0: stack.New(stack.Options{
+			NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+			TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+		}),
+		s1: stack.New(stack.Options{
+			NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+			TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+		}),
 	}
 
 	const defaultMTU = 65536
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 331a8bdaa..7de6a4546 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -14,9 +14,9 @@
 
 // Package ipv6 contains the implementation of the ipv6 network protocol. To use
 // it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing ipv6.ProtocolName (or "ipv6") as one of the
-// network protocols when calling stack.New(). Then endpoints can be created
-// by passing ipv6.ProtocolNumber as the network protocol number when calling
+// activated on the stack by passing ipv6.NewProtocol() as one of the network
+// protocols when calling stack.New(). Then endpoints can be created by passing
+// ipv6.ProtocolNumber as the network protocol number when calling
 // Stack.NewEndpoint().
 package ipv6
 
@@ -28,9 +28,6 @@ import (
 )
 
 const (
-	// ProtocolName is the string representation of the ipv6 protocol name.
-	ProtocolName = "ipv6"
-
 	// ProtocolNumber is the ipv6 protocol number.
 	ProtocolNumber = header.IPv6ProtocolNumber
 
@@ -160,14 +157,6 @@ func (*endpoint) Close() {}
 
 type protocol struct{}
 
-// NewProtocol creates a new protocol ipv6 protocol descriptor. This is exported
-// only for tests that short-circuit the stack. Regular use of the protocol is
-// done via the stack, which gets a protocol descriptor from the init() function
-// below.
-func NewProtocol() stack.NetworkProtocol {
-	return &protocol{}
-}
-
 // Number returns the ipv6 protocol number.
 func (p *protocol) Number() tcpip.NetworkProtocolNumber {
 	return ProtocolNumber
@@ -221,8 +210,7 @@ func calculateMTU(mtu uint32) uint32 {
 	return maxPayloadSize
 }
 
-func init() {
-	stack.RegisterNetworkProtocolFactory(ProtocolName, func() stack.NetworkProtocol {
-		return &protocol{}
-	})
+// NewProtocol returns an IPv6 network protocol.
+func NewProtocol() stack.NetworkProtocol {
+	return &protocol{}
 }
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 57bcd5455..78c674c2c 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -124,17 +124,20 @@ func testReceiveUDP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
 // UDP packets destined to the IPv6 link-local all-nodes multicast address.
 func TestReceiveOnAllNodesMulticastAddr(t *testing.T) {
 	tests := []struct {
-		name         string
-		protocolName string
-		rxf          func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
+		name            string
+		protocolFactory stack.TransportProtocol
+		rxf             func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
 	}{
-		{"ICMP", icmp.ProtocolName6, testReceiveICMP},
-		{"UDP", udp.ProtocolName, testReceiveUDP},
+		{"ICMP", icmp.NewProtocol6(), testReceiveICMP},
+		{"UDP", udp.NewProtocol(), testReceiveUDP},
 	}
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			s := stack.New([]string{ProtocolName}, []string{test.protocolName}, stack.Options{})
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+				TransportProtocols: []stack.TransportProtocol{test.protocolFactory},
+			})
 			e := channel.New(10, 1280, linkAddr1)
 			if err := s.CreateNIC(1, e); err != nil {
 				t.Fatalf("CreateNIC(_) = %s", err)
@@ -152,19 +155,22 @@ func TestReceiveOnAllNodesMulticastAddr(t *testing.T) {
 // address.
 func TestReceiveOnSolicitedNodeAddr(t *testing.T) {
 	tests := []struct {
-		name         string
-		protocolName string
-		rxf          func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
+		name            string
+		protocolFactory stack.TransportProtocol
+		rxf             func(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst tcpip.Address, want uint64)
 	}{
-		{"ICMP", icmp.ProtocolName6, testReceiveICMP},
-		{"UDP", udp.ProtocolName, testReceiveUDP},
+		{"ICMP", icmp.NewProtocol6(), testReceiveICMP},
+		{"UDP", udp.NewProtocol(), testReceiveUDP},
 	}
 
 	snmc := header.SolicitedNodeAddr(addr2)
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			s := stack.New([]string{ProtocolName}, []string{test.protocolName}, stack.Options{})
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+				TransportProtocols: []stack.TransportProtocol{test.protocolFactory},
+			})
 			e := channel.New(10, 1280, linkAddr1)
 			if err := s.CreateNIC(1, e); err != nil {
 				t.Fatalf("CreateNIC(_) = %s", err)
@@ -237,7 +243,9 @@ func TestAddIpv6Address(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			s := stack.New([]string{ProtocolName}, nil, stack.Options{})
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocol{NewProtocol()},
+			})
 			if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
 				t.Fatalf("CreateNIC(_) = %s", err)
 			}
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index 571915d3f..e30791fe3 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -31,7 +31,10 @@ import (
 func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address) (*stack.Stack, stack.NetworkEndpoint) {
 	t.Helper()
 
-	s := stack.New([]string{ProtocolName}, []string{icmp.ProtocolName6}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol6()},
+	})
 
 	if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
 		t.Fatalf("CreateNIC(_) = %s", err)
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index f12189580..2239c1e66 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -126,7 +126,10 @@ func main() {
 
 	// Create the stack with ipv4 and tcp protocols, then add a tun-based
 	// NIC and ipv4 address.
-	s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	mtu, err := rawfile.GetMTU(tunName)
 	if err != nil {
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index 329941775..bca73cbb1 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -111,7 +111,10 @@ func main() {
 
 	// Create the stack with ip and tcp protocols, then add a tun-based
 	// NIC and address.
-	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	mtu, err := rawfile.GetMTU(tunName)
 	if err != nil {
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 07e4c770d..80101d4bb 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -366,14 +366,6 @@ type LinkAddressCache interface {
 	RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker)
 }
 
-// TransportProtocolFactory functions are used by the stack to instantiate
-// transport protocols.
-type TransportProtocolFactory func() TransportProtocol
-
-// NetworkProtocolFactory provides methods to be used by the stack to
-// instantiate network protocols.
-type NetworkProtocolFactory func() NetworkProtocol
-
 // UnassociatedEndpointFactory produces endpoints for writing packets not
 // associated with a particular transport protocol. Such endpoints can be used
 // to write arbitrary packets that include the IP header.
@@ -381,34 +373,6 @@ type UnassociatedEndpointFactory interface {
 	NewUnassociatedRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
 }
 
-var (
-	transportProtocols = make(map[string]TransportProtocolFactory)
-	networkProtocols   = make(map[string]NetworkProtocolFactory)
-
-	unassociatedFactory UnassociatedEndpointFactory
-)
-
-// RegisterTransportProtocolFactory registers a new transport protocol factory
-// with the stack so that it becomes available to users of the stack. This
-// function is intended to be called by init() functions of the protocols.
-func RegisterTransportProtocolFactory(name string, p TransportProtocolFactory) {
-	transportProtocols[name] = p
-}
-
-// RegisterNetworkProtocolFactory registers a new network protocol factory with
-// the stack so that it becomes available to users of the stack. This function
-// is intended to be called by init() functions of the protocols.
-func RegisterNetworkProtocolFactory(name string, p NetworkProtocolFactory) {
-	networkProtocols[name] = p
-}
-
-// RegisterUnassociatedFactory registers a factory to produce endpoints not
-// associated with any particular transport protocol. This function is intended
-// to be called by init() functions of the protocols.
-func RegisterUnassociatedFactory(f UnassociatedEndpointFactory) {
-	unassociatedFactory = f
-}
-
 // GSOType is the type of GSO segments.
 //
 // +stateify savable
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index f7ba3cb0f..18d1704a5 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -17,11 +17,6 @@
 //
 // For consumers, the only function of interest is New(), everything else is
 // provided by the tcpip/public package.
-//
-// For protocol implementers, RegisterTransportProtocolFactory() and
-// RegisterNetworkProtocolFactory() are used to register protocol factories with
-// the stack, which will then be used to instantiate protocol objects when
-// consumers interact with the stack.
 package stack
 
 import (
@@ -351,6 +346,9 @@ type Stack struct {
 	networkProtocols   map[tcpip.NetworkProtocolNumber]NetworkProtocol
 	linkAddrResolvers  map[tcpip.NetworkProtocolNumber]LinkAddressResolver
 
+	// unassociatedFactory creates unassociated endpoints. If nil, raw
+	// endpoints are disabled. It is set during Stack creation and is
+	// immutable.
 	unassociatedFactory UnassociatedEndpointFactory
 
 	demux *transportDemuxer
@@ -359,10 +357,6 @@ type Stack struct {
 
 	linkAddrCache *linkAddrCache
 
-	// raw indicates whether raw sockets may be created. It is set during
-	// Stack creation and is immutable.
-	raw bool
-
 	mu         sync.RWMutex
 	nics       map[tcpip.NICID]*NIC
 	forwarding bool
@@ -398,6 +392,12 @@ type Stack struct {
 
 // Options contains optional Stack configuration.
 type Options struct {
+	// NetworkProtocols lists the network protocols to enable.
+	NetworkProtocols []NetworkProtocol
+
+	// TransportProtocols lists the transport protocols to enable.
+	TransportProtocols []TransportProtocol
+
 	// Clock is an optional clock source used for timestampping packets.
 	//
 	// If no Clock is specified, the clock source will be time.Now.
@@ -411,8 +411,9 @@ type Options struct {
 	// stack (false).
 	HandleLocal bool
 
-	// Raw indicates whether raw sockets may be created.
-	Raw bool
+	// UnassociatedFactory produces unassociated endpoints raw endpoints.
+	// Raw endpoints are enabled only if this is non-nil.
+	UnassociatedFactory UnassociatedEndpointFactory
 }
 
 // New allocates a new networking stack with only the requested networking and
@@ -422,7 +423,7 @@ type Options struct {
 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the
 // stack. Please refer to individual protocol implementations as to what options
 // are supported.
-func New(network []string, transport []string, opts Options) *Stack {
+func New(opts Options) *Stack {
 	clock := opts.Clock
 	if clock == nil {
 		clock = &tcpip.StdClock{}
@@ -438,17 +439,11 @@ func New(network []string, transport []string, opts Options) *Stack {
 		clock:              clock,
 		stats:              opts.Stats.FillIn(),
 		handleLocal:        opts.HandleLocal,
-		raw:                opts.Raw,
 		icmpRateLimiter:    NewICMPRateLimiter(),
 	}
 
 	// Add specified network protocols.
-	for _, name := range network {
-		netProtoFactory, ok := networkProtocols[name]
-		if !ok {
-			continue
-		}
-		netProto := netProtoFactory()
+	for _, netProto := range opts.NetworkProtocols {
 		s.networkProtocols[netProto.Number()] = netProto
 		if r, ok := netProto.(LinkAddressResolver); ok {
 			s.linkAddrResolvers[r.LinkAddressProtocol()] = r
@@ -456,18 +451,14 @@ func New(network []string, transport []string, opts Options) *Stack {
 	}
 
 	// Add specified transport protocols.
-	for _, name := range transport {
-		transProtoFactory, ok := transportProtocols[name]
-		if !ok {
-			continue
-		}
-		transProto := transProtoFactory()
+	for _, transProto := range opts.TransportProtocols {
 		s.transportProtocols[transProto.Number()] = &transportProtocolState{
 			proto: transProto,
 		}
 	}
 
-	s.unassociatedFactory = unassociatedFactory
+	// Add the factory for unassociated endpoints, if present.
+	s.unassociatedFactory = opts.UnassociatedFactory
 
 	// Create the global transport demuxer.
 	s.demux = newTransportDemuxer(s)
@@ -602,7 +593,7 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp
 // protocol. Raw endpoints receive all traffic for a given protocol regardless
 // of address.
 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
-	if !s.raw {
+	if s.unassociatedFactory == nil {
 		return nil, tcpip.ErrNotPermitted
 	}
 
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 7aa10bce9..d2dede8a9 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -222,11 +222,17 @@ func (f *fakeNetworkProtocol) Option(option interface{}) *tcpip.Error {
 	}
 }
 
+func fakeNetFactory() stack.NetworkProtocol {
+	return &fakeNetworkProtocol{}
+}
+
 func TestNetworkReceive(t *testing.T) {
 	// Create a stack with the fake network protocol, one nic, and two
 	// addresses attached to it: 1 & 2.
 	ep := channel.New(10, defaultMTU, "")
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
@@ -370,7 +376,9 @@ func TestNetworkSend(t *testing.T) {
 	// address: 1. The route table sends all packets through the only
 	// existing nic.
 	ep := channel.New(10, defaultMTU, "")
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("NewNIC failed:", err)
 	}
@@ -395,7 +403,9 @@ func TestNetworkSendMultiRoute(t *testing.T) {
 	// Create a stack with the fake network protocol, two nics, and two
 	// addresses per nic, the first nic has odd address, the second one has
 	// even addresses.
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep1 := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep1); err != nil {
@@ -476,7 +486,9 @@ func TestRoutes(t *testing.T) {
 	// Create a stack with the fake network protocol, two nics, and two
 	// addresses per nic, the first nic has odd address, the second one has
 	// even addresses.
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep1 := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep1); err != nil {
@@ -554,7 +566,9 @@ func TestAddressRemoval(t *testing.T) {
 	localAddr := tcpip.Address([]byte{localAddrByte})
 	remoteAddr := tcpip.Address("\x02")
 
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -599,7 +613,9 @@ func TestAddressRemovalWithRouteHeld(t *testing.T) {
 	localAddr := tcpip.Address([]byte{localAddrByte})
 	remoteAddr := tcpip.Address("\x02")
 
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -688,7 +704,9 @@ func TestEndpointExpiration(t *testing.T) {
 	for _, promiscuous := range []bool{true, false} {
 		for _, spoofing := range []bool{true, false} {
 			t.Run(fmt.Sprintf("promiscuous=%t spoofing=%t", promiscuous, spoofing), func(t *testing.T) {
-				s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+				s := stack.New(stack.Options{
+					NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+				})
 
 				ep := channel.New(10, defaultMTU, "")
 				if err := s.CreateNIC(nicid, ep); err != nil {
@@ -844,7 +862,9 @@ func TestEndpointExpiration(t *testing.T) {
 }
 
 func TestPromiscuousMode(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -894,7 +914,9 @@ func TestSpoofingWithAddress(t *testing.T) {
 	nonExistentLocalAddr := tcpip.Address("\x02")
 	dstAddr := tcpip.Address("\x03")
 
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -958,7 +980,9 @@ func TestSpoofingNoAddress(t *testing.T) {
 	nonExistentLocalAddr := tcpip.Address("\x01")
 	dstAddr := tcpip.Address("\x02")
 
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -1003,7 +1027,9 @@ func TestSpoofingNoAddress(t *testing.T) {
 }
 
 func TestBroadcastNeedsNoRoute(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -1074,7 +1100,9 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 		{"IPv6 Unicast Not Link-Local 7", true, "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
-			s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+			})
 
 			ep := channel.New(10, defaultMTU, "")
 			if err := s.CreateNIC(1, ep); err != nil {
@@ -1130,7 +1158,9 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) {
 
 // Add a range of addresses, then check that a packet is delivered.
 func TestAddressRangeAcceptsMatchingPacket(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -1196,7 +1226,9 @@ func testNicForAddressRange(t *testing.T, nicID tcpip.NICID, s *stack.Stack, sub
 // existent.
 func TestCheckLocalAddressForSubnet(t *testing.T) {
 	const nicID tcpip.NICID = 1
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(nicID, ep); err != nil {
@@ -1234,7 +1266,9 @@ func TestCheckLocalAddressForSubnet(t *testing.T) {
 // Set a range of addresses, then send a packet to a destination outside the
 // range and then check it doesn't get delivered.
 func TestAddressRangeRejectsNonmatchingPacket(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
@@ -1266,7 +1300,10 @@ func TestAddressRangeRejectsNonmatchingPacket(t *testing.T) {
 }
 
 func TestNetworkOptions(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, []string{}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{},
+	})
 
 	// Try an unsupported network protocol.
 	if err := s.SetNetworkProtocolOption(tcpip.NetworkProtocolNumber(99999), fakeNetGoodOption(false)); err != tcpip.ErrUnknownProtocol {
@@ -1319,7 +1356,9 @@ func stackContainsAddressRange(s *stack.Stack, id tcpip.NICID, addrRange tcpip.S
 }
 
 func TestAddresRangeAddRemove(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1360,7 +1399,9 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
 				t.Run(fmt.Sprintf("canBe=%d", canBe), func(t *testing.T) {
 					for never := 0; never < 3; never++ {
 						t.Run(fmt.Sprintf("never=%d", never), func(t *testing.T) {
-							s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+							s := stack.New(stack.Options{
+								NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+							})
 							ep := channel.New(10, defaultMTU, "")
 							if err := s.CreateNIC(1, ep); err != nil {
 								t.Fatal("CreateNIC failed:", err)
@@ -1425,7 +1466,9 @@ func TestGetMainNICAddressAddPrimaryNonPrimary(t *testing.T) {
 }
 
 func TestGetMainNICAddressAddRemove(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1508,7 +1551,9 @@ func verifyAddresses(t *testing.T, expectedAddresses, gotAddresses []tcpip.Proto
 
 func TestAddAddress(t *testing.T) {
 	const nicid = 1
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1533,7 +1578,9 @@ func TestAddAddress(t *testing.T) {
 
 func TestAddProtocolAddress(t *testing.T) {
 	const nicid = 1
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1565,7 +1612,9 @@ func TestAddProtocolAddress(t *testing.T) {
 
 func TestAddAddressWithOptions(t *testing.T) {
 	const nicid = 1
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1594,7 +1643,9 @@ func TestAddAddressWithOptions(t *testing.T) {
 
 func TestAddProtocolAddressWithOptions(t *testing.T) {
 	const nicid = 1
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(nicid, ep); err != nil {
 		t.Fatal("CreateNIC failed:", err)
@@ -1628,7 +1679,9 @@ func TestAddProtocolAddressWithOptions(t *testing.T) {
 }
 
 func TestNICStats(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	ep1 := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, ep1); err != nil {
 		t.Fatal("CreateNIC failed: ", err)
@@ -1674,7 +1727,9 @@ func TestNICStats(t *testing.T) {
 func TestNICForwarding(t *testing.T) {
 	// Create a stack with the fake network protocol, two NICs, each with
 	// an address.
-	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{fakeNetFactory()},
+	})
 	s.SetForwarding(true)
 
 	ep1 := channel.New(10, defaultMTU, "")
@@ -1722,9 +1777,3 @@ func TestNICForwarding(t *testing.T) {
 		t.Errorf("got Tx.Bytes.Value() = %d, want = %d", got, want)
 	}
 }
-
-func init() {
-	stack.RegisterNetworkProtocolFactory("fakeNet", func() stack.NetworkProtocol {
-		return &fakeNetworkProtocol{}
-	})
-}
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index 0e69ac7c8..56e8a5d9b 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -282,9 +282,16 @@ func (f *fakeTransportProtocol) Option(option interface{}) *tcpip.Error {
 	}
 }
 
+func fakeTransFactory() stack.TransportProtocol {
+	return &fakeTransportProtocol{}
+}
+
 func TestTransportReceive(t *testing.T) {
 	linkEP := channel.New(10, defaultMTU, "")
-	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+	})
 	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
@@ -346,7 +353,10 @@ func TestTransportReceive(t *testing.T) {
 
 func TestTransportControlReceive(t *testing.T) {
 	linkEP := channel.New(10, defaultMTU, "")
-	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+	})
 	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
@@ -414,7 +424,10 @@ func TestTransportControlReceive(t *testing.T) {
 
 func TestTransportSend(t *testing.T) {
 	linkEP := channel.New(10, defaultMTU, "")
-	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+	})
 	if err := s.CreateNIC(1, linkEP); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
@@ -457,7 +470,10 @@ func TestTransportSend(t *testing.T) {
 }
 
 func TestTransportOptions(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+	})
 
 	// Try an unsupported transport protocol.
 	if err := s.SetTransportProtocolOption(tcpip.TransportProtocolNumber(99999), fakeTransportGoodOption(false)); err != tcpip.ErrUnknownProtocol {
@@ -498,7 +514,10 @@ func TestTransportOptions(t *testing.T) {
 }
 
 func TestTransportForwarding(t *testing.T) {
-	s := stack.New([]string{"fakeNet"}, []string{"fakeTrans"}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{fakeNetFactory()},
+		TransportProtocols: []stack.TransportProtocol{fakeTransFactory()},
+	})
 	s.SetForwarding(true)
 
 	// TODO(b/123449044): Change this to a channel NIC.
@@ -576,9 +595,3 @@ func TestTransportForwarding(t *testing.T) {
 		t.Errorf("Response packet has incorrect source addresss: got = %d, want = 3", src)
 	}
 }
-
-func init() {
-	stack.RegisterTransportProtocolFactory("fakeTrans", func() stack.TransportProtocol {
-		return &fakeTransportProtocol{}
-	})
-}
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 1eb790932..bfb16f7c3 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -14,10 +14,9 @@
 
 // Package icmp contains the implementation of the ICMP and IPv6-ICMP transport
 // protocols for use in ping. To use it in the networking stack, this package
-// must be added to the project, and
-// activated on the stack by passing icmp.ProtocolName (or "icmp") and/or
-// icmp.ProtocolName6 (or "icmp6") as one of the transport protocols when
-// calling stack.New(). Then endpoints can be created by passing
+// must be added to the project, and activated on the stack by passing
+// icmp.NewProtocol4() and/or icmp.NewProtocol6() as one of the transport
+// protocols when calling stack.New(). Then endpoints can be created by passing
 // icmp.ProtocolNumber or icmp.ProtocolNumber6 as the transport protocol number
 // when calling Stack.NewEndpoint().
 package icmp
@@ -34,15 +33,9 @@ import (
 )
 
 const (
-	// ProtocolName4 is the string representation of the icmp protocol name.
-	ProtocolName4 = "icmp4"
-
 	// ProtocolNumber4 is the ICMP protocol number.
 	ProtocolNumber4 = header.ICMPv4ProtocolNumber
 
-	// ProtocolName6 is the string representation of the icmp protocol name.
-	ProtocolName6 = "icmp6"
-
 	// ProtocolNumber6 is the IPv6-ICMP protocol number.
 	ProtocolNumber6 = header.ICMPv6ProtocolNumber
 )
@@ -125,12 +118,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
-func init() {
-	stack.RegisterTransportProtocolFactory(ProtocolName4, func() stack.TransportProtocol {
-		return &protocol{ProtocolNumber4}
-	})
+// NewProtocol4 returns an ICMPv4 transport protocol.
+func NewProtocol4() stack.TransportProtocol {
+	return &protocol{ProtocolNumber4}
+}
 
-	stack.RegisterTransportProtocolFactory(ProtocolName6, func() stack.TransportProtocol {
-		return &protocol{ProtocolNumber6}
-	})
+// NewProtocol6 returns an ICMPv6 transport protocol.
+func NewProtocol6() stack.TransportProtocol {
+	return &protocol{ProtocolNumber6}
 }
diff --git a/pkg/tcpip/transport/raw/protocol.go b/pkg/tcpip/transport/raw/protocol.go
index 783c21e6b..a2512d666 100644
--- a/pkg/tcpip/transport/raw/protocol.go
+++ b/pkg/tcpip/transport/raw/protocol.go
@@ -20,13 +20,10 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-type factory struct{}
+// EndpointFactory implements stack.UnassociatedEndpointFactory.
+type EndpointFactory struct{}
 
 // NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory.
-func (factory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+func (EndpointFactory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
 	return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */)
 }
-
-func init() {
-	stack.RegisterUnassociatedFactory(factory{})
-}
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 2a13b2022..d5d8ab96a 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -14,7 +14,7 @@
 
 // Package tcp contains the implementation of the TCP transport protocol. To use
 // it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing tcp.ProtocolName (or "tcp") as one of the
+// activated on the stack by passing tcp.NewProtocol() as one of the
 // transport protocols when calling stack.New(). Then endpoints can be created
 // by passing tcp.ProtocolNumber as the transport protocol number when calling
 // Stack.NewEndpoint().
@@ -34,9 +34,6 @@ import (
 )
 
 const (
-	// ProtocolName is the string representation of the tcp protocol name.
-	ProtocolName = "tcp"
-
 	// ProtocolNumber is the tcp protocol number.
 	ProtocolNumber = header.TCPProtocolNumber
 
@@ -254,13 +251,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 	}
 }
 
-func init() {
-	stack.RegisterTransportProtocolFactory(ProtocolName, func() stack.TransportProtocol {
-		return &protocol{
-			sendBufferSize:             SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
-			recvBufferSize:             ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
-			congestionControl:          ccReno,
-			availableCongestionControl: []string{ccReno, ccCubic},
-		}
-	})
+// NewProtocol returns a TCP transport protocol.
+func NewProtocol() stack.TransportProtocol {
+	return &protocol{
+		sendBufferSize:             SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
+		recvBufferSize:             ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
+		congestionControl:          ccReno,
+		availableCongestionControl: []string{ccReno, ccCubic},
+	}
 }
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 7fa5cfb6e..2be094876 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2873,7 +2873,10 @@ func checkSendBufferSize(t *testing.T, ep tcpip.Endpoint, v int) {
 }
 
 func TestDefaultBufferSizes(t *testing.T) {
-	s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	// Check the default values.
 	ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
@@ -2919,7 +2922,10 @@ func TestDefaultBufferSizes(t *testing.T) {
 }
 
 func TestMinMaxBufferSizes(t *testing.T) {
-	s := stack.New([]string{ipv4.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	// Check the default values.
 	ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
@@ -2965,10 +2971,13 @@ func TestMinMaxBufferSizes(t *testing.T) {
 }
 
 func makeStack() (*stack.Stack, *tcpip.Error) {
-	s := stack.New([]string{
-		ipv4.ProtocolName,
-		ipv6.ProtocolName,
-	}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocol{
+			ipv4.NewProtocol(),
+			ipv6.NewProtocol(),
+		},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	id := loopback.New()
 	if testing.Verbose() {
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 78eff5c3a..d3f1d2cdf 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -137,7 +137,10 @@ type Context struct {
 // New allocates and initializes a test context containing a new
 // stack and a link-layer endpoint.
 func New(t *testing.T, mtu uint32) *Context {
-	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
+	})
 
 	// Allow minimum send/receive buffer sizes to be 1 during tests.
 	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil {
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 068d9a272..f5cc932dd 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -14,7 +14,7 @@
 
 // Package udp contains the implementation of the UDP transport protocol. To use
 // it in the networking stack, this package must be added to the project, and
-// activated on the stack by passing udp.ProtocolName (or "udp") as one of the
+// activated on the stack by passing udp.NewProtocol() as one of the
 // transport protocols when calling stack.New(). Then endpoints can be created
 // by passing udp.ProtocolNumber as the transport protocol number when calling
 // Stack.NewEndpoint().
@@ -30,9 +30,6 @@ import (
 )
 
 const (
-	// ProtocolName is the string representation of the udp protocol name.
-	ProtocolName = "udp"
-
 	// ProtocolNumber is the udp protocol number.
 	ProtocolNumber = header.UDPProtocolNumber
 )
@@ -182,8 +179,7 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 	return tcpip.ErrUnknownProtocolOption
 }
 
-func init() {
-	stack.RegisterTransportProtocolFactory(ProtocolName, func() stack.TransportProtocol {
-		return &protocol{}
-	})
+// NewProtocol returns a UDP transport protocol.
+func NewProtocol() stack.TransportProtocol {
+	return &protocol{}
 }
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index c6deab892..2ec27be4d 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -274,7 +274,10 @@ type testContext struct {
 func newDualTestContext(t *testing.T, mtu uint32) *testContext {
 	t.Helper()
 
-	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{udp.ProtocolName}, stack.Options{})
+	s := stack.New(stack.Options{
+		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+		TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+	})
 	ep := channel.New(256, mtu, "")
 	wep := stack.LinkEndpoint(ep)
 
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 54d1ab129..d90381c0f 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -80,6 +80,7 @@ go_library(
         "//pkg/tcpip/network/ipv6",
         "//pkg/tcpip/stack",
         "//pkg/tcpip/transport/icmp",
+        "//pkg/tcpip/transport/raw",
         "//pkg/tcpip/transport/tcp",
         "//pkg/tcpip/transport/udp",
         "//pkg/urpc",
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index d824d7dc5..adf345490 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -54,6 +54,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/raw"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
 	"gvisor.dev/gvisor/runsc/boot/filter"
@@ -911,15 +912,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
 
 	case NetworkNone, NetworkSandbox:
 		// NetworkNone sets up loopback using netstack.
-		netProtos := []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName}
-		protoNames := []string{tcp.ProtocolName, udp.ProtocolName, icmp.ProtocolName4}
-		s := epsocket.Stack{stack.New(netProtos, protoNames, stack.Options{
-			Clock:       clock,
-			Stats:       epsocket.Metrics,
-			HandleLocal: true,
+		netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
+		transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
+		s := epsocket.Stack{stack.New(stack.Options{
+			NetworkProtocols:   netProtos,
+			TransportProtocols: transProtos,
+			Clock:              clock,
+			Stats:              epsocket.Metrics,
+			HandleLocal:        true,
 			// Enable raw sockets for users with sufficient
 			// privileges.
-			Raw: true,
+			UnassociatedFactory: raw.EndpointFactory{},
 		})}
 
 		// Enable SACK Recovery.
-- 
cgit v1.2.3