58 files changed, 1334 insertions, 649 deletions
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 10ba2f5f0..40f2c1cad 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -47,6 +47,8 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.
 	return &endpoint{inode, i.fileState.file.file, path}
 }
 
+// LINT.IfChange
+
 // endpoint is a Gofer-backed transport.BoundEndpoint.
 //
 // An endpoint's lifetime is the time between when InodeOperations.BoundEndpoint()
@@ -146,3 +148,5 @@ func (e *endpoint) Release() {
 func (e *endpoint) Passcred() bool {
 	return false
 }
+
+// LINT.ThenChange(../../fsimpl/gofer/socket.go)
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 06fc2d80a..b6e94583e 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -37,6 +37,8 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
+// LINT.IfChange
+
 // maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
 //
 // N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
@@ -388,3 +390,5 @@ func (c *ConnectedEndpoint) Release() {
 
 // CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
 func (c *ConnectedEndpoint) CloseUnread() {}
+
+// LINT.ThenChange(../../fsimpl/host/socket.go)
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index af6955675..5c18dbd5e 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -21,6 +21,8 @@ import (
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
+// LINT.IfChange
+
 // maxIovs is the maximum number of iovecs to pass to the host.
 var maxIovs = linux.UIO_MAXIOV
 
@@ -111,3 +113,5 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
 
 	return total, iovecs, nil, err
 }
+
+// LINT.ThenChange(../../fsimpl/host/socket_iovec.go)
diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go
index f3bbed7ea..5d4f312cf 100644
--- a/pkg/sentry/fs/host/socket_unsafe.go
+++ b/pkg/sentry/fs/host/socket_unsafe.go
@@ -19,6 +19,8 @@ import (
 	"unsafe"
 )
 
+// LINT.IfChange
+
 // fdReadVec receives from fd to bufs.
 //
 // If the total length of bufs is > maxlen, fdReadVec will do a partial read
@@ -99,3 +101,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int6
 
 	return int64(n), length, err
 }
+
+// LINT.ThenChange(../../fsimpl/host/socket_unsafe.go)
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index 2c22a04af..6c882d0b6 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -485,7 +485,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 }
 
 // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
-func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) {
+func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	_, _, err := fs.walk(rp, false)
 	if err != nil {
 		return nil, err
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index b9c4beee4..5ce82b793 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -38,6 +38,7 @@ go_library(
         "p9file.go",
         "pagemath.go",
         "regular_file.go",
+        "socket.go",
         "special_file.go",
         "symlink.go",
         "time.go",
@@ -52,18 +53,25 @@ go_library(
         "//pkg/p9",
         "//pkg/safemem",
         "//pkg/sentry/fs/fsutil",
+        "//pkg/sentry/fsimpl/host",
         "//pkg/sentry/hostfd",
+        "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/pipe",
         "//pkg/sentry/kernel/time",
         "//pkg/sentry/memmap",
         "//pkg/sentry/pgalloc",
         "//pkg/sentry/platform",
+        "//pkg/sentry/socket/control",
+        "//pkg/sentry/socket/unix",
         "//pkg/sentry/socket/unix/transport",
         "//pkg/sentry/usage",
         "//pkg/sentry/vfs",
+        "//pkg/syserr",
         "//pkg/syserror",
         "//pkg/unet",
         "//pkg/usermem",
+        "//pkg/waiter",
     ],
 )
 
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 55f9ed911..b98218753 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -15,6 +15,7 @@
 package gofer
 
 import (
+	"fmt"
 	"sync"
 	"sync/atomic"
 
@@ -22,6 +23,8 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -59,28 +62,52 @@ func (d *dentry) cacheNegativeLookupLocked(name string) {
 	d.children[name] = nil
 }
 
-// createSyntheticDirectory creates a synthetic directory with the given name
+type createSyntheticOpts struct {
+	name string
+	mode linux.FileMode
+	kuid auth.KUID
+	kgid auth.KGID
+
+	// The endpoint for a synthetic socket. endpoint should be nil if the file
+	// being created is not a socket.
+	endpoint transport.BoundEndpoint
+
+	// pipe should be nil if the file being created is not a pipe.
+	pipe *pipe.VFSPipe
+}
+
+// createSyntheticChildLocked creates a synthetic file with the given name
 // in d.
 //
 // Preconditions: d.dirMu must be locked. d.isDir(). d does not already contain
 // a child with the given name.
-func (d *dentry) createSyntheticDirectoryLocked(name string, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) {
+func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
 	d2 := &dentry{
 		refs:      1, // held by d
 		fs:        d.fs,
-		mode:      uint32(mode) | linux.S_IFDIR,
-		uid:       uint32(kuid),
-		gid:       uint32(kgid),
+		mode:      uint32(opts.mode),
+		uid:       uint32(opts.kuid),
+		gid:       uint32(opts.kgid),
 		blockSize: usermem.PageSize, // arbitrary
 		handle: handle{
 			fd: -1,
 		},
 		nlink: uint32(2),
 	}
+	switch opts.mode.FileType() {
+	case linux.S_IFDIR:
+		// Nothing else needs to be done.
+	case linux.S_IFSOCK:
+		d2.endpoint = opts.endpoint
+	case linux.S_IFIFO:
+		d2.pipe = opts.pipe
+	default:
+		panic(fmt.Sprintf("failed to create synthetic file of unrecognized type: %v", opts.mode.FileType()))
+	}
 	d2.pf.dentry = d2
 	d2.vfsd.Init(d2)
 
-	d.cacheNewChildLocked(d2, name)
+	d.cacheNewChildLocked(d2, opts.name)
 	d.syntheticChildren++
 }
 
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 98ccb42fd..4a8411371 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -22,9 +22,11 @@ import (
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // Sync implements vfs.FilesystemImpl.Sync.
@@ -652,7 +654,12 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 				return err
 			}
 			ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err)
-			parent.createSyntheticDirectoryLocked(name, opts.Mode, creds.EffectiveKUID, creds.EffectiveKGID)
+			parent.createSyntheticChildLocked(&createSyntheticOpts{
+				name: name,
+				mode: linux.S_IFDIR | opts.Mode,
+				kuid: creds.EffectiveKUID,
+				kgid: creds.EffectiveKGID,
+			})
 		}
 		if fs.opts.interop != InteropModeShared {
 			parent.incLinks()
@@ -663,7 +670,12 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 			// Can't create non-synthetic files in synthetic directories.
 			return syserror.EPERM
 		}
-		parent.createSyntheticDirectoryLocked(name, opts.Mode, creds.EffectiveKUID, creds.EffectiveKGID)
+		parent.createSyntheticChildLocked(&createSyntheticOpts{
+			name: name,
+			mode: linux.S_IFDIR | opts.Mode,
+			kuid: creds.EffectiveKUID,
+			kgid: creds.EffectiveKGID,
+		})
 		parent.incLinks()
 		return nil
 	})
@@ -674,6 +686,28 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
 		creds := rp.Credentials()
 		_, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+		if err == syserror.EPERM {
+			switch opts.Mode.FileType() {
+			case linux.S_IFSOCK:
+				parent.createSyntheticChildLocked(&createSyntheticOpts{
+					name:     name,
+					mode:     opts.Mode,
+					kuid:     creds.EffectiveKUID,
+					kgid:     creds.EffectiveKGID,
+					endpoint: opts.Endpoint,
+				})
+				return nil
+			case linux.S_IFIFO:
+				parent.createSyntheticChildLocked(&createSyntheticOpts{
+					name: name,
+					mode: opts.Mode,
+					kuid: creds.EffectiveKUID,
+					kgid: creds.EffectiveKGID,
+					pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize),
+				})
+				return nil
+			}
+		}
 		return err
 	}, nil)
 }
@@ -756,20 +790,21 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 		return nil, err
 	}
 	mnt := rp.Mount()
-	filetype := d.fileType()
-	switch {
-	case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD:
-		if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil {
-			return nil, err
-		}
-		fd := &regularFileFD{}
-		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
-			AllowDirectIO: true,
-		}); err != nil {
-			return nil, err
+	switch d.fileType() {
+	case linux.S_IFREG:
+		if !d.fs.opts.regularFilesUseSpecialFileFD {
+			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil {
+				return nil, err
+			}
+			fd := &regularFileFD{}
+			if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
+				AllowDirectIO: true,
+			}); err != nil {
+				return nil, err
+			}
+			return &fd.vfsfd, nil
 		}
-		return &fd.vfsfd, nil
-	case filetype == linux.S_IFDIR:
+	case linux.S_IFDIR:
 		// Can't open directories with O_CREAT.
 		if opts.Flags&linux.O_CREAT != 0 {
 			return nil, syserror.EISDIR
@@ -791,26 +826,40 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 			return nil, err
 		}
 		return &fd.vfsfd, nil
-	case filetype == linux.S_IFLNK:
+	case linux.S_IFLNK:
 		// Can't open symlinks without O_PATH (which is unimplemented).
 		return nil, syserror.ELOOP
-	default:
-		if opts.Flags&linux.O_DIRECT != 0 {
-			return nil, syserror.EINVAL
-		}
-		h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0)
-		if err != nil {
-			return nil, err
-		}
-		fd := &specialFileFD{
-			handle: h,
+	case linux.S_IFSOCK:
+		if d.isSynthetic() {
+			return nil, syserror.ENXIO
 		}
-		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
-			h.close(ctx)
-			return nil, err
+	case linux.S_IFIFO:
+		if d.isSynthetic() {
+			return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags)
 		}
-		return &fd.vfsfd, nil
 	}
+	return d.openSpecialFileLocked(ctx, mnt, opts)
+}
+
+func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+	ats := vfs.AccessTypesForOpenFlags(opts)
+	// Treat as a special file. This is done for non-synthetic pipes as well as
+	// regular files when d.fs.opts.regularFilesUseSpecialFileFD is true.
+	if opts.Flags&linux.O_DIRECT != 0 {
+		return nil, syserror.EINVAL
+	}
+	h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0)
+	if err != nil {
+		return nil, err
+	}
+	fd := &specialFileFD{
+		handle: h,
+	}
+	if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+		h.close(ctx)
+		return nil, err
+	}
+	return &fd.vfsfd, nil
 }
 
 // Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked.
@@ -1196,15 +1245,28 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 }
 
 // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
-func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) {
+func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
 	defer fs.renameMuRUnlockAndCheckCaching(&ds)
-	_, err := fs.resolveLocked(ctx, rp, &ds)
+	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
 	}
-	// TODO(gvisor.dev/issue/1476): Implement BoundEndpointAt.
+	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
+		return nil, err
+	}
+	if d.isSocket() {
+		if !d.isSynthetic() {
+			d.IncRef()
+			return &endpoint{
+				dentry: d,
+				file:   d.file.file,
+				path:   opts.Addr,
+			}, nil
+		}
+		return d.endpoint, nil
+	}
 	return nil, syserror.ECONNREFUSED
 }
 
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 8b4e91d17..3235816c2 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -46,9 +46,11 @@ import (
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/unet"
@@ -585,6 +587,14 @@ type dentry struct {
 	// and target are protected by dataMu.
 	haveTarget bool
 	target     string
+
+	// If this dentry represents a synthetic socket file, endpoint is the
+	// transport endpoint bound to this file.
+	endpoint transport.BoundEndpoint
+
+	// If this dentry represents a synthetic named pipe, pipe is the pipe
+	// endpoint bound to this file.
+	pipe *pipe.VFSPipe
 }
 
 // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the
@@ -791,10 +801,21 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
 		setLocalAtime = stat.Mask&linux.STATX_ATIME != 0
 		setLocalMtime = stat.Mask&linux.STATX_MTIME != 0
 		stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME
-		if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) {
-			// Truncate updates mtime.
-			setLocalMtime = true
-			stat.Mtime.Nsec = linux.UTIME_NOW
+
+		// Prepare for truncate.
+		if stat.Mask&linux.STATX_SIZE != 0 {
+			switch d.mode & linux.S_IFMT {
+			case linux.S_IFREG:
+				if !setLocalMtime {
+					// Truncate updates mtime.
+					setLocalMtime = true
+					stat.Mtime.Nsec = linux.UTIME_NOW
+				}
+			case linux.S_IFDIR:
+				return syserror.EISDIR
+			default:
+				return syserror.EINVAL
+			}
 		}
 	}
 	d.metadataMu.Lock()
@@ -1049,6 +1070,7 @@ func (d *dentry) destroyLocked() {
 		d.handle.close(ctx)
 	}
 	d.handleMu.Unlock()
+
 	if !d.file.isNil() {
 		d.file.close(ctx)
 		d.file = p9file{}
@@ -1134,7 +1156,7 @@ func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name
 	return d.file.removeXattr(ctx, name)
 }
 
-// Preconditions: !d.file.isNil(). d.isRegularFile() || d.isDirectory().
+// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDirectory().
 func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error {
 	// O_TRUNC unconditionally requires us to obtain a new handle (opened with
 	// O_TRUNC).
diff --git a/pkg/sentry/fsimpl/gofer/socket.go b/pkg/sentry/fsimpl/gofer/socket.go
new file mode 100644
index 000000000..73835df91
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/socket.go
@@ -0,0 +1,141 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+	"syscall"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/p9"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+	"gvisor.dev/gvisor/pkg/syserr"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+func (d *dentry) isSocket() bool {
+	return d.fileType() == linux.S_IFSOCK
+}
+
+// endpoint is a Gofer-backed transport.BoundEndpoint.
+//
+// An endpoint's lifetime is the time between when filesystem.BoundEndpointAt()
+// is called and either BoundEndpoint.BidirectionalConnect or
+// BoundEndpoint.UnidirectionalConnect is called.
+type endpoint struct {
+	// dentry is the filesystem dentry which produced this endpoint.
+	dentry *dentry
+
+	// file is the p9 file that contains a single unopened fid.
+	file p9.File
+
+	// path is the sentry path where this endpoint is bound.
+	path string
+}
+
+func sockTypeToP9(t linux.SockType) (p9.ConnectFlags, bool) {
+	switch t {
+	case linux.SOCK_STREAM:
+		return p9.StreamSocket, true
+	case linux.SOCK_SEQPACKET:
+		return p9.SeqpacketSocket, true
+	case linux.SOCK_DGRAM:
+		return p9.DgramSocket, true
+	}
+	return 0, false
+}
+
+// BidirectionalConnect implements ConnectableEndpoint.BidirectionalConnect.
+func (e *endpoint) BidirectionalConnect(ctx context.Context, ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error {
+	cf, ok := sockTypeToP9(ce.Type())
+	if !ok {
+		return syserr.ErrConnectionRefused
+	}
+
+	// No lock ordering required as only the ConnectingEndpoint has a mutex.
+	ce.Lock()
+
+	// Check connecting state.
+	if ce.Connected() {
+		ce.Unlock()
+		return syserr.ErrAlreadyConnected
+	}
+	if ce.Listening() {
+		ce.Unlock()
+		return syserr.ErrInvalidEndpointState
+	}
+
+	c, err := e.newConnectedEndpoint(ctx, cf, ce.WaiterQueue())
+	if err != nil {
+		ce.Unlock()
+		return err
+	}
+
+	returnConnect(c, c)
+	ce.Unlock()
+	c.Init()
+
+	return nil
+}
+
+// UnidirectionalConnect implements
+// transport.BoundEndpoint.UnidirectionalConnect.
+func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.ConnectedEndpoint, *syserr.Error) {
+	c, err := e.newConnectedEndpoint(ctx, p9.DgramSocket, &waiter.Queue{})
+	if err != nil {
+		return nil, err
+	}
+	c.Init()
+
+	// We don't need the receiver.
+	c.CloseRecv()
+	c.Release()
+
+	return c, nil
+}
+
+func (e *endpoint) newConnectedEndpoint(ctx context.Context, flags p9.ConnectFlags, queue *waiter.Queue) (*host.SCMConnectedEndpoint, *syserr.Error) {
+	hostFile, err := e.file.Connect(flags)
+	if err != nil {
+		return nil, syserr.ErrConnectionRefused
+	}
+	// Dup the fd so that the new endpoint can manage its lifetime.
+	hostFD, err := syscall.Dup(hostFile.FD())
+	if err != nil {
+		log.Warningf("Could not dup host socket fd %d: %v", hostFile.FD(), err)
+		return nil, syserr.FromError(err)
+	}
+	// After duplicating, we no longer need hostFile.
+	hostFile.Close()
+
+	c, serr := host.NewSCMEndpoint(ctx, hostFD, queue, e.path)
+	if serr != nil {
+		log.Warningf("Gofer returned invalid host socket for BidirectionalConnect; file %+v flags %+v: %v", e.file, flags, serr)
+		return nil, serr
+	}
+	return c, nil
+}
+
+// Release implements transport.BoundEndpoint.Release.
+func (e *endpoint) Release() {
+	e.dentry.DecRef()
+}
+
+// Passcred implements transport.BoundEndpoint.Passcred.
+func (e *endpoint) Passcred() bool {
+	return false
+}
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 2dcb03a73..e1c56d89b 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -8,6 +8,9 @@ go_library(
         "control.go",
         "host.go",
         "ioctl_unsafe.go",
+        "socket.go",
+        "socket_iovec.go",
+        "socket_unsafe.go",
         "tty.go",
         "util.go",
         "util_unsafe.go",
@@ -16,6 +19,7 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/fdnotifier",
         "//pkg/log",
         "//pkg/refs",
         "//pkg/sentry/arch",
@@ -25,12 +29,18 @@ go_library(
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/memmap",
         "//pkg/sentry/socket/control",
+        "//pkg/sentry/socket/unix",
         "//pkg/sentry/socket/unix/transport",
         "//pkg/sentry/unimpl",
+        "//pkg/sentry/uniqueid",
         "//pkg/sentry/vfs",
         "//pkg/sync",
+        "//pkg/syserr",
         "//pkg/syserror",
+        "//pkg/tcpip",
+        "//pkg/unet",
         "//pkg/usermem",
+        "//pkg/waiter",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 1e53b5c1b..05538ea42 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -17,7 +17,6 @@
 package host
 
 import (
-	"errors"
 	"fmt"
 	"math"
 	"syscall"
@@ -31,6 +30,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/hostfd"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -156,7 +156,7 @@ type inode struct {
 
 // Note that these flags may become out of date, since they can be modified
 // on the host, e.g. with fcntl.
-func fileFlagsFromHostFD(fd int) (int, error) {
+func fileFlagsFromHostFD(fd int) (uint32, error) {
 	flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0)
 	if err != nil {
 		log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err)
@@ -164,7 +164,7 @@ func fileFlagsFromHostFD(fd int) (int, error) {
 	}
 	// TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
 	flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
-	return flags, nil
+	return uint32(flags), nil
 }
 
 // CheckPermissions implements kernfs.Inode.
@@ -361,6 +361,10 @@ func (i *inode) Destroy() {
 
 // Open implements kernfs.Inode.
 func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	// Once created, we cannot re-open a socket fd through /proc/[pid]/fd/.
+	if i.Mode().FileType() == linux.S_IFSOCK {
+		return nil, syserror.ENXIO
+	}
 	return i.open(ctx, vfsd, rp.Mount())
 }
 
@@ -370,42 +374,45 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.F
 		return nil, err
 	}
 	fileType := s.Mode & linux.FileTypeMask
+	flags, err := fileFlagsFromHostFD(i.hostFD)
+	if err != nil {
+		return nil, err
+	}
+
 	if fileType == syscall.S_IFSOCK {
 		if i.isTTY {
-			return nil, errors.New("cannot use host socket as TTY")
+			log.Warningf("cannot use host socket fd %d as TTY", i.hostFD)
+			return nil, syserror.ENOTTY
 		}
-		// TODO(gvisor.dev/issue/1672): support importing sockets.
-		return nil, errors.New("importing host sockets not supported")
+
+		ep, err := newEndpoint(ctx, i.hostFD)
+		if err != nil {
+			return nil, err
+		}
+		// Currently, we only allow Unix sockets to be imported.
+		return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d)
 	}
 
 	// TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that
 	// we don't allow importing arbitrary file types without proper support.
-	var (
-		vfsfd  *vfs.FileDescription
-		fdImpl vfs.FileDescriptionImpl
-	)
 	if i.isTTY {
 		fd := &ttyFD{
 			fileDescription: fileDescription{inode: i},
 			termios:         linux.DefaultSlaveTermios,
 		}
-		vfsfd = &fd.vfsfd
-		fdImpl = fd
-	} else {
-		// For simplicity, set offset to 0. Technically, we should
-		// only set to 0 on files that are not seekable (sockets, pipes, etc.),
-		// and use the offset from the host fd otherwise.
-		fd := &fileDescription{inode: i}
-		vfsfd = &fd.vfsfd
-		fdImpl = fd
-	}
-
-	flags, err := fileFlagsFromHostFD(i.hostFD)
-	if err != nil {
-		return nil, err
+		vfsfd := &fd.vfsfd
+		if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
+			return nil, err
+		}
+		return vfsfd, nil
 	}
 
-	if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
+	// For simplicity, set offset to 0. Technically, we should
+	// only set to 0 on files that are not seekable (sockets, pipes, etc.),
+	// and use the offset from the host fd otherwise.
+	fd := &fileDescription{inode: i}
+	vfsfd := &fd.vfsfd
+	if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
 		return nil, err
 	}
 	return vfsfd, nil
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
new file mode 100644
index 000000000..39dd624a0
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -0,0 +1,397 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+	"fmt"
+	"syscall"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fdnotifier"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/refs"
+	"gvisor.dev/gvisor/pkg/sentry/socket/control"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+	"gvisor.dev/gvisor/pkg/sentry/uniqueid"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/syserr"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/unet"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+// Create a new host-backed endpoint from the given fd.
+func newEndpoint(ctx context.Context, hostFD int) (transport.Endpoint, error) {
+	// Set up an external transport.Endpoint using the host fd.
+	addr := fmt.Sprintf("hostfd:[%d]", hostFD)
+	var q waiter.Queue
+	e, err := NewConnectedEndpoint(ctx, hostFD, &q, addr, true /* saveable */)
+	if err != nil {
+		return nil, err.ToError()
+	}
+	e.Init()
+	ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
+	return ep, nil
+}
+
+// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
+//
+// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
+const maxSendBufferSize = 8 << 20
+
+// ConnectedEndpoint is an implementation of transport.ConnectedEndpoint and
+// transport.Receiver. It is backed by a host fd that was imported at sentry
+// startup. This fd is shared with a hostfs inode, which retains ownership of
+// it.
+//
+// ConnectedEndpoint is saveable, since we expect that the host will provide
+// the same fd upon restore.
+//
+// As of this writing, we only allow Unix sockets to be imported.
+//
+// +stateify savable
+type ConnectedEndpoint struct {
+	// ref keeps track of references to a ConnectedEndpoint.
+	ref refs.AtomicRefCount
+
+	// mu protects fd below.
+	mu sync.RWMutex `state:"nosave"`
+
+	// fd is the host fd backing this endpoint.
+	fd int
+
+	// addr is the address at which this endpoint is bound.
+	addr string
+
+	queue *waiter.Queue
+
+	// sndbuf is the size of the send buffer.
+	//
+	// N.B. When this is smaller than the host size, we present it via
+	// GetSockOpt and message splitting/rejection in SendMsg, but do not
+	// prevent lots of small messages from filling the real send buffer
+	// size on the host.
+	sndbuf int64 `state:"nosave"`
+
+	// stype is the type of Unix socket.
+	stype linux.SockType
+}
+
+// init performs initialization required for creating new ConnectedEndpoints and
+// for restoring them.
+func (c *ConnectedEndpoint) init() *syserr.Error {
+	family, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_DOMAIN)
+	if err != nil {
+		return syserr.FromError(err)
+	}
+
+	if family != syscall.AF_UNIX {
+		// We only allow Unix sockets.
+		return syserr.ErrInvalidEndpointState
+	}
+
+	stype, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_TYPE)
+	if err != nil {
+		return syserr.FromError(err)
+	}
+
+	if err := syscall.SetNonblock(c.fd, true); err != nil {
+		return syserr.FromError(err)
+	}
+
+	sndbuf, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF)
+	if err != nil {
+		return syserr.FromError(err)
+	}
+	if sndbuf > maxSendBufferSize {
+		log.Warningf("Socket send buffer too large: %d", sndbuf)
+		return syserr.ErrInvalidEndpointState
+	}
+
+	c.stype = linux.SockType(stype)
+	c.sndbuf = int64(sndbuf)
+
+	return nil
+}
+
+// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host fd
+// imported at sentry startup,
+//
+// The caller is responsible for calling Init(). Additionaly, Release needs to
+// be called twice because ConnectedEndpoint is both a transport.Receiver and
+// transport.ConnectedEndpoint.
+func NewConnectedEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string, saveable bool) (*ConnectedEndpoint, *syserr.Error) {
+	e := ConnectedEndpoint{
+		fd:    hostFD,
+		addr:  addr,
+		queue: queue,
+	}
+
+	if err := e.init(); err != nil {
+		return nil, err
+	}
+
+	// AtomicRefCounters start off with a single reference. We need two.
+	e.ref.IncRef()
+	e.ref.EnableLeakCheck("host.ConnectedEndpoint")
+	return &e, nil
+}
+
+// Init will do the initialization required without holding other locks.
+func (c *ConnectedEndpoint) Init() {
+	if err := fdnotifier.AddFD(int32(c.fd), c.queue); err != nil {
+		panic(err)
+	}
+}
+
+// Send implements transport.ConnectedEndpoint.Send.
+func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	if !controlMessages.Empty() {
+		return 0, false, syserr.ErrInvalidEndpointState
+	}
+
+	// Since stream sockets don't preserve message boundaries, we can write
+	// only as much of the message as fits in the send buffer.
+	truncate := c.stype == linux.SOCK_STREAM
+
+	n, totalLen, err := fdWriteVec(c.fd, data, c.sndbuf, truncate)
+	if n < totalLen && err == nil {
+		// The host only returns a short write if it would otherwise
+		// block (and only for stream sockets).
+		err = syserror.EAGAIN
+	}
+	if n > 0 && err != syserror.EAGAIN {
+		// The caller may need to block to send more data, but
+		// otherwise there isn't anything that can be done about an
+		// error with a partial write.
+		err = nil
+	}
+
+	// There is no need for the callee to call SendNotify because fdWriteVec
+	// uses the host's sendmsg(2) and the host kernel's queue.
+	return n, false, syserr.FromError(err)
+}
+
+// SendNotify implements transport.ConnectedEndpoint.SendNotify.
+func (c *ConnectedEndpoint) SendNotify() {}
+
+// CloseSend implements transport.ConnectedEndpoint.CloseSend.
+func (c *ConnectedEndpoint) CloseSend() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if err := syscall.Shutdown(c.fd, syscall.SHUT_WR); err != nil {
+		// A well-formed UDS shutdown can't fail. See
+		// net/unix/af_unix.c:unix_shutdown.
+		panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err))
+	}
+}
+
+// CloseNotify implements transport.ConnectedEndpoint.CloseNotify.
+func (c *ConnectedEndpoint) CloseNotify() {}
+
+// Writable implements transport.ConnectedEndpoint.Writable.
+func (c *ConnectedEndpoint) Writable() bool {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventOut)&waiter.EventOut != 0
+}
+
+// Passcred implements transport.ConnectedEndpoint.Passcred.
+func (c *ConnectedEndpoint) Passcred() bool {
+	// We don't support credential passing for host sockets.
+	return false
+}
+
+// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+	return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil
+}
+
+// EventUpdate implements transport.ConnectedEndpoint.EventUpdate.
+func (c *ConnectedEndpoint) EventUpdate() {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	if c.fd != -1 {
+		fdnotifier.UpdateFD(int32(c.fd))
+	}
+}
+
+// Recv implements transport.Receiver.Recv.
+func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	var cm unet.ControlMessage
+	if numRights > 0 {
+		cm.EnableFDs(int(numRights))
+	}
+
+	// N.B. Unix sockets don't have a receive buffer, the send buffer
+	// serves both purposes.
+	rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.sndbuf)
+	if rl > 0 && err != nil {
+		// We got some data, so all we need to do on error is return
+		// the data that we got. Short reads are fine, no need to
+		// block.
+		err = nil
+	}
+	if err != nil {
+		return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err)
+	}
+
+	// There is no need for the callee to call RecvNotify because fdReadVec uses
+	// the host's recvmsg(2) and the host kernel's queue.
+
+	// Trim the control data if we received less than the full amount.
+	if cl < uint64(len(cm)) {
+		cm = cm[:cl]
+	}
+
+	// Avoid extra allocations in the case where there isn't any control data.
+	if len(cm) == 0 {
+		return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+	}
+
+	fds, err := cm.ExtractFDs()
+	if err != nil {
+		return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err)
+	}
+
+	if len(fds) == 0 {
+		return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+	}
+	return rl, ml, control.NewVFS2(nil, nil, newSCMRights(fds)), cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+}
+
+// RecvNotify implements transport.Receiver.RecvNotify.
+func (c *ConnectedEndpoint) RecvNotify() {}
+
+// CloseRecv implements transport.Receiver.CloseRecv.
+func (c *ConnectedEndpoint) CloseRecv() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if err := syscall.Shutdown(c.fd, syscall.SHUT_RD); err != nil {
+		// A well-formed UDS shutdown can't fail. See
+		// net/unix/af_unix.c:unix_shutdown.
+		panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err))
+	}
+}
+
+// Readable implements transport.Receiver.Readable.
+func (c *ConnectedEndpoint) Readable() bool {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventIn)&waiter.EventIn != 0
+}
+
+// SendQueuedSize implements transport.Receiver.SendQueuedSize.
+func (c *ConnectedEndpoint) SendQueuedSize() int64 {
+	// TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host
+	// sockets because we don't allow the sentry to call ioctl(2).
+	return -1
+}
+
+// RecvQueuedSize implements transport.Receiver.RecvQueuedSize.
+func (c *ConnectedEndpoint) RecvQueuedSize() int64 {
+	// TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host
+	// sockets because we don't allow the sentry to call ioctl(2).
+	return -1
+}
+
+// SendMaxQueueSize implements transport.Receiver.SendMaxQueueSize.
+func (c *ConnectedEndpoint) SendMaxQueueSize() int64 {
+	return int64(c.sndbuf)
+}
+
+// RecvMaxQueueSize implements transport.Receiver.RecvMaxQueueSize.
+func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
+	// N.B. Unix sockets don't use the receive buffer. We'll claim it is
+	// the same size as the send buffer.
+	return int64(c.sndbuf)
+}
+
+func (c *ConnectedEndpoint) destroyLocked() {
+	fdnotifier.RemoveFD(int32(c.fd))
+	c.fd = -1
+}
+
+// Release implements transport.ConnectedEndpoint.Release and
+// transport.Receiver.Release.
+func (c *ConnectedEndpoint) Release() {
+	c.ref.DecRefWithDestructor(func() {
+		c.mu.Lock()
+		c.destroyLocked()
+		c.mu.Unlock()
+	})
+}
+
+// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
+func (c *ConnectedEndpoint) CloseUnread() {}
+
+// SCMConnectedEndpoint represents an endpoint backed by a host fd that was
+// passed through a gofer Unix socket. It is almost the same as
+// ConnectedEndpoint, with the following differences:
+// - SCMConnectedEndpoint is not saveable, because the host cannot guarantee
+// the same descriptor number across S/R.
+// - SCMConnectedEndpoint holds ownership of its fd and is responsible for
+// closing it.
+type SCMConnectedEndpoint struct {
+	ConnectedEndpoint
+}
+
+// Release implements transport.ConnectedEndpoint.Release and
+// transport.Receiver.Release.
+func (e *SCMConnectedEndpoint) Release() {
+	e.ref.DecRefWithDestructor(func() {
+		e.mu.Lock()
+		if err := syscall.Close(e.fd); err != nil {
+			log.Warningf("Failed to close host fd %d: %v", err)
+		}
+		e.destroyLocked()
+		e.mu.Unlock()
+	})
+}
+
+// NewSCMEndpoint creates a new SCMConnectedEndpoint backed by a host fd that
+// was passed through a Unix socket.
+//
+// The caller is responsible for calling Init(). Additionaly, Release needs to
+// be called twice because ConnectedEndpoint is both a transport.Receiver and
+// transport.ConnectedEndpoint.
+func NewSCMEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string) (*SCMConnectedEndpoint, *syserr.Error) {
+	e := SCMConnectedEndpoint{ConnectedEndpoint{
+		fd:    hostFD,
+		addr:  addr,
+		queue: queue,
+	}}
+
+	if err := e.init(); err != nil {
+		return nil, err
+	}
+
+	// AtomicRefCounters start off with a single reference. We need two.
+	e.ref.IncRef()
+	e.ref.EnableLeakCheck("host.SCMConnectedEndpoint")
+	return &e, nil
+}
diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go
new file mode 100644
index 000000000..584c247d2
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket_iovec.go
@@ -0,0 +1,113 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+	"syscall"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// maxIovs is the maximum number of iovecs to pass to the host.
+var maxIovs = linux.UIO_MAXIOV
+
+// copyToMulti copies as many bytes from src to dst as possible.
+func copyToMulti(dst [][]byte, src []byte) {
+	for _, d := range dst {
+		done := copy(d, src)
+		src = src[done:]
+		if len(src) == 0 {
+			break
+		}
+	}
+}
+
+// copyFromMulti copies as many bytes from src to dst as possible.
+func copyFromMulti(dst []byte, src [][]byte) {
+	for _, s := range src {
+		done := copy(dst, s)
+		dst = dst[done:]
+		if len(dst) == 0 {
+			break
+		}
+	}
+}
+
+// buildIovec builds an iovec slice from the given []byte slice.
+//
+// If truncate, truncate bufs > maxlen. Otherwise, immediately return an error.
+//
+// If length < the total length of bufs, err indicates why, even when returning
+// a truncated iovec.
+//
+// If intermediate != nil, iovecs references intermediate rather than bufs and
+// the caller must copy to/from bufs as necessary.
+func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) {
+	var iovsRequired int
+	for _, b := range bufs {
+		length += int64(len(b))
+		if len(b) > 0 {
+			iovsRequired++
+		}
+	}
+
+	stopLen := length
+	if length > maxlen {
+		if truncate {
+			stopLen = maxlen
+			err = syserror.EAGAIN
+		} else {
+			return 0, nil, nil, syserror.EMSGSIZE
+		}
+	}
+
+	if iovsRequired > maxIovs {
+		// The kernel will reject our call if we pass this many iovs.
+		// Use a single intermediate buffer instead.
+		b := make([]byte, stopLen)
+
+		return stopLen, []syscall.Iovec{{
+			Base: &b[0],
+			Len:  uint64(stopLen),
+		}}, b, err
+	}
+
+	var total int64
+	iovecs = make([]syscall.Iovec, 0, iovsRequired)
+	for i := range bufs {
+		l := len(bufs[i])
+		if l == 0 {
+			continue
+		}
+
+		stop := int64(l)
+		if total+stop > stopLen {
+			stop = stopLen - total
+		}
+
+		iovecs = append(iovecs, syscall.Iovec{
+			Base: &bufs[i][0],
+			Len:  uint64(stop),
+		})
+
+		total += stop
+		if total >= stopLen {
+			break
+		}
+	}
+
+	return total, iovecs, nil, err
+}
diff --git a/pkg/sentry/fsimpl/host/socket_unsafe.go b/pkg/sentry/fsimpl/host/socket_unsafe.go
new file mode 100644
index 000000000..35ded24bc
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket_unsafe.go
@@ -0,0 +1,101 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// fdReadVec receives from fd to bufs.
+//
+// If the total length of bufs is > maxlen, fdReadVec will do a partial read
+// and err will indicate why the message was truncated.
+func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) {
+	flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC)
+	if peek {
+		flags |= syscall.MSG_PEEK
+	}
+
+	// Always truncate the receive buffer. All socket types will truncate
+	// received messages.
+	length, iovecs, intermediate, err := buildIovec(bufs, maxlen, true)
+	if err != nil && len(iovecs) == 0 {
+		// No partial write to do, return error immediately.
+		return 0, 0, 0, false, err
+	}
+
+	var msg syscall.Msghdr
+	if len(control) != 0 {
+		msg.Control = &control[0]
+		msg.Controllen = uint64(len(control))
+	}
+
+	if len(iovecs) != 0 {
+		msg.Iov = &iovecs[0]
+		msg.Iovlen = uint64(len(iovecs))
+	}
+
+	rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
+	if e != 0 {
+		// N.B. prioritize the syscall error over the buildIovec error.
+		return 0, 0, 0, false, e
+	}
+	n := int64(rawN)
+
+	// Copy data back to bufs.
+	if intermediate != nil {
+		copyToMulti(bufs, intermediate)
+	}
+
+	controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC
+
+	if n > length {
+		return length, n, msg.Controllen, controlTrunc, err
+	}
+
+	return n, n, msg.Controllen, controlTrunc, err
+}
+
+// fdWriteVec sends from bufs to fd.
+//
+// If the total length of bufs is > maxlen && truncate, fdWriteVec will do a
+// partial write and err will indicate why the message was truncated.
+func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) {
+	length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate)
+	if err != nil && len(iovecs) == 0 {
+		// No partial write to do, return error immediately.
+		return 0, length, err
+	}
+
+	// Copy data to intermediate buf.
+	if intermediate != nil {
+		copyFromMulti(intermediate, bufs)
+	}
+
+	var msg syscall.Msghdr
+	if len(iovecs) > 0 {
+		msg.Iov = &iovecs[0]
+		msg.Iovlen = uint64(len(iovecs))
+	}
+
+	n, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), syscall.MSG_DONTWAIT|syscall.MSG_NOSIGNAL)
+	if e != 0 {
+		// N.B. prioritize the syscall error over the buildIovec error.
+		return 0, length, e
+	}
+
+	return int64(n), length, err
+}
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 9e8d80414..40c1b2104 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -766,7 +766,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 }
 
 // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
-func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) {
+func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	fs.mu.RLock()
 	_, _, err := fs.walkExistingLocked(ctx, rp)
 	fs.mu.RUnlock()
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index 5ce50625b..271134af8 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -73,26 +73,14 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
 	return nil, syserror.ENXIO
 }
 
-// InitSocket initializes a socket FileDescription, with a corresponding
-// Dentry in mnt.
-//
-// fd should be the FileDescription associated with socketImpl, i.e. its first
-// field. mnt should be the global socket mount, Kernel.socketMount.
-func InitSocket(socketImpl vfs.FileDescriptionImpl, fd *vfs.FileDescription, mnt *vfs.Mount, creds *auth.Credentials) error {
-	fsimpl := mnt.Filesystem().Impl()
-	fs := fsimpl.(*kernfs.Filesystem)
-
+// NewDentry constructs and returns a sockfs dentry.
+func NewDentry(creds *auth.Credentials, ino uint64) *vfs.Dentry {
 	// File mode matches net/socket.c:sock_alloc.
 	filemode := linux.FileMode(linux.S_IFSOCK | 0600)
 	i := &inode{}
-	i.Init(creds, fs.NextIno(), filemode)
+	i.Init(creds, ino, filemode)
 
 	d := &kernfs.Dentry{}
 	d.Init(i)
-
-	opts := &vfs.FileDescriptionOptions{UseDentryMetadata: true}
-	if err := fd.Init(socketImpl, linux.O_RDWR, mnt, d.VFSDentry(), opts); err != nil {
-		return err
-	}
-	return nil
+	return d.VFSDentry()
 }
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 5b62f9ebb..388b98bef 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -697,7 +697,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 }
 
 // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
-func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) {
+func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	fs.mu.RLock()
 	defer fs.mu.RUnlock()
 	d, err := resolveLocked(rp)
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 55c0f04f3..ff1cfd8f6 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -121,13 +121,12 @@ func (tm *TCPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa
 		tcpHeader = header.TCP(pkt.TransportHeader)
 	} else {
 		// The TCP header hasn't been parsed yet. We have to do it here.
-		hdr, ok := pkt.Data.PullUp(header.TCPMinimumSize)
-		if !ok {
+		if len(pkt.Data.First()) < header.TCPMinimumSize {
 			// There's no valid TCP header here, so we hotdrop the
 			// packet.
 			return false, true
 		}
-		tcpHeader = header.TCP(hdr)
+		tcpHeader = header.TCP(pkt.Data.First())
 	}
 
 	// Check whether the source and destination ports are within the
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index 04d03d494..3359418c1 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -120,13 +120,12 @@ func (um *UDPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa
 		udpHeader = header.UDP(pkt.TransportHeader)
 	} else {
 		// The UDP header hasn't been parsed yet. We have to do it here.
-		hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize)
-		if !ok {
+		if len(pkt.Data.First()) < header.UDPMinimumSize {
 			// There's no valid UDP header here, so we hotdrop the
 			// packet.
 			return false, true
 		}
-		udpHeader = header.UDP(hdr)
+		udpHeader = header.UDP(pkt.Data.First())
 	}
 
 	// Check whether the source and destination ports are within the
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index de2cc4bdf..941a91097 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -21,6 +21,7 @@ go_library(
         "//pkg/sentry/device",
         "//pkg/sentry/fs",
         "//pkg/sentry/fs/fsutil",
+        "//pkg/sentry/fsimpl/kernfs",
         "//pkg/sentry/fsimpl/sockfs",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/time",
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 7c64f30fa..9aad41c9e 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -373,7 +373,7 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint,
 			Path:               p,
 			FollowFinalSymlink: true,
 		}
-		ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop)
+		ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop, &vfs.BoundEndpointOptions{path})
 		root.DecRef()
 		if relPath {
 			start.DecRef()
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 3e54d49c4..315dc274f 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -19,6 +19,7 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/socket/control"
@@ -42,30 +43,44 @@ type SocketVFS2 struct {
 	socketOpsCommon
 }
 
-// NewVFS2File creates and returns a new vfs.FileDescription for a unix socket.
-func NewVFS2File(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
-	sock := NewFDImpl(ep, stype)
-	vfsfd := &sock.vfsfd
-	if err := sockfs.InitSocket(sock, vfsfd, t.Kernel().SocketMount(), t.Credentials()); err != nil {
+// NewSockfsFile creates a new socket file in the global sockfs mount and
+// returns a corresponding file description.
+func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
+	mnt := t.Kernel().SocketMount()
+	fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
+	d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+
+	fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d)
+	if err != nil {
 		return nil, syserr.FromError(err)
 	}
-	return vfsfd, nil
+	return fd, nil
 }
 
-// NewFDImpl creates and returns a new SocketVFS2.
-func NewFDImpl(ep transport.Endpoint, stype linux.SockType) *SocketVFS2 {
+// NewFileDescription creates and returns a socket file description
+// corresponding to the given mount and dentry.
+func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry) (*vfs.FileDescription, error) {
 	// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
 	// SOCK_DGRAM and don't require CAP_NET_RAW.
 	if stype == linux.SOCK_RAW {
 		stype = linux.SOCK_DGRAM
 	}
 
-	return &SocketVFS2{
+	sock := &SocketVFS2{
 		socketOpsCommon: socketOpsCommon{
 			ep:    ep,
 			stype: stype,
 		},
 	}
+	vfsfd := &sock.vfsfd
+	if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{
+		DenyPRead:         true,
+		DenyPWrite:        true,
+		UseDentryMetadata: true,
+	}); err != nil {
+		return nil, err
+	}
+	return vfsfd, nil
 }
 
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
@@ -112,8 +127,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
 		}
 	}
 
-	// We expect this to be a FileDescription here.
-	ns, err := NewVFS2File(t, ep, s.stype)
+	ns, err := NewSockfsFile(t, ep, s.stype)
 	if err != nil {
 		return 0, nil, 0, err
 	}
@@ -307,7 +321,7 @@ func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int)
 		return nil, syserr.ErrInvalidArgument
 	}
 
-	f, err := NewVFS2File(t, ep, stype)
+	f, err := NewSockfsFile(t, ep, stype)
 	if err != nil {
 		ep.Close()
 		return nil, err
@@ -331,13 +345,13 @@ func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*
 
 	// Create the endpoints and sockets.
 	ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
-	s1, err := NewVFS2File(t, ep1, stype)
+	s1, err := NewSockfsFile(t, ep1, stype)
 	if err != nil {
 		ep1.Close()
 		ep2.Close()
 		return nil, nil, err
 	}
-	s2, err := NewVFS2File(t, ep2, stype)
+	s2, err := NewSockfsFile(t, ep2, stype)
 	if err != nil {
 		s1.DecRef()
 		ep2.Close()
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index a64d86122..b1a998590 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -237,7 +237,7 @@ func (fs *anonFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error
 }
 
 // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt.
-func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath) (transport.BoundEndpoint, error) {
+func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	if !rp.Final() {
 		return nil, syserror.ENOTDIR
 	}
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 20e5bb072..70385a21f 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -495,7 +495,7 @@ type FilesystemImpl interface {
 	// BoundEndpointAt returns the Unix socket endpoint bound at the path rp.
 	//
 	// - If a non-socket file exists at rp, then BoundEndpointAt returns ECONNREFUSED.
-	BoundEndpointAt(ctx context.Context, rp *ResolvingPath) (transport.BoundEndpoint, error)
+	BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error)
 
 	// PrependPath prepends a path from vd to vd.Mount().Root() to b.
 	//
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index 022bac127..53d364c5c 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -151,6 +151,24 @@ type SetStatOptions struct {
 	Stat linux.Statx
 }
 
+// BoundEndpointOptions contains options to VirtualFilesystem.BoundEndpointAt()
+// and FilesystemImpl.BoundEndpointAt().
+type BoundEndpointOptions struct {
+	// Addr is the path of the file whose socket endpoint is being retrieved.
+	// It is generally irrelevant: most endpoints are stored at a dentry that
+	// was created through a bind syscall, so the path can be stored on creation.
+	// However, if the endpoint was created in FilesystemImpl.BoundEndpointAt(),
+	// then we may not know what the original bind address was.
+	//
+	// For example, if connect(2) is called with address "foo" which corresponds
+	// a remote named socket in goferfs, we need to generate an endpoint wrapping
+	// that file. In this case, we can use Addr to set the endpoint address to
+	// "foo". Note that Addr is only a best-effort attempt--we still do not know
+	// the exact address that was used on the remote fs to bind the socket (it
+	// may have been "foo", "./foo", etc.).
+	Addr string
+}
+
 // GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(),
 // FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and
 // FileDescriptionImpl.Getxattr().
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 9015f2cc1..8d7f8f8af 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -351,33 +351,6 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia
 	}
 }
 
-// BoundEndpointAt gets the bound endpoint at the given path, if one exists.
-func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (transport.BoundEndpoint, error) {
-	if !pop.Path.Begin.Ok() {
-		if pop.Path.Absolute {
-			return nil, syserror.ECONNREFUSED
-		}
-		return nil, syserror.ENOENT
-	}
-	rp := vfs.getResolvingPath(creds, pop)
-	for {
-		bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp)
-		if err == nil {
-			vfs.putResolvingPath(rp)
-			return bep, nil
-		}
-		if checkInvariants {
-			if rp.canHandleError(err) && rp.Done() {
-				panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
-			}
-		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
-			return nil, err
-		}
-	}
-}
-
 // OpenAt returns a FileDescription providing access to the file at the given
 // path. A reference is taken on the returned FileDescription.
 func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *OpenOptions) (*FileDescription, error) {
@@ -675,6 +648,33 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
 	}
 }
 
+// BoundEndpointAt gets the bound endpoint at the given path, if one exists.
+func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) {
+	if !pop.Path.Begin.Ok() {
+		if pop.Path.Absolute {
+			return nil, syserror.ECONNREFUSED
+		}
+		return nil, syserror.ENOENT
+	}
+	rp := vfs.getResolvingPath(creds, pop)
+	for {
+		bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts)
+		if err == nil {
+			vfs.putResolvingPath(rp)
+			return bep, nil
+		}
+		if checkInvariants {
+			if rp.canHandleError(err) && rp.Done() {
+				panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
+			}
+		}
+		if !rp.handleError(err) {
+			vfs.putResolvingPath(rp)
+			return nil, err
+		}
+	}
+}
+
 // ListxattrAt returns all extended attribute names for the file at the given
 // path.
 func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) {
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index fcc46420f..101497ed6 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -255,7 +255,7 @@ func (w *Watchdog) runTurn() {
 	case <-done:
 	case <-time.After(w.TaskTimeout):
 		// Report if the watchdog is not making progress.
-		// No one is wathching the watchdog watcher though.
+		// No one is watching the watchdog watcher though.
 		w.reportStuckWatchdog()
 		<-done
 	}
@@ -317,10 +317,8 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo
 
 	buf.WriteString("Search for '(*Task).run(0x..., 0x<tid>)' in the stack dump to find the offending goroutine")
 
-	// Dump stack only if a new task is detected or if it sometime has
-	// passed since the last time a stack dump was generated.
-	showStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod
-	w.doAction(w.TaskTimeoutAction, showStack, &buf)
+	// Force stack dump only if a new task is detected.
+	w.doAction(w.TaskTimeoutAction, newTaskFound, &buf)
 }
 
 func (w *Watchdog) reportStuckWatchdog() {
@@ -329,12 +327,15 @@ func (w *Watchdog) reportStuckWatchdog() {
 	w.doAction(w.TaskTimeoutAction, false, &buf)
 }
 
-// doAction will take the given action. If the action is LogWarning and
-// showStack is false, then the stack printing will be skipped.
-func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) {
+// doAction will take the given action. If the action is LogWarning, the stack
+// is not always dumpped to the log to prevent log flooding. "forceStack"
+// guarantees that the stack will be dumped regarless.
+func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) {
 	switch action {
 	case LogWarning:
-		if !showStack {
+		// Dump stack only if forced or sometime has passed since the last time a
+		// stack dump was generated.
+		if !forceStack && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod {
 			msg.WriteString("\n...[stack dump skipped]...")
 			log.Warningf(msg.String())
 			return
@@ -359,6 +360,7 @@ func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) {
 		case <-time.After(1 * time.Second):
 		}
 		panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String()))
+
 	default:
 		panic(fmt.Sprintf("Unknown watchdog action %v", action))
 
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index f01217c91..8ec5d5d5c 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -77,8 +77,7 @@ func NewVectorisedView(size int, views []View) VectorisedView {
 	return VectorisedView{views: views, size: size}
 }
 
-// TrimFront removes the first "count" bytes of the vectorised view. It panics
-// if count > vv.Size().
+// TrimFront removes the first "count" bytes of the vectorised view.
 func (vv *VectorisedView) TrimFront(count int) {
 	for count > 0 && len(vv.views) > 0 {
 		if count < len(vv.views[0]) {
@@ -87,7 +86,7 @@ func (vv *VectorisedView) TrimFront(count int) {
 			return
 		}
 		count -= len(vv.views[0])
-		vv.removeFirst()
+		vv.RemoveFirst()
 	}
 }
 
@@ -105,7 +104,7 @@ func (vv *VectorisedView) Read(v View) (copied int, err error) {
 		count -= len(vv.views[0])
 		copy(v[copied:], vv.views[0])
 		copied += len(vv.views[0])
-		vv.removeFirst()
+		vv.RemoveFirst()
 	}
 	if copied == 0 {
 		return 0, io.EOF
@@ -127,7 +126,7 @@ func (vv *VectorisedView) ReadToVV(dstVV *VectorisedView, count int) (copied int
 		count -= len(vv.views[0])
 		dstVV.AppendView(vv.views[0])
 		copied += len(vv.views[0])
-		vv.removeFirst()
+		vv.RemoveFirst()
 	}
 	return copied
 }
@@ -163,37 +162,22 @@ func (vv *VectorisedView) Clone(buffer []View) VectorisedView {
 	return VectorisedView{views: append(buffer[:0], vv.views...), size: vv.size}
 }
 
-// PullUp returns the first "count" bytes of the vectorised view. If those
-// bytes aren't already contiguous inside the vectorised view, PullUp will
-// reallocate as needed to make them contiguous. PullUp fails and returns false
-// when count > vv.Size().
-func (vv *VectorisedView) PullUp(count int) (View, bool) {
+// First returns the first view of the vectorised view.
+func (vv *VectorisedView) First() View {
 	if len(vv.views) == 0 {
-		return nil, count == 0
-	}
-	if count <= len(vv.views[0]) {
-		return vv.views[0][:count], true
-	}
-	if count > vv.size {
-		return nil, false
+		return nil
 	}
+	return vv.views[0]
+}
 
-	newFirst := NewView(count)
-	i := 0
-	for offset := 0; offset < count; i++ {
-		copy(newFirst[offset:], vv.views[i])
-		if count-offset < len(vv.views[i]) {
-			vv.views[i].TrimFront(count - offset)
-			break
-		}
-		offset += len(vv.views[i])
-		vv.views[i] = nil
+// RemoveFirst removes the first view of the vectorised view.
+func (vv *VectorisedView) RemoveFirst() {
+	if len(vv.views) == 0 {
+		return
 	}
-	// We're guaranteed that i > 0, since count is too large for the first
-	// view.
-	vv.views[i-1] = newFirst
-	vv.views = vv.views[i-1:]
-	return newFirst, true
+	vv.size -= len(vv.views[0])
+	vv.views[0] = nil
+	vv.views = vv.views[1:]
 }
 
 // Size returns the size in bytes of the entire content stored in the vectorised view.
@@ -241,10 +225,3 @@ func (vv *VectorisedView) Readers() []bytes.Reader {
 	}
 	return readers
 }
-
-// removeFirst panics when len(vv.views) < 1.
-func (vv *VectorisedView) removeFirst() {
-	vv.size -= len(vv.views[0])
-	vv.views[0] = nil
-	vv.views = vv.views[1:]
-}
diff --git a/pkg/tcpip/buffer/view_test.go b/pkg/tcpip/buffer/view_test.go
index c56795c7b..106e1994c 100644
--- a/pkg/tcpip/buffer/view_test.go
+++ b/pkg/tcpip/buffer/view_test.go
@@ -16,7 +16,6 @@
 package buffer
 
 import (
-	"bytes"
 	"reflect"
 	"testing"
 )
@@ -371,115 +370,3 @@ func TestVVRead(t *testing.T) {
 		})
 	}
 }
-
-var pullUpTestCases = []struct {
-	comment string
-	in      VectorisedView
-	count   int
-	want    []byte
-	result  VectorisedView
-	ok      bool
-}{
-	{
-		comment: "simple case",
-		in:      vv(2, "12"),
-		count:   1,
-		want:    []byte("1"),
-		result:  vv(2, "12"),
-		ok:      true,
-	},
-	{
-		comment: "entire View",
-		in:      vv(2, "1", "2"),
-		count:   1,
-		want:    []byte("1"),
-		result:  vv(2, "1", "2"),
-		ok:      true,
-	},
-	{
-		comment: "spanning across two Views",
-		in:      vv(3, "1", "23"),
-		count:   2,
-		want:    []byte("12"),
-		result:  vv(3, "12", "3"),
-		ok:      true,
-	},
-	{
-		comment: "spanning across all Views",
-		in:      vv(5, "1", "23", "45"),
-		count:   5,
-		want:    []byte("12345"),
-		result:  vv(5, "12345"),
-		ok:      true,
-	},
-	{
-		comment: "count = 0",
-		in:      vv(1, "1"),
-		count:   0,
-		want:    []byte{},
-		result:  vv(1, "1"),
-		ok:      true,
-	},
-	{
-		comment: "count = size",
-		in:      vv(1, "1"),
-		count:   1,
-		want:    []byte("1"),
-		result:  vv(1, "1"),
-		ok:      true,
-	},
-	{
-		comment: "count too large",
-		in:      vv(3, "1", "23"),
-		count:   4,
-		want:    nil,
-		result:  vv(3, "1", "23"),
-		ok:      false,
-	},
-	{
-		comment: "empty vv",
-		in:      vv(0, ""),
-		count:   1,
-		want:    nil,
-		result:  vv(0, ""),
-		ok:      false,
-	},
-	{
-		comment: "empty vv, count = 0",
-		in:      vv(0, ""),
-		count:   0,
-		want:    nil,
-		result:  vv(0, ""),
-		ok:      true,
-	},
-	{
-		comment: "empty views",
-		in:      vv(3, "", "1", "", "23"),
-		count:   2,
-		want:    []byte("12"),
-		result:  vv(3, "12", "3"),
-		ok:      true,
-	},
-}
-
-func TestPullUp(t *testing.T) {
-	for _, c := range pullUpTestCases {
-		got, ok := c.in.PullUp(c.count)
-
-		// Is the return value right?
-		if ok != c.ok {
-			t.Errorf("Test %q failed when calling PullUp(%d) on %v. Got an ok of %t. Want %t",
-				c.comment, c.count, c.in, ok, c.ok)
-		}
-		if bytes.Compare(got, View(c.want)) != 0 {
-			t.Errorf("Test %q failed when calling PullUp(%d) on %v. Got %v. Want %v",
-				c.comment, c.count, c.in, got, c.want)
-		}
-
-		// Is the underlying structure right?
-		if !reflect.DeepEqual(c.in, c.result) {
-			t.Errorf("Test %q failed when calling PullUp(%d). Got vv with structure %v. Wanted %v",
-				c.comment, c.count, c.in, c.result)
-		}
-	}
-}
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 073c84ef9..1e2255bfa 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -98,13 +98,13 @@ func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList
 
 // WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
 func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	// There should be an ethernet header at the beginning of vv.
-	hdr, ok := vv.PullUp(header.EthernetMinimumSize)
-	if !ok {
-		// Reject the packet if it's shorter than an ethernet header.
+	// Reject the packet if it's shorter than an ethernet header.
+	if vv.Size() < header.EthernetMinimumSize {
 		return tcpip.ErrBadAddress
 	}
-	linkHeader := header.Ethernet(hdr)
+
+	// There should be an ethernet header at the beginning of vv.
+	linkHeader := header.Ethernet(vv.First()[:header.EthernetMinimumSize])
 	vv.TrimFront(len(linkHeader))
 	e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), stack.PacketBuffer{
 		Data:       vv,
diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD
index 9cc08d0e2..14b527bc2 100644
--- a/pkg/tcpip/link/rawfile/BUILD
+++ b/pkg/tcpip/link/rawfile/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library")
 
 package(licenses = ["notice"])
 
@@ -18,10 +18,3 @@ go_library(
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
-
-go_test(
-    name = "rawfile_test",
-    size = "small",
-    srcs = ["rawfile_test.go"],
-    library = ":rawfile",
-)
diff --git a/pkg/tcpip/link/rawfile/rawfile_test.go b/pkg/tcpip/link/rawfile/rawfile_test.go
deleted file mode 100644
index 8f14ba761..000000000
--- a/pkg/tcpip/link/rawfile/rawfile_test.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build linux
-
-package rawfile
-
-import (
-	"syscall"
-	"testing"
-)
-
-func TestNonBlockingWrite3ZeroLength(t *testing.T) {
-	fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
-	if err != nil {
-		t.Fatalf("failed to open /dev/null: %v", err)
-	}
-	defer syscall.Close(fd)
-
-	if err := NonBlockingWrite3(fd, []byte{}, []byte{0}, nil); err != nil {
-		t.Fatalf("failed to write: %v", err)
-	}
-}
-
-func TestNonBlockingWrite3Nil(t *testing.T) {
-	fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
-	if err != nil {
-		t.Fatalf("failed to open /dev/null: %v", err)
-	}
-	defer syscall.Close(fd)
-
-	if err := NonBlockingWrite3(fd, nil, []byte{0}, nil); err != nil {
-		t.Fatalf("failed to write: %v", err)
-	}
-}
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 92efd0bf8..44e25d475 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -76,13 +76,9 @@ func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error {
 
 	// We have two buffers. Build the iovec that represents them and issue
 	// a writev syscall.
-	var base *byte
-	if len(b1) > 0 {
-		base = &b1[0]
-	}
 	iovec := [3]syscall.Iovec{
 		{
-			Base: base,
+			Base: &b1[0],
 			Len:  uint64(len(b1)),
 		},
 		{
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 33f640b85..27ea3f531 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -674,7 +674,7 @@ func TestSimpleReceive(t *testing.T) {
 		// Wait for packet to be received, then check it.
 		c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
 		c.mu.Lock()
-		rcvd := []byte(c.packets[0].vv.ToView())
+		rcvd := []byte(c.packets[0].vv.First())
 		c.packets = c.packets[:0]
 		c.mu.Unlock()
 
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 0799c8f4d..be2537a82 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -171,7 +171,11 @@ func (e *endpoint) GSOMaxSize() uint32 {
 func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
 	writer := e.writer
 	if writer == nil && atomic.LoadUint32(&LogPackets) == 1 {
-		logPacket(prefix, protocol, pkt, gso)
+		first := pkt.Header.View()
+		if len(first) == 0 {
+			first = pkt.Data.First()
+		}
+		logPacket(prefix, protocol, first, gso)
 	}
 	if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 {
 		totalLength := pkt.Header.UsedLength() + pkt.Data.Size()
@@ -234,7 +238,7 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
 // Wait implements stack.LinkEndpoint.Wait.
 func (e *endpoint) Wait() { e.lower.Wait() }
 
-func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
+func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.View, gso *stack.GSO) {
 	// Figure out the network layer info.
 	var transProto uint8
 	src := tcpip.Address("unknown")
@@ -243,49 +247,28 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 	size := uint16(0)
 	var fragmentOffset uint16
 	var moreFragments bool
-
-	// Create a clone of pkt, including any headers if present. Avoid allocating
-	// backing memory for the clone.
-	views := [8]buffer.View{}
-	vv := buffer.NewVectorisedView(0, views[:0])
-	vv.AppendView(pkt.Header.View())
-	vv.Append(pkt.Data)
-
 	switch protocol {
 	case header.IPv4ProtocolNumber:
-		hdr, ok := vv.PullUp(header.IPv4MinimumSize)
-		if !ok {
-			return
-		}
-		ipv4 := header.IPv4(hdr)
+		ipv4 := header.IPv4(b)
 		fragmentOffset = ipv4.FragmentOffset()
 		moreFragments = ipv4.Flags()&header.IPv4FlagMoreFragments == header.IPv4FlagMoreFragments
 		src = ipv4.SourceAddress()
 		dst = ipv4.DestinationAddress()
 		transProto = ipv4.Protocol()
 		size = ipv4.TotalLength() - uint16(ipv4.HeaderLength())
-		vv.TrimFront(int(ipv4.HeaderLength()))
+		b = b[ipv4.HeaderLength():]
 		id = int(ipv4.ID())
 
 	case header.IPv6ProtocolNumber:
-		hdr, ok := vv.PullUp(header.IPv6MinimumSize)
-		if !ok {
-			return
-		}
-		ipv6 := header.IPv6(hdr)
+		ipv6 := header.IPv6(b)
 		src = ipv6.SourceAddress()
 		dst = ipv6.DestinationAddress()
 		transProto = ipv6.NextHeader()
 		size = ipv6.PayloadLength()
-		vv.TrimFront(header.IPv6MinimumSize)
+		b = b[header.IPv6MinimumSize:]
 
 	case header.ARPProtocolNumber:
-		hdr, ok := vv.PullUp(header.ARPSize)
-		if !ok {
-			return
-		}
-		vv.TrimFront(header.ARPSize)
-		arp := header.ARP(hdr)
+		arp := header.ARP(b)
 		log.Infof(
 			"%s arp %v (%v) -> %v (%v) valid:%v",
 			prefix,
@@ -301,7 +284,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 
 	// We aren't guaranteed to have a transport header - it's possible for
 	// writes via raw endpoints to contain only network headers.
-	if minSize, ok := transportProtocolMinSizes[tcpip.TransportProtocolNumber(transProto)]; ok && vv.Size() < minSize {
+	if minSize, ok := transportProtocolMinSizes[tcpip.TransportProtocolNumber(transProto)]; ok && len(b) < minSize {
 		log.Infof("%s %v -> %v transport protocol: %d, but no transport header found (possible raw packet)", prefix, src, dst, transProto)
 		return
 	}
@@ -314,11 +297,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 	switch tcpip.TransportProtocolNumber(transProto) {
 	case header.ICMPv4ProtocolNumber:
 		transName = "icmp"
-		hdr, ok := vv.PullUp(header.ICMPv4MinimumSize)
-		if !ok {
-			break
-		}
-		icmp := header.ICMPv4(hdr)
+		icmp := header.ICMPv4(b)
 		icmpType := "unknown"
 		if fragmentOffset == 0 {
 			switch icmp.Type() {
@@ -351,11 +330,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 
 	case header.ICMPv6ProtocolNumber:
 		transName = "icmp"
-		hdr, ok := vv.PullUp(header.ICMPv6MinimumSize)
-		if !ok {
-			break
-		}
-		icmp := header.ICMPv6(hdr)
+		icmp := header.ICMPv6(b)
 		icmpType := "unknown"
 		switch icmp.Type() {
 		case header.ICMPv6DstUnreachable:
@@ -386,11 +361,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 
 	case header.UDPProtocolNumber:
 		transName = "udp"
-		hdr, ok := vv.PullUp(header.UDPMinimumSize)
-		if !ok {
-			break
-		}
-		udp := header.UDP(hdr)
+		udp := header.UDP(b)
 		if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize {
 			srcPort = udp.SourcePort()
 			dstPort = udp.DestinationPort()
@@ -400,11 +371,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 
 	case header.TCPProtocolNumber:
 		transName = "tcp"
-		hdr, ok := vv.PullUp(header.TCPMinimumSize)
-		if !ok {
-			break
-		}
-		tcp := header.TCP(hdr)
+		tcp := header.TCP(b)
 		if fragmentOffset == 0 && len(tcp) >= header.TCPMinimumSize {
 			offset := int(tcp.DataOffset())
 			if offset < header.TCPMinimumSize {
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index cf73a939e..7acbfa0a8 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -93,10 +93,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuf
 }
 
 func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
-	v, ok := pkt.Data.PullUp(header.ARPSize)
-	if !ok {
-		return
-	}
+	v := pkt.Data.First()
 	h := header.ARP(v)
 	if !h.IsValid() {
 		return
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 4cbefe5ab..c4bf1ba5c 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -25,11 +25,7 @@ import (
 // used to find out which transport endpoint must be notified about the ICMP
 // packet.
 func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
-	h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
-	if !ok {
-		return
-	}
-	hdr := header.IPv4(h)
+	h := header.IPv4(pkt.Data.First())
 
 	// We don't use IsValid() here because ICMP only requires that the IP
 	// header plus 8 bytes of the transport header be included. So it's
@@ -38,12 +34,12 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
 	//
 	// Drop packet if it doesn't have the basic IPv4 header or if the
 	// original source address doesn't match the endpoint's address.
-	if hdr.SourceAddress() != e.id.LocalAddress {
+	if len(h) < header.IPv4MinimumSize || h.SourceAddress() != e.id.LocalAddress {
 		return
 	}
 
-	hlen := int(hdr.HeaderLength())
-	if pkt.Data.Size() < hlen || hdr.FragmentOffset() != 0 {
+	hlen := int(h.HeaderLength())
+	if pkt.Data.Size() < hlen || h.FragmentOffset() != 0 {
 		// We won't be able to handle this if it doesn't contain the
 		// full IPv4 header, or if it's a fragment not at offset 0
 		// (because it won't have the transport header).
@@ -52,15 +48,15 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
 
 	// Skip the ip header, then deliver control message.
 	pkt.Data.TrimFront(hlen)
-	p := hdr.TransportProtocol()
-	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+	p := h.TransportProtocol()
+	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
 func (e *endpoint) handleICMP(r *stack.Route, pkt stack.PacketBuffer) {
 	stats := r.Stats()
 	received := stats.ICMP.V4PacketsReceived
-	v, ok := pkt.Data.PullUp(header.ICMPv4MinimumSize)
-	if !ok {
+	v := pkt.Data.First()
+	if len(v) < header.ICMPv4MinimumSize {
 		received.Invalid.Increment()
 		return
 	}
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 17202cc7a..104aafbed 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -328,11 +328,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
 	// The packet already has an IP header, but there are a few required
 	// checks.
-	h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
-	if !ok {
-		return tcpip.ErrInvalidOptionValue
-	}
-	ip := header.IPv4(h)
+	ip := header.IPv4(pkt.Data.First())
 	if !ip.IsValid(pkt.Data.Size()) {
 		return tcpip.ErrInvalidOptionValue
 	}
@@ -382,11 +378,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuf
 // HandlePacket is called by the link layer when new ipv4 packets arrive for
 // this endpoint.
 func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
-	headerView, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
-	if !ok {
-		r.Stats().IP.MalformedPacketsReceived.Increment()
-		return
-	}
+	headerView := pkt.Data.First()
 	h := header.IPv4(headerView)
 	if !h.IsValid(pkt.Data.Size()) {
 		r.Stats().IP.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index bdf3a0d25..b68983d10 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -28,11 +28,7 @@ import (
 // used to find out which transport endpoint must be notified about the ICMP
 // packet.
 func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
-	h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
-	if !ok {
-		return
-	}
-	hdr := header.IPv6(h)
+	h := header.IPv6(pkt.Data.First())
 
 	// We don't use IsValid() here because ICMP only requires that up to
 	// 1280 bytes of the original packet be included. So it's likely that it
@@ -40,21 +36,17 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
 	//
 	// Drop packet if it doesn't have the basic IPv6 header or if the
 	// original source address doesn't match the endpoint's address.
-	if hdr.SourceAddress() != e.id.LocalAddress {
+	if len(h) < header.IPv6MinimumSize || h.SourceAddress() != e.id.LocalAddress {
 		return
 	}
 
 	// Skip the IP header, then handle the fragmentation header if there
 	// is one.
 	pkt.Data.TrimFront(header.IPv6MinimumSize)
-	p := hdr.TransportProtocol()
+	p := h.TransportProtocol()
 	if p == header.IPv6FragmentHeader {
-		f, ok := pkt.Data.PullUp(header.IPv6FragmentHeaderSize)
-		if !ok {
-			return
-		}
-		fragHdr := header.IPv6Fragment(f)
-		if !fragHdr.IsValid() || fragHdr.FragmentOffset() != 0 {
+		f := header.IPv6Fragment(pkt.Data.First())
+		if !f.IsValid() || f.FragmentOffset() != 0 {
 			// We can't handle fragments that aren't at offset 0
 			// because they don't have the transport headers.
 			return
@@ -63,19 +55,19 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
 		// Skip fragmentation header and find out the actual protocol
 		// number.
 		pkt.Data.TrimFront(header.IPv6FragmentHeaderSize)
-		p = fragHdr.TransportProtocol()
+		p = f.TransportProtocol()
 	}
 
 	// Deliver the control packet to the transport endpoint.
-	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+	e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
 func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.PacketBuffer, hasFragmentHeader bool) {
 	stats := r.Stats().ICMP
 	sent := stats.V6PacketsSent
 	received := stats.V6PacketsReceived
-	v, ok := pkt.Data.PullUp(header.ICMPv6HeaderSize)
-	if !ok {
+	v := pkt.Data.First()
+	if len(v) < header.ICMPv6MinimumSize {
 		received.Invalid.Increment()
 		return
 	}
@@ -84,9 +76,11 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 
 	// Validate ICMPv6 checksum before processing the packet.
 	//
+	// Only the first view in vv is accounted for by h. To account for the
+	// rest of vv, a shallow copy is made and the first view is removed.
 	// This copy is used as extra payload during the checksum calculation.
 	payload := pkt.Data.Clone(nil)
-	payload.TrimFront(len(h))
+	payload.RemoveFirst()
 	if got, want := h.Checksum(), header.ICMPv6Checksum(h, iph.SourceAddress(), iph.DestinationAddress(), payload); got != want {
 		received.Invalid.Increment()
 		return
@@ -107,40 +101,34 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 	switch h.Type() {
 	case header.ICMPv6PacketTooBig:
 		received.PacketTooBig.Increment()
-		hdr, ok := pkt.Data.PullUp(header.ICMPv6PacketTooBigMinimumSize)
-		if !ok {
+		if len(v) < header.ICMPv6PacketTooBigMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
 		pkt.Data.TrimFront(header.ICMPv6PacketTooBigMinimumSize)
-		mtu := header.ICMPv6(hdr).MTU()
+		mtu := h.MTU()
 		e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
 
 	case header.ICMPv6DstUnreachable:
 		received.DstUnreachable.Increment()
-		hdr, ok := pkt.Data.PullUp(header.ICMPv6DstUnreachableMinimumSize)
-		if !ok {
+		if len(v) < header.ICMPv6DstUnreachableMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
 		pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize)
-		switch header.ICMPv6(hdr).Code() {
+		switch h.Code() {
 		case header.ICMPv6PortUnreachable:
 			e.handleControl(stack.ControlPortUnreachable, 0, pkt)
 		}
 
 	case header.ICMPv6NeighborSolicit:
 		received.NeighborSolicit.Increment()
-		if pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() {
+		if len(v) < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() {
 			received.Invalid.Increment()
 			return
 		}
 
-		// The remainder of payload must be only the neighbor solicitation, so
-		// payload.ToView() always returns the solicitation. Per RFC 6980 section 5,
-		// NDP messages cannot be fragmented. Also note that in the common case NDP
-		// datagrams are very small and ToView() will not incur allocations.
-		ns := header.NDPNeighborSolicit(payload.ToView())
+		ns := header.NDPNeighborSolicit(h.NDPPayload())
 		it, err := ns.Options().Iter(true)
 		if err != nil {
 			// If we have a malformed NDP NS option, drop the packet.
@@ -298,16 +286,12 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 
 	case header.ICMPv6NeighborAdvert:
 		received.NeighborAdvert.Increment()
-		if pkt.Data.Size() < header.ICMPv6NeighborAdvertSize || !isNDPValid() {
+		if len(v) < header.ICMPv6NeighborAdvertSize || !isNDPValid() {
 			received.Invalid.Increment()
 			return
 		}
 
-		// The remainder of payload must be only the neighbor advertisement, so
-		// payload.ToView() always returns the advertisement. Per RFC 6980 section
-		// 5, NDP messages cannot be fragmented. Also note that in the common case
-		// NDP datagrams are very small and ToView() will not incur allocations.
-		na := header.NDPNeighborAdvert(payload.ToView())
+		na := header.NDPNeighborAdvert(h.NDPPayload())
 		it, err := na.Options().Iter(true)
 		if err != nil {
 			// If we have a malformed NDP NA option, drop the packet.
@@ -379,15 +363,14 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 
 	case header.ICMPv6EchoRequest:
 		received.EchoRequest.Increment()
-		icmpHdr, ok := pkt.Data.PullUp(header.ICMPv6EchoMinimumSize)
-		if !ok {
+		if len(v) < header.ICMPv6EchoMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
 		pkt.Data.TrimFront(header.ICMPv6EchoMinimumSize)
 		hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize)
 		packet := header.ICMPv6(hdr.Prepend(header.ICMPv6EchoMinimumSize))
-		copy(packet, icmpHdr)
+		copy(packet, h)
 		packet.SetType(header.ICMPv6EchoReply)
 		packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data))
 		if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
@@ -401,7 +384,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 
 	case header.ICMPv6EchoReply:
 		received.EchoReply.Increment()
-		if pkt.Data.Size() < header.ICMPv6EchoMinimumSize {
+		if len(v) < header.ICMPv6EchoMinimumSize {
 			received.Invalid.Increment()
 			return
 		}
@@ -423,9 +406,8 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 	case header.ICMPv6RouterAdvert:
 		received.RouterAdvert.Increment()
 
-		// Is the NDP payload of sufficient size to hold a Router
-		// Advertisement?
-		if pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize || !isNDPValid() {
+		p := h.NDPPayload()
+		if len(p) < header.NDPRAMinimumSize || !isNDPValid() {
 			received.Invalid.Increment()
 			return
 		}
@@ -443,11 +425,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
 			return
 		}
 
-		// The remainder of payload must be only the router advertisement, so
-		// payload.ToView() always returns the advertisement. Per RFC 6980 section
-		// 5, NDP messages cannot be fragmented. Also note that in the common case
-		// NDP datagrams are very small and ToView() will not incur allocations.
-		ra := header.NDPRouterAdvert(payload.ToView())
+		ra := header.NDPRouterAdvert(p)
 		opts := ra.Options()
 
 		// Are options valid as per the wire format?
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index d412ff688..bd099a7f8 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -166,8 +166,7 @@ func TestICMPCounts(t *testing.T) {
 		},
 		{
 			typ:  header.ICMPv6NeighborSolicit,
-			size: header.ICMPv6NeighborSolicitMinimumSize,
-		},
+			size: header.ICMPv6NeighborSolicitMinimumSize},
 		{
 			typ:       header.ICMPv6NeighborAdvert,
 			size:      header.ICMPv6NeighborAdvertMinimumSize,
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 486725131..331b0817b 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -171,11 +171,7 @@ func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffe
 // HandlePacket is called by the link layer when new ipv6 packets arrive for
 // this endpoint.
 func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
-	headerView, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
-	if !ok {
-		r.Stats().IP.MalformedPacketsReceived.Increment()
-		return
-	}
+	headerView := pkt.Data.First()
 	h := header.IPv6(headerView)
 	if !h.IsValid(pkt.Data.Size()) {
 		r.Stats().IP.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index c7c663498..e9c652042 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -70,10 +70,7 @@ func (f *fwdTestNetworkEndpoint) ID() *NetworkEndpointID {
 
 func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt PacketBuffer) {
 	// Consume the network header.
-	b, ok := pkt.Data.PullUp(fwdTestNetHeaderLen)
-	if !ok {
-		return
-	}
+	b := pkt.Data.First()
 	pkt.Data.TrimFront(fwdTestNetHeaderLen)
 
 	// Dispatch the packet to the transport protocol.
@@ -476,7 +473,7 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
 		t.Fatal("packet not forwarded")
 	}
 
-	b := p.Pkt.Data.ToView()
+	b := p.Pkt.Header.View()
 	if b[0] != 3 {
 		t.Fatalf("got b[0] = %d, want = 3", b[0])
 	}
@@ -520,7 +517,7 @@ func TestForwardingWithFakeResolverTwoPackets(t *testing.T) {
 			t.Fatal("packet not forwarded")
 		}
 
-		b := p.Pkt.Data.ToView()
+		b := p.Pkt.Header.View()
 		if b[0] != 3 {
 			t.Fatalf("got b[0] = %d, want = 3", b[0])
 		}
@@ -567,7 +564,7 @@ func TestForwardingWithFakeResolverManyPackets(t *testing.T) {
 			t.Fatal("packet not forwarded")
 		}
 
-		b := p.Pkt.Data.ToView()
+		b := p.Pkt.Header.View()
 		if b[0] != 3 {
 			t.Fatalf("got b[0] = %d, want = 3", b[0])
 		}
@@ -622,7 +619,7 @@ func TestForwardingWithFakeResolverManyResolutions(t *testing.T) {
 
 		// The first 5 packets (address 3 to 7) should not be forwarded
 		// because their address resolutions are interrupted.
-		b := p.Pkt.Data.ToView()
+		b := p.Pkt.Header.View()
 		if b[0] < 8 {
 			t.Fatalf("got b[0] = %d, want b[0] >= 8", b[0])
 		}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 6b91159d4..6c0a4b24d 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -212,11 +212,6 @@ func (it *IPTables) Check(hook Hook, pkt PacketBuffer) bool {
 // CheckPackets runs pkts through the rules for hook and returns a map of packets that
 // should not go forward.
 //
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-//
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
-//
 // NOTE: unlike the Check API the returned map contains packets that should be
 // dropped.
 func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList) (drop map[*PacketBuffer]struct{}) {
@@ -231,9 +226,7 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList) (drop map[*Pa
 	return drop
 }
 
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
+// Precondition: pkt.NetworkHeader is set.
 func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) chainVerdict {
 	// Start from ruleIdx and walk the list of rules until a rule gives us
 	// a verdict.
@@ -278,21 +271,14 @@ func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx
 	return chainDrop
 }
 
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
+// Precondition: pk.NetworkHeader is set.
 func (it *IPTables) checkRule(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) {
 	rule := table.Rules[ruleIdx]
 
 	// If pkt.NetworkHeader hasn't been set yet, it will be contained in
-	// pkt.Data.
+	// pkt.Data.First().
 	if pkt.NetworkHeader == nil {
-		var ok bool
-		pkt.NetworkHeader, ok = pkt.Data.PullUp(header.IPv4MinimumSize)
-		if !ok {
-			// Precondition has been violated.
-			panic(fmt.Sprintf("iptables checks require IPv4 headers of at least %d bytes", header.IPv4MinimumSize))
-		}
+		pkt.NetworkHeader = pkt.Data.First()
 	}
 
 	// Check whether the packet matches the IP header filter.
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index 8be61f4b1..7b4543caf 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -96,12 +96,9 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
 	newPkt := pkt.Clone()
 
 	// Set network header.
-	headerView, ok := newPkt.Data.PullUp(header.IPv4MinimumSize)
-	if !ok {
-		return RuleDrop, 0
-	}
+	headerView := newPkt.Data.First()
 	netHeader := header.IPv4(headerView)
-	newPkt.NetworkHeader = headerView
+	newPkt.NetworkHeader = headerView[:header.IPv4MinimumSize]
 
 	hlen := int(netHeader.HeaderLength())
 	tlen := int(netHeader.TotalLength())
@@ -120,14 +117,10 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
 		if newPkt.TransportHeader != nil {
 			udpHeader = header.UDP(newPkt.TransportHeader)
 		} else {
-			if pkt.Data.Size() < header.UDPMinimumSize {
-				return RuleDrop, 0
-			}
-			hdr, ok := newPkt.Data.PullUp(header.UDPMinimumSize)
-			if !ok {
+			if len(pkt.Data.First()) < header.UDPMinimumSize {
 				return RuleDrop, 0
 			}
-			udpHeader = header.UDP(hdr)
+			udpHeader = header.UDP(newPkt.Data.First())
 		}
 		udpHeader.SetDestinationPort(rt.MinPort)
 	case header.TCPProtocolNumber:
@@ -135,14 +128,10 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
 		if newPkt.TransportHeader != nil {
 			tcpHeader = header.TCP(newPkt.TransportHeader)
 		} else {
-			if pkt.Data.Size() < header.TCPMinimumSize {
+			if len(pkt.Data.First()) < header.TCPMinimumSize {
 				return RuleDrop, 0
 			}
-			hdr, ok := newPkt.Data.PullUp(header.TCPMinimumSize)
-			if !ok {
-				return RuleDrop, 0
-			}
-			tcpHeader = header.TCP(hdr)
+			tcpHeader = header.TCP(newPkt.TransportHeader)
 		}
 		// TODO(gvisor.dev/issue/170): Need to recompute checksum
 		// and implement nat connection tracking to support TCP.
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 0c2b1f36a..016dbe15e 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1203,12 +1203,12 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
 		n.stack.stats.IP.PacketsReceived.Increment()
 	}
 
-	netHeader, ok := pkt.Data.PullUp(netProto.MinimumPacketSize())
-	if !ok {
+	if len(pkt.Data.First()) < netProto.MinimumPacketSize() {
 		n.stack.stats.MalformedRcvdPackets.Increment()
 		return
 	}
-	src, dst := netProto.ParseAddresses(netHeader)
+
+	src, dst := netProto.ParseAddresses(pkt.Data.First())
 
 	if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil {
 		// The source address is one of our own, so we never should have gotten a
@@ -1289,8 +1289,22 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
 
 func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
 	// TODO(b/143425874) Decrease the TTL field in forwarded packets.
-	if linkHeaderLen := int(n.linkEP.MaxHeaderLength()); linkHeaderLen != 0 {
-		pkt.Header = buffer.NewPrependable(linkHeaderLen)
+
+	firstData := pkt.Data.First()
+	pkt.Data.RemoveFirst()
+
+	if linkHeaderLen := int(n.linkEP.MaxHeaderLength()); linkHeaderLen == 0 {
+		pkt.Header = buffer.NewPrependableFromView(firstData)
+	} else {
+		firstDataLen := len(firstData)
+
+		// pkt.Header should have enough capacity to hold n.linkEP's headers.
+		pkt.Header = buffer.NewPrependable(firstDataLen + linkHeaderLen)
+
+		// TODO(b/151227689): avoid copying the packet when forwarding
+		if n := copy(pkt.Header.Prepend(firstDataLen), firstData); n != firstDataLen {
+			panic(fmt.Sprintf("copied %d bytes, expected %d", n, firstDataLen))
+		}
 	}
 
 	if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, pkt); err != nil {
@@ -1318,13 +1332,12 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
 	// validly formed.
 	n.stack.demux.deliverRawPacket(r, protocol, pkt)
 
-	transHeader, ok := pkt.Data.PullUp(transProto.MinimumPacketSize())
-	if !ok {
+	if len(pkt.Data.First()) < transProto.MinimumPacketSize() {
 		n.stack.stats.MalformedRcvdPackets.Increment()
 		return
 	}
 
-	srcPort, dstPort, err := transProto.ParsePorts(transHeader)
+	srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
 	if err != nil {
 		n.stack.stats.MalformedRcvdPackets.Increment()
 		return
@@ -1362,12 +1375,11 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp
 	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
 	// be present in the payload. We know that the ports are within the
 	// first 8 bytes for all known transport protocols.
-	transHeader, ok := pkt.Data.PullUp(8)
-	if !ok {
+	if len(pkt.Data.First()) < 8 {
 		return
 	}
 
-	srcPort, dstPort, err := transProto.ParsePorts(transHeader)
+	srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
 	if err != nil {
 		return
 	}
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 7d36f8e84..dc125f25e 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -37,13 +37,7 @@ type PacketBuffer struct {
 	Data buffer.VectorisedView
 
 	// Header holds the headers of outbound packets. As a packet is passed
-	// down the stack, each layer adds to Header. Note that forwarded
-	// packets don't populate Headers on their way out -- their headers and
-	// payload are never parsed out and remain in Data.
-	//
-	// TODO(gvisor.dev/issue/170): Forwarded packets don't currently
-	// populate Header, but should. This will be doable once early parsing
-	// (https://github.com/google/gvisor/pull/1995) is supported.
+	// down the stack, each layer adds to Header.
 	Header buffer.Prependable
 
 	// These fields are used by both inbound and outbound packets. They
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index d45d2cc1f..c7634ceb1 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -95,18 +95,16 @@ func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffe
 	f.proto.packetCount[int(f.id.LocalAddress[0])%len(f.proto.packetCount)]++
 
 	// Consume the network header.
-	b, ok := pkt.Data.PullUp(fakeNetHeaderLen)
-	if !ok {
-		return
-	}
+	b := pkt.Data.First()
 	pkt.Data.TrimFront(fakeNetHeaderLen)
 
 	// Handle control packets.
 	if b[2] == uint8(fakeControlProtocol) {
-		nb, ok := pkt.Data.PullUp(fakeNetHeaderLen)
-		if !ok {
+		nb := pkt.Data.First()
+		if len(nb) < fakeNetHeaderLen {
 			return
 		}
+
 		pkt.Data.TrimFront(fakeNetHeaderLen)
 		f.dispatcher.DeliverTransportControlPacket(tcpip.Address(nb[1:2]), tcpip.Address(nb[0:1]), fakeNetNumber, tcpip.TransportProtocolNumber(nb[2]), stack.ControlPortUnreachable, 0, pkt)
 		return
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index a611e44ab..3084e6593 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -642,11 +642,10 @@ func TestTransportForwarding(t *testing.T) {
 		t.Fatal("Response packet not forwarded")
 	}
 
-	hdrs := p.Pkt.Data.ToView()
-	if dst := hdrs[0]; dst != 3 {
+	if dst := p.Pkt.Header.View()[0]; dst != 3 {
 		t.Errorf("Response packet has incorrect destination addresss: got = %d, want = 3", dst)
 	}
-	if src := hdrs[1]; src != 1 {
+	if src := p.Pkt.Header.View()[1]; src != 1 {
 		t.Errorf("Response packet has incorrect source addresss: got = %d, want = 3", src)
 	}
 }
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index b1d820372..feef8dca0 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -747,15 +747,15 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	// Only accept echo replies.
 	switch e.NetProto {
 	case header.IPv4ProtocolNumber:
-		h, ok := pkt.Data.PullUp(header.ICMPv4MinimumSize)
-		if !ok || header.ICMPv4(h).Type() != header.ICMPv4EchoReply {
+		h := header.ICMPv4(pkt.Data.First())
+		if h.Type() != header.ICMPv4EchoReply {
 			e.stack.Stats().DroppedPackets.Increment()
 			e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
 			return
 		}
 	case header.IPv6ProtocolNumber:
-		h, ok := pkt.Data.PullUp(header.ICMPv6MinimumSize)
-		if !ok || header.ICMPv6(h).Type() != header.ICMPv6EchoReply {
+		h := header.ICMPv6(pkt.Data.First())
+		if h.Type() != header.ICMPv6EchoReply {
 			e.stack.Stats().DroppedPackets.Increment()
 			e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
 			return
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 7712ce652..40461fd31 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -144,11 +144,7 @@ func (s *segment) logicalLen() seqnum.Size {
 // TCP checksum and stores the checksum and result of checksum verification in
 // the csum and csumValid fields of the segment.
 func (s *segment) parse() bool {
-	h, ok := s.data.PullUp(header.TCPMinimumSize)
-	if !ok {
-		return false
-	}
-	hdr := header.TCP(h)
+	h := header.TCP(s.data.First())
 
 	// h is the header followed by the payload. We check that the offset to
 	// the data respects the following constraints:
@@ -160,16 +156,12 @@ func (s *segment) parse() bool {
 	// N.B. The segment has already been validated as having at least the
 	//      minimum TCP size before reaching here, so it's safe to read the
 	//      fields.
-	offset := int(hdr.DataOffset())
-	if offset < header.TCPMinimumSize {
-		return false
-	}
-	hdrWithOpts, ok := s.data.PullUp(offset)
-	if !ok {
+	offset := int(h.DataOffset())
+	if offset < header.TCPMinimumSize || offset > len(h) {
 		return false
 	}
 
-	s.options = []byte(hdrWithOpts[header.TCPMinimumSize:])
+	s.options = []byte(h[header.TCPMinimumSize:offset])
 	s.parsedOptions = header.ParseTCPOptions(s.options)
 
 	// Query the link capabilities to decide if checksum validation is
@@ -181,19 +173,18 @@ func (s *segment) parse() bool {
 		s.data.TrimFront(offset)
 	}
 	if verifyChecksum {
-		hdr = header.TCP(hdrWithOpts)
-		s.csum = hdr.Checksum()
+		s.csum = h.Checksum()
 		xsum := s.route.PseudoHeaderChecksum(ProtocolNumber, uint16(s.data.Size()))
-		xsum = hdr.CalculateChecksum(xsum)
+		xsum = h.CalculateChecksum(xsum)
 		s.data.TrimFront(offset)
 		xsum = header.ChecksumVV(s.data, xsum)
 		s.csumValid = xsum == 0xffff
 	}
 
-	s.sequenceNumber = seqnum.Value(hdr.SequenceNumber())
-	s.ackNumber = seqnum.Value(hdr.AckNumber())
-	s.flags = hdr.Flags()
-	s.window = seqnum.Size(hdr.WindowSize())
+	s.sequenceNumber = seqnum.Value(h.SequenceNumber())
+	s.ackNumber = seqnum.Value(h.AckNumber())
+	s.flags = h.Flags()
+	s.window = seqnum.Size(h.WindowSize())
 	return true
 }
 
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 1dd63dd61..ace79b7b2 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -28,6 +28,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 )
 
 // createConnectedWithSACKPermittedOption creates and connects c.ep with the
@@ -439,21 +440,28 @@ func TestSACKRecovery(t *testing.T) {
 	// Receive the retransmitted packet.
 	c.ReceiveAndCheckPacketWithOptions(data, rtxOffset, maxPayload, tsOptionSize)
 
-	tcpStats := c.Stack().Stats().TCP
-	stats := []struct {
-		stat *tcpip.StatCounter
-		name string
-		want uint64
-	}{
-		{tcpStats.FastRetransmit, "stats.TCP.FastRetransmit", 1},
-		{tcpStats.Retransmits, "stats.TCP.Retransmits", 1},
-		{tcpStats.SACKRecovery, "stats.TCP.SACKRecovery", 1},
-		{tcpStats.FastRecovery, "stats.TCP.FastRecovery", 0},
-	}
-	for _, s := range stats {
-		if got, want := s.stat.Value(), s.want; got != want {
-			t.Errorf("got %s.Value() = %v, want = %v", s.name, got, want)
+	metricPollFn := func() error {
+		tcpStats := c.Stack().Stats().TCP
+		stats := []struct {
+			stat *tcpip.StatCounter
+			name string
+			want uint64
+		}{
+			{tcpStats.FastRetransmit, "stats.TCP.FastRetransmit", 1},
+			{tcpStats.Retransmits, "stats.TCP.Retransmits", 1},
+			{tcpStats.SACKRecovery, "stats.TCP.SACKRecovery", 1},
+			{tcpStats.FastRecovery, "stats.TCP.FastRecovery", 0},
+		}
+		for _, s := range stats {
+			if got, want := s.stat.Value(), s.want; got != want {
+				return fmt.Errorf("got %s.Value() = %v, want = %v", s.name, got, want)
+			}
 		}
+		return nil
+	}
+
+	if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+		t.Error(err)
 	}
 
 	// Now send 7 mode duplicate ACKs. In SACK TCP dupAcks do not cause
@@ -517,22 +525,28 @@ func TestSACKRecovery(t *testing.T) {
 		bytesRead += maxPayload
 	}
 
-	// In SACK recovery only the first segment is fast retransmitted when
-	// entering recovery.
-	if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
-		t.Errorf("got stats.TCP.FastRetransmit.Value = %v, want = %v", got, want)
-	}
+	metricPollFn = func() error {
+		// In SACK recovery only the first segment is fast retransmitted when
+		// entering recovery.
+		if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
+			return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %v, want = %v", got, want)
+		}
 
-	if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.FastRetransmit.Value(), uint64(1); got != want {
-		t.Errorf("got EP stats SendErrors.FastRetransmit = %v, want = %v", got, want)
-	}
+		if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.FastRetransmit.Value(), uint64(1); got != want {
+			return fmt.Errorf("got EP stats SendErrors.FastRetransmit = %v, want = %v", got, want)
+		}
 
-	if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(4); got != want {
-		t.Errorf("got stats.TCP.Retransmits.Value = %v, want = %v", got, want)
-	}
+		if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(4); got != want {
+			return fmt.Errorf("got stats.TCP.Retransmits.Value = %v, want = %v", got, want)
+		}
 
-	if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(4); got != want {
-		t.Errorf("got EP stats Stats.SendErrors.Retransmits = %v, want = %v", got, want)
+		if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(4); got != want {
+			return fmt.Errorf("got EP stats Stats.SendErrors.Retransmits = %v, want = %v", got, want)
+		}
+		return nil
+	}
+	if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+		t.Error(err)
 	}
 
 	c.CheckNoPacketTimeout("More packets received than expected during recovery after partial ack for this cwnd.", 50*time.Millisecond)
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 286c66cf5..7e574859b 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -35,6 +35,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
@@ -209,8 +210,15 @@ func TestTCPResetsSentIncrement(t *testing.T) {
 	c.SendPacket(nil, ackHeaders)
 
 	c.GetPacket()
-	if got := stats.TCP.ResetsSent.Value(); got != want {
-		t.Errorf("got stats.TCP.ResetsSent.Value() = %v, want = %v", got, want)
+
+	metricPollFn := func() error {
+		if got := stats.TCP.ResetsSent.Value(); got != want {
+			return fmt.Errorf("got stats.TCP.ResetsSent.Value() = %v, want = %v", got, want)
+		}
+		return nil
+	}
+	if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+		t.Error(err)
 	}
 }
 
@@ -3548,7 +3556,7 @@ func TestReceivedInvalidSegmentCountIncrement(t *testing.T) {
 		AckNum:  c.IRS.Add(1),
 		RcvWnd:  30000,
 	})
-	tcpbuf := vv.ToView()[header.IPv4MinimumSize:]
+	tcpbuf := vv.First()[header.IPv4MinimumSize:]
 	tcpbuf[header.TCPDataOffset] = ((header.TCPMinimumSize - 1) / 4) << 4
 
 	c.SendSegment(vv)
@@ -3575,7 +3583,7 @@ func TestReceivedIncorrectChecksumIncrement(t *testing.T) {
 		AckNum:  c.IRS.Add(1),
 		RcvWnd:  30000,
 	})
-	tcpbuf := vv.ToView()[header.IPv4MinimumSize:]
+	tcpbuf := vv.First()[header.IPv4MinimumSize:]
 	// Overwrite a byte in the payload which should cause checksum
 	// verification to fail.
 	tcpbuf[(tcpbuf[header.TCPDataOffset]>>4)*4] = 0x4
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 756ab913a..edb54f0be 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1250,8 +1250,8 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 // endpoint.
 func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
 	// Get the header then trim it from the view.
-	hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize)
-	if !ok || int(header.UDP(hdr).Length()) > pkt.Data.Size() {
+	hdr := header.UDP(pkt.Data.First())
+	if int(hdr.Length()) > pkt.Data.Size() {
 		// Malformed packet.
 		e.stack.Stats().UDP.MalformedPacketsReceived.Increment()
 		e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
@@ -1286,7 +1286,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 		senderAddress: tcpip.FullAddress{
 			NIC:  r.NICID(),
 			Addr: id.RemoteAddress,
-			Port: header.UDP(hdr).SourcePort(),
+			Port: hdr.SourcePort(),
 		},
 	}
 	packet.data = pkt.Data
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 52af6de22..6e31a9bac 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -68,13 +68,8 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
 // that don't match any existing endpoint.
 func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
 	// Get the header then trim it from the view.
-	h, ok := pkt.Data.PullUp(header.UDPMinimumSize)
-	if !ok {
-		// Malformed packet.
-		r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
-		return true
-	}
-	if int(header.UDP(h).Length()) > pkt.Data.Size() {
+	hdr := header.UDP(pkt.Data.First())
+	if int(hdr.Length()) > pkt.Data.Size() {
 		// Malformed packet.
 		r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
 		return true
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index b184bd402..7d1310c96 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -166,15 +166,33 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 		return Errorf("Error write spec: %v", err)
 	}
 
-	runArgs := container.Args{
+	containerArgs := container.Args{
 		ID:        cid,
 		Spec:      spec,
 		BundleDir: tmpDir,
 		Attached:  true,
 	}
-	ws, err := container.Run(conf, runArgs)
+	ct, err := container.New(conf, containerArgs)
 	if err != nil {
-		return Errorf("running container: %v", err)
+		return Errorf("creating container: %v", err)
+	}
+	defer ct.Destroy()
+
+	if err := ct.Start(conf); err != nil {
+		return Errorf("starting container: %v", err)
+	}
+
+	// Forward signals to init in the container. Thus if we get SIGINT from
+	// ^C, the container gracefully exit, and we can clean up.
+	//
+	// N.B. There is a still a window before this where a signal may kill
+	// this process, skipping cleanup.
+	stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
+	defer stopForwarding()
+
+	ws, err := ct.Wait()
+	if err != nil {
+		return Errorf("waiting for container: %v", err)
 	}
 
 	*waitStatus = ws
@@ -237,20 +255,27 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	for _, cmd := range cmds {
 		log.Debugf("Run %q", cmd)
 		args := strings.Split(cmd, " ")
-		c := exec.Command(args[0], args[1:]...)
-		if err := c.Run(); err != nil {
+		cmd := exec.Command(args[0], args[1:]...)
+		if err := cmd.Run(); err != nil {
+			c.cleanupNet(cid, dev, "", "", "")
 			return nil, fmt.Errorf("failed to run %q: %v", cmd, err)
 		}
 	}
 
-	if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil {
+	resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, "", "", "")
 		return nil, err
 	}
-	if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil {
+	hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, "", "")
 		return nil, err
 	}
 	hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid)
-	if err := makeFile("/etc/hosts", hosts, spec); err != nil {
+	hostsPath, err := makeFile("/etc/hosts", hosts, spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, hostnamePath, "")
 		return nil, err
 	}
 
@@ -263,19 +288,22 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	}
 	spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns)
 
-	return func() { c.cleanNet(cid, dev) }, nil
+	return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil
 }
 
-func (c *Do) cleanNet(cid, dev string) {
-	veth, peer := deviceNames(cid)
+// cleanupNet tries to cleanup the network setup in setupNet.
+//
+// It may be called when setupNet is only partially complete, in which case it
+// will cleanup as much as possible, logging warnings for the rest.
+//
+// Unfortunately none of this can be automatically cleaned up on process exit,
+// we must do so explicitly.
+func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) {
+	_, peer := deviceNames(cid)
 
 	cmds := []string{
 		fmt.Sprintf("ip link delete %s", peer),
 		fmt.Sprintf("ip netns delete %s", cid),
-
-		fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev),
-		fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth),
-		fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth),
 	}
 
 	for _, cmd := range cmds {
@@ -286,6 +314,10 @@ func (c *Do) cleanNet(cid, dev string) {
 			log.Warningf("Failed to run %q: %v", cmd, err)
 		}
 	}
+
+	tryRemove(resolvPath)
+	tryRemove(hostnamePath)
+	tryRemove(hostsPath)
 }
 
 func deviceNames(cid string) (string, string) {
@@ -306,13 +338,16 @@ func defaultDevice() (string, error) {
 	return parts[4], nil
 }
 
-func makeFile(dest, content string, spec *specs.Spec) error {
+func makeFile(dest, content string, spec *specs.Spec) (string, error) {
 	tmpFile, err := ioutil.TempFile("", filepath.Base(dest))
 	if err != nil {
-		return err
+		return "", err
 	}
 	if _, err := tmpFile.WriteString(content); err != nil {
-		return err
+		if err := os.Remove(tmpFile.Name()); err != nil {
+			log.Warningf("Failed to remove %q: %v", tmpFile, err)
+		}
+		return "", err
 	}
 	spec.Mounts = append(spec.Mounts, specs.Mount{
 		Source:      tmpFile.Name(),
@@ -320,7 +355,17 @@ func makeFile(dest, content string, spec *specs.Spec) error {
 		Type:        "bind",
 		Options:     []string{"ro"},
 	})
-	return nil
+	return tmpFile.Name(), nil
+}
+
+func tryRemove(path string) {
+	if path == "" {
+		return
+	}
+
+	if err := os.Remove(path); err != nil {
+		log.Warningf("Failed to remove %q: %v", path, err)
+	}
 }
 
 func calculatePeerIP(ip string) (string, error) {
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 347d9b5f4..8539f252d 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -422,7 +422,7 @@ func (c *Container) Start(conf *boot.Config) error {
 			return err
 		}
 	} else {
-		// Join cgroup to strt gofer process to ensure it's part of the cgroup from
+		// Join cgroup to start gofer process to ensure it's part of the cgroup from
 		// the start (and all their children processes).
 		if err := runInCgroup(c.Sandbox.Cgroup, func() error {
 			// Create the gofer process.
diff --git a/runsc/main.go b/runsc/main.go
index 2baba90f8..8e594c58e 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -330,7 +330,7 @@ func main() {
 		log.Infof("Exiting with status: %v", ws)
 		if ws.Signaled() {
 			// No good way to return it, emulate what the shell does. Maybe raise
-			// signall to self?
+			// signal to self?
 			os.Exit(128 + int(ws.Signal()))
 		}
 		os.Exit(ws.ExitStatus())