40 files changed, 1468 insertions, 198 deletions
diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD
new file mode 100644
index 000000000..e54e7371c
--- /dev/null
+++ b/pkg/fdchannel/BUILD
@@ -0,0 +1,17 @@
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "fdchannel",
+    srcs = ["fdchannel_unsafe.go"],
+    importpath = "gvisor.dev/gvisor/pkg/fdchannel",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "fdchannel_test",
+    size = "small",
+    srcs = ["fdchannel_test.go"],
+    embed = [":fdchannel"],
+)
diff --git a/pkg/fdchannel/fdchannel_test.go b/pkg/fdchannel/fdchannel_test.go
new file mode 100644
index 000000000..5d01dc636
--- /dev/null
+++ b/pkg/fdchannel/fdchannel_test.go
@@ -0,0 +1,131 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fdchannel
+
+import (
+	"io/ioutil"
+	"os"
+	"sync"
+	"syscall"
+	"testing"
+	"time"
+)
+
+func TestSendRecvFD(t *testing.T) {
+	sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+	if err != nil {
+		t.Fatalf("failed to create temporary file: %v", err)
+	}
+	defer sendFile.Close()
+
+	chanFDs, err := NewConnectedSockets()
+	if err != nil {
+		t.Fatalf("failed to create fdchannel sockets: %v", err)
+	}
+	sendEP := NewEndpoint(chanFDs[0])
+	defer sendEP.Destroy()
+	recvEP := NewEndpoint(chanFDs[1])
+	defer recvEP.Destroy()
+
+	recvFD, err := recvEP.RecvFDNonblock()
+	if err != syscall.EAGAIN && err != syscall.EWOULDBLOCK {
+		t.Errorf("RecvFDNonblock before SendFD: got (%d, %v), wanted (<unspecified>, EAGAIN or EWOULDBLOCK", recvFD, err)
+	}
+
+	if err := sendEP.SendFD(int(sendFile.Fd())); err != nil {
+		t.Fatalf("SendFD failed: %v", err)
+	}
+	recvFD, err = recvEP.RecvFD()
+	if err != nil {
+		t.Fatalf("RecvFD failed: %v", err)
+	}
+	recvFile := os.NewFile(uintptr(recvFD), "received file")
+	defer recvFile.Close()
+
+	sendInfo, err := sendFile.Stat()
+	if err != nil {
+		t.Fatalf("failed to stat sent file: %v", err)
+	}
+	sendInfoSys := sendInfo.Sys()
+	sendStat, ok := sendInfoSys.(*syscall.Stat_t)
+	if !ok {
+		t.Fatalf("sent file's FileInfo is backed by unknown type %T", sendInfoSys)
+	}
+
+	recvInfo, err := recvFile.Stat()
+	if err != nil {
+		t.Fatalf("failed to stat received file: %v", err)
+	}
+	recvInfoSys := recvInfo.Sys()
+	recvStat, ok := recvInfoSys.(*syscall.Stat_t)
+	if !ok {
+		t.Fatalf("received file's FileInfo is backed by unknown type %T", recvInfoSys)
+	}
+
+	if sendStat.Dev != recvStat.Dev || sendStat.Ino != recvStat.Ino {
+		t.Errorf("sent file (dev=%d, ino=%d) does not match received file (dev=%d, ino=%d)", sendStat.Dev, sendStat.Ino, recvStat.Dev, recvStat.Ino)
+	}
+}
+
+func TestShutdownThenRecvFD(t *testing.T) {
+	sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+	if err != nil {
+		t.Fatalf("failed to create temporary file: %v", err)
+	}
+	defer sendFile.Close()
+
+	chanFDs, err := NewConnectedSockets()
+	if err != nil {
+		t.Fatalf("failed to create fdchannel sockets: %v", err)
+	}
+	sendEP := NewEndpoint(chanFDs[0])
+	defer sendEP.Destroy()
+	recvEP := NewEndpoint(chanFDs[1])
+	defer recvEP.Destroy()
+
+	recvEP.Shutdown()
+	if _, err := recvEP.RecvFD(); err == nil {
+		t.Error("RecvFD succeeded unexpectedly")
+	}
+}
+
+func TestRecvFDThenShutdown(t *testing.T) {
+	sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+	if err != nil {
+		t.Fatalf("failed to create temporary file: %v", err)
+	}
+	defer sendFile.Close()
+
+	chanFDs, err := NewConnectedSockets()
+	if err != nil {
+		t.Fatalf("failed to create fdchannel sockets: %v", err)
+	}
+	sendEP := NewEndpoint(chanFDs[0])
+	defer sendEP.Destroy()
+	recvEP := NewEndpoint(chanFDs[1])
+	defer recvEP.Destroy()
+
+	var receiverWG sync.WaitGroup
+	receiverWG.Add(1)
+	go func() {
+		defer receiverWG.Done()
+		if _, err := recvEP.RecvFD(); err == nil {
+			t.Error("RecvFD succeeded unexpectedly")
+		}
+	}()
+	defer receiverWG.Wait()
+	time.Sleep(time.Second) // to ensure recvEP.RecvFD() has blocked
+	recvEP.Shutdown()
+}
diff --git a/pkg/fdchannel/fdchannel_unsafe.go b/pkg/fdchannel/fdchannel_unsafe.go
new file mode 100644
index 000000000..367235be5
--- /dev/null
+++ b/pkg/fdchannel/fdchannel_unsafe.go
@@ -0,0 +1,146 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+// Package fdchannel implements passing file descriptors between processes over
+// Unix domain sockets.
+package fdchannel
+
+import (
+	"fmt"
+	"reflect"
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+)
+
+// int32 is the real type of a file descriptor.
+const sizeofInt32 = int(unsafe.Sizeof(int32(0)))
+
+// NewConnectedSockets returns a pair of file descriptors, owned by the caller,
+// representing connected sockets that may be passed to separate calls to
+// NewEndpoint to create connected Endpoints.
+func NewConnectedSockets() ([2]int, error) {
+	return syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
+}
+
+// Endpoint sends file descriptors to, and receives them from, another
+// connected Endpoint.
+//
+// Endpoint is not copyable or movable by value.
+type Endpoint struct {
+	sockfd int32 // accessed using atomic memory operations
+	msghdr syscall.Msghdr
+	cmsg   *syscall.Cmsghdr // followed by sizeofInt32 bytes of data
+}
+
+// Init must be called on zero-value Endpoints before first use. sockfd must be
+// a blocking AF_UNIX SOCK_SEQPACKET socket.
+func (ep *Endpoint) Init(sockfd int) {
+	// "Datagram sockets in various domains (e.g., the UNIX and Internet
+	// domains) permit zero-length datagrams." - recv(2). Experimentally,
+	// sendmsg+recvmsg for a zero-length datagram is slightly faster than
+	// sendmsg+recvmsg for a single byte over a stream socket.
+	cmsgSlice := make([]byte, syscall.CmsgSpace(sizeofInt32))
+	cmsgReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&cmsgSlice))
+	ep.sockfd = int32(sockfd)
+	ep.msghdr.Control = (*byte)((unsafe.Pointer)(cmsgReflect.Data))
+	ep.cmsg = (*syscall.Cmsghdr)((unsafe.Pointer)(cmsgReflect.Data))
+	// ep.msghdr.Controllen and ep.cmsg.* are mutated by recvmsg(2), so they're
+	// set before calling sendmsg/recvmsg.
+}
+
+// NewEndpoint is a convenience function that returns an initialized Endpoint
+// allocated on the heap.
+func NewEndpoint(sockfd int) *Endpoint {
+	ep := &Endpoint{}
+	ep.Init(sockfd)
+	return ep
+}
+
+// Destroy releases resources owned by ep. No other Endpoint methods may be
+// called after Destroy.
+func (ep *Endpoint) Destroy() {
+	// These need not use sync/atomic since there must not be any concurrent
+	// calls to Endpoint methods.
+	if ep.sockfd >= 0 {
+		syscall.Close(int(ep.sockfd))
+		ep.sockfd = -1
+	}
+}
+
+// Shutdown causes concurrent and future calls to ep.SendFD(), ep.RecvFD(), and
+// ep.RecvFDNonblock(), as well as the same calls in the connected Endpoint, to
+// unblock and return errors. It does not wait for concurrent calls to return.
+//
+// Shutdown is the only Endpoint method that may be called concurrently with
+// other methods.
+func (ep *Endpoint) Shutdown() {
+	if sockfd := int(atomic.SwapInt32(&ep.sockfd, -1)); sockfd >= 0 {
+		syscall.Shutdown(sockfd, syscall.SHUT_RDWR)
+		syscall.Close(sockfd)
+	}
+}
+
+// SendFD sends the open file description represented by the given file
+// descriptor to the connected Endpoint.
+func (ep *Endpoint) SendFD(fd int) error {
+	cmsgLen := syscall.CmsgLen(sizeofInt32)
+	ep.cmsg.Level = syscall.SOL_SOCKET
+	ep.cmsg.Type = syscall.SCM_RIGHTS
+	ep.cmsg.SetLen(cmsgLen)
+	*ep.cmsgData() = int32(fd)
+	ep.msghdr.SetControllen(cmsgLen)
+	_, _, e := syscall.Syscall(syscall.SYS_SENDMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), 0)
+	if e != 0 {
+		return e
+	}
+	return nil
+}
+
+// RecvFD receives an open file description from the connected Endpoint and
+// returns a file descriptor representing it, owned by the caller.
+func (ep *Endpoint) RecvFD() (int, error) {
+	return ep.recvFD(0)
+}
+
+// RecvFDNonblock receives an open file description from the connected Endpoint
+// and returns a file descriptor representing it, owned by the caller. If there
+// are no pending receivable open file descriptions, RecvFDNonblock returns
+// (<unspecified>, EAGAIN or EWOULDBLOCK).
+func (ep *Endpoint) RecvFDNonblock() (int, error) {
+	return ep.recvFD(syscall.MSG_DONTWAIT)
+}
+
+func (ep *Endpoint) recvFD(flags uintptr) (int, error) {
+	cmsgLen := syscall.CmsgLen(sizeofInt32)
+	ep.msghdr.SetControllen(cmsgLen)
+	_, _, e := syscall.Syscall(syscall.SYS_RECVMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), flags|syscall.MSG_TRUNC)
+	if e != 0 {
+		return -1, e
+	}
+	if int(ep.msghdr.Controllen) != cmsgLen {
+		return -1, fmt.Errorf("received control message has incorrect length: got %d, wanted %d", ep.msghdr.Controllen, cmsgLen)
+	}
+	if ep.cmsg.Level != syscall.SOL_SOCKET || ep.cmsg.Type != syscall.SCM_RIGHTS {
+		return -1, fmt.Errorf("received control message has incorrect (level, type): got (%v, %v), wanted (%v, %v)", ep.cmsg.Level, ep.cmsg.Type, syscall.SOL_SOCKET, syscall.SCM_RIGHTS)
+	}
+	return int(*ep.cmsgData()), nil
+}
+
+func (ep *Endpoint) cmsgData() *int32 {
+	// syscall.CmsgLen(0) == syscall.cmsgAlignOf(syscall.SizeofCmsghdr)
+	return (*int32)((unsafe.Pointer)(uintptr((unsafe.Pointer)(ep.cmsg)) + uintptr(syscall.CmsgLen(0))))
+}
diff --git a/pkg/log/log.go b/pkg/log/log.go
index 0765a1963..ab9ad01ef 100644
--- a/pkg/log/log.go
+++ b/pkg/log/log.go
@@ -50,6 +50,19 @@ const (
 	Debug
 )
 
+func (l Level) String() string {
+	switch l {
+	case Warning:
+		return "Warning"
+	case Info:
+		return "Info"
+	case Debug:
+		return "Debug"
+	default:
+		return fmt.Sprintf("Invalid level: %d", l)
+	}
+}
+
 // Emitter is the final destination for logs.
 type Emitter interface {
 	// Emit emits the given log statement. This allows for control over the
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index 15a1fe8a9..5dccb8e3c 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -6,6 +6,7 @@ go_library(
     name = "control",
     srcs = [
         "control.go",
+        "logging.go",
         "pprof.go",
         "proc.go",
         "state.go",
@@ -26,8 +27,10 @@ go_library(
         "//pkg/sentry/kernel/time",
         "//pkg/sentry/limits",
         "//pkg/sentry/state",
+        "//pkg/sentry/strace",
         "//pkg/sentry/usage",
         "//pkg/sentry/watchdog",
+        "//pkg/tcpip/link/sniffer",
         "//pkg/urpc",
     ],
 )
diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go
new file mode 100644
index 000000000..811f24324
--- /dev/null
+++ b/pkg/sentry/control/logging.go
@@ -0,0 +1,136 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+	"fmt"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/strace"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+)
+
+// LoggingArgs are the arguments to use for changing the logging
+// level and strace list.
+type LoggingArgs struct {
+	// SetLevel is a flag used to indicate that we should update
+	// the logging level. We should be able to change the strace
+	// list without affecting the logging level and vice versa.
+	SetLevel bool
+
+	// Level is the log level that will be set if SetLevel is true.
+	Level log.Level
+
+	// SetLogPackets indicates that we should update the log packets flag.
+	SetLogPackets bool
+
+	// LogPackets is the actual value to set for LogPackets.
+	// SetLogPackets must be enabled to indicate that we're changing
+	// the value.
+	LogPackets bool
+
+	// SetStrace is a flag used to indicate that strace related
+	// arguments were passed in.
+	SetStrace bool
+
+	// EnableStrace is a flag from the CLI that specifies whether to
+	// enable strace at all. If this flag is false then a completely
+	// pristine copy of the syscall table will be swapped in. This
+	// approach is used to remain consistent with an empty strace
+	// whitelist meaning trace all system calls.
+	EnableStrace bool
+
+	// Strace is the whitelist of syscalls to trace to log. If this
+	// and StraceEventWhitelist are empty trace all system calls.
+	StraceWhitelist []string
+
+	// SetEventStrace is a flag used to indicate that event strace
+	// related arguments were passed in.
+	SetEventStrace bool
+
+	// StraceEventWhitelist is the whitelist of syscalls to trace
+	// to event log.
+	StraceEventWhitelist []string
+}
+
+// Logging provides functions related to logging.
+type Logging struct{}
+
+// Change will change the log level and strace arguments. Although
+// this functions signature requires an error it never acctually
+// return san error. It's required by the URPC interface.
+// Additionally, it may look odd that this is the only method
+// attached to an empty struct but this is also part of how
+// URPC dispatches.
+func (l *Logging) Change(args *LoggingArgs, code *int) error {
+	if args.SetLevel {
+		// Logging uses an atomic for the level so this is thread safe.
+		log.SetLevel(args.Level)
+	}
+
+	if args.SetLogPackets {
+		if args.LogPackets {
+			atomic.StoreUint32(&sniffer.LogPackets, 1)
+		} else {
+			atomic.StoreUint32(&sniffer.LogPackets, 0)
+		}
+		log.Infof("LogPackets set to: %v", atomic.LoadUint32(&sniffer.LogPackets))
+	}
+
+	if args.SetStrace {
+		if err := l.configureStrace(args); err != nil {
+			return fmt.Errorf("error configuring strace: %v", err)
+		}
+	}
+
+	if args.SetEventStrace {
+		if err := l.configureEventStrace(args); err != nil {
+			return fmt.Errorf("error configuring event strace: %v", err)
+		}
+	}
+
+	return nil
+}
+
+func (l *Logging) configureStrace(args *LoggingArgs) error {
+	if args.EnableStrace {
+		// Install the whitelist specified.
+		if len(args.StraceWhitelist) > 0 {
+			if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil {
+				return err
+			}
+		} else {
+			// For convenience, if strace is enabled but whitelist
+			// is empty, enable everything to log.
+			strace.EnableAll(strace.SinkTypeLog)
+		}
+	} else {
+		// Uninstall all strace functions.
+		strace.Disable(strace.SinkTypeLog)
+	}
+	return nil
+}
+
+func (l *Logging) configureEventStrace(args *LoggingArgs) error {
+	if len(args.StraceEventWhitelist) > 0 {
+		if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil {
+			return err
+		}
+	} else {
+		strace.Disable(strace.SinkTypeEvent)
+	}
+	return nil
+}
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 55ffe6c0c..8e1f5674d 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 		return 0, syserror.ErrInterrupted
 	}
 
+	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
 	// Handle append mode.
 	if f.Flags().Append {
 		if err := f.offsetForAppend(ctx, &f.offset); err != nil {
+			unlockAppendMu()
 			f.mu.Unlock()
 			return 0, err
 		}
@@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 	limit, ok := f.checkLimit(ctx, f.offset)
 	switch {
 	case ok && limit == 0:
+		unlockAppendMu()
 		f.mu.Unlock()
 		return 0, syserror.ErrExceedsFileSizeLimit
 	case ok:
@@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 	if n >= 0 && !f.flags.NonSeekable {
 		atomic.StoreInt64(&f.offset, f.offset+n)
 	}
+	unlockAppendMu()
 	f.mu.Unlock()
 	return n, err
 }
@@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
 	// However, on Linux, if a file is opened with O_APPEND,  pwrite()
 	// appends data to the end of the file, regardless of the value of
 	// offset."
+	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
+	defer unlockAppendMu()
+
 	if f.Flags().Append {
-		if !f.mu.Lock(ctx) {
-			return 0, syserror.ErrInterrupted
-		}
-		defer f.mu.Unlock()
 		if err := f.offsetForAppend(ctx, &offset); err != nil {
-			f.mu.Unlock()
 			return 0, err
 		}
 	}
@@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
 
 // offsetForAppend sets the given offset to the end of the file.
 //
-// Precondition: the underlying file mutex should be held.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
 func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
 	uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
 	if err != nil {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index a889586aa..e4aae1135 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -15,6 +15,8 @@
 package fs
 
 import (
+	"sync"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/metric"
@@ -55,6 +57,12 @@ type Inode struct {
 
 	// overlay is the overlay entry for this Inode.
 	overlay *overlayEntry
+
+	// appendMu is used to synchronize write operations into files which
+	// have been opened with O_APPEND. Operations which change a file size
+	// have to take this lock for read. Write operations to files with
+	// O_APPEND have to take this lock for write.
+	appendMu sync.RWMutex `state:"nosave"`
 }
 
 // LockCtx is an Inode's lock context and contains different personalities of locks; both
@@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
 	if i.overlay != nil {
 		return overlayTruncate(ctx, i.overlay, d, size)
 	}
+	i.appendMu.RLock()
+	defer i.appendMu.RUnlock()
 	return i.InodeOperations.Truncate(ctx, i, size)
 }
 
@@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
 	}
 	return creds.HasCapability(cp)
 }
+
+func (i *Inode) lockAppendMu(appendMode bool) func() {
+	if appendMode {
+		i.appendMu.Lock()
+		return i.appendMu.Unlock
+	}
+	i.appendMu.RLock()
+	return i.appendMu.RUnlock
+}
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 57b8b14e3..920d86042 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -537,12 +537,6 @@ func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error {
 	if o.upper != nil {
 		err = o.upper.check(ctx, p)
 	} else {
-		if p.Write {
-			// Since writes will be redirected to the upper filesystem, the lower
-			// filesystem need not be writable, but must be readable for copy-up.
-			p.Write = false
-			p.Read = true
-		}
 		err = o.lower.check(ctx, p)
 	}
 	o.copyMu.RUnlock()
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index b70c583f3..da41a10ab 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -31,6 +31,7 @@ go_library(
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/binary",
         "//pkg/log",
         "//pkg/sentry/context",
         "//pkg/sentry/fs",
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index caa1a5c4d..37694620c 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -55,9 +56,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
 			"psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))),
 			"ptype":  newStaticProcInode(ctx, msrc, []byte("Type Device      Function")),
 			"route":  newStaticProcInode(ctx, msrc, []byte("Iface   Destination     Gateway         Flags   RefCnt  Use     Metric  Mask            MTU     Window  IRTT")),
-			"tcp":    newStaticProcInode(ctx, msrc, []byte("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode")),
-
-			"udp": newStaticProcInode(ctx, msrc, []byte("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops")),
+			"tcp":    seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc),
+			"udp":    newStaticProcInode(ctx, msrc, []byte("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops")),
 
 			"unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc),
 		}
@@ -210,10 +210,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
 	}
 
 	var buf bytes.Buffer
-	// Header
-	fmt.Fprintf(&buf, "Num       RefCount Protocol Flags    Type St Inode Path\n")
-
-	// Entries
 	for _, se := range n.k.ListSockets() {
 		s := se.Sock.Get()
 		if s == nil {
@@ -222,6 +218,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
 		}
 		sfile := s.(*fs.File)
 		if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+			s.DecRef()
 			// Not a unix socket.
 			continue
 		}
@@ -281,12 +278,160 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
 		}
 		fmt.Fprintf(&buf, "\n")
 
-		sfile.DecRef()
+		s.DecRef()
+	}
+
+	data := []seqfile.SeqData{
+		{
+			Buf:    []byte("Num       RefCount Protocol Flags    Type St Inode Path\n"),
+			Handle: n,
+		},
+		{
+			Buf:    buf.Bytes(),
+			Handle: n,
+		},
+	}
+	return data, 0
+}
+
+// netTCP implements seqfile.SeqSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+	k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*netTCP) NeedsUpdate(generation int64) bool {
+	return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+	t := kernel.TaskFromContext(ctx)
+
+	if h != nil {
+		return nil, 0
+	}
+
+	var buf bytes.Buffer
+	for _, se := range n.k.ListSockets() {
+		s := se.Sock.Get()
+		if s == nil {
+			log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
+			continue
+		}
+		sfile := s.(*fs.File)
+		sops, ok := sfile.FileOperations.(socket.Socket)
+		if !ok {
+			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+		}
+		if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+			s.DecRef()
+			// Not tcp4 sockets.
+			continue
+		}
+
+		// Linux's documentation for the fields below can be found at
+		// https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+		// For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+		// Note that the header doesn't contain labels for all the fields.
+
+		// Field: sl; entry number.
+		fmt.Fprintf(&buf, "%4d: ", se.ID)
+
+		portBuf := make([]byte, 2)
+
+		// Field: local_adddress.
+		var localAddr linux.SockAddrInet
+		if local, _, err := sops.GetSockName(t); err == nil {
+			localAddr = local.(linux.SockAddrInet)
+		}
+		binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
+		fmt.Fprintf(&buf, "%08X:%04X ",
+			binary.LittleEndian.Uint32(localAddr.Addr[:]),
+			portBuf)
+
+		// Field: rem_address.
+		var remoteAddr linux.SockAddrInet
+		if remote, _, err := sops.GetPeerName(t); err == nil {
+			remoteAddr = remote.(linux.SockAddrInet)
+		}
+		binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
+		fmt.Fprintf(&buf, "%08X:%04X ",
+			binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
+			portBuf)
+
+		// Field: state; socket state.
+		fmt.Fprintf(&buf, "%02X ", sops.State())
+
+		// Field: tx_queue, rx_queue; number of packets in the transmit and
+		// receive queue. Unimplemented.
+		fmt.Fprintf(&buf, "%08X:%08X ", 0, 0)
+
+		// Field: tr, tm->when; timer active state and number of jiffies
+		// until timer expires. Unimplemented.
+		fmt.Fprintf(&buf, "%02X:%08X ", 0, 0)
+
+		// Field: retrnsmt; number of unrecovered RTO timeouts.
+		// Unimplemented.
+		fmt.Fprintf(&buf, "%08X ", 0)
+
+		// Field: uid.
+		uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+		if err != nil {
+			log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+			fmt.Fprintf(&buf, "%5d ", 0)
+		} else {
+			fmt.Fprintf(&buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
+		}
+
+		// Field: timeout; number of unanswered 0-window probes.
+		// Unimplemented.
+		fmt.Fprintf(&buf, "%8d ", 0)
+
+		// Field: inode.
+		fmt.Fprintf(&buf, "%8d ", sfile.InodeID())
+
+		// Field: refcount. Don't count the ref we obtain while deferencing
+		// the weakref to this socket.
+		fmt.Fprintf(&buf, "%d ", sfile.ReadRefs()-1)
+
+		// Field: Socket struct address. Redacted due to the same reason as
+		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+		fmt.Fprintf(&buf, "%#016p ", (*socket.Socket)(nil))
+
+		// Field: retransmit timeout. Unimplemented.
+		fmt.Fprintf(&buf, "%d ", 0)
+
+		// Field: predicted tick of soft clock (delayed ACK control data).
+		// Unimplemented.
+		fmt.Fprintf(&buf, "%d ", 0)
+
+		// Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+		fmt.Fprintf(&buf, "%d ", 0)
+
+		// Field: sending congestion window, Unimplemented.
+		fmt.Fprintf(&buf, "%d ", 0)
+
+		// Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+		// Unimplemented, report as large threshold.
+		fmt.Fprintf(&buf, "%d", -1)
+
+		fmt.Fprintf(&buf, "\n")
+
+		s.DecRef()
 	}
 
-	data := []seqfile.SeqData{{
-		Buf:    buf.Bytes(),
-		Handle: (*netUnix)(nil),
-	}}
+	data := []seqfile.SeqData{
+		{
+			Buf:    []byte("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode                                                     \n"),
+			Handle: n,
+		},
+		{
+			Buf:    buf.Bytes(),
+			Handle: n,
+		},
+	}
 	return data, 0
 }
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index 978dc679b..eed1c2854 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
 
 	// Check append-only mode and the limit.
 	if !dstPipe {
+		unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
+		defer unlock()
 		if dst.Flags().Append {
 			if opts.DstOffset {
 				// We need to acquire the lock.
diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go
index a5fcdf969..881dd89b0 100644
--- a/pkg/sentry/fs/tmpfs/fs.go
+++ b/pkg/sentry/fs/tmpfs/fs.go
@@ -133,6 +133,9 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
 	}
 
 	// Construct a mount which will follow the cache options provided.
+	//
+	// TODO(gvisor.dev/issue/179): There should be no reason to disable
+	// caching once bind mounts are properly supported.
 	var msrc *fs.MountSource
 	switch options[cacheKey] {
 	case "", cacheAll:
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index 37cb8c8b9..42779baa9 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -4,6 +4,17 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
 load("//tools/go_stateify:defs.bzl", "go_library")
 
 go_template_instance(
+    name = "atomicptr_credentials",
+    out = "atomicptr_credentials.go",
+    package = "auth",
+    suffix = "Credentials",
+    template = "//third_party/gvsync:generic_atomicptr",
+    types = {
+        "Value": "Credentials",
+    },
+)
+
+go_template_instance(
     name = "id_map_range",
     out = "id_map_range.go",
     package = "auth",
@@ -34,6 +45,7 @@ go_template_instance(
 go_library(
     name = "auth",
     srcs = [
+        "atomicptr_credentials.go",
         "auth.go",
         "capability_set.go",
         "context.go",
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c297c5973..2e3a39d3b 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -386,10 +386,11 @@ type Task struct {
 
 	// creds is the task's credentials.
 	//
-	// creds is protected by mu, however the value itself is immutable and can
-	// only be changed by a copy. After reading the pointer, access will
-	// proceed outside the scope of mu. creds is owned by the task goroutine.
-	creds *auth.Credentials
+	// creds.Load() may be called without synchronization. creds.Store() is
+	// serialized by mu. creds is owned by the task goroutine. All
+	// auth.Credentials objects that creds may point to, or have pointed to
+	// in the past, must be treated as immutable.
+	creds auth.AtomicPtrCredentials
 
 	// utsns is the task's UTS namespace.
 	//
@@ -597,7 +598,7 @@ func (t *Task) Value(key interface{}) interface{} {
 	case CtxTask:
 		return t
 	case auth.CtxCredentials:
-		return t.creds
+		return t.Credentials()
 	case context.CtxThreadGroupID:
 		return int32(t.ThreadGroup().ID())
 	case fs.CtxRoot:
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index 0e621f0d1..b5cc3860d 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -425,6 +425,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 	if opts.NewAddressSpace || opts.NewSignalHandlers {
 		return syserror.EINVAL
 	}
+	creds := t.Credentials()
 	if opts.NewThreadGroup {
 		t.tg.signalHandlers.mu.Lock()
 		if t.tg.tasksCount != 1 {
@@ -439,8 +440,6 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		if t.IsChrooted() {
 			return syserror.EPERM
 		}
-		// This temporary is needed because Go.
-		creds := t.Credentials()
 		newUserNS, err := creds.NewChildUserNamespace()
 		if err != nil {
 			return err
@@ -449,6 +448,8 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		if err != nil {
 			return err
 		}
+		// Need to reload creds, becaue t.SetUserNamespace() changed task credentials.
+		creds = t.Credentials()
 	}
 	haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN)
 	if opts.NewPIDNamespace {
@@ -473,7 +474,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		}
 		// Note that this must happen after NewUserNamespace, so the
 		// new user namespace is used if there is one.
-		t.utsns = t.utsns.Clone(t.creds.UserNamespace)
+		t.utsns = t.utsns.Clone(creds.UserNamespace)
 	}
 	if opts.NewIPCNamespace {
 		if !haveCapSysAdmin {
@@ -482,7 +483,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		}
 		// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
 		// namespace"
-		t.ipcns = NewIPCNamespace(t.creds.UserNamespace)
+		t.ipcns = NewIPCNamespace(creds.UserNamespace)
 	}
 	var oldfds *FDMap
 	if opts.NewFiles {
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index 39c138925..78ff14b20 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -25,30 +25,22 @@ import (
 //
 // This value must be considered immutable.
 func (t *Task) Credentials() *auth.Credentials {
-	t.mu.Lock()
-	defer t.mu.Unlock()
-	return t.creds
+	return t.creds.Load()
 }
 
 // UserNamespace returns the user namespace associated with the task.
 func (t *Task) UserNamespace() *auth.UserNamespace {
-	t.mu.Lock()
-	defer t.mu.Unlock()
-	return t.creds.UserNamespace
+	return t.Credentials().UserNamespace
 }
 
 // HasCapabilityIn checks if the task has capability cp in user namespace ns.
 func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool {
-	t.mu.Lock()
-	defer t.mu.Unlock()
-	return t.creds.HasCapabilityIn(cp, ns)
+	return t.Credentials().HasCapabilityIn(cp, ns)
 }
 
 // HasCapability checks if the task has capability cp in its user namespace.
 func (t *Task) HasCapability(cp linux.Capability) bool {
-	t.mu.Lock()
-	defer t.mu.Unlock()
-	return t.creds.HasCapability(cp)
+	return t.Credentials().HasCapability(cp)
 }
 
 // SetUID implements the semantics of setuid(2).
@@ -57,9 +49,12 @@ func (t *Task) SetUID(uid auth.UID) error {
 	if !uid.Ok() {
 		return syserror.EINVAL
 	}
+
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	kuid := t.creds.UserNamespace.MapToKUID(uid)
+
+	creds := t.Credentials()
+	kuid := creds.UserNamespace.MapToKUID(uid)
 	if !kuid.Ok() {
 		return syserror.EINVAL
 	}
@@ -67,17 +62,17 @@ func (t *Task) SetUID(uid auth.UID) error {
 	// effective UID of the caller is root (more precisely: if the caller has
 	// the CAP_SETUID capability), the real UID and saved set-user-ID are also
 	// set." - setuid(2)
-	if t.creds.HasCapability(linux.CAP_SETUID) {
+	if creds.HasCapability(linux.CAP_SETUID) {
 		t.setKUIDsUncheckedLocked(kuid, kuid, kuid)
 		return nil
 	}
 	// "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID
 	// capability) and uid does not match the real UID or saved set-user-ID of
 	// the calling process."
-	if kuid != t.creds.RealKUID && kuid != t.creds.SavedKUID {
+	if kuid != creds.RealKUID && kuid != creds.SavedKUID {
 		return syserror.EPERM
 	}
-	t.setKUIDsUncheckedLocked(t.creds.RealKUID, kuid, t.creds.SavedKUID)
+	t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID)
 	return nil
 }
 
@@ -87,37 +82,38 @@ func (t *Task) SetREUID(r, e auth.UID) error {
 	defer t.mu.Unlock()
 	// "Supplying a value of -1 for either the real or effective user ID forces
 	// the system to leave that ID unchanged." - setreuid(2)
-	newR := t.creds.RealKUID
+	creds := t.Credentials()
+	newR := creds.RealKUID
 	if r.Ok() {
-		newR = t.creds.UserNamespace.MapToKUID(r)
+		newR = creds.UserNamespace.MapToKUID(r)
 		if !newR.Ok() {
 			return syserror.EINVAL
 		}
 	}
-	newE := t.creds.EffectiveKUID
+	newE := creds.EffectiveKUID
 	if e.Ok() {
-		newE = t.creds.UserNamespace.MapToKUID(e)
+		newE = creds.UserNamespace.MapToKUID(e)
 		if !newE.Ok() {
 			return syserror.EINVAL
 		}
 	}
-	if !t.creds.HasCapability(linux.CAP_SETUID) {
+	if !creds.HasCapability(linux.CAP_SETUID) {
 		// "Unprivileged processes may only set the effective user ID to the
 		// real user ID, the effective user ID, or the saved set-user-ID."
-		if newE != t.creds.RealKUID && newE != t.creds.EffectiveKUID && newE != t.creds.SavedKUID {
+		if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID {
 			return syserror.EPERM
 		}
 		// "Unprivileged users may only set the real user ID to the real user
 		// ID or the effective user ID."
-		if newR != t.creds.RealKUID && newR != t.creds.EffectiveKUID {
+		if newR != creds.RealKUID && newR != creds.EffectiveKUID {
 			return syserror.EPERM
 		}
 	}
 	// "If the real user ID is set (i.e., ruid is not -1) or the effective user
 	// ID is set to a value not equal to the previous real user ID, the saved
 	// set-user-ID will be set to the new effective user ID."
-	newS := t.creds.SavedKUID
-	if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKUID) {
+	newS := creds.SavedKUID
+	if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) {
 		newS = newE
 	}
 	t.setKUIDsUncheckedLocked(newR, newE, newS)
@@ -136,23 +132,24 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
 	// arguments equals -1, the corresponding value is not changed." -
 	// setresuid(2)
 	var err error
-	newR := t.creds.RealKUID
+	creds := t.Credentials()
+	newR := creds.RealKUID
 	if r.Ok() {
-		newR, err = t.creds.UseUID(r)
+		newR, err = creds.UseUID(r)
 		if err != nil {
 			return err
 		}
 	}
-	newE := t.creds.EffectiveKUID
+	newE := creds.EffectiveKUID
 	if e.Ok() {
-		newE, err = t.creds.UseUID(e)
+		newE, err = creds.UseUID(e)
 		if err != nil {
 			return err
 		}
 	}
-	newS := t.creds.SavedKUID
+	newS := creds.SavedKUID
 	if s.Ok() {
-		newS, err = t.creds.UseUID(s)
+		newS, err = creds.UseUID(s)
 		if err != nil {
 			return err
 		}
@@ -163,10 +160,10 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
 
 // Preconditions: t.mu must be locked.
 func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
-	root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
-	oldR, oldE, oldS := t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID = newR, newE, newS
+	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+	root := creds.UserNamespace.MapToKUID(auth.RootUID)
+	oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID
+	creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS
 
 	// "1. If one or more of the real, effective or saved set user IDs was
 	// previously 0, and as a result of the UID changes all of these IDs have a
@@ -184,9 +181,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
 		// being cleared." (A thread's effective capability set is always
 		// cleared when such a credential change is made,
 		// regardless of the setting of the "keep capabilities" flag.)
-		if !t.creds.KeepCaps {
-			t.creds.PermittedCaps = 0
-			t.creds.EffectiveCaps = 0
+		if !creds.KeepCaps {
+			creds.PermittedCaps = 0
+			creds.EffectiveCaps = 0
 		}
 	}
 	// """
@@ -197,9 +194,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
 	// permitted set is copied to the effective set.
 	// """
 	if oldE == root && newE != root {
-		t.creds.EffectiveCaps = 0
+		creds.EffectiveCaps = 0
 	} else if oldE != root && newE == root {
-		t.creds.EffectiveCaps = t.creds.PermittedCaps
+		creds.EffectiveCaps = creds.PermittedCaps
 	}
 	// "4. If the filesystem user ID is changed from 0 to nonzero (see
 	// setfsuid(2)), then the following capabilities are cleared from the
@@ -220,6 +217,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
 		// Not documented, but compare Linux's kernel/cred.c:commit_creds().
 		t.parentDeathSignal = 0
 	}
+	t.creds.Store(creds)
 }
 
 // SetGID implements the semantics of setgid(2).
@@ -227,20 +225,23 @@ func (t *Task) SetGID(gid auth.GID) error {
 	if !gid.Ok() {
 		return syserror.EINVAL
 	}
+
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	kgid := t.creds.UserNamespace.MapToKGID(gid)
+
+	creds := t.Credentials()
+	kgid := creds.UserNamespace.MapToKGID(gid)
 	if !kgid.Ok() {
 		return syserror.EINVAL
 	}
-	if t.creds.HasCapability(linux.CAP_SETGID) {
+	if creds.HasCapability(linux.CAP_SETGID) {
 		t.setKGIDsUncheckedLocked(kgid, kgid, kgid)
 		return nil
 	}
-	if kgid != t.creds.RealKGID && kgid != t.creds.SavedKGID {
+	if kgid != creds.RealKGID && kgid != creds.SavedKGID {
 		return syserror.EPERM
 	}
-	t.setKGIDsUncheckedLocked(t.creds.RealKGID, kgid, t.creds.SavedKGID)
+	t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID)
 	return nil
 }
 
@@ -248,30 +249,32 @@ func (t *Task) SetGID(gid auth.GID) error {
 func (t *Task) SetREGID(r, e auth.GID) error {
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	newR := t.creds.RealKGID
+
+	creds := t.Credentials()
+	newR := creds.RealKGID
 	if r.Ok() {
-		newR = t.creds.UserNamespace.MapToKGID(r)
+		newR = creds.UserNamespace.MapToKGID(r)
 		if !newR.Ok() {
 			return syserror.EINVAL
 		}
 	}
-	newE := t.creds.EffectiveKGID
+	newE := creds.EffectiveKGID
 	if e.Ok() {
-		newE = t.creds.UserNamespace.MapToKGID(e)
+		newE = creds.UserNamespace.MapToKGID(e)
 		if !newE.Ok() {
 			return syserror.EINVAL
 		}
 	}
-	if !t.creds.HasCapability(linux.CAP_SETGID) {
-		if newE != t.creds.RealKGID && newE != t.creds.EffectiveKGID && newE != t.creds.SavedKGID {
+	if !creds.HasCapability(linux.CAP_SETGID) {
+		if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID {
 			return syserror.EPERM
 		}
-		if newR != t.creds.RealKGID && newR != t.creds.EffectiveKGID {
+		if newR != creds.RealKGID && newR != creds.EffectiveKGID {
 			return syserror.EPERM
 		}
 	}
-	newS := t.creds.SavedKGID
-	if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKGID) {
+	newS := creds.SavedKGID
+	if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) {
 		newS = newE
 	}
 	t.setKGIDsUncheckedLocked(newR, newE, newS)
@@ -280,26 +283,29 @@ func (t *Task) SetREGID(r, e auth.GID) error {
 
 // SetRESGID implements the semantics of the setresgid(2) syscall.
 func (t *Task) SetRESGID(r, e, s auth.GID) error {
+	var err error
+
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	var err error
-	newR := t.creds.RealKGID
+
+	creds := t.Credentials()
+	newR := creds.RealKGID
 	if r.Ok() {
-		newR, err = t.creds.UseGID(r)
+		newR, err = creds.UseGID(r)
 		if err != nil {
 			return err
 		}
 	}
-	newE := t.creds.EffectiveKGID
+	newE := creds.EffectiveKGID
 	if e.Ok() {
-		newE, err = t.creds.UseGID(e)
+		newE, err = creds.UseGID(e)
 		if err != nil {
 			return err
 		}
 	}
-	newS := t.creds.SavedKGID
+	newS := creds.SavedKGID
 	if s.Ok() {
-		newS, err = t.creds.UseGID(s)
+		newS, err = creds.UseGID(s)
 		if err != nil {
 			return err
 		}
@@ -309,9 +315,9 @@ func (t *Task) SetRESGID(r, e, s auth.GID) error {
 }
 
 func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
-	oldE := t.creds.EffectiveKGID
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS
+	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+	oldE := creds.EffectiveKGID
+	creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS
 
 	if oldE != newE {
 		// "[dumpability] is reset to the current value contained in
@@ -327,6 +333,7 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
 		// kernel/cred.c:commit_creds().
 		t.parentDeathSignal = 0
 	}
+	t.creds.Store(creds)
 }
 
 // SetExtraGIDs attempts to change t's supplemental groups. All IDs are
@@ -334,19 +341,21 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
 func (t *Task) SetExtraGIDs(gids []auth.GID) error {
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	if !t.creds.HasCapability(linux.CAP_SETGID) {
+	creds := t.Credentials()
+	if !creds.HasCapability(linux.CAP_SETGID) {
 		return syserror.EPERM
 	}
 	kgids := make([]auth.KGID, len(gids))
 	for i, gid := range gids {
-		kgid := t.creds.UserNamespace.MapToKGID(gid)
+		kgid := creds.UserNamespace.MapToKGID(gid)
 		if !kgid.Ok() {
 			return syserror.EINVAL
 		}
 		kgids[i] = kgid
 	}
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.ExtraKGIDs = kgids
+	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+	creds.ExtraKGIDs = kgids
+	t.creds.Store(creds)
 	return nil
 }
 
@@ -360,27 +369,29 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
 	if effective & ^permitted != 0 {
 		return syserror.EPERM
 	}
+	creds := t.Credentials()
 	// "It is also a limiting superset for the capabilities that may be added
 	// to the inheritable set by a thread that does not have the CAP_SETPCAP
 	// capability in its effective set."
-	if !t.creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(t.creds.InheritableCaps|t.creds.PermittedCaps) != 0) {
+	if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) {
 		return syserror.EPERM
 	}
 	// "If a thread drops a capability from its permitted set, it can never
 	// reacquire that capability (unless it execve(2)s ..."
-	if permitted & ^t.creds.PermittedCaps != 0 {
+	if permitted & ^creds.PermittedCaps != 0 {
 		return syserror.EPERM
 	}
 	// "... if a capability is not in the bounding set, then a thread can't add
 	// this capability to its inheritable set, even if it was in its permitted
 	// capabilities ..."
-	if inheritable & ^(t.creds.InheritableCaps|t.creds.BoundingCaps) != 0 {
+	if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 {
 		return syserror.EPERM
 	}
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.PermittedCaps = permitted
-	t.creds.InheritableCaps = inheritable
-	t.creds.EffectiveCaps = effective
+	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+	creds.PermittedCaps = permitted
+	creds.InheritableCaps = inheritable
+	creds.EffectiveCaps = effective
+	t.creds.Store(creds)
 	return nil
 }
 
@@ -389,11 +400,13 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
 func (t *Task) DropBoundingCapability(cp linux.Capability) error {
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	if !t.creds.HasCapability(linux.CAP_SETPCAP) {
+	creds := t.Credentials()
+	if !creds.HasCapability(linux.CAP_SETPCAP) {
 		return syserror.EPERM
 	}
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+	creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+	t.creds.Store(creds)
 	return nil
 }
 
@@ -402,31 +415,33 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
 	t.mu.Lock()
 	defer t.mu.Unlock()
 
+	creds := t.Credentials()
 	// "A process reassociating itself with a user namespace must have the
 	// CAP_SYS_ADMIN capability in the target user namespace." - setns(2)
 	//
 	// If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN
 	// in ns (by rule 3 in auth.Credentials.HasCapability).
-	if !t.creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
+	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
 		return syserror.EPERM
 	}
 
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.UserNamespace = ns
+	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+	creds.UserNamespace = ns
 	// "The child process created by clone(2) with the CLONE_NEWUSER flag
 	// starts out with a complete set of capabilities in the new user
 	// namespace. Likewise, a process that creates a new user namespace using
 	// unshare(2) or joins an existing user namespace using setns(2) gains a
 	// full set of capabilities in that namespace."
-	t.creds.PermittedCaps = auth.AllCapabilities
-	t.creds.InheritableCaps = 0
-	t.creds.EffectiveCaps = auth.AllCapabilities
-	t.creds.BoundingCaps = auth.AllCapabilities
+	creds.PermittedCaps = auth.AllCapabilities
+	creds.InheritableCaps = 0
+	creds.EffectiveCaps = auth.AllCapabilities
+	creds.BoundingCaps = auth.AllCapabilities
 	// "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER
 	// flag sets the "securebits" flags (see capabilities(7)) to their default
 	// values (all flags disabled) in the child (for clone(2)) or caller (for
 	// unshare(2), or setns(2)." - user_namespaces(7)
-	t.creds.KeepCaps = false
+	creds.KeepCaps = false
+	t.creds.Store(creds)
 
 	return nil
 }
@@ -435,8 +450,9 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
 func (t *Task) SetKeepCaps(k bool) {
 	t.mu.Lock()
 	defer t.mu.Unlock()
-	t.creds = t.creds.Fork() // See doc for creds.
-	t.creds.KeepCaps = k
+	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+	creds.KeepCaps = k
+	t.creds.Store(creds)
 }
 
 // updateCredsForExec updates t.creds to reflect an execve().
@@ -512,15 +528,16 @@ func (t *Task) updateCredsForExecLocked() {
 	// the effective user ID.
 	var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0
 	fileEffective := false
-	root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
-	if t.creds.EffectiveKUID == root || t.creds.RealKUID == root {
-		newPermitted = t.creds.InheritableCaps | t.creds.BoundingCaps
-		if t.creds.EffectiveKUID == root {
+	creds := t.Credentials()
+	root := creds.UserNamespace.MapToKUID(auth.RootUID)
+	if creds.EffectiveKUID == root || creds.RealKUID == root {
+		newPermitted = creds.InheritableCaps | creds.BoundingCaps
+		if creds.EffectiveKUID == root {
 			fileEffective = true
 		}
 	}
 
-	t.creds = t.creds.Fork() // See doc for creds.
+	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
 
 	// Now we enter poorly-documented, somewhat confusing territory. (The
 	// accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds
@@ -562,27 +579,28 @@ func (t *Task) updateCredsForExecLocked() {
 	// But since no_new_privs is always set (A3 is always true), this becomes
 	// much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1
 	// is a no-op. So we can just do C1 and C2 unconditionally.
-	if t.creds.EffectiveKUID != t.creds.RealKUID || t.creds.EffectiveKGID != t.creds.RealKGID {
-		t.creds.EffectiveKUID = t.creds.RealKUID
-		t.creds.EffectiveKGID = t.creds.RealKGID
+	if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID {
+		creds.EffectiveKUID = creds.RealKUID
+		creds.EffectiveKGID = creds.RealKGID
 		t.parentDeathSignal = 0
 	}
 	// (Saved set-user-ID is always set to the new effective user ID, and saved
 	// set-group-ID is always set to the new effective group ID, regardless of
 	// the above.)
-	t.creds.SavedKUID = t.creds.RealKUID
-	t.creds.SavedKGID = t.creds.RealKGID
-	t.creds.PermittedCaps &= newPermitted
+	creds.SavedKUID = creds.RealKUID
+	creds.SavedKGID = creds.RealKGID
+	creds.PermittedCaps &= newPermitted
 	if fileEffective {
-		t.creds.EffectiveCaps = t.creds.PermittedCaps
+		creds.EffectiveCaps = creds.PermittedCaps
 	} else {
-		t.creds.EffectiveCaps = 0
+		creds.EffectiveCaps = 0
 	}
 
 	// prctl(2): The "keep capabilities" value will be reset to 0 on subsequent
 	// calls to execve(2).
-	t.creds.KeepCaps = false
+	creds.KeepCaps = false
 
 	// "The bounding set is inherited at fork(2) from the thread's parent, and
 	// is preserved across an execve(2)". So we're done.
+	t.creds.Store(creds)
 }
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 9458f5c2a..72caae537 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -119,7 +119,6 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
 		ptraceTracees:   make(map[*Task]struct{}),
 		allowedCPUMask:  cfg.AllowedCPUMask.Copy(),
 		ioUsage:         &usage.IO{},
-		creds:           cfg.Credentials,
 		niceness:        cfg.Niceness,
 		netns:           cfg.NetworkNamespaced,
 		utsns:           cfg.UTSNamespace,
@@ -129,6 +128,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
 		futexWaiter:     futex.NewWaiter(),
 		containerID:     cfg.ContainerID,
 	}
+	t.creds.Store(cfg.Credentials)
 	t.endStopCond.L = &t.tg.signalHandlers.mu
 	t.ptraceTracer.Store((*Task)(nil))
 	// We don't construct t.blockingTimer until Task.run(); see that function
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index dca8e4c0e..f15b3415a 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -370,13 +370,16 @@ func (t *thread) destroy() {
 
 // init initializes trace options.
 func (t *thread) init() {
-	// Set our TRACESYSGOOD option to differeniate real SIGTRAP.
+	// Set our TRACESYSGOOD option to differeniate real SIGTRAP. We also
+	// set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
+	// sentry will immediately kill the associated stubs.
+	const PTRACE_O_EXITKILL = 0x100000
 	_, _, errno := syscall.RawSyscall6(
 		syscall.SYS_PTRACE,
 		syscall.PTRACE_SETOPTIONS,
 		uintptr(t.tid),
 		0,
-		syscall.PTRACE_O_TRACESYSGOOD,
+		syscall.PTRACE_O_TRACESYSGOOD|syscall.PTRACE_O_TRACEEXIT|PTRACE_O_EXITKILL,
 		0, 0)
 	if errno != 0 {
 		panic(fmt.Sprintf("ptrace set options failed: %v", errno))
@@ -419,7 +422,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
 	// between syscall-enter-stop and syscall-exit-stop; it happens *after*
 	// syscall-exit-stop.)" - ptrace(2), "Syscall-stops"
 	if sig := t.wait(stopped); sig != (syscallEvent | syscall.SIGTRAP) {
-		panic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig))
+		t.dumpAndPanic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig))
 	}
 
 	// Grab registers.
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 2a41e8176..7f18b1ac8 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -379,7 +379,7 @@ var AMD64 = &kernel.SyscallTable{
 		326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil),
 		327: syscalls.Undocumented("preadv2", Preadv2),
 		328: syscalls.Undocumented("pwritev2", Pwritev2),
-		397: syscalls.Undocumented("statx", Statx),
+		332: syscalls.Supported("statx", Statx),
 	},
 
 	Emulate: map[usermem.Addr]uintptr{
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index d9ed02c99..04962726a 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -304,44 +304,100 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
 		return 0, syserror.ENOENT
 	}
 
-	err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, remainingTraversals uint) error {
-		if !fs.IsDir(d.Inode.StableAttr) {
-			return syserror.ENOTDIR
-		}
+	fileFlags := linuxToFlags(flags)
+	// Linux always adds the O_LARGEFILE flag when running in 64-bit mode.
+	fileFlags.LargeFile = true
+
+	err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, parent *fs.Dirent, name string, remainingTraversals uint) error {
+		// Resolve the name to see if it exists, and follow any
+		// symlinks along the way. We must do the symlink resolution
+		// manually because if the symlink target does not exist, we
+		// must create the target (and not the symlink itself).
+		var (
+			found *fs.Dirent
+			err   error
+		)
+		for {
+			if !fs.IsDir(parent.Inode.StableAttr) {
+				return syserror.ENOTDIR
+			}
 
-		fileFlags := linuxToFlags(flags)
-		// Linux always adds the O_LARGEFILE flag when running in 64-bit mode.
-		fileFlags.LargeFile = true
+			// Start by looking up the dirent at 'name'.
+			found, err = t.MountNamespace().FindLink(t, root, parent, name, &remainingTraversals)
+			if err != nil {
+				break
+			}
+
+			// We found something (possibly a symlink). If the
+			// O_EXCL flag was passed, then we can immediately
+			// return EEXIST.
+			if flags&linux.O_EXCL != 0 {
+				return syserror.EEXIST
+			}
+
+			// If we have a non-symlink, then we can proceed.
+			if !fs.IsSymlink(found.Inode.StableAttr) {
+				break
+			}
+
+			// If O_NOFOLLOW was passed, then don't try to resolve
+			// anything.
+			if flags&linux.O_NOFOLLOW != 0 {
+				return syserror.ELOOP
+			}
+
+			// Try to resolve the symlink directly to a Dirent.
+			resolved, err := found.Inode.Getlink(t)
+			if err == nil || err != fs.ErrResolveViaReadlink {
+				// No more resolution necessary.
+				found.DecRef()
+				found = resolved
+				break
+			}
+
+			// Resolve the symlink to a path via Readlink.
+			path, err := found.Inode.Readlink(t)
+			if err != nil {
+				break
+			}
+			remainingTraversals--
+
+			// Get the new parent from the target path.
+			newParentPath, newName := fs.SplitLast(path)
+			newParent, err := t.MountNamespace().FindInode(t, root, parent, newParentPath, &remainingTraversals)
+			if err != nil {
+				break
+			}
+
+			// Repeat the process with the parent and name of the
+			// symlink target.
+			parent.DecRef()
+			parent = newParent
+			name = newName
+		}
 
-		// Does this file exist already?
-		targetDirent, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals)
 		var newFile *fs.File
 		switch err {
 		case nil:
 			// The file existed.
-			defer targetDirent.DecRef()
-
-			// Check if we wanted to create.
-			if flags&linux.O_EXCL != 0 {
-				return syserror.EEXIST
-			}
+			defer found.DecRef()
 
 			// Like sys_open, check for a few things about the
 			// filesystem before trying to get a reference to the
 			// fs.File. The same constraints on Check apply.
-			if err := targetDirent.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil {
+			if err := found.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil {
 				return err
 			}
 
 			// Should we truncate the file?
 			if flags&linux.O_TRUNC != 0 {
-				if err := targetDirent.Inode.Truncate(t, targetDirent, 0); err != nil {
+				if err := found.Inode.Truncate(t, found, 0); err != nil {
 					return err
 				}
 			}
 
 			// Create a new fs.File.
-			newFile, err = targetDirent.Inode.GetFile(t, targetDirent, fileFlags)
+			newFile, err = found.Inode.GetFile(t, found, fileFlags)
 			if err != nil {
 				return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
 			}
@@ -350,19 +406,19 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
 			// File does not exist. Proceed with creation.
 
 			// Do we have write permissions on the parent?
-			if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil {
+			if err := parent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil {
 				return err
 			}
 
 			// Attempt a creation.
 			perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask()))
-			newFile, err = d.Create(t, root, name, fileFlags, perms)
+			newFile, err = parent.Create(t, root, name, fileFlags, perms)
 			if err != nil {
 				// No luck, bail.
 				return err
 			}
 			defer newFile.DecRef()
-			targetDirent = newFile.Dirent
+			found = newFile.Dirent
 		default:
 			return err
 		}
@@ -378,10 +434,10 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
 		fd = uintptr(newFD)
 
 		// Queue the open inotify event. The creation event is
-		// automatically queued when the dirent is targetDirent. The
-		// open events are implemented at the syscall layer so we need
-		// to manually queue one here.
-		targetDirent.InotifyEvent(linux.IN_OPEN, 0)
+		// automatically queued when the dirent is found. The open
+		// events are implemented at the syscall layer so we need to
+		// manually queue one here.
+		found.InotifyEvent(linux.IN_OPEN, 0)
 
 		return nil
 	})
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 7f41a9c53..d79aaff60 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -96,8 +96,10 @@ const (
 
 	// SandboxStacks collects sandbox stacks for debugging.
 	SandboxStacks = "debug.Stacks"
+)
 
-	// Profiling related commands (see pprof.go for more details).
+// Profiling related commands (see pprof.go for more details).
+const (
 	StartCPUProfile = "Profile.StartCPUProfile"
 	StopCPUProfile  = "Profile.StopCPUProfile"
 	HeapProfile     = "Profile.HeapProfile"
@@ -105,6 +107,11 @@ const (
 	StopTrace       = "Profile.StopTrace"
 )
 
+// Logging related commands (see logging.go for more details).
+const (
+	ChangeLogging = "Logging.Change"
+)
+
 // ControlSocketAddr generates an abstract unix socket name for the given ID.
 func ControlSocketAddr(id string) string {
 	return fmt.Sprintf("\x00runsc-sandbox.%s", id)
@@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) {
 	}
 
 	srv.Register(&debug{})
+	srv.Register(&control.Logging{})
 	if l.conf.ProfileEnable {
 		srv.Register(&control.Profile{})
 	}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 67a286212..5c2220d83 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -85,6 +85,19 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string,
 	if err != nil {
 		return nil, fmt.Errorf("creating tmpfs overlay: %v", err)
 	}
+
+	// Replicate permissions and owner from lower to upper mount point.
+	attr, err := lower.UnstableAttr(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("reading attributes from lower mount point: %v", err)
+	}
+	if !upper.InodeOperations.SetPermissions(ctx, upper, attr.Perms) {
+		return nil, fmt.Errorf("error setting permission to upper mount point")
+	}
+	if err := upper.InodeOperations.SetOwner(ctx, upper, attr.Owner); err != nil {
+		return nil, fmt.Errorf("setting owner to upper mount point: %v", err)
+	}
+
 	return fs.NewOverlayRoot(ctx, upper, lower, upperFlags)
 }
 
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 4af45bfcc..eca592e5b 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -37,6 +37,9 @@ import (
 func init() {
 	log.SetLevel(log.Debug)
 	rand.Seed(time.Now().UnixNano())
+	if err := fsgofer.OpenProcSelfFD(); err != nil {
+		panic(err)
+	}
 }
 
 func testConfig() *Config {
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 30a69acf0..7313e473f 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -17,12 +17,15 @@ package cmd
 import (
 	"context"
 	"os"
+	"strconv"
+	"strings"
 	"syscall"
 	"time"
 
 	"flag"
 	"github.com/google/subcommands"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/container"
 )
@@ -36,6 +39,9 @@ type Debug struct {
 	profileCPU   string
 	profileDelay int
 	trace        string
+	strace       string
+	logLevel     string
+	logPackets   string
 }
 
 // Name implements subcommands.Command.
@@ -62,6 +68,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
 	f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile")
 	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
 	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
+	f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`)
+	f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
+	f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
 }
 
 // Execute implements subcommands.Command.Execute.
@@ -78,7 +87,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		var err error
 		c, err = container.Load(conf.RootDir, f.Arg(0))
 		if err != nil {
-			Fatalf("loading container %q: %v", f.Arg(0), err)
+			return Errorf("loading container %q: %v", f.Arg(0), err)
 		}
 	} else {
 		if f.NArg() != 0 {
@@ -88,12 +97,12 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		// Go over all sandboxes and find the one that matches PID.
 		ids, err := container.List(conf.RootDir)
 		if err != nil {
-			Fatalf("listing containers: %v", err)
+			return Errorf("listing containers: %v", err)
 		}
 		for _, id := range ids {
 			candidate, err := container.Load(conf.RootDir, id)
 			if err != nil {
-				Fatalf("loading container %q: %v", id, err)
+				return Errorf("loading container %q: %v", id, err)
 			}
 			if candidate.SandboxPid() == d.pid {
 				c = candidate
@@ -101,38 +110,38 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			}
 		}
 		if c == nil {
-			Fatalf("container with PID %d not found", d.pid)
+			return Errorf("container with PID %d not found", d.pid)
 		}
 	}
 
 	if c.Sandbox == nil || !c.Sandbox.IsRunning() {
-		Fatalf("container sandbox is not running")
+		return Errorf("container sandbox is not running")
 	}
 	log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid)
 
 	if d.signal > 0 {
 		log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
 		if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil {
-			Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
+			return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
 		}
 	}
 	if d.stacks {
 		log.Infof("Retrieving sandbox stacks")
 		stacks, err := c.Sandbox.Stacks()
 		if err != nil {
-			Fatalf("retrieving stacks: %v", err)
+			return Errorf("retrieving stacks: %v", err)
 		}
 		log.Infof("     *** Stack dump ***\n%s", stacks)
 	}
 	if d.profileHeap != "" {
 		f, err := os.Create(d.profileHeap)
 		if err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		defer f.Close()
 
 		if err := c.Sandbox.HeapProfile(f); err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		log.Infof("Heap profile written to %q", d.profileHeap)
 	}
@@ -142,7 +151,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		delay = true
 		f, err := os.Create(d.profileCPU)
 		if err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		defer func() {
 			f.Close()
@@ -152,7 +161,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			log.Infof("CPU profile written to %q", d.profileCPU)
 		}()
 		if err := c.Sandbox.StartCPUProfile(f); err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU)
 	}
@@ -160,7 +169,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		delay = true
 		f, err := os.Create(d.trace)
 		if err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		defer func() {
 			f.Close()
@@ -170,15 +179,71 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			log.Infof("Trace written to %q", d.trace)
 		}()
 		if err := c.Sandbox.StartTrace(f); err != nil {
-			Fatalf(err.Error())
+			return Errorf(err.Error())
 		}
 		log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace)
 
 	}
 
+	if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
+		args := control.LoggingArgs{}
+		switch strings.ToLower(d.strace) {
+		case "":
+			// strace not set, nothing to do here.
+
+		case "off":
+			log.Infof("Disabling strace")
+			args.SetStrace = true
+
+		case "all":
+			log.Infof("Enabling all straces")
+			args.SetStrace = true
+			args.EnableStrace = true
+
+		default:
+			log.Infof("Enabling strace for syscalls: %s", d.strace)
+			args.SetStrace = true
+			args.EnableStrace = true
+			args.StraceWhitelist = strings.Split(d.strace, ",")
+		}
+
+		if len(d.logLevel) != 0 {
+			args.SetLevel = true
+			switch strings.ToLower(d.logLevel) {
+			case "warning", "0":
+				args.Level = log.Warning
+			case "info", "1":
+				args.Level = log.Info
+			case "debug", "2":
+				args.Level = log.Debug
+			default:
+				return Errorf("invalid log level %q", d.logLevel)
+			}
+			log.Infof("Setting log level %v", args.Level)
+		}
+
+		if len(d.logPackets) != 0 {
+			args.SetLogPackets = true
+			lp, err := strconv.ParseBool(d.logPackets)
+			if err != nil {
+				return Errorf("invalid value for log_packets %q", d.logPackets)
+			}
+			args.LogPackets = lp
+			if args.LogPackets {
+				log.Infof("Enabling packet logging")
+			} else {
+				log.Infof("Disabling packet logging")
+			}
+		}
+
+		if err := c.Sandbox.ChangeLogging(args); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Logging options changed")
+	}
+
 	if delay {
 		time.Sleep(time.Duration(d.profileDelay) * time.Second)
-
 	}
 
 	return subcommands.ExitSuccess
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 7adc23a77..e817eff77 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
 		cmd.SysProcAttr = &syscall.SysProcAttr{
 			Setsid:  true,
 			Setctty: true,
-			Ctty:    int(tty.Fd()),
+			// The Ctty FD must be the FD in the child process's FD
+			// table. Since we set cmd.Stdin/Stdout/Stderr to the
+			// tty FD, we can use any of 0, 1, or 2 here.
+			// See https://github.com/golang/go/issues/29458.
+			Ctty: 0,
 		}
 	}
 
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 52609a57a..9faabf494 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// modes exactly as sent by the sandbox, which will have applied its own umask.
 	syscall.Umask(0)
 
+	if err := fsgofer.OpenProcSelfFD(); err != nil {
+		Fatalf("failed to open /proc/self/fd: %v", err)
+	}
+
 	if err := syscall.Chroot(root); err != nil {
 		Fatalf("failed to chroot to %q: %v", root, err)
 	}
diff --git a/runsc/console/BUILD b/runsc/console/BUILD
index 2d71cd371..e623c1a0f 100644
--- a/runsc/console/BUILD
+++ b/runsc/console/BUILD
@@ -4,7 +4,9 @@ package(licenses = ["notice"])
 
 go_library(
     name = "console",
-    srcs = ["console.go"],
+    srcs = [
+        "console.go",
+    ],
     importpath = "gvisor.dev/gvisor/runsc/console",
     visibility = [
         "//runsc:__subpackages__",
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 8f50af780..f970ce88d 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -28,6 +28,7 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
+	"strconv"
 	"sync"
 	"syscall"
 
@@ -223,6 +224,28 @@ type localFile struct {
 	lastDirentOffset uint64
 }
 
+var procSelfFD *fd.FD
+
+// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to
+// reopen file descriptors.
+func OpenProcSelfFD() error {
+	d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0)
+	if err != nil {
+		return fmt.Errorf("error opening /proc/self/fd: %v", err)
+	}
+	procSelfFD = fd.New(d)
+	return nil
+}
+
+func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
+	d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	return fd.New(d), nil
+}
+
 func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) {
 	path := path.Join(parent.hostPath, name)
 	f, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
@@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
 		log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath)
 		var err error
-		newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0)
+		newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags())
 		if err != nil {
 			return nil, p9.QID{}, 0, extractErrno(err)
 		}
@@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) {
 	// Duplicate current file if 'names' is empty.
 	if len(names) == 0 {
 		newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
-			return fd.Open(l.hostPath, openFlags|mode, 0)
+			return reopenProcFd(l.file, openFlags|mode)
 		})
 		if err != nil {
 			return nil, nil, extractErrno(err)
@@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 	f := l.file
 	if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
 		var err error
-		f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0)
+		f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
 		if err != nil {
 			return extractErrno(err)
 		}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 68267df1b..0a162bb8a 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -31,6 +31,10 @@ func init() {
 
 	allConfs = append(allConfs, rwConfs...)
 	allConfs = append(allConfs, roConfs...)
+
+	if err := OpenProcSelfFD(); err != nil {
+		panic(err)
+	}
 }
 
 func assertPanic(t *testing.T, f func()) {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 3bd0291c0..6bebf0737 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -437,10 +437,10 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		defer tty.Close()
 
 		// Set the TTY as a controlling TTY on the sandbox process.
-		// Note that the Ctty field must be the FD of the TTY in the
-		// *new* process, not this process. Since we are about to
-		// assign the TTY to nextFD, we can use that value here.
 		cmd.SysProcAttr.Setctty = true
+		// The Ctty FD must be the FD in the child process's FD table,
+		// which will be nextFD in this case.
+		// See https://github.com/golang/go/issues/29458.
 		cmd.SysProcAttr.Ctty = nextFD
 
 		// Pass the tty as all stdio fds to sandbox.
@@ -960,7 +960,7 @@ func (s *Sandbox) StartTrace(f *os.File) error {
 	return nil
 }
 
-// StopTrace stops a previously started trace..
+// StopTrace stops a previously started trace.
 func (s *Sandbox) StopTrace() error {
 	log.Debugf("Trace stop %q", s.ID)
 	conn, err := s.sandboxConnect()
@@ -975,6 +975,21 @@ func (s *Sandbox) StopTrace() error {
 	return nil
 }
 
+// ChangeLogging changes logging options.
+func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
+	log.Debugf("Change logging start %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil {
+		return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err)
+	}
+	return nil
+}
+
 // DestroyContainer destroys the given container. If it is the root container,
 // then the entire sandbox is destroyed.
 func (s *Sandbox) DestroyContainer(cid string) error {
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 731e2aa85..b06e46c03 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -240,7 +240,7 @@ syscall_test(
 syscall_test(test = "//test/syscalls/linux:munmap_test")
 
 syscall_test(
-    add_overlay = False,  # TODO(gvisor.dev/issue/316): enable when fixed.
+    add_overlay = True,
     test = "//test/syscalls/linux:open_create_test",
 )
 
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 0618fea58..8a24d8c0b 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1177,6 +1177,7 @@ cc_binary(
         "//test/util:temp_path",
         "//test/util:test_main",
         "//test/util:test_util",
+        "//test/util:thread_util",
         "@com_google_absl//absl/strings",
         "@com_google_googletest//:gtest",
     ],
@@ -2940,6 +2941,8 @@ cc_binary(
     testonly = 1,
     srcs = ["tcp_socket.cc"],
     linkstatic = 1,
+    # FIXME(b/135470853)
+    tags = ["flaky"],
     deps = [
         ":socket_test_util",
         "//test/util:file_descriptor",
@@ -3340,3 +3343,18 @@ cc_binary(
         "@com_google_googletest//:gtest",
     ],
 )
+
+cc_binary(
+    name = "proc_net_tcp_test",
+    testonly = 1,
+    srcs = ["proc_net_tcp.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        "//test/util:file_descriptor",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_googletest//:gtest",
+    ],
+)
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 42646bb02..e0525f386 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -28,6 +28,7 @@
 #include "test/util/fs_util.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
+#include "test/util/thread_util.h"
 
 namespace gvisor {
 namespace testing {
@@ -214,6 +215,42 @@ TEST_F(OpenTest, AppendOnly) {
               SyscallSucceedsWithValue(kBufSize * 3));
 }
 
+TEST_F(OpenTest, AppendConcurrentWrite) {
+  constexpr int kThreadCount = 5;
+  constexpr int kBytesPerThread = 10000;
+  std::unique_ptr<ScopedThread> threads[kThreadCount];
+
+  // In case of the uncached policy, we expect that a file system can be changed
+  // externally, so we create a new inode each time when we open a file and we
+  // can't guarantee that writes to files with O_APPEND will work correctly.
+  SKIP_IF(getenv("GVISOR_GOFER_UNCACHED"));
+
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+
+  std::string filename = test_file_name_;
+  DisableSave ds;  // Too many syscalls.
+  // Start kThreadCount threads which will write concurrently into the same
+  // file.
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i] = absl::make_unique<ScopedThread>([filename]() {
+      const FileDescriptor fd =
+          ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND));
+
+      for (int j = 0; j < kBytesPerThread; j++) {
+        EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1));
+      }
+    });
+  }
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i]->Join();
+  }
+
+  // Check that the size of the file is correct.
+  struct stat st;
+  EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds());
+  EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread);
+}
+
 TEST_F(OpenTest, Truncate) {
   {
     // First write some data to the new file and close it.
diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc
new file mode 100644
index 000000000..578b20680
--- /dev/null
+++ b/test/syscalls/linux/proc_net_tcp.cc
@@ -0,0 +1,281 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using absl::StrCat;
+using absl::StrSplit;
+
+constexpr char kProcNetTCPHeader[] =
+    "  sl  local_address rem_address   st tx_queue rx_queue tr tm->when "
+    "retrnsmt   uid  timeout inode                                             "
+    "        ";
+
+// Possible values of the "st" field in a /proc/net/tcp entry. Source: Linux
+// kernel, include/net/tcp_states.h.
+enum {
+  TCP_ESTABLISHED = 1,
+  TCP_SYN_SENT,
+  TCP_SYN_RECV,
+  TCP_FIN_WAIT1,
+  TCP_FIN_WAIT2,
+  TCP_TIME_WAIT,
+  TCP_CLOSE,
+  TCP_CLOSE_WAIT,
+  TCP_LAST_ACK,
+  TCP_LISTEN,
+  TCP_CLOSING,
+  TCP_NEW_SYN_RECV,
+
+  TCP_MAX_STATES
+};
+
+// TCPEntry represents a single entry from /proc/net/tcp.
+struct TCPEntry {
+  uint32_t local_addr;
+  uint16_t local_port;
+
+  uint32_t remote_addr;
+  uint16_t remote_port;
+
+  uint64_t state;
+  uint64_t uid;
+  uint64_t inode;
+};
+
+uint32_t IP(const struct sockaddr* addr) {
+  auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+  return in_addr->sin_addr.s_addr;
+}
+
+uint16_t Port(const struct sockaddr* addr) {
+  auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+  return ntohs(in_addr->sin_port);
+}
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and sets 'match' to point to the matching entry.
+bool FindBy(std::vector<TCPEntry> entries, TCPEntry* match,
+            std::function<bool(const TCPEntry&)> predicate) {
+  for (int i = 0; i < entries.size(); ++i) {
+    if (predicate(entries[i])) {
+      *match = entries[i];
+      return true;
+    }
+  }
+  return false;
+}
+
+bool FindByLocalAddr(std::vector<TCPEntry> entries, TCPEntry* match,
+                     const struct sockaddr* addr) {
+  uint32_t host = IP(addr);
+  uint16_t port = Port(addr);
+  return FindBy(entries, match, [host, port](const TCPEntry& e) {
+    return (e.local_addr == host && e.local_port == port);
+  });
+}
+
+bool FindByRemoteAddr(std::vector<TCPEntry> entries, TCPEntry* match,
+                      const struct sockaddr* addr) {
+  uint32_t host = IP(addr);
+  uint16_t port = Port(addr);
+  return FindBy(entries, match, [host, port](const TCPEntry& e) {
+    return (e.remote_addr == host && e.remote_port == port);
+  });
+}
+
+// Returns a parsed representation of /proc/net/tcp entries.
+PosixErrorOr<std::vector<TCPEntry>> ProcNetTCPEntries() {
+  std::string content;
+  RETURN_IF_ERRNO(GetContents("/proc/net/tcp", &content));
+
+  bool found_header = false;
+  std::vector<TCPEntry> entries;
+  std::vector<std::string> lines = StrSplit(content, '\n');
+  std::cerr << "<contents of /proc/net/tcp>" << std::endl;
+  for (std::string line : lines) {
+    std::cerr << line << std::endl;
+
+    if (!found_header) {
+      EXPECT_EQ(line, kProcNetTCPHeader);
+      found_header = true;
+      continue;
+    }
+    if (line.empty()) {
+      continue;
+    }
+
+    // Parse a single entry from /proc/net/tcp.
+    //
+    // Example entries:
+    //
+    // clang-format off
+    //
+    //  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
+    //   0: 00000000:006F 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 1968 1 0000000000000000 100 0 0 10 0
+    //   1: 0100007F:7533 00000000:0000 0A 00000000:00000000 00:00000000 00000000   120        0 10684 1 0000000000000000 100 0 0 10 0
+    //   ^     ^       ^     ^       ^   ^     ^       ^      ^     ^        ^       ^         ^   ^   ^      ^            ^  ^ ^  ^ ^
+    //   0     1       2     3       4   5     6       7      8     9       10       11       12  13  14     15           16 17 18 19 20
+    //
+    // clang-format on
+
+    TCPEntry entry;
+    std::vector<std::string> fields =
+        StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty());
+
+    ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11]));
+    ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13]));
+
+    entries.push_back(entry);
+  }
+  std::cerr << "<end of /proc/net/tcp>" << std::endl;
+
+  return entries;
+}
+
+TEST(ProcNetTCP, Exists) {
+  const std::string content =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp"));
+  const std::string header_line = StrCat(kProcNetTCPHeader, "\n");
+  if (IsRunningOnGvisor()) {
+    // Should be just the header since we don't have any tcp sockets yet.
+    EXPECT_EQ(content, header_line);
+  } else {
+    // On a general linux machine, we could have abitrary sockets on the system,
+    // so just check the header.
+    EXPECT_THAT(content, ::testing::StartsWith(header_line));
+  }
+}
+
+TEST(ProcNetTCP, EntryUID) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry e;
+  EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+  EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+}
+
+TEST(ProcNetTCP, BindAcceptConnect) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  // We can only make assertions about the total number of entries if we control
+  // the entire "machine".
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(entries.size(), 2);
+  }
+
+  TCPEntry e;
+  EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()));
+  EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()));
+}
+
+TEST(ProcNetTCP, InodeReasonable) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+
+  TCPEntry accepted_entry;
+  ASSERT_TRUE(FindByLocalAddr(entries, &accepted_entry, sockets->first_addr()));
+  EXPECT_NE(accepted_entry.inode, 0);
+
+  TCPEntry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, sockets->first_addr()));
+  EXPECT_NE(client_entry.inode, 0);
+  EXPECT_NE(accepted_entry.inode, client_entry.inode);
+}
+
+TEST(ProcNetTCP, State) {
+  std::unique_ptr<FileDescriptor> server =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+
+  auto test_addr = V4Loopback();
+  ASSERT_THAT(
+      bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr),
+           test_addr.addr_len),
+      SyscallSucceeds());
+
+  struct sockaddr addr;
+  socklen_t addrlen = sizeof(struct sockaddr);
+  ASSERT_THAT(getsockname(server->get(), &addr, &addrlen), SyscallSucceeds());
+  ASSERT_EQ(addrlen, sizeof(struct sockaddr));
+
+  ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry listen_entry;
+  ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+
+  std::unique_ptr<FileDescriptor> client =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+  ASSERT_THAT(connect(client->get(), &addr, addrlen), SyscallSucceeds());
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+  TCPEntry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, &addr));
+  EXPECT_EQ(client_entry.state, TCP_ESTABLISHED);
+
+  FileDescriptor accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr));
+
+  const uint32_t accepted_local_host = IP(&addr);
+  const uint16_t accepted_local_port = Port(&addr);
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry accepted_entry;
+  ASSERT_TRUE(FindBy(entries, &accepted_entry,
+                     [client_entry, accepted_local_host,
+                      accepted_local_port](const TCPEntry& e) {
+                       return e.local_addr == accepted_local_host &&
+                              e.local_port == accepted_local_port &&
+                              e.remote_addr == client_entry.local_addr &&
+                              e.remote_port == client_entry.local_port;
+                     }));
+  EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc
index 82d325c17..74acbe92c 100644
--- a/test/syscalls/linux/proc_net_unix.cc
+++ b/test/syscalls/linux/proc_net_unix.cc
@@ -162,7 +162,7 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() {
 // Finds the first entry in 'entries' for which 'predicate' returns true.
 // Returns true on match, and sets 'match' to point to the matching entry.
 bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match,
-            std::function<bool(UnixEntry)> predicate) {
+            std::function<bool(const UnixEntry&)> predicate) {
   for (int i = 0; i < entries.size(); ++i) {
     if (predicate(entries[i])) {
       *match = entries[i];
@@ -174,7 +174,8 @@ bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match,
 
 bool FindByPath(std::vector<UnixEntry> entries, UnixEntry* match,
                 const std::string& path) {
-  return FindBy(entries, match, [path](UnixEntry e) { return e.path == path; });
+  return FindBy(entries, match,
+                [path](const UnixEntry& e) { return e.path == path; });
 }
 
 TEST(ProcNetUnix, Exists) {
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
index 0e914215d..510f7bee5 100644
--- a/test/syscalls/linux/stat.cc
+++ b/test/syscalls/linux/stat.cc
@@ -558,7 +558,7 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) {
 
 #ifndef SYS_statx
 #if defined(__x86_64__)
-#define SYS_statx 397
+#define SYS_statx 332
 #else
 #error "Unknown architecture"
 #endif
@@ -607,7 +607,8 @@ int statx(int dirfd, const char *pathname, int flags, unsigned int mask,
 }
 
 TEST_F(StatTest, StatxAbsPath) {
-  SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+          errno == ENOSYS);
 
   struct kernel_statx stx;
   EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx),
@@ -616,7 +617,8 @@ TEST_F(StatTest, StatxAbsPath) {
 }
 
 TEST_F(StatTest, StatxRelPathDirFD) {
-  SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+          errno == ENOSYS);
 
   struct kernel_statx stx;
   auto const dirfd =
@@ -629,7 +631,8 @@ TEST_F(StatTest, StatxRelPathDirFD) {
 }
 
 TEST_F(StatTest, StatxRelPathCwd) {
-  SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+          errno == ENOSYS);
 
   ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
   auto filename = std::string(Basename(test_file_name_));
@@ -640,7 +643,8 @@ TEST_F(StatTest, StatxRelPathCwd) {
 }
 
 TEST_F(StatTest, StatxEmptyPath) {
-  SKIP_IF(statx(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+          errno == ENOSYS);
 
   const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
   struct kernel_statx stx;
diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
index 494072a9b..dce8de9ec 100644
--- a/test/syscalls/linux/symlink.cc
+++ b/test/syscalls/linux/symlink.cc
@@ -272,6 +272,77 @@ TEST(SymlinkTest, ChmodSymlink) {
   EXPECT_EQ(FilePermission(newpath), 0777);
 }
 
+class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {};
+
+// Test that creating an existing symlink with creat will create the target.
+TEST_P(ParamSymlinkTest, CreatLinkCreatesTarget) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  int fd;
+  EXPECT_THAT(fd = creat(linkpath.c_str(), 0666), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  struct stat st;
+  EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+  ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT will create the target.
+TEST_P(ParamSymlinkTest, OpenLinkCreatesTarget) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  int fd;
+  EXPECT_THAT(fd = open(linkpath.c_str(), O_CREAT, 0666), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  struct stat st;
+  EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+  ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_EXCL will fail with
+// EEXIST.
+TEST_P(ParamSymlinkTest, OpenLinkExclFails) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_EXCL, 0666),
+              SyscallFailsWithErrno(EEXIST));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_NOFOLLOW will fail with
+// ELOOP.
+TEST_P(ParamSymlinkTest, OpenLinkNoFollowFails) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_NOFOLLOW, 0666),
+              SyscallFailsWithErrno(ELOOP));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_SUITE_P(AbsAndRelTarget, ParamSymlinkTest,
+                         ::testing::Values(NewTempAbsPath(), NewTempRelPath()));
+
 }  // namespace
 
 }  // namespace testing
diff --git a/third_party/gvsync/atomicptr_unsafe.go b/third_party/gvsync/atomicptr_unsafe.go
index 53a943282..525c4beed 100644
--- a/third_party/gvsync/atomicptr_unsafe.go
+++ b/third_party/gvsync/atomicptr_unsafe.go
@@ -21,8 +21,18 @@ type Value struct{}
 // Note that copying AtomicPtr by value performs a non-atomic read of the
 // stored pointer, which is unsafe if Store() can be called concurrently; in
 // this case, do `dst.Store(src.Load())` instead.
+//
+// +stateify savable
 type AtomicPtr struct {
-	ptr unsafe.Pointer
+	ptr unsafe.Pointer `state:".(*Value)"`
+}
+
+func (p *AtomicPtr) savePtr() *Value {
+	return p.Load()
+}
+
+func (p *AtomicPtr) loadPtr(v *Value) {
+	p.Store(v)
 }
 
 // Load returns the value set by the most recent Store. It returns nil if there
diff --git a/tools/go_generics/generics.go b/tools/go_generics/generics.go
index 4e5cc53a2..22c714c13 100644
--- a/tools/go_generics/generics.go
+++ b/tools/go_generics/generics.go
@@ -222,7 +222,11 @@ func main() {
 				// Modify the state tag appropriately.
 				if m := stateTagRegexp.FindStringSubmatch(ident.Name); m != nil {
 					if t := identifierRegexp.FindStringSubmatch(m[2]); t != nil {
-						ident.Name = m[1] + `state:".(` + t[1] + *prefix + t[2] + *suffix + t[3] + `)"` + m[3]
+						typeName := *prefix + t[2] + *suffix
+						if n, ok := types[t[2]]; ok {
+							typeName = n
+						}
+						ident.Name = m[1] + `state:".(` + t[1] + typeName + t[3] + `)"` + m[3]
 					}
 				}
 			}