145 files changed, 5606 insertions, 2053 deletions
diff --git a/README.md b/README.md
index aa8b8c068..3d380350f 100644
--- a/README.md
+++ b/README.md
@@ -61,8 +61,9 @@ Make sure the following dependencies are installed:
 Build and install the `runsc` binary:
 
 ```sh
-make runsc
-sudo cp ./bazel-bin/runsc/linux_amd64_pure_stripped/runsc /usr/local/bin
+mkdir -p bin
+make copy TARGETS=runsc DESTINATION=bin/
+sudo cp ./bin/runsc /usr/local/bin
 ```
 
 ### Testing
diff --git a/WORKSPACE b/WORKSPACE
index b00c09682..6dc647292 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -42,10 +42,10 @@ http_archive(
         # binaries of symbols, which we don't want.
         "//tools:rules_go_symbols.patch",
     ],
-    sha256 = "69de5c704a05ff37862f7e0f5534d4f479418afc21806c887db544a316f3cb6b",
+    sha256 = "8e968b5fcea1d2d64071872b12737bbb5514524ee5f0a4f54f5920266c261acb",
     urls = [
-        "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz",
-        "https://github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz",
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.28.0/rules_go-v0.28.0.zip",
+        "https://github.com/bazelbuild/rules_go/releases/download/v0.28.0/rules_go-v0.28.0.zip",
     ],
 )
 
@@ -69,7 +69,7 @@ load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_depe
 
 go_rules_dependencies()
 
-go_register_toolchains(go_version = "1.16.2")
+go_register_toolchains(go_version = "1.16.8")
 
 load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")
 
@@ -113,11 +113,11 @@ cc_crosstool(name = "crosstool")
 # Load protobuf dependencies.
 http_archive(
     name = "rules_proto",
-    sha256 = "2a20fd8af3cad3fbab9fd3aec4a137621e0c31f858af213a7ae0f997723fc4a9",
-    strip_prefix = "rules_proto-a0761ed101b939e19d83b2da5f59034bffc19c12",
+    sha256 = "66bfdf8782796239d3875d37e7de19b1d94301e8972b3cbd2446b332429b4df1",
+    strip_prefix = "rules_proto-4.0.0",
     urls = [
-        "https://mirror.bazel.build/github.com/bazelbuild/rules_proto/archive/a0761ed101b939e19d83b2da5f59034bffc19c12.tar.gz",
-        "https://github.com/bazelbuild/rules_proto/archive/a0761ed101b939e19d83b2da5f59034bffc19c12.tar.gz",
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_proto/archive/refs/tags/4.0.0.tar.gz",
+        "https://github.com/bazelbuild/rules_proto/archive/refs/tags/4.0.0.tar.gz",
     ],
 )
 
@@ -1256,7 +1256,7 @@ http_archive(
     strip_prefix = "abseil-cpp-278e0a071885a22dcd2fd1b5576cc44757299343",
     urls = [
         "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/278e0a071885a22dcd2fd1b5576cc44757299343.tar.gz",
-        "https://github.com/abseil/abseil-cpp/archive/278e0a071885a22dcd2fd1b5576cc44757299343.tar.gz"
+        "https://github.com/abseil/abseil-cpp/archive/278e0a071885a22dcd2fd1b5576cc44757299343.tar.gz",
     ],
 )
 
@@ -1278,7 +1278,6 @@ load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps")
 
 grpc_extra_deps()
 
-
 http_archive(
     name = "com_google_googletest",
     sha256 = "0a10bea96d8670e5eef948d79d824162b1577bb7889539e49ec786bfc3e48912",
@@ -1305,7 +1304,9 @@ http_archive(
     strip_prefix = "protobuf-3.17.3",
     urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.17.3.zip"],
 )
+
 load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
+
 protobuf_deps()
 
 # Schemas for testing.
@@ -1635,7 +1636,7 @@ go_repository(
     importpath = "honnef.co/go/tools",
     sum = "h1:Tyybiul3hjaq0dkv+kcf5/MPTfo+ZBiEWrkhgxMPH54=",
     version = "v0.3.0-0.dev.0.20210801021341-453cb28c0b15",
- )
+)
 
 go_repository(
     name = "com_github_burntsushi_toml",
diff --git a/g3doc/user_guide/compatibility.md b/g3doc/user_guide/compatibility.md
index 76e879a01..ef50c0147 100644
--- a/g3doc/user_guide/compatibility.md
+++ b/g3doc/user_guide/compatibility.md
@@ -42,6 +42,9 @@ Most common utilities work. Note that:
 
 *   Some tools, such as `tcpdump` and old versions of `ping`, require explicitly
     enabling raw sockets via the unsafe `--net-raw` runsc flag.
+    *   In case of tcpdump the following invocations will work
+        *   tcpdump -i any
+        *   tcpdump -i \<device-name\> -p (-p disables promiscuous mode)
 *   Different Docker images can behave differently. For example, Alpine Linux
     and Ubuntu have different `ip` binaries.
 
@@ -82,7 +85,7 @@ Most common utilities work. Note that:
 | sshd       | Partially working. Job control [in progress](https://gvisor.dev/issue/154).                         |
 | strace     | Working.                                                                                            |
 | tar        | Working.                                                                                            |
-| tcpdump    | Working. [Promiscuous mode in progress](https://gvisor.dev/issue/3333).                             |
+| tcpdump    | Working [only with libpcap versions < 1.10](https://github.com/google/gvisor/issues/6699), [Promiscuous mode in progress](https://gvisor.dev/issue/3333).                             |
 | top        | Working.                                                                                            |
 | uptime     | Working.                                                                                            |
 | vim        | Working.                                                                                            |
diff --git a/images/benchmarks/node/package-lock.json b/images/benchmarks/node/package-lock.json
index 580e68aa5..9f59a3a71 100644
--- a/images/benchmarks/node/package-lock.json
+++ b/images/benchmarks/node/package-lock.json
@@ -1,63 +1,687 @@
 {
   "name": "nodedum",
   "version": "1.0.0",
-  "lockfileVersion": 1,
+  "lockfileVersion": 2,
   "requires": true,
-  "dependencies": {
-    "accepts": {
-      "version": "1.3.5",
-      "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz",
-      "integrity": "sha1-63d99gEXI6OxTopywIBcjoZ0a9I=",
-      "requires": {
-        "mime-types": "~2.1.18",
-        "negotiator": "0.6.1"
+  "packages": {
+    "": {
+      "name": "nodedum",
+      "version": "1.0.0",
+      "license": "ISC",
+      "dependencies": {
+        "express": "^4.16.4",
+        "hbs": "^4.0.4",
+        "redis": "^3.1.2",
+        "redis-commands": "^1.2.0",
+        "redis-parser": "^2.6.0",
+        "secure-random-string": "^1.1.0"
       }
     },
-    "array-flatten": {
+    "node_modules/accepts": {
+      "version": "1.3.7",
+      "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
+      "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
+      "dependencies": {
+        "mime-types": "~2.1.24",
+        "negotiator": "0.6.2"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/array-flatten": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
       "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
     },
-    "async": {
+    "node_modules/body-parser": {
+      "version": "1.19.0",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
+      "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
+      "dependencies": {
+        "bytes": "3.1.0",
+        "content-type": "~1.0.4",
+        "debug": "2.6.9",
+        "depd": "~1.1.2",
+        "http-errors": "1.7.2",
+        "iconv-lite": "0.4.24",
+        "on-finished": "~2.3.0",
+        "qs": "6.7.0",
+        "raw-body": "2.4.0",
+        "type-is": "~1.6.17"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/bytes": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
+      "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/content-disposition": {
+      "version": "0.5.3",
+      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
+      "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
+      "dependencies": {
+        "safe-buffer": "5.1.2"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/content-type": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
+      "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
+      "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie-signature": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
+      "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw="
+    },
+    "node_modules/debug": {
+      "version": "2.6.9",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
+      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
+      "dependencies": {
+        "ms": "2.0.0"
+      }
+    },
+    "node_modules/denque": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz",
+      "integrity": "sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw==",
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/depd": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
+      "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/destroy": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
+      "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
+    },
+    "node_modules/ee-first": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
+      "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
+    },
+    "node_modules/encodeurl": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+      "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/escape-html": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
+      "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg="
+    },
+    "node_modules/etag": {
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
+      "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/express": {
+      "version": "4.17.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
+      "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
+      "dependencies": {
+        "accepts": "~1.3.7",
+        "array-flatten": "1.1.1",
+        "body-parser": "1.19.0",
+        "content-disposition": "0.5.3",
+        "content-type": "~1.0.4",
+        "cookie": "0.4.0",
+        "cookie-signature": "1.0.6",
+        "debug": "2.6.9",
+        "depd": "~1.1.2",
+        "encodeurl": "~1.0.2",
+        "escape-html": "~1.0.3",
+        "etag": "~1.8.1",
+        "finalhandler": "~1.1.2",
+        "fresh": "0.5.2",
+        "merge-descriptors": "1.0.1",
+        "methods": "~1.1.2",
+        "on-finished": "~2.3.0",
+        "parseurl": "~1.3.3",
+        "path-to-regexp": "0.1.7",
+        "proxy-addr": "~2.0.5",
+        "qs": "6.7.0",
+        "range-parser": "~1.2.1",
+        "safe-buffer": "5.1.2",
+        "send": "0.17.1",
+        "serve-static": "1.14.1",
+        "setprototypeof": "1.1.1",
+        "statuses": "~1.5.0",
+        "type-is": "~1.6.18",
+        "utils-merge": "1.0.1",
+        "vary": "~1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.10.0"
+      }
+    },
+    "node_modules/finalhandler": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
+      "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
+      "dependencies": {
+        "debug": "2.6.9",
+        "encodeurl": "~1.0.2",
+        "escape-html": "~1.0.3",
+        "on-finished": "~2.3.0",
+        "parseurl": "~1.3.3",
+        "statuses": "~1.5.0",
+        "unpipe": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/foreachasync": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/foreachasync/-/foreachasync-3.0.0.tgz",
+      "integrity": "sha1-VQKYfchxS+M5IJfzLgBxyd7gfPY="
+    },
+    "node_modules/forwarded": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
+      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/fresh": {
+      "version": "0.5.2",
+      "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
+      "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/handlebars": {
+      "version": "4.7.7",
+      "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.7.tgz",
+      "integrity": "sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA==",
+      "dependencies": {
+        "minimist": "^1.2.5",
+        "neo-async": "^2.6.0",
+        "source-map": "^0.6.1",
+        "uglify-js": "^3.1.4",
+        "wordwrap": "^1.0.0"
+      },
+      "bin": {
+        "handlebars": "bin/handlebars"
+      },
+      "engines": {
+        "node": ">=0.4.7"
+      },
+      "optionalDependencies": {
+        "uglify-js": "^3.1.4"
+      }
+    },
+    "node_modules/hbs": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/hbs/-/hbs-4.1.2.tgz",
+      "integrity": "sha512-WfBnQbozbdiTLjJu6P6Wturgvy0FN8xtRmIjmP0ebX9OGQrt+2S6UC7xX0IebHTCS1sXe20zfTzQ7yhjrEvrfQ==",
+      "dependencies": {
+        "handlebars": "4.7.7",
+        "walk": "2.3.14"
+      },
+      "engines": {
+        "node": ">= 0.8",
+        "npm": "1.2.8000 || >= 1.4.16"
+      }
+    },
+    "node_modules/http-errors": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
+      "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
+      "dependencies": {
+        "depd": "~1.1.2",
+        "inherits": "2.0.3",
+        "setprototypeof": "1.1.1",
+        "statuses": ">= 1.5.0 < 2",
+        "toidentifier": "1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.4.24",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
+      "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
+      "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
+    },
+    "node_modules/ipaddr.js": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/media-typer": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
+      "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/merge-descriptors": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
+      "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E="
+    },
+    "node_modules/methods": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
+      "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
+      "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
+      "bin": {
+        "mime": "cli.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.49.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.49.0.tgz",
+      "integrity": "sha512-CIc8j9URtOVApSFCQIF+VBkX1RwXp/oMMOrqdyXSBXq5RWNEsRfyj1kiRnQgmNXmHxPoFIxOroKA3zcU9P+nAA==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.32",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.32.tgz",
+      "integrity": "sha512-hJGaVS4G4c9TSMYh2n6SQAGrC4RnfU+daP8G7cSCmaqNjiOoUY0VHCMS42pxnQmVF1GWwFhbHWn3RIxCqTmZ9A==",
+      "dependencies": {
+        "mime-db": "1.49.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/minimist": {
+      "version": "1.2.5",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
+      "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
+    },
+    "node_modules/ms": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
+      "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
+    },
+    "node_modules/negotiator": {
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
+      "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/neo-async": {
       "version": "2.6.2",
-      "resolved": "https://registry.npmjs.org/async/-/async-2.6.2.tgz",
-      "integrity": "sha512-H1qVYh1MYhEEFLsP97cVKqCGo7KfCyTt6uEWqsTBr9SO84oK9Uwbyd/yCW+6rKJLHksBNUVWZDAjfS+Ccx0Bbg==",
+      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
+      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw=="
+    },
+    "node_modules/on-finished": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
+      "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=",
+      "dependencies": {
+        "ee-first": "1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/parseurl": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/path-to-regexp": {
+      "version": "0.1.7",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
+      "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
+    },
+    "node_modules/proxy-addr": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
+      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
+      "dependencies": {
+        "forwarded": "0.2.0",
+        "ipaddr.js": "1.9.1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/qs": {
+      "version": "6.7.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
+      "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/range-parser": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/raw-body": {
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
+      "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
+      "dependencies": {
+        "bytes": "3.1.0",
+        "http-errors": "1.7.2",
+        "iconv-lite": "0.4.24",
+        "unpipe": "1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/redis": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/redis/-/redis-3.1.2.tgz",
+      "integrity": "sha512-grn5KoZLr/qrRQVwoSkmzdbw6pwF+/rwODtrOr6vuBRiR/f3rjSTGupbF90Zpqm2oenix8Do6RV7pYEkGwlKkw==",
+      "dependencies": {
+        "denque": "^1.5.0",
+        "redis-commands": "^1.7.0",
+        "redis-errors": "^1.2.0",
+        "redis-parser": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/node-redis"
+      }
+    },
+    "node_modules/redis-commands": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.7.0.tgz",
+      "integrity": "sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ=="
+    },
+    "node_modules/redis-errors": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
+      "integrity": "sha1-62LSrbFeTq9GEMBK/hUpOEJQq60=",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/redis-parser": {
+      "version": "2.6.0",
+      "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz",
+      "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs=",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/redis/node_modules/redis-parser": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-3.0.0.tgz",
+      "integrity": "sha1-tm2CjNyv5rS4pCin3vTGvKwxyLQ=",
+      "dependencies": {
+        "redis-errors": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/safe-buffer": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+      "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
+    },
+    "node_modules/secure-random-string": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/secure-random-string/-/secure-random-string-1.1.3.tgz",
+      "integrity": "sha512-298HxkJJp5mjpPhxDsN26S/2JmMaUIrQ4PxDI/F4fXKRBTOKendQ5i6JCkc+a8F8koLh0vdfwSCw8+RJkY7N6A=="
+    },
+    "node_modules/send": {
+      "version": "0.17.1",
+      "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
+      "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
+      "dependencies": {
+        "debug": "2.6.9",
+        "depd": "~1.1.2",
+        "destroy": "~1.0.4",
+        "encodeurl": "~1.0.2",
+        "escape-html": "~1.0.3",
+        "etag": "~1.8.1",
+        "fresh": "0.5.2",
+        "http-errors": "~1.7.2",
+        "mime": "1.6.0",
+        "ms": "2.1.1",
+        "on-finished": "~2.3.0",
+        "range-parser": "~1.2.1",
+        "statuses": "~1.5.0"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/send/node_modules/ms": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
+      "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
+    },
+    "node_modules/serve-static": {
+      "version": "1.14.1",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
+      "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
+      "dependencies": {
+        "encodeurl": "~1.0.2",
+        "escape-html": "~1.0.3",
+        "parseurl": "~1.3.3",
+        "send": "0.17.1"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/setprototypeof": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
+      "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/statuses": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
+      "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/toidentifier": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
+      "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/type-is": {
+      "version": "1.6.18",
+      "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
+      "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
+      "dependencies": {
+        "media-typer": "0.3.0",
+        "mime-types": "~2.1.24"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/uglify-js": {
+      "version": "3.14.2",
+      "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.14.2.tgz",
+      "integrity": "sha512-rtPMlmcO4agTUfz10CbgJ1k6UAoXM2gWb3GoMPPZB/+/Ackf8lNWk11K4rYi2D0apgoFRLtQOZhb+/iGNJq26A==",
+      "optional": true,
+      "bin": {
+        "uglifyjs": "bin/uglifyjs"
+      },
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/unpipe": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+      "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/utils-merge": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
+      "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=",
+      "engines": {
+        "node": ">= 0.4.0"
+      }
+    },
+    "node_modules/vary": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
+      "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/walk": {
+      "version": "2.3.14",
+      "resolved": "https://registry.npmjs.org/walk/-/walk-2.3.14.tgz",
+      "integrity": "sha512-5skcWAUmySj6hkBdH6B6+3ddMjVQYH5Qy9QGbPmN8kVmLteXk+yVXg+yfk1nbX30EYakahLrr8iPcCxJQSCBeg==",
+      "dependencies": {
+        "foreachasync": "^3.0.0"
+      }
+    },
+    "node_modules/wordwrap": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
+      "integrity": "sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus="
+    }
+  },
+  "dependencies": {
+    "accepts": {
+      "version": "1.3.7",
+      "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
+      "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
       "requires": {
-        "lodash": "^4.17.11"
+        "mime-types": "~2.1.24",
+        "negotiator": "0.6.2"
       }
     },
+    "array-flatten": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
+      "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
+    },
     "body-parser": {
-      "version": "1.18.3",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.3.tgz",
-      "integrity": "sha1-WykhmP/dVTs6DyDe0FkrlWlVyLQ=",
+      "version": "1.19.0",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
+      "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
       "requires": {
-        "bytes": "3.0.0",
+        "bytes": "3.1.0",
         "content-type": "~1.0.4",
         "debug": "2.6.9",
         "depd": "~1.1.2",
-        "http-errors": "~1.6.3",
-        "iconv-lite": "0.4.23",
+        "http-errors": "1.7.2",
+        "iconv-lite": "0.4.24",
         "on-finished": "~2.3.0",
-        "qs": "6.5.2",
-        "raw-body": "2.3.3",
-        "type-is": "~1.6.16"
+        "qs": "6.7.0",
+        "raw-body": "2.4.0",
+        "type-is": "~1.6.17"
       }
     },
     "bytes": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz",
-      "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg="
-    },
-    "commander": {
-      "version": "2.20.0",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz",
-      "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==",
-      "optional": true
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
+      "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg=="
     },
     "content-disposition": {
-      "version": "0.5.2",
-      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz",
-      "integrity": "sha1-DPaLud318r55YcOoUXjLhdunjLQ="
+      "version": "0.5.3",
+      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
+      "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
+      "requires": {
+        "safe-buffer": "5.1.2"
+      }
     },
     "content-type": {
       "version": "1.0.4",
@@ -65,9 +689,9 @@
       "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA=="
     },
     "cookie": {
-      "version": "0.3.1",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz",
-      "integrity": "sha1-5+Ch+e9DtMi6klxcWpboBtFoc7s="
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
+      "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg=="
     },
     "cookie-signature": {
       "version": "1.0.6",
@@ -82,6 +706,11 @@
         "ms": "2.0.0"
       }
     },
+    "denque": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/denque/-/denque-1.5.1.tgz",
+      "integrity": "sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw=="
+    },
     "depd": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
@@ -92,11 +721,6 @@
       "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
       "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
     },
-    "double-ended-queue": {
-      "version": "2.1.0-0",
-      "resolved": "https://registry.npmjs.org/double-ended-queue/-/double-ended-queue-2.1.0-0.tgz",
-      "integrity": "sha1-ED01J/0xUo9AGIEwyEHv3XgmTlw="
-    },
     "ee-first": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -118,53 +742,53 @@
       "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc="
     },
     "express": {
-      "version": "4.16.4",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.16.4.tgz",
-      "integrity": "sha512-j12Uuyb4FMrd/qQAm6uCHAkPtO8FDTRJZBDd5D2KOL2eLaz1yUNdUB/NOIyq0iU4q4cFarsUCrnFDPBcnksuOg==",
+      "version": "4.17.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
+      "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
       "requires": {
-        "accepts": "~1.3.5",
+        "accepts": "~1.3.7",
         "array-flatten": "1.1.1",
-        "body-parser": "1.18.3",
-        "content-disposition": "0.5.2",
+        "body-parser": "1.19.0",
+        "content-disposition": "0.5.3",
         "content-type": "~1.0.4",
-        "cookie": "0.3.1",
+        "cookie": "0.4.0",
         "cookie-signature": "1.0.6",
         "debug": "2.6.9",
         "depd": "~1.1.2",
         "encodeurl": "~1.0.2",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.1.1",
+        "finalhandler": "~1.1.2",
         "fresh": "0.5.2",
         "merge-descriptors": "1.0.1",
         "methods": "~1.1.2",
         "on-finished": "~2.3.0",
-        "parseurl": "~1.3.2",
+        "parseurl": "~1.3.3",
         "path-to-regexp": "0.1.7",
-        "proxy-addr": "~2.0.4",
-        "qs": "6.5.2",
-        "range-parser": "~1.2.0",
+        "proxy-addr": "~2.0.5",
+        "qs": "6.7.0",
+        "range-parser": "~1.2.1",
         "safe-buffer": "5.1.2",
-        "send": "0.16.2",
-        "serve-static": "1.13.2",
-        "setprototypeof": "1.1.0",
-        "statuses": "~1.4.0",
-        "type-is": "~1.6.16",
+        "send": "0.17.1",
+        "serve-static": "1.14.1",
+        "setprototypeof": "1.1.1",
+        "statuses": "~1.5.0",
+        "type-is": "~1.6.18",
         "utils-merge": "1.0.1",
         "vary": "~1.1.2"
       }
     },
     "finalhandler": {
-      "version": "1.1.1",
-      "resolved": "http://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz",
-      "integrity": "sha512-Y1GUDo39ez4aHAw7MysnUD5JzYX+WaIj8I57kO3aEPT1fFRL4sr7mjei97FgnwhAyyzRYmQZaTHb2+9uZ1dPtg==",
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
+      "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
       "requires": {
         "debug": "2.6.9",
         "encodeurl": "~1.0.2",
         "escape-html": "~1.0.3",
         "on-finished": "~2.3.0",
-        "parseurl": "~1.3.2",
-        "statuses": "~1.4.0",
+        "parseurl": "~1.3.3",
+        "statuses": "~1.5.0",
         "unpipe": "~1.0.0"
       }
     },
@@ -174,9 +798,9 @@
       "integrity": "sha1-VQKYfchxS+M5IJfzLgBxyd7gfPY="
     },
     "forwarded": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz",
-      "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ="
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
+      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="
     },
     "fresh": {
       "version": "0.5.2",
@@ -184,40 +808,42 @@
       "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac="
     },
     "handlebars": {
-      "version": "4.0.14",
-      "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.0.14.tgz",
-      "integrity": "sha512-E7tDoyAA8ilZIV3xDJgl18sX3M8xB9/fMw8+mfW4msLW8jlX97bAnWgT3pmaNXuvzIEgSBMnAHfuXsB2hdzfow==",
+      "version": "4.7.7",
+      "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.7.tgz",
+      "integrity": "sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA==",
       "requires": {
-        "async": "^2.5.0",
-        "optimist": "^0.6.1",
+        "minimist": "^1.2.5",
+        "neo-async": "^2.6.0",
         "source-map": "^0.6.1",
-        "uglify-js": "^3.1.4"
+        "uglify-js": "^3.1.4",
+        "wordwrap": "^1.0.0"
       }
     },
     "hbs": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/hbs/-/hbs-4.0.4.tgz",
-      "integrity": "sha512-esVlyV/V59mKkwFai5YmPRSNIWZzhqL5YMN0++ueMxyK1cCfPa5f6JiHtapPKAIVAhQR6rpGxow0troav9WMEg==",
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/hbs/-/hbs-4.1.2.tgz",
+      "integrity": "sha512-WfBnQbozbdiTLjJu6P6Wturgvy0FN8xtRmIjmP0ebX9OGQrt+2S6UC7xX0IebHTCS1sXe20zfTzQ7yhjrEvrfQ==",
       "requires": {
-        "handlebars": "4.0.14",
-        "walk": "2.3.9"
+        "handlebars": "4.7.7",
+        "walk": "2.3.14"
       }
     },
     "http-errors": {
-      "version": "1.6.3",
-      "resolved": "http://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz",
-      "integrity": "sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=",
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
+      "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
       "requires": {
         "depd": "~1.1.2",
         "inherits": "2.0.3",
-        "setprototypeof": "1.1.0",
-        "statuses": ">= 1.4.0 < 2"
+        "setprototypeof": "1.1.1",
+        "statuses": ">= 1.5.0 < 2",
+        "toidentifier": "1.0.0"
       }
     },
     "iconv-lite": {
-      "version": "0.4.23",
-      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.23.tgz",
-      "integrity": "sha512-neyTUVFtahjf0mB3dZT77u+8O0QB89jFdnBkd5P1JgYPbPaia3gXXOVL2fq8VyU2gMMD7SaN7QukTB/pmXYvDA==",
+      "version": "0.4.24",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
+      "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
       "requires": {
         "safer-buffer": ">= 2.1.2 < 3"
       }
@@ -228,18 +854,13 @@
       "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
     },
     "ipaddr.js": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.8.0.tgz",
-      "integrity": "sha1-6qM9bd16zo9/b+DJygRA5wZzix4="
-    },
-    "lodash": {
-      "version": "4.17.15",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
-      "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="
     },
     "media-typer": {
       "version": "0.3.0",
-      "resolved": "http://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
+      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
       "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
     },
     "merge-descriptors": {
@@ -253,27 +874,27 @@
       "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4="
     },
     "mime": {
-      "version": "1.4.1",
-      "resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz",
-      "integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ=="
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
+      "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
     },
     "mime-db": {
-      "version": "1.37.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.37.0.tgz",
-      "integrity": "sha512-R3C4db6bgQhlIhPU48fUtdVmKnflq+hRdad7IyKhtFj06VPNVdk2RhiYL3UjQIlso8L+YxAtFkobT0VK+S/ybg=="
+      "version": "1.49.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.49.0.tgz",
+      "integrity": "sha512-CIc8j9URtOVApSFCQIF+VBkX1RwXp/oMMOrqdyXSBXq5RWNEsRfyj1kiRnQgmNXmHxPoFIxOroKA3zcU9P+nAA=="
     },
     "mime-types": {
-      "version": "2.1.21",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.21.tgz",
-      "integrity": "sha512-3iL6DbwpyLzjR3xHSFNFeb9Nz/M8WDkX33t1GFQnFOllWk8pOrh/LSrB5OXlnlW5P9LH73X6loW/eogc+F5lJg==",
+      "version": "2.1.32",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.32.tgz",
+      "integrity": "sha512-hJGaVS4G4c9TSMYh2n6SQAGrC4RnfU+daP8G7cSCmaqNjiOoUY0VHCMS42pxnQmVF1GWwFhbHWn3RIxCqTmZ9A==",
       "requires": {
-        "mime-db": "~1.37.0"
+        "mime-db": "1.49.0"
       }
     },
     "minimist": {
-      "version": "0.0.10",
-      "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz",
-      "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8="
+      "version": "1.2.5",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
+      "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
     },
     "ms": {
       "version": "2.0.0",
@@ -281,9 +902,14 @@
       "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
     },
     "negotiator": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz",
-      "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk="
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
+      "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw=="
+    },
+    "neo-async": {
+      "version": "2.6.2",
+      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
+      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw=="
     },
     "on-finished": {
       "version": "2.3.0",
@@ -293,19 +919,10 @@
         "ee-first": "1.1.1"
       }
     },
-    "optimist": {
-      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz",
-      "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=",
-      "requires": {
-        "minimist": "~0.0.1",
-        "wordwrap": "~0.0.2"
-      }
-    },
     "parseurl": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.2.tgz",
-      "integrity": "sha1-/CidTtiZMRlGDBViUyYs3I3mW/M="
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="
     },
     "path-to-regexp": {
       "version": "0.1.7",
@@ -313,61 +930,65 @@
       "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
     },
     "proxy-addr": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.4.tgz",
-      "integrity": "sha512-5erio2h9jp5CHGwcybmxmVqHmnCBZeewlfJ0pex+UW7Qny7OOZXTtH56TGNyBizkgiOwhJtMKrVzDTeKcySZwA==",
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
+      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
       "requires": {
-        "forwarded": "~0.1.2",
-        "ipaddr.js": "1.8.0"
+        "forwarded": "0.2.0",
+        "ipaddr.js": "1.9.1"
       }
     },
     "qs": {
-      "version": "6.5.2",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
-      "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
+      "version": "6.7.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
+      "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ=="
     },
     "range-parser": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz",
-      "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4="
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="
     },
     "raw-body": {
-      "version": "2.3.3",
-      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz",
-      "integrity": "sha512-9esiElv1BrZoI3rCDuOuKCBRbuApGGaDPQfjSflGxdy4oyzqghxu6klEkkVIvBje+FF0BX9coEv8KqW6X/7njw==",
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
+      "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
       "requires": {
-        "bytes": "3.0.0",
-        "http-errors": "1.6.3",
-        "iconv-lite": "0.4.23",
+        "bytes": "3.1.0",
+        "http-errors": "1.7.2",
+        "iconv-lite": "0.4.24",
         "unpipe": "1.0.0"
       }
     },
     "redis": {
-      "version": "2.8.0",
-      "resolved": "https://registry.npmjs.org/redis/-/redis-2.8.0.tgz",
-      "integrity": "sha512-M1OkonEQwtRmZv4tEWF2VgpG0JWJ8Fv1PhlgT5+B+uNq2cA3Rt1Yt/ryoR+vQNOQcIEgdCdfH0jr3bDpihAw1A==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/redis/-/redis-3.1.2.tgz",
+      "integrity": "sha512-grn5KoZLr/qrRQVwoSkmzdbw6pwF+/rwODtrOr6vuBRiR/f3rjSTGupbF90Zpqm2oenix8Do6RV7pYEkGwlKkw==",
       "requires": {
-        "double-ended-queue": "^2.1.0-0",
-        "redis-commands": "^1.2.0",
-        "redis-parser": "^2.6.0"
+        "denque": "^1.5.0",
+        "redis-commands": "^1.7.0",
+        "redis-errors": "^1.2.0",
+        "redis-parser": "^3.0.0"
       },
       "dependencies": {
-        "redis-commands": {
-          "version": "1.4.0",
-          "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.4.0.tgz",
-          "integrity": "sha512-cu8EF+MtkwI4DLIT0x9P8qNTLFhQD4jLfxLR0cCNkeGzs87FN6879JOJwNQR/1zD7aSYNbU0hgsV9zGY71Itvw=="
-        },
         "redis-parser": {
-          "version": "2.6.0",
-          "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz",
-          "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs="
+          "version": "3.0.0",
+          "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-3.0.0.tgz",
+          "integrity": "sha1-tm2CjNyv5rS4pCin3vTGvKwxyLQ=",
+          "requires": {
+            "redis-errors": "^1.0.0"
+          }
         }
       }
     },
     "redis-commands": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.5.0.tgz",
-      "integrity": "sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg=="
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.7.0.tgz",
+      "integrity": "sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ=="
+    },
+    "redis-errors": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
+      "integrity": "sha1-62LSrbFeTq9GEMBK/hUpOEJQq60="
     },
     "redis-parser": {
       "version": "2.6.0",
@@ -385,14 +1006,14 @@
       "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
     },
     "secure-random-string": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/secure-random-string/-/secure-random-string-1.1.0.tgz",
-      "integrity": "sha512-V/h8jqoz58zklNGybVhP++cWrxEPXlLM/6BeJ4e0a8zlb4BsbYRzFs16snrxByPa5LUxCVTD3M6EYIVIHR1fAg=="
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/secure-random-string/-/secure-random-string-1.1.3.tgz",
+      "integrity": "sha512-298HxkJJp5mjpPhxDsN26S/2JmMaUIrQ4PxDI/F4fXKRBTOKendQ5i6JCkc+a8F8koLh0vdfwSCw8+RJkY7N6A=="
     },
     "send": {
-      "version": "0.16.2",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz",
-      "integrity": "sha512-E64YFPUssFHEFBvpbbjr44NCLtI1AohxQ8ZSiJjQLskAdKuriYEP6VyGEsRDH8ScozGpkaX1BGvhanqCwkcEZw==",
+      "version": "0.17.1",
+      "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
+      "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
       "requires": {
         "debug": "2.6.9",
         "depd": "~1.1.2",
@@ -401,29 +1022,36 @@
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
         "fresh": "0.5.2",
-        "http-errors": "~1.6.2",
-        "mime": "1.4.1",
-        "ms": "2.0.0",
+        "http-errors": "~1.7.2",
+        "mime": "1.6.0",
+        "ms": "2.1.1",
         "on-finished": "~2.3.0",
-        "range-parser": "~1.2.0",
-        "statuses": "~1.4.0"
+        "range-parser": "~1.2.1",
+        "statuses": "~1.5.0"
+      },
+      "dependencies": {
+        "ms": {
+          "version": "2.1.1",
+          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
+          "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
+        }
       }
     },
     "serve-static": {
-      "version": "1.13.2",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.2.tgz",
-      "integrity": "sha512-p/tdJrO4U387R9oMjb1oj7qSMaMfmOyd4j9hOFoxZe2baQszgHcSWjuya/CiT5kgZZKRudHNOA0pYXOl8rQ5nw==",
+      "version": "1.14.1",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
+      "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
       "requires": {
         "encodeurl": "~1.0.2",
         "escape-html": "~1.0.3",
-        "parseurl": "~1.3.2",
-        "send": "0.16.2"
+        "parseurl": "~1.3.3",
+        "send": "0.17.1"
       }
     },
     "setprototypeof": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.0.tgz",
-      "integrity": "sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ=="
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
+      "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
     },
     "source-map": {
       "version": "0.6.1",
@@ -431,28 +1059,29 @@
       "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="
     },
     "statuses": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz",
-      "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew=="
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
+      "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow="
+    },
+    "toidentifier": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
+      "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw=="
     },
     "type-is": {
-      "version": "1.6.16",
-      "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.16.tgz",
-      "integrity": "sha512-HRkVv/5qY2G6I8iab9cI7v1bOIdhm94dVjQCPFElW9W+3GeDOSHmy2EBYe4VTApuzolPcmgFTN3ftVJRKR2J9Q==",
+      "version": "1.6.18",
+      "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
+      "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
       "requires": {
         "media-typer": "0.3.0",
-        "mime-types": "~2.1.18"
+        "mime-types": "~2.1.24"
       }
     },
     "uglify-js": {
-      "version": "3.5.9",
-      "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.5.9.tgz",
-      "integrity": "sha512-WpT0RqsDtAWPNJK955DEnb6xjymR8Fn0OlK4TT4pS0ASYsVPqr5ELhgwOwLCP5J5vHeJ4xmMmz3DEgdqC10JeQ==",
-      "optional": true,
-      "requires": {
-        "commander": "~2.20.0",
-        "source-map": "~0.6.1"
-      }
+      "version": "3.14.2",
+      "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.14.2.tgz",
+      "integrity": "sha512-rtPMlmcO4agTUfz10CbgJ1k6UAoXM2gWb3GoMPPZB/+/Ackf8lNWk11K4rYi2D0apgoFRLtQOZhb+/iGNJq26A==",
+      "optional": true
     },
     "unpipe": {
       "version": "1.0.0",
@@ -470,17 +1099,17 @@
       "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw="
     },
     "walk": {
-      "version": "2.3.9",
-      "resolved": "https://registry.npmjs.org/walk/-/walk-2.3.9.tgz",
-      "integrity": "sha1-MbTbZnjyrgHDnqn7hyWpAx5Vins=",
+      "version": "2.3.14",
+      "resolved": "https://registry.npmjs.org/walk/-/walk-2.3.14.tgz",
+      "integrity": "sha512-5skcWAUmySj6hkBdH6B6+3ddMjVQYH5Qy9QGbPmN8kVmLteXk+yVXg+yfk1nbX30EYakahLrr8iPcCxJQSCBeg==",
       "requires": {
         "foreachasync": "^3.0.0"
       }
     },
     "wordwrap": {
-      "version": "0.0.3",
-      "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz",
-      "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc="
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
+      "integrity": "sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus="
     }
   }
 }
diff --git a/images/benchmarks/node/package.json b/images/benchmarks/node/package.json
index 7dcadd523..a46adc22a 100644
--- a/images/benchmarks/node/package.json
+++ b/images/benchmarks/node/package.json
@@ -11,7 +11,7 @@
   "dependencies": {
     "express": "^4.16.4",
     "hbs": "^4.0.4",
-    "redis": "^2.8.0",
+    "redis": "^3.1.2",
     "redis-commands": "^1.2.0",
     "redis-parser": "^2.6.0",
     "secure-random-string": "^1.1.0"
diff --git a/pkg/atomicbitops/atomicbitops_amd64.s b/pkg/atomicbitops/atomicbitops_amd64.s
index cbaf716bb..6b9a67adc 100644
--- a/pkg/atomicbitops/atomicbitops_amd64.s
+++ b/pkg/atomicbitops/atomicbitops_amd64.s
@@ -16,28 +16,28 @@
 
 #include "textflag.h"
 
-TEXT ·AndUint32(SB),$0-12
+TEXT ·AndUint32(SB),NOSPLIT,$0-12
   MOVQ  addr+0(FP), BX
   MOVL  val+8(FP), AX
   LOCK
   ANDL   AX, 0(BX)
   RET
 
-TEXT ·OrUint32(SB),$0-12
+TEXT ·OrUint32(SB),NOSPLIT,$0-12
   MOVQ  addr+0(FP), BX
   MOVL  val+8(FP), AX
   LOCK
   ORL   AX, 0(BX)
   RET
 
-TEXT ·XorUint32(SB),$0-12
+TEXT ·XorUint32(SB),NOSPLIT,$0-12
   MOVQ  addr+0(FP), BX
   MOVL  val+8(FP), AX
   LOCK
   XORL   AX, 0(BX)
   RET
 
-TEXT ·CompareAndSwapUint32(SB),$0-20
+TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20
   MOVQ  addr+0(FP), DI
   MOVL  old+8(FP), AX
   MOVL  new+12(FP), DX
@@ -46,28 +46,28 @@ TEXT ·CompareAndSwapUint32(SB),$0-20
   MOVL  AX, ret+16(FP)
   RET
 
-TEXT ·AndUint64(SB),$0-16
+TEXT ·AndUint64(SB),NOSPLIT,$0-16
   MOVQ  addr+0(FP), BX
   MOVQ  val+8(FP), AX
   LOCK
   ANDQ   AX, 0(BX)
   RET
 
-TEXT ·OrUint64(SB),$0-16
+TEXT ·OrUint64(SB),NOSPLIT,$0-16
   MOVQ  addr+0(FP), BX
   MOVQ  val+8(FP), AX
   LOCK
   ORQ   AX, 0(BX)
   RET
 
-TEXT ·XorUint64(SB),$0-16
+TEXT ·XorUint64(SB),NOSPLIT,$0-16
   MOVQ  addr+0(FP), BX
   MOVQ  val+8(FP), AX
   LOCK
   XORQ   AX, 0(BX)
   RET
 
-TEXT ·CompareAndSwapUint64(SB),$0-32
+TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32
   MOVQ  addr+0(FP), DI
   MOVQ  old+8(FP), AX
   MOVQ  new+16(FP), DX
diff --git a/pkg/atomicbitops/atomicbitops_arm64.s b/pkg/atomicbitops/atomicbitops_arm64.s
index 5c780851b..644a6bca5 100644
--- a/pkg/atomicbitops/atomicbitops_arm64.s
+++ b/pkg/atomicbitops/atomicbitops_arm64.s
@@ -16,7 +16,7 @@
 
 #include "textflag.h"
 
-TEXT ·AndUint32(SB),$0-12
+TEXT ·AndUint32(SB),NOSPLIT,$0-12
   MOVD    ptr+0(FP), R0
   MOVW    val+8(FP), R1
 again:
@@ -26,7 +26,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·OrUint32(SB),$0-12
+TEXT ·OrUint32(SB),NOSPLIT,$0-12
   MOVD    ptr+0(FP), R0
   MOVW    val+8(FP), R1
 again:
@@ -36,7 +36,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·XorUint32(SB),$0-12
+TEXT ·XorUint32(SB),NOSPLIT,$0-12
   MOVD    ptr+0(FP), R0
   MOVW    val+8(FP), R1
 again:
@@ -46,7 +46,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·CompareAndSwapUint32(SB),$0-20
+TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20
   MOVD addr+0(FP), R0
   MOVW old+8(FP), R1
   MOVW new+12(FP), R2
@@ -60,7 +60,7 @@ done:
   MOVW R3, prev+16(FP)
   RET
 
-TEXT ·AndUint64(SB),$0-16
+TEXT ·AndUint64(SB),NOSPLIT,$0-16
   MOVD    ptr+0(FP), R0
   MOVD    val+8(FP), R1
 again:
@@ -70,7 +70,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·OrUint64(SB),$0-16
+TEXT ·OrUint64(SB),NOSPLIT,$0-16
   MOVD    ptr+0(FP), R0
   MOVD    val+8(FP), R1
 again:
@@ -80,7 +80,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·XorUint64(SB),$0-16
+TEXT ·XorUint64(SB),NOSPLIT,$0-16
   MOVD    ptr+0(FP), R0
   MOVD    val+8(FP), R1
 again:
@@ -90,7 +90,7 @@ again:
   CBNZ    R3, again
   RET
 
-TEXT ·CompareAndSwapUint64(SB),$0-32
+TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32
   MOVD addr+0(FP), R0
   MOVD old+8(FP), R1
   MOVD new+16(FP), R2
diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go
index 474c0c815..af6b1362e 100644
--- a/pkg/atomicbitops/atomicbitops_noasm.go
+++ b/pkg/atomicbitops/atomicbitops_noasm.go
@@ -21,6 +21,7 @@ import (
 	"sync/atomic"
 )
 
+//go:nosplit
 func AndUint32(addr *uint32, val uint32) {
 	for {
 		o := atomic.LoadUint32(addr)
@@ -31,6 +32,7 @@ func AndUint32(addr *uint32, val uint32) {
 	}
 }
 
+//go:nosplit
 func OrUint32(addr *uint32, val uint32) {
 	for {
 		o := atomic.LoadUint32(addr)
@@ -41,6 +43,7 @@ func OrUint32(addr *uint32, val uint32) {
 	}
 }
 
+//go:nosplit
 func XorUint32(addr *uint32, val uint32) {
 	for {
 		o := atomic.LoadUint32(addr)
@@ -51,6 +54,7 @@ func XorUint32(addr *uint32, val uint32) {
 	}
 }
 
+//go:nosplit
 func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) {
 	for {
 		prev = atomic.LoadUint32(addr)
@@ -63,6 +67,7 @@ func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) {
 	}
 }
 
+//go:nosplit
 func AndUint64(addr *uint64, val uint64) {
 	for {
 		o := atomic.LoadUint64(addr)
@@ -73,6 +78,7 @@ func AndUint64(addr *uint64, val uint64) {
 	}
 }
 
+//go:nosplit
 func OrUint64(addr *uint64, val uint64) {
 	for {
 		o := atomic.LoadUint64(addr)
@@ -83,6 +89,7 @@ func OrUint64(addr *uint64, val uint64) {
 	}
 }
 
+//go:nosplit
 func XorUint64(addr *uint64, val uint64) {
 	for {
 		o := atomic.LoadUint64(addr)
@@ -93,6 +100,7 @@ func XorUint64(addr *uint64, val uint64) {
 	}
 }
 
+//go:nosplit
 func CompareAndSwapUint64(addr *uint64, old, new uint64) (prev uint64) {
 	for {
 		prev = atomic.LoadUint64(addr)
diff --git a/pkg/buffer/view_test.go b/pkg/buffer/view_test.go
index 796efa240..59784eacb 100644
--- a/pkg/buffer/view_test.go
+++ b/pkg/buffer/view_test.go
@@ -509,6 +509,24 @@ func TestView(t *testing.T) {
 	}
 }
 
+func TestViewClone(t *testing.T) {
+	const (
+		originalSize  = 90
+		bytesToDelete = 30
+	)
+	var v View
+	v.AppendOwned(bytes.Repeat([]byte{originalSize}, originalSize))
+
+	clonedV := v.Clone()
+	v.TrimFront(bytesToDelete)
+	if got, want := int(v.Size()), originalSize-bytesToDelete; got != want {
+		t.Errorf("original packet was not changed: size expected = %d, got = %d", want, got)
+	}
+	if got := clonedV.Size(); got != originalSize {
+		t.Errorf("cloned packet should not be modified: expected size = %d, got = %d", originalSize, got)
+	}
+}
+
 func TestViewPullUp(t *testing.T) {
 	for _, tc := range []struct {
 		desc   string
diff --git a/pkg/eventfd/BUILD b/pkg/eventfd/BUILD
new file mode 100644
index 000000000..02407cb99
--- /dev/null
+++ b/pkg/eventfd/BUILD
@@ -0,0 +1,22 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "eventfd",
+    srcs = [
+        "eventfd.go",
+    ],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/hostarch",
+        "//pkg/tcpip/link/rawfile",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_test(
+    name = "eventfd_test",
+    srcs = ["eventfd_test.go"],
+    library = ":eventfd",
+)
diff --git a/pkg/eventfd/eventfd.go b/pkg/eventfd/eventfd.go
new file mode 100644
index 000000000..acdac01b8
--- /dev/null
+++ b/pkg/eventfd/eventfd.go
@@ -0,0 +1,115 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package eventfd wraps Linux's eventfd(2) syscall.
+package eventfd
+
+import (
+	"fmt"
+	"io"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/hostarch"
+	"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+)
+
+const sizeofUint64 = 8
+
+// Eventfd represents a Linux eventfd object.
+type Eventfd struct {
+	fd int
+}
+
+// Create returns an initialized eventfd.
+func Create() (Eventfd, error) {
+	fd, _, err := unix.RawSyscall(unix.SYS_EVENTFD2, 0, 0, 0)
+	if err != 0 {
+		return Eventfd{}, fmt.Errorf("failed to create eventfd: %v", error(err))
+	}
+	if err := unix.SetNonblock(int(fd), true); err != nil {
+		unix.Close(int(fd))
+		return Eventfd{}, err
+	}
+	return Eventfd{int(fd)}, nil
+}
+
+// Wrap returns an initialized Eventfd using the provided fd.
+func Wrap(fd int) Eventfd {
+	return Eventfd{fd}
+}
+
+// Close closes the eventfd, after which it should not be used.
+func (ev Eventfd) Close() error {
+	return unix.Close(ev.fd)
+}
+
+// Dup copies the eventfd, calling dup(2) on the underlying file descriptor.
+func (ev Eventfd) Dup() (Eventfd, error) {
+	other, err := unix.Dup(ev.fd)
+	if err != nil {
+		return Eventfd{}, fmt.Errorf("failed to dup: %v", other)
+	}
+	return Eventfd{other}, nil
+}
+
+// Notify alerts other users of the eventfd. Users can receive alerts by
+// calling Wait or Read.
+func (ev Eventfd) Notify() error {
+	return ev.Write(1)
+}
+
+// Write writes a specific value to the eventfd.
+func (ev Eventfd) Write(val uint64) error {
+	var buf [sizeofUint64]byte
+	hostarch.ByteOrder.PutUint64(buf[:], val)
+	for {
+		n, err := unix.Write(ev.fd, buf[:])
+		if err == unix.EINTR {
+			continue
+		}
+		if n != sizeofUint64 {
+			panic(fmt.Sprintf("short write to eventfd: got %d bytes, wanted %d", n, sizeofUint64))
+		}
+		return err
+	}
+}
+
+// Wait blocks until eventfd is non-zero (i.e. someone calls Notify or Write).
+func (ev Eventfd) Wait() error {
+	_, err := ev.Read()
+	return err
+}
+
+// Read blocks until eventfd is non-zero (i.e. someone calls Notify or Write)
+// and returns the value read.
+func (ev Eventfd) Read() (uint64, error) {
+	var tmp [sizeofUint64]byte
+	n, err := rawfile.BlockingReadUntranslated(ev.fd, tmp[:])
+	if err != 0 {
+		return 0, err
+	}
+	if n == 0 {
+		return 0, io.EOF
+	}
+	if n != sizeofUint64 {
+		panic(fmt.Sprintf("short read from eventfd: got %d bytes, wanted %d", n, sizeofUint64))
+	}
+	return hostarch.ByteOrder.Uint64(tmp[:]), nil
+}
+
+// FD returns the underlying file descriptor. Use with care, as this breaks the
+// Eventfd abstraction.
+func (ev Eventfd) FD() int {
+	return ev.fd
+}
diff --git a/pkg/eventfd/eventfd_test.go b/pkg/eventfd/eventfd_test.go
new file mode 100644
index 000000000..96998d530
--- /dev/null
+++ b/pkg/eventfd/eventfd_test.go
@@ -0,0 +1,75 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package eventfd
+
+import (
+	"testing"
+	"time"
+)
+
+func TestReadWrite(t *testing.T) {
+	efd, err := Create()
+	if err != nil {
+		t.Fatalf("failed to Create(): %v", err)
+	}
+	defer efd.Close()
+
+	// Make sure we can read actual values
+	const want = 343
+	if err := efd.Write(want); err != nil {
+		t.Fatalf("failed to write value: %d", want)
+	}
+
+	got, err := efd.Read()
+	if err != nil {
+		t.Fatalf("failed to read value: %v", err)
+	}
+	if got != want {
+		t.Fatalf("Read(): got %d, but wanted %d", got, want)
+	}
+}
+
+func TestWait(t *testing.T) {
+	efd, err := Create()
+	if err != nil {
+		t.Fatalf("failed to Create(): %v", err)
+	}
+	defer efd.Close()
+
+	// There's no way to test with certainty that Wait() blocks indefinitely, but
+	// as a best-effort we can wait a bit on it.
+	errCh := make(chan error)
+	go func() {
+		errCh <- efd.Wait()
+	}()
+	select {
+	case err := <-errCh:
+		t.Fatalf("Wait() returned without a call to Notify(): %v", err)
+	case <-time.After(500 * time.Millisecond):
+	}
+
+	// Notify and check that Wait() returned.
+	if err := efd.Notify(); err != nil {
+		t.Fatalf("Notify() failed: %v", err)
+	}
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatalf("Read() failed: %v", err)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatalf("Read() did not return after Notify()")
+	}
+}
diff --git a/pkg/ring0/defs.go b/pkg/ring0/defs.go
index b6e2012e8..38ce9be1e 100644
--- a/pkg/ring0/defs.go
+++ b/pkg/ring0/defs.go
@@ -77,6 +77,9 @@ type CPU struct {
 	// calls and exceptions via the Registers function.
 	registers arch.Registers
 
+	// floatingPointState holds floating point state.
+	floatingPointState fpu.State
+
 	// hooks are kernel hooks.
 	hooks Hooks
 }
@@ -90,6 +93,15 @@ func (c *CPU) Registers() *arch.Registers {
 	return &c.registers
 }
 
+// FloatingPointState returns the kernel floating point state.
+//
+// This is explicitly safe to call during KernelException and KernelSyscall.
+//
+//go:nosplit
+func (c *CPU) FloatingPointState() *fpu.State {
+	return &c.floatingPointState
+}
+
 // SwitchOpts are passed to the Switch function.
 type SwitchOpts struct {
 	// Registers are the user register state.
diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go
index 24f6e4cde..81e90dbf7 100644
--- a/pkg/ring0/defs_amd64.go
+++ b/pkg/ring0/defs_amd64.go
@@ -116,6 +116,11 @@ type CPUArchState struct {
 	errorType uintptr
 
 	*kernelEntry
+
+	// Copies of global variables, stored in CPU so that they can be used by
+	// syscall and exception handlers (in the upper address space).
+	hasXSAVE    bool
+	hasXSAVEOPT bool
 }
 
 // ErrorCode returns the last error code.
diff --git a/pkg/ring0/entry_amd64.go b/pkg/ring0/entry_amd64.go
index afd646b0b..13ad4e4df 100644
--- a/pkg/ring0/entry_amd64.go
+++ b/pkg/ring0/entry_amd64.go
@@ -39,11 +39,6 @@ func sysenter()
 // assembly to get the ABI0 (i.e., primary) address.
 func addrOfSysenter() uintptr
 
-// swapgs swaps the current GS value.
-//
-// This must be called prior to sysret/iret.
-func swapgs()
-
 // jumpToKernel jumps to the kernel version of the current RIP.
 func jumpToKernel()
 
diff --git a/pkg/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s
index 520bd9f57..d2913f190 100644
--- a/pkg/ring0/entry_amd64.s
+++ b/pkg/ring0/entry_amd64.s
@@ -142,8 +142,103 @@ TEXT ·jumpToUser(SB),NOSPLIT,$0
 	MOVQ AX, 0(SP)
 	RET
 
+// See kernel_amd64.go.
+//
+// The 16-byte frame size is for the saved values of MXCSR and the x87 control
+// word.
+TEXT ·doSwitchToUser(SB),NOSPLIT,$16-48
+	// We are passed pointers to heap objects, but do not store them in our
+	// local frame.
+	NO_LOCAL_POINTERS
+
+	// MXCSR and the x87 control word are the only floating point state
+	// that is callee-save and thus we must save.
+	STMXCSR mxcsr-0(SP)
+	FSTCW cw-8(SP)
+
+	// Restore application floating point state.
+	MOVQ cpu+0(FP), SI
+	MOVQ fpState+16(FP), DI
+	MOVB ·hasXSAVE(SB), BX
+	TESTB BX, BX
+	JZ no_xrstor
+	// Use xrstor to restore all available fp state. For now, we restore
+	// everything unconditionally by setting the implicit operand edx:eax
+	// (the "requested feature bitmap") to all 1's.
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI)
+	JMP fprestore_done
+no_xrstor:
+	// Fall back to fxrstor if xsave is not available.
+	FXRSTOR64 0(DI)
+fprestore_done:
+
+	// Set application GS.
+	MOVQ regs+8(FP), R8
+	SWAP_GS()
+	MOVQ PTRACE_GS_BASE(R8), AX
+	PUSHQ AX
+	CALL ·writeGS(SB)
+	POPQ AX
+
+	// Call sysret() or iret().
+	MOVQ userCR3+24(FP), CX
+	MOVQ needIRET+32(FP), R9
+	ADDQ $-32, SP
+	MOVQ SI, 0(SP)  // cpu
+	MOVQ R8, 8(SP)  // regs
+	MOVQ CX, 16(SP) // userCR3
+	TESTQ R9, R9
+	JNZ do_iret
+	CALL ·sysret(SB)
+	JMP done_sysret_or_iret
+do_iret:
+	CALL ·iret(SB)
+done_sysret_or_iret:
+	MOVQ 24(SP), AX // vector
+	ADDQ $32, SP
+	MOVQ AX, vector+40(FP)
+
+	// Save application floating point state.
+	MOVQ fpState+16(FP), DI
+	MOVB ·hasXSAVE(SB), BX
+	MOVB ·hasXSAVEOPT(SB), CX
+	TESTB BX, BX
+	JZ no_xsave
+	// Use xsave/xsaveopt to save all extended state.
+	// We save everything unconditionally by setting RFBM to all 1's.
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	TESTB CX, CX
+	JZ no_xsaveopt
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+	JMP fpsave_done
+no_xsaveopt:
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+	JMP fpsave_done
+no_xsave:
+	FXSAVE64 0(DI)
+fpsave_done:
+
+	// Restore MXCSR and the x87 control word after one of the two floating
+	// point save cases above, to ensure the application versions are saved
+	// before being clobbered here.
+	LDMXCSR mxcsr-0(SP)
+
+	// FLDCW is a "waiting" x87 instruction, meaning it checks for pending
+	// unmasked exceptions before executing. Thus if userspace has unmasked
+	// an exception and has one pending, it can be raised by FLDCW even
+	// though the new control word will mask exceptions. To prevent this,
+	// we must first clear pending exceptions (which will be restored by
+	// XRSTOR, et al).
+	BYTE $0xDB; BYTE $0xE2; // FNCLEX
+	FLDCW cw-8(SP)
+
+	RET
+
 // See entry_amd64.go.
-TEXT ·sysret(SB),NOSPLIT,$0-24
+TEXT ·sysret(SB),NOSPLIT,$0-32
 	// Set application FS. We can't do this in Go because Go code needs FS.
 	MOVQ regs+8(FP), AX
 	MOVQ PTRACE_FS_BASE(AX), AX
@@ -182,9 +277,11 @@ TEXT ·sysret(SB),NOSPLIT,$0-24
 	POPQ AX                             // Restore AX.
 	POPQ SP                             // Restore SP.
 	SYSRET64()
+	// sysenter or exception will write our return value and return to our
+	// caller.
 
 // See entry_amd64.go.
-TEXT ·iret(SB),NOSPLIT,$0-24
+TEXT ·iret(SB),NOSPLIT,$0-32
 	// Set application FS. We can't do this in Go because Go code needs FS.
 	MOVQ regs+8(FP), AX
 	MOVQ PTRACE_FS_BASE(AX), AX
@@ -220,6 +317,8 @@ TEXT ·iret(SB),NOSPLIT,$0-24
 	WRITE_CR3()                         // Switch to userCR3.
 	POPQ AX                             // Restore AX.
 	IRET()
+	// sysenter or exception will write our return value and return to our
+	// caller.
 
 // See entry_amd64.go.
 TEXT ·resume(SB),NOSPLIT,$0
@@ -324,11 +423,39 @@ kernel:
 	MOVQ $0,  CPU_ERROR_CODE(AX)                // Clear error code.
 	MOVQ $0,  CPU_ERROR_TYPE(AX)                // Set error type to kernel.
 
+	// Save floating point state. CPU.floatingPointState is a slice, so the
+	// first word of CPU.floatingPointState is a pointer to the destination
+	// array.
+	MOVQ CPU_FPU_STATE(AX), DI
+	MOVB CPU_HAS_XSAVE(AX), BX
+	MOVB CPU_HAS_XSAVEOPT(AX), CX
+	TESTB BX, BX
+	JZ no_xsave
+	// Use xsave/xsaveopt to save all extended state.
+	// We save everything unconditionally by setting RFBM to all 1's.
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	TESTB CX, CX
+	JZ no_xsaveopt
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+	JMP fpsave_done
+no_xsaveopt:
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+	JMP fpsave_done
+no_xsave:
+	FXSAVE64 0(DI)
+fpsave_done:
+
 	// Call the syscall trampoline.
 	LOAD_KERNEL_STACK(GS)
-	PUSHQ AX                // First argument (vCPU).
-	CALL ·kernelSyscall(SB) // Call the trampoline.
-	POPQ AX                 // Pop vCPU.
+	MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
+	PUSHQ AX                    // First argument (vCPU).
+	CALL ·kernelSyscall(SB)     // Call the trampoline.
+	POPQ AX                     // Pop vCPU.
+
+	// We only trigger a bluepill entry in the bluepill function, and can
+	// therefore be guaranteed that there is no floating point state to be
+	// loaded on resuming from halt.
 	JMP ·resume(SB)
 
 ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB));
@@ -416,15 +543,43 @@ kernel:
 	MOVQ 8(SP), BX              // Load the error code.
 	MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
 	MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
-	MOVQ 0(SP), BX              // BX contains the vector.
+
+	// Save floating point state. CPU.floatingPointState is a slice, so the
+	// first word of CPU.floatingPointState is a pointer to the destination
+	// array.
+	MOVQ CPU_FPU_STATE(AX), DI
+	MOVB CPU_HAS_XSAVE(AX), BX
+	MOVB CPU_HAS_XSAVEOPT(AX), CX
+	TESTB BX, BX
+	JZ no_xsave
+	// Use xsave/xsaveopt to save all extended state.
+	// We save everything unconditionally by setting RFBM to all 1's.
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	TESTB CX, CX
+	JZ no_xsaveopt
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+	JMP fpsave_done
+no_xsaveopt:
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+	JMP fpsave_done
+no_xsave:
+	FXSAVE64 0(DI)
+fpsave_done:
 
 	// Call the exception trampoline.
+	MOVQ 0(SP), BX              // BX contains the vector.
 	LOAD_KERNEL_STACK(GS)
-	PUSHQ BX                  // Second argument (vector).
-	PUSHQ AX                  // First argument (vCPU).
-	CALL ·kernelException(SB) // Call the trampoline.
-	POPQ BX                   // Pop vector.
-	POPQ AX                   // Pop vCPU.
+	MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
+	PUSHQ BX                    // Second argument (vector).
+	PUSHQ AX                    // First argument (vCPU).
+	CALL ·kernelException(SB)   // Call the trampoline.
+	POPQ BX                     // Pop vector.
+	POPQ AX                     // Pop vCPU.
+
+	// We only trigger a bluepill entry in the bluepill function, and can
+	// therefore be guaranteed that there is no floating point state to be
+	// loaded on resuming from halt.
 	JMP ·resume(SB)
 
 #define EXCEPTION_WITH_ERROR(value, symbol, addr) \
diff --git a/pkg/ring0/kernel.go b/pkg/ring0/kernel.go
index 292f9d0cc..e7dd84929 100644
--- a/pkg/ring0/kernel.go
+++ b/pkg/ring0/kernel.go
@@ -14,6 +14,10 @@
 
 package ring0
 
+import (
+	"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
+)
+
 // Init initializes a new kernel.
 //
 //go:nosplit
@@ -80,6 +84,7 @@ func (c *CPU) Init(k *Kernel, cpuID int, hooks Hooks) {
 	c.self = c    // Set self reference.
 	c.kernel = k  // Set kernel reference.
 	c.init(cpuID) // Perform architectural init.
+	c.floatingPointState = fpu.NewState()
 
 	// Require hooks.
 	if hooks != nil {
diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go
index 4a4c0ae26..7e55011b5 100644
--- a/pkg/ring0/kernel_amd64.go
+++ b/pkg/ring0/kernel_amd64.go
@@ -143,6 +143,9 @@ func (c *CPU) init(cpuID int) {
 
 	// Set mandatory flags.
 	c.registers.Eflags = KernelFlagsSet
+
+	c.hasXSAVE = hasXSAVE
+	c.hasXSAVEOPT = hasXSAVEOPT
 }
 
 // StackTop returns the kernel's stack address.
@@ -248,19 +251,21 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
 	regs.Ss = uint64(Udata)   // Ditto.
 
 	// Perform the switch.
-	swapgs()                                                       // GS will be swapped on return.
-	WriteGS(uintptr(regs.Gs_base))                                 // escapes: no. Set application GS.
-	LoadFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy in floating point.
+	needIRET := uint64(0)
 	if switchOpts.FullRestore {
-		vector = iret(c, regs, uintptr(userCR3))
-	} else {
-		vector = sysret(c, regs, uintptr(userCR3))
+		needIRET = 1
 	}
-	SaveFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy out floating point.
-	RestoreKernelFPState()                                         // escapes: no. Restore kernel MXCSR.
+	vector = doSwitchToUser(c, regs, switchOpts.FloatingPointState.BytePointer(), userCR3, needIRET) // escapes: no.
 	return
 }
 
+func doSwitchToUser(
+	cpu *CPU, // +0(FP)
+	regs *arch.Registers, // +8(FP)
+	fpState *byte, // +16(FP)
+	userCR3 uint64, // +24(FP)
+	needIRET uint64) Vector // +32(FP), +40(FP)
+
 var (
 	sentryXCR0     uintptr
 	sentryXCR0Once sync.Once
@@ -287,7 +292,7 @@ func initSentryXCR0() {
 //go:nosplit
 func startGo(c *CPU) {
 	// Save per-cpu.
-	WriteGS(kernelAddr(c.kernelEntry))
+	writeGS(kernelAddr(c.kernelEntry))
 
 	//
 	// TODO(mpratt): Note that per the note above, this should be done
diff --git a/pkg/ring0/lib_amd64.go b/pkg/ring0/lib_amd64.go
index 05c394ff5..c42a5b205 100644
--- a/pkg/ring0/lib_amd64.go
+++ b/pkg/ring0/lib_amd64.go
@@ -21,29 +21,6 @@ import (
 	"gvisor.dev/gvisor/pkg/cpuid"
 )
 
-// LoadFloatingPoint loads floating point state by the most efficient mechanism
-// available (set by Init).
-var LoadFloatingPoint func(*byte)
-
-// SaveFloatingPoint saves floating point state by the most efficient mechanism
-// available (set by Init).
-var SaveFloatingPoint func(*byte)
-
-// fxrstor uses fxrstor64 to load floating point state.
-func fxrstor(*byte)
-
-// xrstor uses xrstor to load floating point state.
-func xrstor(*byte)
-
-// fxsave uses fxsave64 to save floating point state.
-func fxsave(*byte)
-
-// xsave uses xsave to save floating point state.
-func xsave(*byte)
-
-// xsaveopt uses xsaveopt to save floating point state.
-func xsaveopt(*byte)
-
 // writeFS sets the FS base address (selects one of wrfsbase or wrfsmsr).
 func writeFS(addr uintptr)
 
@@ -53,8 +30,8 @@ func wrfsbase(addr uintptr)
 // wrfsmsr writes to the GS_BASE MSR.
 func wrfsmsr(addr uintptr)
 
-// WriteGS sets the GS address (set by init).
-var WriteGS func(addr uintptr)
+// writeGS sets the GS address (selects one of wrgsbase or wrgsmsr).
+func writeGS(addr uintptr)
 
 // wrgsbase writes to the GS base address.
 func wrgsbase(addr uintptr)
@@ -106,19 +83,4 @@ func Init(featureSet *cpuid.FeatureSet) {
 	hasXSAVE = featureSet.UseXsave()
 	hasFSGSBASE = featureSet.HasFeature(cpuid.X86FeatureFSGSBase)
 	validXCR0Mask = uintptr(featureSet.ValidXCR0Mask())
-	if hasXSAVEOPT {
-		SaveFloatingPoint = xsaveopt
-		LoadFloatingPoint = xrstor
-	} else if hasXSAVE {
-		SaveFloatingPoint = xsave
-		LoadFloatingPoint = xrstor
-	} else {
-		SaveFloatingPoint = fxsave
-		LoadFloatingPoint = fxrstor
-	}
-	if hasFSGSBASE {
-		WriteGS = wrgsbase
-	} else {
-		WriteGS = wrgsmsr
-	}
 }
diff --git a/pkg/ring0/lib_amd64.s b/pkg/ring0/lib_amd64.s
index 8ed98fc84..0f283aaae 100644
--- a/pkg/ring0/lib_amd64.s
+++ b/pkg/ring0/lib_amd64.s
@@ -128,6 +128,29 @@ TEXT ·wrfsmsr(SB),NOSPLIT,$0-8
 	BYTE $0x0f; BYTE $0x30;
 	RET
 
+// writeGS writes to the GS base.
+//
+// This is written in assembly because it must be callable from assembly (ABI0)
+// without an intermediate transition to ABIInternal.
+//
+// Preconditions: must be running in the lower address space, as it accesses
+// global data.
+TEXT ·writeGS(SB),NOSPLIT,$8-8
+	MOVQ addr+0(FP), AX
+
+	CMPB ·hasFSGSBASE(SB), $1
+	JNE msr
+
+	PUSHQ AX
+	CALL ·wrgsbase(SB)
+	POPQ AX
+	RET
+msr:
+	PUSHQ AX
+	CALL ·wrgsmsr(SB)
+	POPQ AX
+	RET
+
 // wrgsbase writes to the GS base.
 //
 // The code corresponds to:
diff --git a/pkg/ring0/offsets_amd64.go b/pkg/ring0/offsets_amd64.go
index 75f6218b3..38fe27c35 100644
--- a/pkg/ring0/offsets_amd64.go
+++ b/pkg/ring0/offsets_amd64.go
@@ -35,6 +35,9 @@ func Emit(w io.Writer) {
 	fmt.Fprintf(w, "#define CPU_ERROR_CODE       0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
 	fmt.Fprintf(w, "#define CPU_ERROR_TYPE       0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
 	fmt.Fprintf(w, "#define CPU_ENTRY            0x%02x\n", reflect.ValueOf(&c.kernelEntry).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_HAS_XSAVE        0x%02x\n", reflect.ValueOf(&c.hasXSAVE).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_HAS_XSAVEOPT     0x%02x\n", reflect.ValueOf(&c.hasXSAVEOPT).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_FPU_STATE        0x%02x\n", reflect.ValueOf(&c.floatingPointState).Pointer()-reflect.ValueOf(c).Pointer())
 
 	e := &kernelEntry{}
 	fmt.Fprintf(w, "\n// CPU entry offsets.\n")
diff --git a/pkg/safecopy/BUILD b/pkg/safecopy/BUILD
index 0a045fc8e..2a1602e2b 100644
--- a/pkg/safecopy/BUILD
+++ b/pkg/safecopy/BUILD
@@ -18,9 +18,9 @@ go_library(
     ],
     visibility = ["//:sandbox"],
     deps = [
-        "//pkg/abi/linux",
         "//pkg/errors",
         "//pkg/errors/linuxerr",
+        "//pkg/sighandling",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go
index a9711e63d..0dd0aea83 100644
--- a/pkg/safecopy/safecopy.go
+++ b/pkg/safecopy/safecopy.go
@@ -23,6 +23,7 @@ import (
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/errors"
 	"gvisor.dev/gvisor/pkg/errors/linuxerr"
+	"gvisor.dev/gvisor/pkg/sighandling"
 )
 
 // SegvError is returned when a safecopy function receives SIGSEGV.
@@ -132,10 +133,10 @@ func initializeAddresses() {
 
 func init() {
 	initializeAddresses()
-	if err := ReplaceSignalHandler(unix.SIGSEGV, addrOfSignalHandler(), &savedSigSegVHandler); err != nil {
+	if err := sighandling.ReplaceSignalHandler(unix.SIGSEGV, addrOfSignalHandler(), &savedSigSegVHandler); err != nil {
 		panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err))
 	}
-	if err := ReplaceSignalHandler(unix.SIGBUS, addrOfSignalHandler(), &savedSigBusHandler); err != nil {
+	if err := sighandling.ReplaceSignalHandler(unix.SIGBUS, addrOfSignalHandler(), &savedSigBusHandler); err != nil {
 		panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err))
 	}
 	linuxerr.AddErrorUnwrapper(func(e error) (*errors.Error, bool) {
diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go
index 2365b2c0d..15f84abea 100644
--- a/pkg/safecopy/safecopy_unsafe.go
+++ b/pkg/safecopy/safecopy_unsafe.go
@@ -20,7 +20,6 @@ import (
 	"unsafe"
 
 	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/abi/linux"
 )
 
 // maxRegisterSize is the maximum register size used in memcpy and memclr. It
@@ -332,39 +331,3 @@ func errorFromFaultSignal(addr uintptr, sig int32) error {
 		panic(fmt.Sprintf("safecopy got unexpected signal %d at address %#x", sig, addr))
 	}
 }
-
-// ReplaceSignalHandler replaces the existing signal handler for the provided
-// signal with the one that handles faults in safecopy-protected functions.
-//
-// It stores the value of the previously set handler in previous.
-//
-// This function will be called on initialization in order to install safecopy
-// handlers for appropriate signals. These handlers will call the previous
-// handler however, and if this is function is being used externally then the
-// same courtesy is expected.
-func ReplaceSignalHandler(sig unix.Signal, handler uintptr, previous *uintptr) error {
-	var sa linux.SigAction
-	const maskLen = 8
-
-	// Get the existing signal handler information, and save the current
-	// handler. Once we replace it, we will use this pointer to fall back to
-	// it when we receive other signals.
-	if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(sig), 0, uintptr(unsafe.Pointer(&sa)), maskLen, 0, 0); e != 0 {
-		return e
-	}
-
-	// Fail if there isn't a previous handler.
-	if sa.Handler == 0 {
-		return fmt.Errorf("previous handler for signal %x isn't set", sig)
-	}
-
-	*previous = uintptr(sa.Handler)
-
-	// Install our own handler.
-	sa.Handler = uint64(handler)
-	if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 {
-		return e
-	}
-
-	return nil
-}
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 4370cce33..d2eb03bb7 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -45,7 +45,8 @@ type pipeOperations struct {
 	fsutil.FileNoIoctl              `state:"nosave"`
 	fsutil.FileNoSplice             `state:"nosave"`
 	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
-	waiter.Queue                    `state:"nosave"`
+
+	waiter.Queue
 
 	// flags are the flags used to open the pipe.
 	flags fs.FileFlags `state:".(fs.FileFlags)"`
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 92d58e3e9..99c37291e 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -70,7 +70,7 @@ type inodeFileState struct {
 	descriptor *descriptor `state:"wait"`
 
 	// Event queue for blocking operations.
-	queue waiter.Queue `state:"zerovalue"`
+	queue waiter.Queue
 
 	// sattr is used to restore the inodeOperations.
 	sattr fs.StableAttr `state:"wait"`
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go
index 51cd6cd37..941f37116 100644
--- a/pkg/sentry/fs/inotify.go
+++ b/pkg/sentry/fs/inotify.go
@@ -43,7 +43,7 @@ type Inotify struct {
 	// user, since we may aggressively reuse an id on S/R.
 	id uint64
 
-	waiter.Queue `state:"nosave"`
+	waiter.Queue
 
 	// evMu *only* protects the events list. We need a separate lock because
 	// while queuing events, a watch needs to lock the event queue, and using mu
diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go
index 7d7a207cc..e39d340fe 100644
--- a/pkg/sentry/fs/lock/lock.go
+++ b/pkg/sentry/fs/lock/lock.go
@@ -132,7 +132,7 @@ type Locks struct {
 	locks LockSet
 
 	// blockedQueue is the queue of waiters that are waiting on a lock.
-	blockedQueue waiter.Queue `state:"zerovalue"`
+	blockedQueue waiter.Queue
 }
 
 // Blocker is the interface used for blocking locks. Passing a nil Blocker
diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go
index 085aa6d61..443b9a94c 100644
--- a/pkg/sentry/fs/proc/sys.go
+++ b/pkg/sentry/fs/proc/sys.go
@@ -109,6 +109,9 @@ func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode
 		"shmall":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMALL, 10))),
 		"shmmax":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMAX, 10))),
 		"shmmni":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMNI, 10))),
+		"msgmni":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.MSGMNI, 10))),
+		"msgmax":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.MSGMAX, 10))),
+		"msgmnb":   newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.MSGMNB, 10))),
 	}
 
 	d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555))
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index 1c8518d71..ca8be8683 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -43,7 +43,7 @@ type TimerOperations struct {
 	fsutil.FileNoopFlush            `state:"nosave"`
 	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
 
-	events waiter.Queue `state:"zerovalue"`
+	events waiter.Queue
 	timer  *ktime.Timer
 
 	// val is the number of timer expirations since the last successful call to
diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go
index f9fca6d8e..f2c9e9668 100644
--- a/pkg/sentry/fs/tty/line_discipline.go
+++ b/pkg/sentry/fs/tty/line_discipline.go
@@ -102,10 +102,10 @@ type lineDiscipline struct {
 	column int
 
 	// masterWaiter is used to wait on the master end of the TTY.
-	masterWaiter waiter.Queue `state:"zerovalue"`
+	masterWaiter waiter.Queue
 
 	// replicaWaiter is used to wait on the replica end of the TTY.
-	replicaWaiter waiter.Queue `state:"zerovalue"`
+	replicaWaiter waiter.Queue
 }
 
 func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline {
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 7bef8242f..b98825e26 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1595,7 +1595,10 @@ func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats
 	// (b/148380782). Allow all other extended attributes to be passed through
 	// to the remote filesystem. This is inconsistent with Linux's 9p client,
 	// but consistent with other filesystems (e.g. FUSE).
-	if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) {
+	//
+	// NOTE(b/202533394): Also disallow "trusted" namespace for now. This is
+	// consistent with the VFS1 gofer client.
+	if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) || strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) {
 		return linuxerr.EOPNOTSUPP
 	}
 	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
@@ -2046,16 +2049,7 @@ func (d *dentry) listXattr(ctx context.Context, size uint64) ([]string, error) {
 	}
 
 	if d.fs.opts.lisaEnabled {
-		xattrs, err := d.controlFDLisa.ListXattr(ctx, size)
-		if err != nil {
-			return nil, err
-		}
-
-		res := make([]string, 0, len(xattrs))
-		for _, xattr := range xattrs {
-			res = append(res, xattr)
-		}
-		return res, nil
+		return d.controlFDLisa.ListXattr(ctx, size)
 	}
 
 	xattrMap, err := d.file.listXattr(ctx, size)
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 26d44744b..7b0be9c14 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -268,6 +268,6 @@ func cpuInfoData(k *kernel.Kernel) string {
 	return buf.String()
 }
 
-func shmData(v uint64) dynamicInode {
+func ipcData(v uint64) dynamicInode {
 	return newStaticFile(strconv.FormatUint(v, 10))
 }
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 99f64a9d8..82e2857b3 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -47,9 +47,12 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *
 		"kernel": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
 			"hostname": fs.newInode(ctx, root, 0444, &hostnameData{}),
 			"sem":      fs.newInode(ctx, root, 0444, newStaticFile(fmt.Sprintf("%d\t%d\t%d\t%d\n", linux.SEMMSL, linux.SEMMNS, linux.SEMOPM, linux.SEMMNI))),
-			"shmall":   fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)),
-			"shmmax":   fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)),
-			"shmmni":   fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)),
+			"shmall":   fs.newInode(ctx, root, 0444, ipcData(linux.SHMALL)),
+			"shmmax":   fs.newInode(ctx, root, 0444, ipcData(linux.SHMMAX)),
+			"shmmni":   fs.newInode(ctx, root, 0444, ipcData(linux.SHMMNI)),
+			"msgmni":   fs.newInode(ctx, root, 0444, ipcData(linux.MSGMNI)),
+			"msgmax":   fs.newInode(ctx, root, 0444, ipcData(linux.MSGMAX)),
+			"msgmnb":   fs.newInode(ctx, root, 0444, ipcData(linux.MSGMNB)),
 			"yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
 				"ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root),
 			}),
diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD
index 66fa1ad40..03c8e2f38 100644
--- a/pkg/sentry/hostmm/BUILD
+++ b/pkg/sentry/hostmm/BUILD
@@ -12,8 +12,7 @@ go_library(
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
-        "//pkg/fd",
-        "//pkg/hostarch",
+        "//pkg/eventfd",
         "//pkg/log",
         "@org_golang_x_sys//unix:go_default_library",
     ],
diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go
index 285ea9050..5df06a60f 100644
--- a/pkg/sentry/hostmm/hostmm.go
+++ b/pkg/sentry/hostmm/hostmm.go
@@ -21,9 +21,7 @@ import (
 	"os"
 	"path"
 
-	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/fd"
-	"gvisor.dev/gvisor/pkg/hostarch"
+	"gvisor.dev/gvisor/pkg/eventfd"
 	"gvisor.dev/gvisor/pkg/log"
 )
 
@@ -54,7 +52,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error)
 	}
 	defer eventControlFile.Close()
 
-	eventFD, err := newEventFD()
+	eventFD, err := eventfd.Create()
 	if err != nil {
 		return nil, err
 	}
@@ -75,20 +73,11 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error)
 	const stopVal = 1 << 63
 	stopCh := make(chan struct{})
 	go func() { // S/R-SAFE: f provides synchronization if necessary
-		rw := fd.NewReadWriter(eventFD.FD())
-		var buf [sizeofUint64]byte
 		for {
-			n, err := rw.Read(buf[:])
+			val, err := eventFD.Read()
 			if err != nil {
-				if err == unix.EINTR {
-					continue
-				}
 				panic(fmt.Sprintf("failed to read from memory pressure level eventfd: %v", err))
 			}
-			if n != sizeofUint64 {
-				panic(fmt.Sprintf("short read from memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64))
-			}
-			val := hostarch.ByteOrder.Uint64(buf[:])
 			if val >= stopVal {
 				// Assume this was due to the notifier's "destructor" (the
 				// function returned by NotifyCurrentMemcgPressureCallback
@@ -101,30 +90,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error)
 		}
 	}()
 	return func() {
-		rw := fd.NewReadWriter(eventFD.FD())
-		var buf [sizeofUint64]byte
-		hostarch.ByteOrder.PutUint64(buf[:], stopVal)
-		for {
-			n, err := rw.Write(buf[:])
-			if err != nil {
-				if err == unix.EINTR {
-					continue
-				}
-				panic(fmt.Sprintf("failed to write to memory pressure level eventfd: %v", err))
-			}
-			if n != sizeofUint64 {
-				panic(fmt.Sprintf("short write to memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64))
-			}
-			break
-		}
+		eventFD.Write(stopVal)
 		<-stopCh
 	}, nil
 }
-
-func newEventFD() (*fd.FD, error) {
-	f, _, e := unix.Syscall(unix.SYS_EVENTFD2, 0, 0, 0)
-	if e != 0 {
-		return nil, fmt.Errorf("failed to create eventfd: %v", e)
-	}
-	return fd.New(int(f)), nil
-}
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index c0f13bf52..53a21e1e2 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -255,7 +255,6 @@ go_library(
         "//pkg/sentry/hostcpu",
         "//pkg/sentry/inet",
         "//pkg/sentry/kernel/auth",
-        "//pkg/sentry/kernel/epoll",
         "//pkg/sentry/kernel/futex",
         "//pkg/sentry/kernel/msgqueue",
         "//pkg/sentry/kernel/sched",
diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go
index 6006c46a9..8d0a21baf 100644
--- a/pkg/sentry/kernel/epoll/epoll.go
+++ b/pkg/sentry/kernel/epoll/epoll.go
@@ -66,7 +66,7 @@ type pollEntry struct {
 	file     *refs.WeakRef  `state:"manual"`
 	id       FileIdentifier `state:"wait"`
 	userData [2]int32
-	waiter   waiter.Entry `state:"manual"`
+	waiter   waiter.Entry
 	mask     waiter.EventMask
 	flags    EntryFlags
 
@@ -102,7 +102,7 @@ type EventPoll struct {
 
 	// Wait queue is used to notify interested parties when the event poll
 	// object itself becomes readable or writable.
-	waiter.Queue `state:"zerovalue"`
+	waiter.Queue
 
 	// files is the map of all the files currently being observed, it is
 	// protected by mu.
@@ -454,14 +454,3 @@ func (e *EventPoll) RemoveEntry(ctx context.Context, id FileIdentifier) error {
 
 	return nil
 }
-
-// UnregisterEpollWaiters removes the epoll waiter objects from the waiting
-// queues. This is different from Release() as the file is not dereferenced.
-func (e *EventPoll) UnregisterEpollWaiters() {
-	e.mu.Lock()
-	defer e.mu.Unlock()
-
-	for _, entry := range e.files {
-		entry.id.File.EventUnregister(&entry.waiter)
-	}
-}
diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go
index e08d6287f..135a6d72c 100644
--- a/pkg/sentry/kernel/epoll/epoll_state.go
+++ b/pkg/sentry/kernel/epoll/epoll_state.go
@@ -21,9 +21,7 @@ import (
 
 // afterLoad is invoked by stateify.
 func (p *pollEntry) afterLoad() {
-	p.waiter.Callback = p
 	p.file = refs.NewWeakRef(p.id.File, p)
-	p.id.File.EventRegister(&p.waiter, p.mask)
 }
 
 // afterLoad is invoked by stateify.
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index 5ea44a2c2..bf625dede 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -54,7 +54,7 @@ type EventOperations struct {
 
 	// Queue is used to notify interested parties when the event object
 	// becomes readable or writable.
-	wq waiter.Queue `state:"zerovalue"`
+	wq waiter.Queue
 
 	// val is the current value of the event counter.
 	val uint64
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index df5160b67..5dc821a48 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -57,7 +57,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/hostcpu"
 	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -78,11 +77,19 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 )
 
-// VFS2Enabled is set to true when VFS2 is enabled. Added as a global for allow
-// easy access everywhere. To be removed once VFS2 becomes the default.
+// VFS2Enabled is set to true when VFS2 is enabled. Added as a global to allow
+// easy access everywhere.
+//
+// TODO(gvisor.dev/issue/1624): Remove when VFS1 is no longer used.
 var VFS2Enabled = false
 
-// FUSEEnabled is set to true when FUSE is enabled. Added as a global for allow
+// LISAFSEnabled is set to true when lisafs protocol is enabled. Added as a
+// global to allow easy access everywhere.
+//
+// TODO(gvisor.dev/issue/6319): Remove when lisafs is default.
+var LISAFSEnabled = false
+
+// FUSEEnabled is set to true when FUSE is enabled. Added as a global to allow
 // easy access everywhere. To be removed once FUSE is completed.
 var FUSEEnabled = false
 
@@ -478,11 +485,6 @@ func (k *Kernel) SaveTo(ctx context.Context, w wire.Writer) error {
 			return err
 		}
 
-		// Remove all epoll waiter objects from underlying wait queues.
-		// NOTE: for programs to resume execution in future snapshot scenarios,
-		// we will need to re-establish these waiter objects after saving.
-		k.tasks.unregisterEpollWaiters(ctx)
-
 		// Clear the dirent cache before saving because Dirents must be Loaded in a
 		// particular order (parents before children), and Loading dirents from a cache
 		// breaks that order.
@@ -615,32 +617,6 @@ func (k *Kernel) flushWritesToFiles(ctx context.Context) error {
 	})
 }
 
-// Preconditions: !VFS2Enabled.
-func (ts *TaskSet) unregisterEpollWaiters(ctx context.Context) {
-	ts.mu.RLock()
-	defer ts.mu.RUnlock()
-
-	// Tasks that belong to the same process could potentially point to the
-	// same FDTable. So we retain a map of processed ones to avoid
-	// processing the same FDTable multiple times.
-	processed := make(map[*FDTable]struct{})
-	for t := range ts.Root.tids {
-		// We can skip locking Task.mu here since the kernel is paused.
-		if t.fdTable == nil {
-			continue
-		}
-		if _, ok := processed[t.fdTable]; ok {
-			continue
-		}
-		t.fdTable.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) {
-			if e, ok := file.FileOperations.(*epoll.EventPoll); ok {
-				e.UnregisterEpollWaiters()
-			}
-		})
-		processed[t.fdTable] = struct{}{}
-	}
-}
-
 // Preconditions: The kernel must be paused.
 func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
 	invalidated := make(map[*mm.MemoryManager]struct{})
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 86beee6fe..8345473f3 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -55,7 +55,7 @@ const (
 //
 // +stateify savable
 type Pipe struct {
-	waiter.Queue `state:"nosave"`
+	waiter.Queue
 
 	// isNamed indicates whether this is a named pipe.
 	//
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index b0004482c..1ea3c1bf7 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -158,7 +158,7 @@ type Task struct {
 	// signalQueue is protected by the signalMutex. Note that the task does
 	// not implement all queue methods, specifically the readiness checks.
 	// The task only broadcast a notification on signal delivery.
-	signalQueue waiter.Queue `state:"zerovalue"`
+	signalQueue waiter.Queue
 
 	// If groupStopPending is true, the task should participate in a group
 	// stop in the interrupt path.
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index c5b099559..f0c168ecc 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -191,9 +191,11 @@ const (
 //
 // Preconditions: The task's owning TaskSet.mu must be locked.
 func (t *Task) updateInfoLocked() {
-	// Use the task's TID in the root PID namespace for logging.
+	// Use the task's TID and PID in the root PID namespace for logging.
+	pid := t.tg.pidns.owner.Root.tgids[t.tg]
 	tid := t.tg.pidns.owner.Root.tids[t]
-	t.logPrefix.Store(fmt.Sprintf("[% 4d] ", tid))
+	t.logPrefix.Store(fmt.Sprintf("[% 4d:% 4d] ", pid, tid))
+
 	t.rebuildTraceContext(tid)
 }
 
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index 77ad62445..e38b723ce 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -324,11 +324,7 @@ type threadGroupNode struct {
 	// eventQueue is notified whenever a event of interest to Task.Wait occurs
 	// in a child of this thread group, or a ptrace tracee of a task in this
 	// thread group. Events are defined in task_exit.go.
-	//
-	// Note that we cannot check and save this wait queue similarly to other
-	// wait queues, as the queue will not be empty by the time of saving, due
-	// to the wait sourced from Exec().
-	eventQueue waiter.Queue `state:"nosave"`
+	eventQueue waiter.Queue
 
 	// leader is the thread group's leader, which is the oldest task in the
 	// thread group; usually the last task in the thread group to call
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index a26f54269..834d72408 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -63,7 +63,6 @@ go_library(
         "//pkg/procid",
         "//pkg/ring0",
         "//pkg/ring0/pagetables",
-        "//pkg/safecopy",
         "//pkg/seccomp",
         "//pkg/sentry/arch",
         "//pkg/sentry/arch/fpu",
@@ -71,6 +70,7 @@ go_library(
         "//pkg/sentry/platform",
         "//pkg/sentry/platform/interrupt",
         "//pkg/sentry/time",
+        "//pkg/sighandling",
         "//pkg/sync",
         "@org_golang_x_sys//unix:go_default_library",
     ],
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 826997e77..5be2215ed 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -19,8 +19,8 @@ import (
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/ring0"
-	"gvisor.dev/gvisor/pkg/safecopy"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sighandling"
 )
 
 // bluepill enters guest mode.
@@ -97,7 +97,7 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) {
 
 func init() {
 	// Install the handler.
-	if err := safecopy.ReplaceSignalHandler(bluepillSignal, addrOfSighandler(), &savedHandler); err != nil {
+	if err := sighandling.ReplaceSignalHandler(bluepillSignal, addrOfSighandler(), &savedHandler); err != nil {
 		panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err))
 	}
 
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index 0567c8d32..b2db2bb9f 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -71,10 +71,6 @@ func (c *vCPU) KernelSyscall() {
 	if regs.Rax != ^uint64(0) {
 		regs.Rip -= 2 // Rewind.
 	}
-	// We only trigger a bluepill entry in the bluepill function, and can
-	// therefore be guaranteed that there is no floating point state to be
-	// loaded on resuming from halt. We only worry about saving on exit.
-	ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
 	// N.B. Since KernelSyscall is called when the kernel makes a syscall,
 	// FS_BASE is already set for correct execution of this function.
 	//
@@ -112,8 +108,6 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
 		regs.Rip = 0
 	}
 	// See above.
-	ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
-	// See above.
 	ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
 }
 
@@ -144,5 +138,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
 	// Set the context pointer to the saved floating point state. This is
 	// where the guest data has been serialized, the kernel will restore
 	// from this new pointer value.
-	context.Fpstate = uint64(uintptrValue(c.floatingPointState.BytePointer()))
+	context.Fpstate = uint64(uintptrValue(c.FloatingPointState().BytePointer())) // escapes: no.
 }
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index acb0cb05f..df772d620 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -70,7 +70,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
 
 	lazyVfp := c.GetLazyVFP()
 	if lazyVfp != 0 {
-		fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
+		fpsimd := fpsimdPtr(c.FloatingPointState().BytePointer()) // escapes: no
 		context.Fpsimd64.Fpsr = fpsimd.Fpsr
 		context.Fpsimd64.Fpcr = fpsimd.Fpcr
 		context.Fpsimd64.Vregs = fpsimd.Vregs
@@ -90,12 +90,12 @@ func (c *vCPU) KernelSyscall() {
 
 	fpDisableTrap := ring0.CPACREL1()
 	if fpDisableTrap != 0 {
-		fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
+		fpsimd := fpsimdPtr(c.FloatingPointState().BytePointer()) // escapes: no
 		fpcr := ring0.GetFPCR()
 		fpsr := ring0.GetFPSR()
 		fpsimd.Fpcr = uint32(fpcr)
 		fpsimd.Fpsr = uint32(fpsr)
-		ring0.SaveVRegs(c.floatingPointState.BytePointer())
+		ring0.SaveVRegs(c.FloatingPointState().BytePointer()) // escapes: no
 	}
 
 	ring0.Halt()
@@ -114,12 +114,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
 
 	fpDisableTrap := ring0.CPACREL1()
 	if fpDisableTrap != 0 {
-		fpsimd := fpsimdPtr(c.floatingPointState.BytePointer())
+		fpsimd := fpsimdPtr(c.FloatingPointState().BytePointer()) // escapes: no
 		fpcr := ring0.GetFPCR()
 		fpsr := ring0.GetFPSR()
 		fpsimd.Fpcr = uint32(fpcr)
 		fpsimd.Fpsr = uint32(fpsr)
-		ring0.SaveVRegs(c.floatingPointState.BytePointer())
+		ring0.SaveVRegs(c.FloatingPointState().BytePointer()) // escapes: no
 	}
 
 	ring0.Halt()
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index aac0fdffe..ad6863646 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -77,7 +77,11 @@ var (
 
 // OpenDevice opens the KVM device at /dev/kvm and returns the File.
 func OpenDevice() (*os.File, error) {
-	f, err := os.OpenFile("/dev/kvm", unix.O_RDWR, 0)
+	dev, ok := os.LookupEnv("GVISOR_KVM_DEV")
+	if !ok {
+		dev = "/dev/kvm"
+	}
+	f, err := os.OpenFile(dev, unix.O_RDWR, 0)
 	if err != nil {
 		return nil, fmt.Errorf("error opening /dev/kvm: %v", err)
 	}
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index dcf34015d..f1f7e4ea4 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -28,9 +28,9 @@ import (
 	"gvisor.dev/gvisor/pkg/procid"
 	"gvisor.dev/gvisor/pkg/ring0"
 	"gvisor.dev/gvisor/pkg/ring0/pagetables"
-	"gvisor.dev/gvisor/pkg/safecopy"
 	"gvisor.dev/gvisor/pkg/seccomp"
 	ktime "gvisor.dev/gvisor/pkg/sentry/time"
+	"gvisor.dev/gvisor/pkg/sighandling"
 	"gvisor.dev/gvisor/pkg/sync"
 )
 
@@ -723,7 +723,7 @@ func addrOfSigsysHandler() uintptr
 func seccompMmapRules(m *machine) {
 	seccompMmapRulesOnce.Do(func() {
 		// Install the handler.
-		if err := safecopy.ReplaceSignalHandler(unix.SIGSYS, addrOfSigsysHandler(), &savedSigsysHandler); err != nil {
+		if err := sighandling.ReplaceSignalHandler(unix.SIGSYS, addrOfSigsysHandler(), &savedSigsysHandler); err != nil {
 			panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err))
 		}
 		rules := []seccomp.RuleSet{}
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index ab1e036b7..5bc023899 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -29,7 +29,6 @@ import (
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/ring0"
 	"gvisor.dev/gvisor/pkg/ring0/pagetables"
-	"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	ktime "gvisor.dev/gvisor/pkg/sentry/time"
 )
@@ -72,10 +71,6 @@ type vCPUArchState struct {
 	//
 	// This starts above fixedKernelPCID.
 	PCIDs *pagetables.PCIDs
-
-	// floatingPointState is the floating point state buffer used in guest
-	// to host transitions. See usage in bluepill_amd64.go.
-	floatingPointState fpu.State
 }
 
 const (
@@ -152,12 +147,6 @@ func (c *vCPU) initArchState() error {
 		return fmt.Errorf("error setting user registers: %v", errno)
 	}
 
-	// Allocate some floating point state save area for the local vCPU.
-	// This will be saved prior to leaving the guest, and we restore from
-	// this always. We cannot use the pointer in the context alone because
-	// we don't know how large the area there is in reality.
-	c.floatingPointState = fpu.NewState()
-
 	// Set the time offset to the host native time.
 	return c.setSystemTime()
 }
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 08d98c479..31998a600 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -26,7 +26,6 @@ import (
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/ring0"
 	"gvisor.dev/gvisor/pkg/ring0/pagetables"
-	"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 )
 
@@ -40,10 +39,6 @@ type vCPUArchState struct {
 	//
 	// This starts above fixedKernelPCID.
 	PCIDs *pagetables.PCIDs
-
-	// floatingPointState is the floating point state buffer used in guest
-	// to host transitions. See usage in bluepill_arm64.go.
-	floatingPointState fpu.State
 }
 
 const (
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 7e8e19dcb..e73d5c544 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -28,7 +28,6 @@ import (
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/ring0"
 	"gvisor.dev/gvisor/pkg/ring0/pagetables"
-	"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	ktime "gvisor.dev/gvisor/pkg/sentry/time"
 )
@@ -159,8 +158,6 @@ func (c *vCPU) initArchState() error {
 		c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs)
 	}
 
-	c.floatingPointState = fpu.NewState()
-
 	return c.setSystemTime()
 }
 
diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD
index 7ee89a735..00f925166 100644
--- a/pkg/sentry/socket/BUILD
+++ b/pkg/sentry/socket/BUILD
@@ -4,7 +4,10 @@ package(licenses = ["notice"])
 
 go_library(
     name = "socket",
-    srcs = ["socket.go"],
+    srcs = [
+        "socket.go",
+        "socket_state.go",
+    ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index f9a5b0df1..6077b2150 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -29,10 +29,9 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/socket"
 	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+	"time"
 )
 
-const maxInt = int(^uint(0) >> 1)
-
 // SCMCredentials represents a SCM_CREDENTIALS socket control message.
 type SCMCredentials interface {
 	transport.CredentialsControlMessage
@@ -78,7 +77,7 @@ func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) {
 }
 
 // Files implements SCMRights.Files.
-func (fs *RightsFiles) Files(ctx context.Context, max int) (RightsFiles, bool) {
+func (fs *RightsFiles) Files(_ context.Context, max int) (RightsFiles, bool) {
 	n := max
 	var trunc bool
 	if l := len(*fs); n > l {
@@ -124,7 +123,7 @@ func rightsFDs(t *kernel.Task, rights SCMRights, cloexec bool, max int) ([]int32
 			break
 		}
 
-		fds = append(fds, int32(fd))
+		fds = append(fds, fd)
 	}
 	return fds, trunc
 }
@@ -300,8 +299,8 @@ func alignSlice(buf []byte, align uint) []byte {
 }
 
 // PackTimestamp packs a SO_TIMESTAMP socket control message.
-func PackTimestamp(t *kernel.Task, timestamp int64, buf []byte) []byte {
-	timestampP := linux.NsecToTimeval(timestamp)
+func PackTimestamp(t *kernel.Task, timestamp time.Time, buf []byte) []byte {
+	timestampP := linux.NsecToTimeval(timestamp.UnixNano())
 	return putCmsgStruct(
 		buf,
 		linux.SOL_SOCKET,
@@ -545,7 +544,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
 				}
 				var ts linux.Timeval
 				ts.UnmarshalUnsafe(buf[i : i+linux.SizeOfTimeval])
-				cmsgs.IP.Timestamp = ts.ToNsecCapped()
+				cmsgs.IP.Timestamp = ts.ToTime()
 				cmsgs.IP.HasTimestamp = true
 				i += bits.AlignUp(length, width)
 
diff --git a/pkg/sentry/socket/control/control_test.go b/pkg/sentry/socket/control/control_test.go
index 7e28a0cef..1b04e1bbc 100644
--- a/pkg/sentry/socket/control/control_test.go
+++ b/pkg/sentry/socket/control/control_test.go
@@ -50,7 +50,7 @@ func TestParse(t *testing.T) {
 	want := socket.ControlMessages{
 		IP: socket.IPControlMessages{
 			HasTimestamp: true,
-			Timestamp:    ts.ToNsecCapped(),
+			Timestamp:    ts.ToTime(),
 		},
 	}
 	if diff := cmp.Diff(want, cmsg); diff != "" {
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index 1c1e501ba..6e2318f75 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -111,7 +111,7 @@ func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
 		}
 		return readv(s.fd, safemem.IovecsFromBlockSeq(dsts))
 	}))
-	return int64(n), err
+	return n, err
 }
 
 // Write implements fs.FileOperations.Write.
@@ -134,7 +134,7 @@ func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
 		}
 		return writev(s.fd, safemem.IovecsFromBlockSeq(srcs))
 	}))
-	return int64(n), err
+	return n, err
 }
 
 // Socket implements socket.Provider.Socket.
@@ -180,7 +180,7 @@ func (p *socketProvider) Socket(t *kernel.Task, stypeflags linux.SockType, proto
 }
 
 // Pair implements socket.Provider.Pair.
-func (p *socketProvider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) {
+func (p *socketProvider) Pair(*kernel.Task, linux.SockType, int) (*fs.File, *fs.File, *syserr.Error) {
 	// Not supported by AF_INET/AF_INET6.
 	return nil, nil, nil
 }
@@ -207,7 +207,7 @@ type socketOpsCommon struct {
 // Release implements fs.FileOperations.Release.
 func (s *socketOpsCommon) Release(context.Context) {
 	fdnotifier.RemoveFD(int32(s.fd))
-	unix.Close(s.fd)
+	_ = unix.Close(s.fd)
 }
 
 // Readiness implements waiter.Waitable.Readiness.
@@ -218,13 +218,13 @@ func (s *socketOpsCommon) Readiness(mask waiter.EventMask) waiter.EventMask {
 // EventRegister implements waiter.Waitable.EventRegister.
 func (s *socketOpsCommon) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
 	s.queue.EventRegister(e, mask)
-	fdnotifier.UpdateFD(int32(s.fd))
+	_ = fdnotifier.UpdateFD(int32(s.fd))
 }
 
 // EventUnregister implements waiter.Waitable.EventUnregister.
 func (s *socketOpsCommon) EventUnregister(e *waiter.Entry) {
 	s.queue.EventUnregister(e)
-	fdnotifier.UpdateFD(int32(s.fd))
+	_ = fdnotifier.UpdateFD(int32(s.fd))
 }
 
 // Connect implements socket.Socket.Connect.
@@ -316,7 +316,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int,
 	if kernel.VFS2Enabled {
 		f, err := newVFS2Socket(t, s.family, s.stype, s.protocol, fd, uint32(flags&unix.SOCK_NONBLOCK))
 		if err != nil {
-			unix.Close(fd)
+			_ = unix.Close(fd)
 			return 0, nil, 0, err
 		}
 		defer f.DecRef(t)
@@ -328,7 +328,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int,
 	} else {
 		f, err := newSocketFile(t, s.family, s.stype, s.protocol, fd, flags&unix.SOCK_NONBLOCK != 0)
 		if err != nil {
-			unix.Close(fd)
+			_ = unix.Close(fd)
 			return 0, nil, 0, err
 		}
 		defer f.DecRef(t)
@@ -343,7 +343,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int,
 }
 
 // Bind implements socket.Socket.Bind.
-func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
+func (s *socketOpsCommon) Bind(_ *kernel.Task, sockaddr []byte) *syserr.Error {
 	if len(sockaddr) > sizeofSockaddr {
 		sockaddr = sockaddr[:sizeofSockaddr]
 	}
@@ -356,12 +356,12 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
 }
 
 // Listen implements socket.Socket.Listen.
-func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
+func (s *socketOpsCommon) Listen(_ *kernel.Task, backlog int) *syserr.Error {
 	return syserr.FromError(unix.Listen(s.fd, backlog))
 }
 
 // Shutdown implements socket.Socket.Shutdown.
-func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
+func (s *socketOpsCommon) Shutdown(_ *kernel.Task, how int) *syserr.Error {
 	switch how {
 	case unix.SHUT_RD, unix.SHUT_WR, unix.SHUT_RDWR:
 		return syserr.FromError(unix.Shutdown(s.fd, how))
@@ -371,7 +371,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
 }
 
 // GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, _ hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	if outLen < 0 {
 		return nil, syserr.ErrInvalidArgument
 	}
@@ -401,7 +401,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr
 		case linux.TCP_NODELAY:
 			optlen = sizeofInt32
 		case linux.TCP_INFO:
-			optlen = int(linux.SizeOfTCPInfo)
+			optlen = linux.SizeOfTCPInfo
 		}
 	}
 
@@ -579,7 +579,7 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s
 				controlMessages.IP.HasTimestamp = true
 				ts := linux.Timeval{}
 				ts.UnmarshalUnsafe(unixCmsg.Data[:linux.SizeOfTimeval])
-				controlMessages.IP.Timestamp = ts.ToNsecCapped()
+				controlMessages.IP.Timestamp = ts.ToTime()
 			}
 
 		case linux.SOL_IP:
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
index 0f6e576a9..b9c15daab 100644
--- a/pkg/sentry/socket/netfilter/targets.go
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -647,7 +647,7 @@ func (jt *JumpTarget) id() targetID {
 }
 
 // Action implements stack.Target.Action.
-func (jt *JumpTarget) Action(*stack.PacketBuffer, *stack.ConnTrack, stack.Hook, *stack.Route, stack.AddressableEndpoint) (stack.RuleVerdict, int) {
+func (jt *JumpTarget) Action(*stack.PacketBuffer, stack.Hook, *stack.Route, stack.AddressableEndpoint) (stack.RuleVerdict, int) {
 	return stack.RuleJump, jt.RuleNum
 }
 
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index bf5ec4558..075f61cda 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -7,6 +7,7 @@ go_library(
     srcs = [
         "device.go",
         "netstack.go",
+        "netstack_state.go",
         "netstack_vfs2.go",
         "provider.go",
         "provider_vfs2.go",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index dedc32dda..030c6c8e4 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -274,6 +274,7 @@ var Metrics = tcpip.Stats{
 		ChecksumErrors:                     mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."),
 		FailedPortReservations:             mustCreateMetric("/netstack/tcp/failed_port_reservations", "Number of time TCP failed to reserve a port."),
 		SegmentsAckedWithDSACK:             mustCreateMetric("/netstack/tcp/segments_acked_with_dsack", "Number of segments for which DSACK was received."),
+		SpuriousRecovery:                   mustCreateMetric("/netstack/tcp/spurious_recovery", "Number of times the connection entered loss recovery spuriously."),
 	},
 	UDP: tcpip.UDPStats{
 		PacketsReceived:          mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."),
@@ -378,9 +379,9 @@ type socketOpsCommon struct {
 	// timestampValid indicates whether timestamp for SIOCGSTAMP has been
 	// set. It is protected by readMu.
 	timestampValid bool
-	// timestampNS holds the timestamp to use with SIOCTSTAMP. It is only
+	// timestamp holds the timestamp to use with SIOCTSTAMP. It is only
 	// valid when timestampValid is true. It is protected by readMu.
-	timestampNS int64
+	timestamp time.Time `state:".(int64)"`
 
 	// TODO(b/153685824): Move this to SocketOptions.
 	// sockOptInq corresponds to TCP_INQ.
@@ -410,15 +411,6 @@ var sockAddrInetSize = (*linux.SockAddrInet)(nil).SizeBytes()
 var sockAddrInet6Size = (*linux.SockAddrInet6)(nil).SizeBytes()
 var sockAddrLinkSize = (*linux.SockAddrLink)(nil).SizeBytes()
 
-// bytesToIPAddress converts an IPv4 or IPv6 address from the user to the
-// netstack representation taking any addresses into account.
-func bytesToIPAddress(addr []byte) tcpip.Address {
-	if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) {
-		return ""
-	}
-	return tcpip.Address(addr)
-}
-
 // minSockAddrLen returns the minimum length in bytes of a socket address for
 // the socket's family.
 func (s *socketOpsCommon) minSockAddrLen() int {
@@ -468,7 +460,7 @@ func (s *socketOpsCommon) Release(ctx context.Context) {
 		t := kernel.TaskFromContext(ctx)
 		start := t.Kernel().MonotonicClock().Now()
 		deadline := start.Add(v.Timeout)
-		t.BlockWithDeadline(ch, true, deadline)
+		_ = t.BlockWithDeadline(ch, true, deadline)
 	}
 }
 
@@ -488,7 +480,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
 }
 
 // WriteTo implements fs.FileOperations.WriteTo.
-func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Writer, count int64, dup bool) (int64, error) {
+func (s *SocketOperations) WriteTo(_ context.Context, _ *fs.File, dst io.Writer, count int64, dup bool) (int64, error) {
 	s.readMu.Lock()
 	defer s.readMu.Unlock()
 
@@ -543,7 +535,7 @@ func (l *limitedPayloader) Len() int {
 }
 
 // ReadFrom implements fs.FileOperations.ReadFrom.
-func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) {
+func (s *SocketOperations) ReadFrom(_ context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) {
 	f := limitedPayloader{
 		inner: io.LimitedReader{
 			R: r,
@@ -654,7 +646,7 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
 
 // Bind implements the linux syscall bind(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
+func (s *socketOpsCommon) Bind(_ *kernel.Task, sockaddr []byte) *syserr.Error {
 	if len(sockaddr) < 2 {
 		return syserr.ErrInvalidArgument
 	}
@@ -714,7 +706,7 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
 
 // Listen implements the linux syscall listen(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
+func (s *socketOpsCommon) Listen(_ *kernel.Task, backlog int) *syserr.Error {
 	return syserr.TranslateNetstackError(s.Endpoint.Listen(backlog))
 }
 
@@ -805,7 +797,7 @@ func ConvertShutdown(how int) (tcpip.ShutdownFlags, *syserr.Error) {
 
 // Shutdown implements the linux syscall shutdown(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
+func (s *socketOpsCommon) Shutdown(_ *kernel.Task, how int) *syserr.Error {
 	f, err := ConvertShutdown(how)
 	if err != nil {
 		return err
@@ -886,7 +878,7 @@ func boolToInt32(v bool) int32 {
 }
 
 // getSockOptSocket implements GetSockOpt when level is SOL_SOCKET.
-func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, _ linux.SockType, name, outLen int) (marshal.Marshallable, *syserr.Error) {
 	// TODO(b/124056281): Stop rejecting short optLen values in getsockopt.
 	switch name {
 	case linux.SO_ERROR:
@@ -1402,11 +1394,11 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
-		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true)
+		info, err := netfilter.GetInfo(t, stk.(*Stack).Stack, outPtr, true)
 		if err != nil {
 			return nil, err
 		}
@@ -1422,11 +1414,11 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
-		entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen)
+		entries, err := netfilter.GetEntries6(t, stk.(*Stack).Stack, outPtr, outLen)
 		if err != nil {
 			return nil, err
 		}
@@ -1442,8 +1434,8 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
 		ret, err := netfilter.TargetRevision(t, outPtr, header.IPv6ProtocolNumber)
@@ -1459,7 +1451,7 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 }
 
 // getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, _ int) (marshal.Marshallable, *syserr.Error) {
 	if _, ok := ep.(tcpip.Endpoint); !ok {
 		log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name)
 		return nil, syserr.ErrUnknownProtocolOption
@@ -1599,11 +1591,11 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
-		info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false)
+		info, err := netfilter.GetInfo(t, stk.(*Stack).Stack, outPtr, false)
 		if err != nil {
 			return nil, err
 		}
@@ -1619,11 +1611,11 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
-		entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen)
+		entries, err := netfilter.GetEntries4(t, stk.(*Stack).Stack, outPtr, outLen)
 		if err != nil {
 			return nil, err
 		}
@@ -1639,8 +1631,8 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return nil, syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return nil, syserr.ErrNoDevice
 		}
 		ret, err := netfilter.TargetRevision(t, outPtr, header.IPv4ProtocolNumber)
@@ -2186,12 +2178,12 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
 			return syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return syserr.ErrNoDevice
 		}
 		// Stack must be a netstack stack.
-		return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, true)
+		return netfilter.SetEntries(t, stk.(*Stack).Stack, optVal, true)
 
 	case linux.IP6T_SO_SET_ADD_COUNTERS:
 		log.Infof("IP6T_SO_SET_ADD_COUNTERS is not supported")
@@ -2429,12 +2421,12 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
 			return syserr.ErrProtocolNotAvailable
 		}
 
-		stack := inet.StackFromContext(t)
-		if stack == nil {
+		stk := inet.StackFromContext(t)
+		if stk == nil {
 			return syserr.ErrNoDevice
 		}
 		// Stack must be a netstack stack.
-		return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, false)
+		return netfilter.SetEntries(t, stk.(*Stack).Stack, optVal, false)
 
 	case linux.IPT_SO_SET_ADD_COUNTERS:
 		log.Infof("IPT_SO_SET_ADD_COUNTERS is not supported")
@@ -2601,7 +2593,7 @@ func emitUnimplementedEventIP(t *kernel.Task, name int) {
 
 // GetSockName implements the linux syscall getsockname(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
+func (s *socketOpsCommon) GetSockName(*kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
 	addr, err := s.Endpoint.GetLocalAddress()
 	if err != nil {
 		return nil, 0, syserr.TranslateNetstackError(err)
@@ -2613,7 +2605,7 @@ func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *
 
 // GetPeerName implements the linux syscall getpeername(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
+func (s *socketOpsCommon) GetPeerName(*kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
 	addr, err := s.Endpoint.GetRemoteAddress()
 	if err != nil {
 		return nil, 0, syserr.TranslateNetstackError(err)
@@ -2774,7 +2766,7 @@ func (s *socketOpsCommon) updateTimestamp(cm tcpip.ControlMessages) {
 	// Save the SIOCGSTAMP timestamp only if SO_TIMESTAMP is disabled.
 	if !s.sockOptTimestamp {
 		s.timestampValid = true
-		s.timestampNS = cm.Timestamp
+		s.timestamp = cm.Timestamp
 	}
 }
 
@@ -2833,7 +2825,7 @@ func (s *socketOpsCommon) recvErr(t *kernel.Task, dst usermem.IOSequence) (int,
 
 // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) {
+func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, _ uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) {
 	if flags&linux.MSG_ERRQUEUE != 0 {
 		return s.recvErr(t, dst)
 	}
@@ -2998,7 +2990,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
 			return 0, linuxerr.ENOENT
 		}
 
-		tv := linux.NsecToTimeval(s.timestampNS)
+		tv := linux.NsecToTimeval(s.timestamp.UnixNano())
 		_, err := tv.CopyOut(t, args[2].Pointer())
 		return 0, err
 
@@ -3105,7 +3097,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc
 }
 
 // interfaceIoctl implements interface requests.
-func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFReq) *syserr.Error {
+func interfaceIoctl(ctx context.Context, _ usermem.IO, arg int, ifr *linux.IFReq) *syserr.Error {
 	var (
 		iface inet.Interface
 		index int32
@@ -3113,8 +3105,8 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 	)
 
 	// Find the relevant device.
-	stack := inet.StackFromContext(ctx)
-	if stack == nil {
+	stk := inet.StackFromContext(ctx)
+	if stk == nil {
 		return syserr.ErrNoDevice
 	}
 
@@ -3124,7 +3116,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 		// Gets the name of the interface given the interface index
 		// stored in ifr_ifindex.
 		index = int32(hostarch.ByteOrder.Uint32(ifr.Data[:4]))
-		if iface, ok := stack.Interfaces()[index]; ok {
+		if iface, ok := stk.Interfaces()[index]; ok {
 			ifr.SetName(iface.Name)
 			return nil
 		}
@@ -3132,7 +3124,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 	}
 
 	// Find the relevant device.
-	for index, iface = range stack.Interfaces() {
+	for index, iface = range stk.Interfaces() {
 		if iface.Name == ifr.Name() {
 			found = true
 			break
@@ -3165,7 +3157,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 		}
 
 	case linux.SIOCGIFFLAGS:
-		f, err := interfaceStatusFlags(stack, iface.Name)
+		f, err := interfaceStatusFlags(stk, iface.Name)
 		if err != nil {
 			return err
 		}
@@ -3175,7 +3167,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 
 	case linux.SIOCGIFADDR:
 		// Copy the IPv4 address out.
-		for _, addr := range stack.InterfaceAddrs()[index] {
+		for _, addr := range stk.InterfaceAddrs()[index] {
 			// This ioctl is only compatible with AF_INET addresses.
 			if addr.Family != linux.AF_INET {
 				continue
@@ -3211,7 +3203,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 
 	case linux.SIOCGIFNETMASK:
 		// Gets the network mask of a device.
-		for _, addr := range stack.InterfaceAddrs()[index] {
+		for _, addr := range stk.InterfaceAddrs()[index] {
 			// This ioctl is only compatible with AF_INET addresses.
 			if addr.Family != linux.AF_INET {
 				continue
@@ -3243,24 +3235,24 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 }
 
 // ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl.
-func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux.IFConf) error {
+func ifconfIoctl(ctx context.Context, t *kernel.Task, _ usermem.IO, ifc *linux.IFConf) error {
 	// If Ptr is NULL, return the necessary buffer size via Len.
 	// Otherwise, write up to Len bytes starting at Ptr containing ifreq
 	// structs.
-	stack := inet.StackFromContext(ctx)
-	if stack == nil {
+	stk := inet.StackFromContext(ctx)
+	if stk == nil {
 		return syserr.ErrNoDevice.ToError()
 	}
 
 	if ifc.Ptr == 0 {
-		ifc.Len = int32(len(stack.Interfaces())) * int32(linux.SizeOfIFReq)
+		ifc.Len = int32(len(stk.Interfaces())) * int32(linux.SizeOfIFReq)
 		return nil
 	}
 
 	max := ifc.Len
 	ifc.Len = 0
-	for key, ifaceAddrs := range stack.InterfaceAddrs() {
-		iface := stack.Interfaces()[key]
+	for key, ifaceAddrs := range stk.InterfaceAddrs() {
+		iface := stk.Interfaces()[key]
 		for _, ifaceAddr := range ifaceAddrs {
 			// Don't write past the end of the buffer.
 			if ifc.Len+int32(linux.SizeOfIFReq) > max {
diff --git a/pkg/sentry/socket/netstack/netstack_state.go b/pkg/sentry/socket/netstack/netstack_state.go
new file mode 100644
index 000000000..591e00d42
--- /dev/null
+++ b/pkg/sentry/socket/netstack/netstack_state.go
@@ -0,0 +1,31 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netstack
+
+import (
+	"time"
+)
+
+func (s *socketOpsCommon) saveTimestamp() int64 {
+	s.readMu.Lock()
+	defer s.readMu.Unlock()
+	return s.timestamp.UnixNano()
+}
+
+func (s *socketOpsCommon) loadTimestamp(nsec int64) {
+	s.readMu.Lock()
+	defer s.readMu.Unlock()
+	s.timestamp = time.Unix(0, nsec)
+}
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index 2f0eb4a6c..d4b80a39d 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -21,6 +21,7 @@ import (
 	"bytes"
 	"fmt"
 	"sync/atomic"
+	"time"
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -51,8 +52,8 @@ type ControlMessages struct {
 func packetInfoToLinux(packetInfo tcpip.IPPacketInfo) linux.ControlMessageIPPacketInfo {
 	var p linux.ControlMessageIPPacketInfo
 	p.NIC = int32(packetInfo.NIC)
-	copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr))
-	copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr))
+	copy(p.LocalAddr[:], packetInfo.LocalAddr)
+	copy(p.DestinationAddr[:], packetInfo.DestinationAddr)
 	return p
 }
 
@@ -60,7 +61,7 @@ func packetInfoToLinux(packetInfo tcpip.IPPacketInfo) linux.ControlMessageIPPack
 // format.
 func ipv6PacketInfoToLinux(packetInfo tcpip.IPv6PacketInfo) linux.ControlMessageIPv6PacketInfo {
 	var p linux.ControlMessageIPv6PacketInfo
-	if n := copy(p.Addr[:], []byte(packetInfo.Addr)); n != len(p.Addr) {
+	if n := copy(p.Addr[:], packetInfo.Addr); n != len(p.Addr) {
 		panic(fmt.Sprintf("got copy(%x, %x) = %d, want = %d", p.Addr, packetInfo.Addr, n, len(p.Addr)))
 	}
 	p.NIC = uint32(packetInfo.NIC)
@@ -156,9 +157,9 @@ type IPControlMessages struct {
 	// HasTimestamp indicates whether Timestamp is valid/set.
 	HasTimestamp bool
 
-	// Timestamp is the time (in ns) that the last packet used to create
-	// the read data was received.
-	Timestamp int64
+	// Timestamp is the time that the last packet used to create the read data
+	// was received.
+	Timestamp time.Time `state:".(int64)"`
 
 	// HasInq indicates whether Inq is valid/set.
 	HasInq bool
diff --git a/pkg/sentry/socket/socket_state.go b/pkg/sentry/socket/socket_state.go
new file mode 100644
index 000000000..32e12b238
--- /dev/null
+++ b/pkg/sentry/socket/socket_state.go
@@ -0,0 +1,27 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package socket
+
+import (
+	"time"
+)
+
+func (i *IPControlMessages) saveTimestamp() int64 {
+	return i.Timestamp.UnixNano()
+}
+
+func (i *IPControlMessages) loadTimestamp(nsec int64) {
+	i.Timestamp = time.Unix(0, nsec)
+}
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index 757ff2a40..4d3f4d556 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -610,9 +610,9 @@ func (i *SyscallInfo) printExit(t *kernel.Task, elapsed time.Duration, output []
 	if err == nil {
 		// Fill in the output after successful execution.
 		i.post(t, args, retval, output, LogMaximumSize)
-		rval = fmt.Sprintf("%#x (%v)", retval, elapsed)
+		rval = fmt.Sprintf("%d (%#x) (%v)", retval, retval, elapsed)
 	} else {
-		rval = fmt.Sprintf("%#x errno=%d (%s) (%v)", retval, errno, err, elapsed)
+		rval = fmt.Sprintf("%d (%#x) errno=%d (%s) (%v)", retval, retval, errno, err, elapsed)
 	}
 
 	switch len(output) {
diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go
index 04bc4d10c..fefd0fc9c 100644
--- a/pkg/sentry/vfs/epoll.go
+++ b/pkg/sentry/vfs/epoll.go
@@ -135,12 +135,16 @@ func (ep *EpollInstance) Readiness(mask waiter.EventMask) waiter.EventMask {
 		return 0
 	}
 	ep.mu.Lock()
-	for epi := ep.ready.Front(); epi != nil; epi = epi.Next() {
+	var next *epollInterest
+	for epi := ep.ready.Front(); epi != nil; epi = next {
+		next = epi.Next()
 		wmask := waiter.EventMaskFromLinux(epi.mask)
 		if epi.key.file.Readiness(wmask)&wmask != 0 {
 			ep.mu.Unlock()
 			return waiter.ReadableEvents
 		}
+		ep.ready.Remove(epi)
+		epi.ready = false
 	}
 	ep.mu.Unlock()
 	return 0
diff --git a/pkg/shim/service.go b/pkg/shim/service.go
index 24e3b7a82..0980d964e 100644
--- a/pkg/shim/service.go
+++ b/pkg/shim/service.go
@@ -77,6 +77,8 @@ const (
 	// shimAddressPath is the relative path to a file that contains the address
 	// to the shim UDS. See service.shimAddress.
 	shimAddressPath = "address"
+
+	cgroupParentAnnotation = "dev.gvisor.spec.cgroup-parent"
 )
 
 // New returns a new shim service that can be used via GRPC.
@@ -952,7 +954,7 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
 	if err != nil {
 		return nil, fmt.Errorf("update volume annotations: %w", err)
 	}
-	updated = updateCgroup(spec) || updated
+	updated = setPodCgroup(spec) || updated
 
 	if updated {
 		if err := utils.WriteSpec(r.Bundle, spec); err != nil {
@@ -980,12 +982,13 @@ func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.C
 	return p, nil
 }
 
-// updateCgroup updates cgroup path for the sandbox to make the sandbox join the
-// pod cgroup and not the pause container cgroup. Returns true if the spec was
-// modified. Ex.:
-//   /kubepods/burstable/pod123/abc => kubepods/burstable/pod123
+// setPodCgroup searches for the pod cgroup path inside the container's cgroup
+// path. If found, it's set as an annotation in the spec. This is done so that
+// the sandbox joins the pod cgroup. Otherwise, the sandbox would join the pause
+// container cgroup. Returns true if the spec was modified. Ex.:
+//   /kubepods/burstable/pod123/container123 => kubepods/burstable/pod123
 //
-func updateCgroup(spec *specs.Spec) bool {
+func setPodCgroup(spec *specs.Spec) bool {
 	if !utils.IsSandbox(spec) {
 		return false
 	}
@@ -1009,7 +1012,10 @@ func updateCgroup(spec *specs.Spec) bool {
 			if spec.Linux.CgroupsPath == path {
 				return false
 			}
-			spec.Linux.CgroupsPath = path
+			if spec.Annotations == nil {
+				spec.Annotations = make(map[string]string)
+			}
+			spec.Annotations[cgroupParentAnnotation] = path
 			return true
 		}
 	}
diff --git a/pkg/shim/service_test.go b/pkg/shim/service_test.go
index 2d9f07e02..4b4410a58 100644
--- a/pkg/shim/service_test.go
+++ b/pkg/shim/service_test.go
@@ -40,12 +40,12 @@ func TestCgroupPath(t *testing.T) {
 		{
 			name: "no-container",
 			path: "foo/pod123",
-			want: "foo/pod123",
+			want: "",
 		},
 		{
 			name: "no-container-absolute",
 			path: "/foo/pod123",
-			want: "/foo/pod123",
+			want: "",
 		},
 		{
 			name: "double-pod",
@@ -70,7 +70,7 @@ func TestCgroupPath(t *testing.T) {
 		{
 			name: "no-pod",
 			path: "/foo/nopod123/container",
-			want: "/foo/nopod123/container",
+			want: "",
 		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
@@ -79,12 +79,12 @@ func TestCgroupPath(t *testing.T) {
 					CgroupsPath: tc.path,
 				},
 			}
-			updated := updateCgroup(&spec)
-			if spec.Linux.CgroupsPath != tc.want {
-				t.Errorf("updateCgroup(%q), want: %q, got: %q", tc.path, tc.want, spec.Linux.CgroupsPath)
+			updated := setPodCgroup(&spec)
+			if got := spec.Annotations[cgroupParentAnnotation]; got != tc.want {
+				t.Errorf("setPodCgroup(%q), want: %q, got: %q", tc.path, tc.want, got)
 			}
-			if shouldUpdate := tc.path != tc.want; shouldUpdate != updated {
-				t.Errorf("updateCgroup(%q)=%v, want: %v", tc.path, updated, shouldUpdate)
+			if shouldUpdate := len(tc.want) > 0; shouldUpdate != updated {
+				t.Errorf("setPodCgroup(%q)=%v, want: %v", tc.path, updated, shouldUpdate)
 			}
 		})
 	}
@@ -113,8 +113,8 @@ func TestCgroupNoUpdate(t *testing.T) {
 		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
-			if updated := updateCgroup(tc.spec); updated {
-				t.Errorf("updateCgroup(%+v), got: %v, want: false", tc.spec.Linux, updated)
+			if updated := setPodCgroup(tc.spec); updated {
+				t.Errorf("setPodCgroup(%+v), got: %v, want: false", tc.spec.Linux, updated)
 			}
 		})
 	}
diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sighandling/BUILD
index 1790d57c9..72f10f982 100644
--- a/pkg/sentry/sighandling/BUILD
+++ b/pkg/sighandling/BUILD
@@ -8,7 +8,7 @@ go_library(
         "sighandling.go",
         "sighandling_unsafe.go",
     ],
-    visibility = ["//pkg/sentry:internal"],
+    visibility = ["//:sandbox"],
     deps = [
         "//pkg/abi/linux",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/sighandling/sighandling.go b/pkg/sighandling/sighandling.go
index bdaf8af29..bdaf8af29 100644
--- a/pkg/sentry/sighandling/sighandling.go
+++ b/pkg/sighandling/sighandling.go
diff --git a/pkg/sentry/sighandling/sighandling_unsafe.go b/pkg/sighandling/sighandling_unsafe.go
index 3fe5c6770..7deeda042 100644
--- a/pkg/sentry/sighandling/sighandling_unsafe.go
+++ b/pkg/sighandling/sighandling_unsafe.go
@@ -15,6 +15,7 @@
 package sighandling
 
 import (
+	"fmt"
 	"unsafe"
 
 	"golang.org/x/sys/unix"
@@ -37,3 +38,36 @@ func IgnoreChildStop() error {
 
 	return nil
 }
+
+// ReplaceSignalHandler replaces the existing signal handler for the provided
+// signal with the function pointer at `handler`. This bypasses the Go runtime
+// signal handlers, and should only be used for low-level signal handlers where
+// use of signal.Notify is not appropriate.
+//
+// It stores the value of the previously set handler in previous.
+func ReplaceSignalHandler(sig unix.Signal, handler uintptr, previous *uintptr) error {
+	var sa linux.SigAction
+	const maskLen = 8
+
+	// Get the existing signal handler information, and save the current
+	// handler. Once we replace it, we will use this pointer to fall back to
+	// it when we receive other signals.
+	if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(sig), 0, uintptr(unsafe.Pointer(&sa)), maskLen, 0, 0); e != 0 {
+		return e
+	}
+
+	// Fail if there isn't a previous handler.
+	if sa.Handler == 0 {
+		return fmt.Errorf("previous handler for signal %x isn't set", sig)
+	}
+
+	*previous = uintptr(sa.Handler)
+
+	// Install our own handler.
+	sa.Handler = uint64(handler)
+	if _, _, e := unix.RawSyscall6(unix.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 {
+		return e
+	}
+
+	return nil
+}
diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD
index 73791b456..517f16329 100644
--- a/pkg/sync/BUILD
+++ b/pkg/sync/BUILD
@@ -26,6 +26,7 @@ go_library(
         "rwmutex_unsafe.go",
         "seqcount.go",
         "sync.go",
+        "wait.go",
     ],
     marshal = False,
     stateify = False,
diff --git a/pkg/sync/wait.go b/pkg/sync/wait.go
new file mode 100644
index 000000000..f8e7742a5
--- /dev/null
+++ b/pkg/sync/wait.go
@@ -0,0 +1,58 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sync
+
+// WaitGroupErr is similar to WaitGroup but allows goroutines to report error.
+// Only the first error is retained and reported back.
+//
+// Example usage:
+// 	wg := WaitGroupErr{}
+// 	wg.Add(1)
+// 	go func() {
+//			defer wg.Done()
+//			if err := ...; err != nil {
+//				wg.ReportError(err)
+//				return
+// 			}
+// 	}()
+//	return wg.Error()
+//
+type WaitGroupErr struct {
+	WaitGroup
+
+	// mu protects firstErr.
+	mu Mutex
+
+	// firstErr holds the first error reported. nil is no error occurred.
+	firstErr error
+}
+
+// ReportError reports an error. Note it does not call Done().
+func (w *WaitGroupErr) ReportError(err error) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	if w.firstErr == nil {
+		w.firstErr = err
+	}
+}
+
+// Error waits for the counter to reach 0 and returns the first reported error
+// if any.
+func (w *WaitGroupErr) Error() error {
+	w.Wait()
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	return w.firstErr
+}
diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD
index dbe4506cc..b98de54c5 100644
--- a/pkg/tcpip/BUILD
+++ b/pkg/tcpip/BUILD
@@ -25,6 +25,7 @@ go_library(
         "stdclock.go",
         "stdclock_state.go",
         "tcpip.go",
+        "tcpip_state.go",
         "timer.go",
     ],
     visibility = ["//visibility:public"],
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 87a0b9a62..e53789d92 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -152,10 +152,22 @@ type PollEvent struct {
 // no data is available, it will block in a poll() syscall until the file
 // descriptor becomes readable.
 func BlockingRead(fd int, b []byte) (int, tcpip.Error) {
+	n, err := BlockingReadUntranslated(fd, b)
+	if err != 0 {
+		return n, TranslateErrno(err)
+	}
+	return n, nil
+}
+
+// BlockingReadUntranslated reads from a file descriptor that is set up as
+// non-blocking. If no data is available, it will block in a poll() syscall
+// until the file descriptor becomes readable. It returns the raw unix.Errno
+// value returned by the underlying syscalls.
+func BlockingReadUntranslated(fd int, b []byte) (int, unix.Errno) {
 	for {
 		n, _, e := unix.RawSyscall(unix.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)))
 		if e == 0 {
-			return int(n), nil
+			return int(n), 0
 		}
 
 		event := PollEvent{
@@ -165,7 +177,7 @@ func BlockingRead(fd int, b []byte) (int, tcpip.Error) {
 
 		_, e = BlockingPoll(&event, 1, nil)
 		if e != 0 && e != unix.EINTR {
-			return 0, TranslateErrno(e)
+			return 0, e
 		}
 	}
 }
diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD
index 4215ee852..f8076d83c 100644
--- a/pkg/tcpip/link/sharedmem/BUILD
+++ b/pkg/tcpip/link/sharedmem/BUILD
@@ -5,19 +5,26 @@ package(licenses = ["notice"])
 go_library(
     name = "sharedmem",
     srcs = [
+        "queuepair.go",
         "rx.go",
+        "server_rx.go",
+        "server_tx.go",
         "sharedmem.go",
+        "sharedmem_server.go",
         "sharedmem_unsafe.go",
         "tx.go",
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "//pkg/cleanup",
+        "//pkg/eventfd",
         "//pkg/log",
         "//pkg/sync",
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/link/rawfile",
+        "//pkg/tcpip/link/sharedmem/pipe",
         "//pkg/tcpip/link/sharedmem/queue",
         "//pkg/tcpip/stack",
         "@org_golang_x_sys//unix:go_default_library",
@@ -26,9 +33,7 @@ go_library(
 
 go_test(
     name = "sharedmem_test",
-    srcs = [
-        "sharedmem_test.go",
-    ],
+    srcs = ["sharedmem_test.go"],
     library = ":sharedmem",
     deps = [
         "//pkg/sync",
@@ -41,3 +46,22 @@ go_test(
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
+
+go_test(
+    name = "sharedmem_server_test",
+    size = "small",
+    srcs = ["sharedmem_server_test.go"],
+    deps = [
+        ":sharedmem",
+        "//pkg/tcpip",
+        "//pkg/tcpip/adapters/gonet",
+        "//pkg/tcpip/header",
+        "//pkg/tcpip/link/sniffer",
+        "//pkg/tcpip/network/ipv4",
+        "//pkg/tcpip/network/ipv6",
+        "//pkg/tcpip/stack",
+        "//pkg/tcpip/transport/tcp",
+        "//pkg/tcpip/transport/udp",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/pkg/tcpip/link/sharedmem/queue/rx.go b/pkg/tcpip/link/sharedmem/queue/rx.go
index 696e6c9e5..a78826ebc 100644
--- a/pkg/tcpip/link/sharedmem/queue/rx.go
+++ b/pkg/tcpip/link/sharedmem/queue/rx.go
@@ -119,7 +119,6 @@ func (r *Rx) PostBuffers(buffers []RxBuffer) bool {
 	}
 
 	r.tx.Flush()
-
 	return true
 }
 
@@ -131,7 +130,6 @@ func (r *Rx) PostBuffers(buffers []RxBuffer) bool {
 func (r *Rx) Dequeue(bufs []RxBuffer) ([]RxBuffer, uint32) {
 	for {
 		outBufs := bufs
-
 		// Pull the next descriptor from the rx pipe.
 		b := r.rx.Pull()
 		if b == nil {
diff --git a/pkg/tcpip/link/sharedmem/queuepair.go b/pkg/tcpip/link/sharedmem/queuepair.go
new file mode 100644
index 000000000..b12647fdd
--- /dev/null
+++ b/pkg/tcpip/link/sharedmem/queuepair.go
@@ -0,0 +1,199 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build linux
+// +build linux
+
+package sharedmem
+
+import (
+	"fmt"
+	"io/ioutil"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/eventfd"
+)
+
+const (
+	// defaultQueueDataSize is the size of the shared memory data region that
+	// holds the scatter/gather buffers.
+	defaultQueueDataSize = 1 << 20 // 1MiB
+
+	// defaultQueuePipeSize is the size of the pipe that holds the packet descriptors.
+	//
+	// Assuming each packet data is approximately 1280 bytes (IPv6 Minimum MTU)
+	// then we can hold approximately 1024*1024/1280 ~ 819 packets in the data
+	// area. Which means the pipe needs to be big enough to hold 819
+	// descriptors.
+	//
+	// Each descriptor is approximately 8 (slot descriptor in pipe) +
+	// 16 (packet descriptor) + 12 (for buffer descriptor) assuming each packet is
+	// stored in exactly 1 buffer descriptor (see queue/tx.go and pipe/tx.go.)
+	//
+	// Which means we need approximately 36*819 ~ 29 KiB to store all packet
+	// descriptors. We could go with a 32 KiB pipe but to give it some slack in
+	// how the upper layer may make use of the scatter gather buffers we double
+	// this to hold enough descriptors.
+	defaultQueuePipeSize = 64 << 10 // 64KiB
+
+	// defaultSharedDataSize is the size of the sharedData region used to
+	// enable/disable notifications.
+	defaultSharedDataSize = 4 << 10 // 4KiB
+)
+
+// A QueuePair represents a pair of TX/RX queues.
+type QueuePair struct {
+	// txCfg is the QueueConfig to be used for transmit queue.
+	txCfg QueueConfig
+
+	// rxCfg is the QueueConfig to be used for receive queue.
+	rxCfg QueueConfig
+}
+
+// NewQueuePair creates a shared memory QueuePair.
+func NewQueuePair() (*QueuePair, error) {
+	txCfg, err := createQueueFDs(queueSizes{
+		dataSize:       defaultQueueDataSize,
+		txPipeSize:     defaultQueuePipeSize,
+		rxPipeSize:     defaultQueuePipeSize,
+		sharedDataSize: defaultSharedDataSize,
+	})
+
+	if err != nil {
+		return nil, fmt.Errorf("failed to create tx queue: %s", err)
+	}
+
+	rxCfg, err := createQueueFDs(queueSizes{
+		dataSize:       defaultQueueDataSize,
+		txPipeSize:     defaultQueuePipeSize,
+		rxPipeSize:     defaultQueuePipeSize,
+		sharedDataSize: defaultSharedDataSize,
+	})
+
+	if err != nil {
+		closeFDs(txCfg)
+		return nil, fmt.Errorf("failed to create rx queue: %s", err)
+	}
+
+	return &QueuePair{
+		txCfg: txCfg,
+		rxCfg: rxCfg,
+	}, nil
+}
+
+// Close closes underlying tx/rx queue fds.
+func (q *QueuePair) Close() {
+	closeFDs(q.txCfg)
+	closeFDs(q.rxCfg)
+}
+
+// TXQueueConfig returns the QueueConfig for the receive queue.
+func (q *QueuePair) TXQueueConfig() QueueConfig {
+	return q.txCfg
+}
+
+// RXQueueConfig returns the QueueConfig for the transmit queue.
+func (q *QueuePair) RXQueueConfig() QueueConfig {
+	return q.rxCfg
+}
+
+type queueSizes struct {
+	dataSize       int64
+	txPipeSize     int64
+	rxPipeSize     int64
+	sharedDataSize int64
+}
+
+func createQueueFDs(s queueSizes) (QueueConfig, error) {
+	success := false
+	var eventFD eventfd.Eventfd
+	var dataFD, txPipeFD, rxPipeFD, sharedDataFD int
+	defer func() {
+		if success {
+			return
+		}
+		closeFDs(QueueConfig{
+			EventFD:      eventFD,
+			DataFD:       dataFD,
+			TxPipeFD:     txPipeFD,
+			RxPipeFD:     rxPipeFD,
+			SharedDataFD: sharedDataFD,
+		})
+	}()
+	eventFD, err := eventfd.Create()
+	if err != nil {
+		return QueueConfig{}, fmt.Errorf("eventfd failed: %v", err)
+	}
+	dataFD, err = createFile(s.dataSize, false)
+	if err != nil {
+		return QueueConfig{}, fmt.Errorf("failed to create dataFD: %s", err)
+	}
+	txPipeFD, err = createFile(s.txPipeSize, true)
+	if err != nil {
+		return QueueConfig{}, fmt.Errorf("failed to create txPipeFD: %s", err)
+	}
+	rxPipeFD, err = createFile(s.rxPipeSize, true)
+	if err != nil {
+		return QueueConfig{}, fmt.Errorf("failed to create rxPipeFD: %s", err)
+	}
+	sharedDataFD, err = createFile(s.sharedDataSize, false)
+	if err != nil {
+		return QueueConfig{}, fmt.Errorf("failed to create sharedDataFD: %s", err)
+	}
+	success = true
+	return QueueConfig{
+		EventFD:      eventFD,
+		DataFD:       dataFD,
+		TxPipeFD:     txPipeFD,
+		RxPipeFD:     rxPipeFD,
+		SharedDataFD: sharedDataFD,
+	}, nil
+}
+
+func createFile(size int64, initQueue bool) (fd int, err error) {
+	const tmpDir = "/dev/shm/"
+	f, err := ioutil.TempFile(tmpDir, "sharedmem_test")
+	if err != nil {
+		return -1, fmt.Errorf("TempFile failed: %v", err)
+	}
+	defer f.Close()
+	unix.Unlink(f.Name())
+
+	if initQueue {
+		// Write the "slot-free" flag in the initial queue.
+		if _, err := f.WriteAt([]byte{0, 0, 0, 0, 0, 0, 0, 0x80}, 0); err != nil {
+			return -1, fmt.Errorf("WriteAt failed: %v", err)
+		}
+	}
+
+	fd, err = unix.Dup(int(f.Fd()))
+	if err != nil {
+		return -1, fmt.Errorf("unix.Dup(%d) failed: %v", f.Fd(), err)
+	}
+
+	if err := unix.Ftruncate(fd, size); err != nil {
+		unix.Close(fd)
+		return -1, fmt.Errorf("ftruncate(%d, %d) failed: %v", fd, size, err)
+	}
+
+	return fd, nil
+}
+
+func closeFDs(c QueueConfig) {
+	unix.Close(c.DataFD)
+	c.EventFD.Close()
+	unix.Close(c.TxPipeFD)
+	unix.Close(c.RxPipeFD)
+	unix.Close(c.SharedDataFD)
+}
diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go
index e882a128c..87747dcc7 100644
--- a/pkg/tcpip/link/sharedmem/rx.go
+++ b/pkg/tcpip/link/sharedmem/rx.go
@@ -21,7 +21,7 @@ import (
 	"sync/atomic"
 
 	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+	"gvisor.dev/gvisor/pkg/eventfd"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
 )
 
@@ -30,7 +30,7 @@ type rx struct {
 	data       []byte
 	sharedData []byte
 	q          queue.Rx
-	eventFD    int
+	eventFD    eventfd.Eventfd
 }
 
 // init initializes all state needed by the rx queue based on the information
@@ -68,7 +68,7 @@ func (r *rx) init(mtu uint32, c *QueueConfig) error {
 
 	// Duplicate the eventFD so that caller can close it but we can still
 	// use it.
-	efd, err := unix.Dup(c.EventFD)
+	efd, err := c.EventFD.Dup()
 	if err != nil {
 		unix.Munmap(txPipe)
 		unix.Munmap(rxPipe)
@@ -77,16 +77,6 @@ func (r *rx) init(mtu uint32, c *QueueConfig) error {
 		return err
 	}
 
-	// Set the eventfd as non-blocking.
-	if err := unix.SetNonblock(efd, true); err != nil {
-		unix.Munmap(txPipe)
-		unix.Munmap(rxPipe)
-		unix.Munmap(data)
-		unix.Munmap(sharedData)
-		unix.Close(efd)
-		return err
-	}
-
 	// Initialize state based on buffers.
 	r.q.Init(txPipe, rxPipe, sharedDataPointer(sharedData))
 	r.data = data
@@ -105,7 +95,13 @@ func (r *rx) cleanup() {
 
 	unix.Munmap(r.data)
 	unix.Munmap(r.sharedData)
-	unix.Close(r.eventFD)
+	r.eventFD.Close()
+}
+
+// notify writes to the tx.eventFD to indicate to the peer that there is data to
+// be read.
+func (r *rx) notify() {
+	r.eventFD.Notify()
 }
 
 // postAndReceive posts the provided buffers (if any), and then tries to read
@@ -122,8 +118,7 @@ func (r *rx) postAndReceive(b []queue.RxBuffer, stopRequested *uint32) ([]queue.
 	if len(b) != 0 && !r.q.PostBuffers(b) {
 		r.q.EnableNotification()
 		for !r.q.PostBuffers(b) {
-			var tmp [8]byte
-			rawfile.BlockingRead(r.eventFD, tmp[:])
+			r.eventFD.Wait()
 			if atomic.LoadUint32(stopRequested) != 0 {
 				r.q.DisableNotification()
 				return nil, 0
@@ -147,8 +142,7 @@ func (r *rx) postAndReceive(b []queue.RxBuffer, stopRequested *uint32) ([]queue.
 		}
 
 		// Wait for notification.
-		var tmp [8]byte
-		rawfile.BlockingRead(r.eventFD, tmp[:])
+		r.eventFD.Wait()
 		if atomic.LoadUint32(stopRequested) != 0 {
 			r.q.DisableNotification()
 			return nil, 0
diff --git a/pkg/tcpip/link/sharedmem/server_rx.go b/pkg/tcpip/link/sharedmem/server_rx.go
new file mode 100644
index 000000000..6ea21ffd1
--- /dev/null
+++ b/pkg/tcpip/link/sharedmem/server_rx.go
@@ -0,0 +1,142 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build linux
+// +build linux
+
+package sharedmem
+
+import (
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/cleanup"
+	"gvisor.dev/gvisor/pkg/eventfd"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
+)
+
+type serverRx struct {
+	// packetPipe represents the receive end of the pipe that carries the packet
+	// descriptors sent by the client.
+	packetPipe pipe.Rx
+
+	// completionPipe represents the transmit end of the pipe that will carry
+	// completion notifications from the server to the client.
+	completionPipe pipe.Tx
+
+	// data represents the buffer area where the packet payload is held.
+	data []byte
+
+	// eventFD is used to notify the peer when transmission is completed.
+	eventFD eventfd.Eventfd
+
+	// sharedData the memory region to use to enable/disable notifications.
+	sharedData []byte
+}
+
+// init initializes all state needed by the serverTx queue based on the
+// information provided.
+//
+// The caller always retains ownership of all file descriptors passed in. The
+// queue implementation will duplicate any that it may need in the future.
+func (s *serverRx) init(c *QueueConfig) error {
+	// Map in all buffers.
+	packetPipeMem, err := getBuffer(c.TxPipeFD)
+	if err != nil {
+		return err
+	}
+	cu := cleanup.Make(func() { unix.Munmap(packetPipeMem) })
+	defer cu.Clean()
+
+	completionPipeMem, err := getBuffer(c.RxPipeFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(completionPipeMem) })
+
+	data, err := getBuffer(c.DataFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(data) })
+
+	sharedData, err := getBuffer(c.SharedDataFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(sharedData) })
+
+	// Duplicate the eventFD so that caller can close it but we can still
+	// use it.
+	efd, err := c.EventFD.Dup()
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { efd.Close() })
+
+	s.packetPipe.Init(packetPipeMem)
+	s.completionPipe.Init(completionPipeMem)
+	s.data = data
+	s.eventFD = efd
+	s.sharedData = sharedData
+
+	cu.Release()
+	return nil
+}
+
+func (s *serverRx) cleanup() {
+	unix.Munmap(s.packetPipe.Bytes())
+	unix.Munmap(s.completionPipe.Bytes())
+	unix.Munmap(s.data)
+	unix.Munmap(s.sharedData)
+	s.eventFD.Close()
+}
+
+// completionNotificationSize is size in bytes of a completion notification sent
+// on the completion queue after a transmitted packet has been handled.
+const completionNotificationSize = 8
+
+// receive receives a single packet from the packetPipe.
+func (s *serverRx) receive() []byte {
+	desc := s.packetPipe.Pull()
+	if desc == nil {
+		return nil
+	}
+
+	pktInfo := queue.DecodeTxPacketHeader(desc)
+	contents := make([]byte, 0, pktInfo.Size)
+	toCopy := pktInfo.Size
+	for i := 0; i < pktInfo.BufferCount; i++ {
+		txBuf := queue.DecodeTxBufferHeader(desc, i)
+		if txBuf.Size <= toCopy {
+			contents = append(contents, s.data[txBuf.Offset:][:txBuf.Size]...)
+			toCopy -= txBuf.Size
+			continue
+		}
+		contents = append(contents, s.data[txBuf.Offset:][:toCopy]...)
+		break
+	}
+
+	// Flush to let peer know that slots queued for transmission have been handled
+	// and its free to reuse the slots.
+	s.packetPipe.Flush()
+	// Encode packet completion.
+	b := s.completionPipe.Push(completionNotificationSize)
+	queue.EncodeTxCompletion(b, pktInfo.ID)
+	s.completionPipe.Flush()
+	return contents
+}
+
+func (s *serverRx) waitForPackets() {
+	s.eventFD.Wait()
+}
diff --git a/pkg/tcpip/link/sharedmem/server_tx.go b/pkg/tcpip/link/sharedmem/server_tx.go
new file mode 100644
index 000000000..13a82903f
--- /dev/null
+++ b/pkg/tcpip/link/sharedmem/server_tx.go
@@ -0,0 +1,175 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build linux
+// +build linux
+
+package sharedmem
+
+import (
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/cleanup"
+	"gvisor.dev/gvisor/pkg/eventfd"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
+)
+
+// serverTx represents the server end of the sharedmem queue and is used to send
+// packets to the peer in the buffers posted by the peer in the fillPipe.
+type serverTx struct {
+	// fillPipe represents the receive end of the pipe that carries the RxBuffers
+	// posted by the peer.
+	fillPipe pipe.Rx
+
+	// completionPipe represents the transmit end of the pipe that carries the
+	// descriptors for filled RxBuffers.
+	completionPipe pipe.Tx
+
+	// data represents the buffer area where the packet payload is held.
+	data []byte
+
+	// eventFD is used to notify the peer when fill requests are fulfilled.
+	eventFD eventfd.Eventfd
+
+	// sharedData the memory region to use to enable/disable notifications.
+	sharedData []byte
+}
+
+// init initializes all tstate needed by the serverTx queue based on the
+// information provided.
+//
+// The caller always retains ownership of all file descriptors passed in. The
+// queue implementation will duplicate any that it may need in the future.
+func (s *serverTx) init(c *QueueConfig) error {
+	// Map in all buffers.
+	fillPipeMem, err := getBuffer(c.TxPipeFD)
+	if err != nil {
+		return err
+	}
+	cu := cleanup.Make(func() { unix.Munmap(fillPipeMem) })
+	defer cu.Clean()
+
+	completionPipeMem, err := getBuffer(c.RxPipeFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(completionPipeMem) })
+
+	data, err := getBuffer(c.DataFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(data) })
+
+	sharedData, err := getBuffer(c.SharedDataFD)
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { unix.Munmap(sharedData) })
+
+	// Duplicate the eventFD so that caller can close it but we can still
+	// use it.
+	efd, err := c.EventFD.Dup()
+	if err != nil {
+		return err
+	}
+	cu.Add(func() { efd.Close() })
+
+	cu.Release()
+
+	s.fillPipe.Init(fillPipeMem)
+	s.completionPipe.Init(completionPipeMem)
+	s.data = data
+	s.eventFD = efd
+	s.sharedData = sharedData
+
+	return nil
+}
+
+func (s *serverTx) cleanup() {
+	unix.Munmap(s.fillPipe.Bytes())
+	unix.Munmap(s.completionPipe.Bytes())
+	unix.Munmap(s.data)
+	unix.Munmap(s.sharedData)
+	s.eventFD.Close()
+}
+
+// fillPacket copies the data in the provided views into buffers pulled from the
+// fillPipe and returns a slice of RxBuffers that contain the copied data as
+// well as the total number of bytes copied.
+//
+// To avoid allocations the filledBuffers are appended to the buffers slice
+// which will be grown as required.
+func (s *serverTx) fillPacket(views []buffer.View, buffers []queue.RxBuffer) (filledBuffers []queue.RxBuffer, totalCopied uint32) {
+	filledBuffers = buffers[:0]
+	// fillBuffer copies as much of the views as possible into the provided buffer
+	// and returns any left over views (if any).
+	fillBuffer := func(buffer *queue.RxBuffer, views []buffer.View) (left []buffer.View) {
+		if len(views) == 0 {
+			return nil
+		}
+		availBytes := buffer.Size
+		copied := uint64(0)
+		for availBytes > 0 && len(views) > 0 {
+			n := copy(s.data[buffer.Offset+copied:][:uint64(buffer.Size)-copied], views[0])
+			views[0].TrimFront(n)
+			if !views[0].IsEmpty() {
+				break
+			}
+			views = views[1:]
+			copied += uint64(n)
+			availBytes -= uint32(n)
+		}
+		buffer.Size = uint32(copied)
+		return views
+	}
+
+	for len(views) > 0 {
+		var b []byte
+		// Spin till we get a free buffer reposted by the peer.
+		for {
+			if b = s.fillPipe.Pull(); b != nil {
+				break
+			}
+		}
+		rxBuffer := queue.DecodeRxBufferHeader(b)
+		// Copy the packet into the posted buffer.
+		views = fillBuffer(&rxBuffer, views)
+		totalCopied += rxBuffer.Size
+		filledBuffers = append(filledBuffers, rxBuffer)
+	}
+
+	return filledBuffers, totalCopied
+}
+
+func (s *serverTx) transmit(views []buffer.View) bool {
+	buffers := make([]queue.RxBuffer, 8)
+	buffers, totalCopied := s.fillPacket(views, buffers)
+	b := s.completionPipe.Push(queue.RxCompletionSize(len(buffers)))
+	if b == nil {
+		return false
+	}
+	queue.EncodeRxCompletion(b, totalCopied, 0 /* reserved */)
+	for i := 0; i < len(buffers); i++ {
+		queue.EncodeRxCompletionBuffer(b, i, buffers[i])
+	}
+	s.completionPipe.Flush()
+	s.fillPipe.Flush()
+	return true
+}
+
+func (s *serverTx) notify() {
+	s.eventFD.Notify()
+}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 66efe6472..bcb37a465 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -24,14 +24,16 @@
 package sharedmem
 
 import (
+	"fmt"
 	"sync/atomic"
 
-	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/eventfd"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -47,7 +49,7 @@ type QueueConfig struct {
 
 	// EventFD is a file descriptor for the event that is signaled when
 	// data is becomes available in this queue.
-	EventFD int
+	EventFD eventfd.Eventfd
 
 	// TxPipeFD is a file descriptor for the tx pipe associated with the
 	// queue.
@@ -63,16 +65,97 @@ type QueueConfig struct {
 	SharedDataFD int
 }
 
+// FDs returns the FD's in the QueueConfig as a slice of ints. This must
+// be used in conjunction with QueueConfigFromFDs to ensure the order
+// of FDs matches when reconstructing the config when serialized or sent
+// as part of control messages.
+func (q *QueueConfig) FDs() []int {
+	return []int{q.DataFD, q.EventFD.FD(), q.TxPipeFD, q.RxPipeFD, q.SharedDataFD}
+}
+
+// QueueConfigFromFDs constructs a QueueConfig out of a slice of ints where each
+// entry represents an file descriptor. The order of FDs in the slice must be in
+// the order specified below for the config to be valid. QueueConfig.FDs()
+// should be used when the config needs to be serialized or sent as part of a
+// control message to ensure the correct order.
+func QueueConfigFromFDs(fds []int) (QueueConfig, error) {
+	if len(fds) != 5 {
+		return QueueConfig{}, fmt.Errorf("insufficient number of fds: len(fds): %d, want: 5", len(fds))
+	}
+	return QueueConfig{
+		DataFD:       fds[0],
+		EventFD:      eventfd.Wrap(fds[1]),
+		TxPipeFD:     fds[2],
+		RxPipeFD:     fds[3],
+		SharedDataFD: fds[4],
+	}, nil
+}
+
+// Options specify the details about the sharedmem endpoint to be created.
+type Options struct {
+	// MTU is the mtu to use for this endpoint.
+	MTU uint32
+
+	// BufferSize is the size of each scatter/gather buffer that will hold packet
+	// data.
+	//
+	// NOTE: This directly determines number of packets that can be held in
+	// the ring buffer at any time. This does not have to be sized to the MTU as
+	// the shared memory queue design allows usage of more than one buffer to be
+	// used to make up a given packet.
+	BufferSize uint32
+
+	// LinkAddress is the link address for this endpoint (required).
+	LinkAddress tcpip.LinkAddress
+
+	// TX is the transmit queue configuration for this shared memory endpoint.
+	TX QueueConfig
+
+	// RX is the receive queue configuration for this shared memory endpoint.
+	RX QueueConfig
+
+	// PeerFD is the fd for the connected peer which can be used to detect
+	// peer disconnects.
+	PeerFD int
+
+	// OnClosed is a function that is called when the endpoint is being closed
+	// (probably due to peer going away)
+	OnClosed func(err tcpip.Error)
+
+	// TXChecksumOffload if true, indicates that this endpoints capability
+	// set should include CapabilityTXChecksumOffload.
+	TXChecksumOffload bool
+
+	// RXChecksumOffload if true, indicates that this endpoints capability
+	// set should include CapabilityRXChecksumOffload.
+	RXChecksumOffload bool
+}
+
 type endpoint struct {
 	// mtu (maximum transmission unit) is the maximum size of a packet.
+	// mtu is immutable.
 	mtu uint32
 
 	// bufferSize is the size of each individual buffer.
+	// bufferSize is immutable.
 	bufferSize uint32
 
 	// addr is the local address of this endpoint.
+	// addr is immutable.
 	addr tcpip.LinkAddress
 
+	// peerFD is an fd to the peer that can be used to detect when the
+	// peer is gone.
+	// peerFD is immutable.
+	peerFD int
+
+	// caps holds the endpoint capabilities.
+	caps stack.LinkEndpointCapabilities
+
+	// hdrSize is the size of the link layer header if any.
+	// hdrSize is immutable.
+	hdrSize uint32
+
 	// rx is the receive queue.
 	rx rx
 
@@ -83,34 +166,55 @@ type endpoint struct {
 	// Wait group used to indicate that all workers have stopped.
 	completed sync.WaitGroup
 
+	// onClosed is a function to be called when the FD's peer (if any) closes
+	// its end of the communication pipe.
+	onClosed func(tcpip.Error)
+
 	// mu protects the following fields.
 	mu sync.Mutex
 
 	// tx is the transmit queue.
+	// +checklocks:mu
 	tx tx
 
 	// workerStarted specifies whether the worker goroutine was started.
+	// +checklocks:mu
 	workerStarted bool
 }
 
 // New creates a new shared-memory-based endpoint. Buffers will be broken up
 // into buffers of "bufferSize" bytes.
-func New(mtu, bufferSize uint32, addr tcpip.LinkAddress, tx, rx QueueConfig) (stack.LinkEndpoint, error) {
+func New(opts Options) (stack.LinkEndpoint, error) {
 	e := &endpoint{
-		mtu:        mtu,
-		bufferSize: bufferSize,
-		addr:       addr,
+		mtu:        opts.MTU,
+		bufferSize: opts.BufferSize,
+		addr:       opts.LinkAddress,
+		peerFD:     opts.PeerFD,
+		onClosed:   opts.OnClosed,
 	}
 
-	if err := e.tx.init(bufferSize, &tx); err != nil {
+	if err := e.tx.init(opts.BufferSize, &opts.TX); err != nil {
 		return nil, err
 	}
 
-	if err := e.rx.init(bufferSize, &rx); err != nil {
+	if err := e.rx.init(opts.BufferSize, &opts.RX); err != nil {
 		e.tx.cleanup()
 		return nil, err
 	}
 
+	e.caps = stack.LinkEndpointCapabilities(0)
+	if opts.RXChecksumOffload {
+		e.caps |= stack.CapabilityRXChecksumOffload
+	}
+
+	if opts.TXChecksumOffload {
+		e.caps |= stack.CapabilityTXChecksumOffload
+	}
+
+	if opts.LinkAddress != "" {
+		e.hdrSize = header.EthernetMinimumSize
+		e.caps |= stack.CapabilityResolutionRequired
+	}
 	return e, nil
 }
 
@@ -119,13 +223,13 @@ func (e *endpoint) Close() {
 	// Tell dispatch goroutine to stop, then write to the eventfd so that
 	// it wakes up in case it's sleeping.
 	atomic.StoreUint32(&e.stopRequested, 1)
-	unix.Write(e.rx.eventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+	e.rx.eventFD.Notify()
 
 	// Cleanup the queues inline if the worker hasn't started yet; we also
 	// know it won't start from now on because stopRequested is set to 1.
 	e.mu.Lock()
+	defer e.mu.Unlock()
 	workerPresent := e.workerStarted
-	e.mu.Unlock()
 
 	if !workerPresent {
 		e.tx.cleanup()
@@ -146,6 +250,22 @@ func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
 	if !e.workerStarted && atomic.LoadUint32(&e.stopRequested) == 0 {
 		e.workerStarted = true
 		e.completed.Add(1)
+
+		// Spin up a goroutine to monitor for peer shutdown.
+		if e.peerFD >= 0 {
+			e.completed.Add(1)
+			go func() {
+				defer e.completed.Done()
+				b := make([]byte, 1)
+				// When sharedmem endpoint is in use the peerFD is never used for any data
+				// transfer and this Read should only return if the peer is shutting down.
+				_, err := rawfile.BlockingRead(e.peerFD, b)
+				if e.onClosed != nil {
+					e.onClosed(err)
+				}
+			}()
+		}
+
 		// Link endpoints are not savable. When transportation endpoints
 		// are saved, they stop sending outgoing packets and all
 		// incoming packets are rejected.
@@ -164,18 +284,18 @@ func (e *endpoint) IsAttached() bool {
 // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized
 // during construction.
 func (e *endpoint) MTU() uint32 {
-	return e.mtu - header.EthernetMinimumSize
+	return e.mtu - e.hdrSize
 }
 
 // Capabilities implements stack.LinkEndpoint.Capabilities.
-func (*endpoint) Capabilities() stack.LinkEndpointCapabilities {
-	return 0
+func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
+	return e.caps
 }
 
 // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It returns the
 // ethernet frame header size.
-func (*endpoint) MaxHeaderLength() uint16 {
-	return header.EthernetMinimumSize
+func (e *endpoint) MaxHeaderLength() uint16 {
+	return uint16(e.hdrSize)
 }
 
 // LinkAddress implements stack.LinkEndpoint.LinkAddress. It returns the local
@@ -205,17 +325,15 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
 // WriteRawPacket implements stack.LinkEndpoint.
 func (*endpoint) WriteRawPacket(*stack.PacketBuffer) tcpip.Error { return &tcpip.ErrNotSupported{} }
 
-// WritePacket writes outbound packets to the file descriptor. If it is not
-// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
-	e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+// +checklocks:e.mu
+func (e *endpoint) writePacketLocked(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
+	if e.addr != "" {
+		e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+	}
 
 	views := pkt.Views()
 	// Transmit the packet.
-	e.mu.Lock()
 	ok := e.tx.transmit(views...)
-	e.mu.Unlock()
-
 	if !ok {
 		return &tcpip.ErrWouldBlock{}
 	}
@@ -223,9 +341,37 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocol
 	return nil
 }
 
+// WritePacket writes outbound packets to the file descriptor. If it is not
+// currently writable, the packet is dropped.
+func (e *endpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	if err := e.writePacketLocked(r, protocol, pkt); err != nil {
+		return err
+	}
+	e.tx.notify()
+	return nil
+}
+
 // WritePackets implements stack.LinkEndpoint.WritePackets.
-func (*endpoint) WritePackets(stack.RouteInfo, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) {
-	panic("not implemented")
+func (e *endpoint) WritePackets(r stack.RouteInfo, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) {
+	n := 0
+	var err tcpip.Error
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
+		if err = e.writePacketLocked(r, pkt.NetworkProtocolNumber, pkt); err != nil {
+			break
+		}
+		n++
+	}
+	// WritePackets never returns an error if it successfully transmitted at least
+	// one packet.
+	if err != nil && n == 0 {
+		return 0, err
+	}
+	e.tx.notify()
+	return n, nil
 }
 
 // dispatchLoop reads packets from the rx queue in a loop and dispatches them
@@ -268,16 +414,42 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
 			Data: buffer.View(b).ToVectorisedView(),
 		})
 
-		hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
-		if !ok {
-			continue
+		var src, dst tcpip.LinkAddress
+		var proto tcpip.NetworkProtocolNumber
+		if e.addr != "" {
+			hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
+			if !ok {
+				continue
+			}
+			eth := header.Ethernet(hdr)
+			src = eth.SourceAddress()
+			dst = eth.DestinationAddress()
+			proto = eth.Type()
+		} else {
+			// We don't get any indication of what the packet is, so try to guess
+			// if it's an IPv4 or IPv6 packet.
+			// IP version information is at the first octet, so pulling up 1 byte.
+			h, ok := pkt.Data().PullUp(1)
+			if !ok {
+				continue
+			}
+			switch header.IPVersion(h) {
+			case header.IPv4Version:
+				proto = header.IPv4ProtocolNumber
+			case header.IPv6Version:
+				proto = header.IPv6ProtocolNumber
+			default:
+				continue
+			}
 		}
-		eth := header.Ethernet(hdr)
 
 		// Send packet up the stack.
-		d.DeliverNetworkPacket(eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), pkt)
+		d.DeliverNetworkPacket(src, dst, proto, pkt)
 	}
 
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
 	// Clean state.
 	e.tx.cleanup()
 	e.rx.cleanup()
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_server.go b/pkg/tcpip/link/sharedmem/sharedmem_server.go
new file mode 100644
index 000000000..ccc84989d
--- /dev/null
+++ b/pkg/tcpip/link/sharedmem/sharedmem_server.go
@@ -0,0 +1,333 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build linux
+// +build linux
+
+package sharedmem
+
+import (
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+type serverEndpoint struct {
+	// mtu (maximum transmission unit) is the maximum size of a packet.
+	// mtu is immutable.
+	mtu uint32
+
+	// bufferSize is the size of each individual buffer.
+	// bufferSize is immutable.
+	bufferSize uint32
+
+	// addr is the local address of this endpoint.
+	// addr is immutable
+	addr tcpip.LinkAddress
+
+	// rx is the receive queue.
+	rx serverRx
+
+	// stopRequested is to be accessed atomically only, and determines if the
+	// worker goroutines should stop.
+	stopRequested uint32
+
+	// Wait group used to indicate that all workers have stopped.
+	completed sync.WaitGroup
+
+	// peerFD is an fd to the peer that can be used to detect when the peer is
+	// gone.
+	// peerFD is immutable.
+	peerFD int
+
+	// caps holds the endpoint capabilities.
+	caps stack.LinkEndpointCapabilities
+
+	// hdrSize is the size of the link layer header if any.
+	// hdrSize is immutable.
+	hdrSize uint32
+
+	// onClosed is a function to be called when the FD's peer (if any) closes its
+	// end of the communication pipe.
+	onClosed func(tcpip.Error)
+
+	// mu protects the following fields.
+	mu sync.Mutex
+
+	// tx is the transmit queue.
+	// +checklocks:mu
+	tx serverTx
+
+	// workerStarted specifies whether the worker goroutine was started.
+	// +checklocks:mu
+	workerStarted bool
+}
+
+// NewServerEndpoint creates a new shared-memory-based endpoint. Buffers will be
+// broken up into buffers of "bufferSize" bytes.
+func NewServerEndpoint(opts Options) (stack.LinkEndpoint, error) {
+	e := &serverEndpoint{
+		mtu:        opts.MTU,
+		bufferSize: opts.BufferSize,
+		addr:       opts.LinkAddress,
+		peerFD:     opts.PeerFD,
+		onClosed:   opts.OnClosed,
+	}
+
+	if err := e.tx.init(&opts.RX); err != nil {
+		return nil, err
+	}
+
+	if err := e.rx.init(&opts.TX); err != nil {
+		e.tx.cleanup()
+		return nil, err
+	}
+
+	e.caps = stack.LinkEndpointCapabilities(0)
+	if opts.RXChecksumOffload {
+		e.caps |= stack.CapabilityRXChecksumOffload
+	}
+
+	if opts.TXChecksumOffload {
+		e.caps |= stack.CapabilityTXChecksumOffload
+	}
+
+	if opts.LinkAddress != "" {
+		e.hdrSize = header.EthernetMinimumSize
+		e.caps |= stack.CapabilityResolutionRequired
+	}
+
+	return e, nil
+}
+
+// Close frees all resources associated with the endpoint.
+func (e *serverEndpoint) Close() {
+	// Tell dispatch goroutine to stop, then write to the eventfd so that it wakes
+	// up in case it's sleeping.
+	atomic.StoreUint32(&e.stopRequested, 1)
+	e.rx.eventFD.Notify()
+
+	// Cleanup the queues inline if the worker hasn't started yet; we also know it
+	// won't start from now on because stopRequested is set to 1.
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	workerPresent := e.workerStarted
+
+	if !workerPresent {
+		e.tx.cleanup()
+		e.rx.cleanup()
+	}
+}
+
+// Wait implements stack.LinkEndpoint.Wait. It waits until all workers have
+// stopped after a Close() call.
+func (e *serverEndpoint) Wait() {
+	e.completed.Wait()
+}
+
+// Attach implements stack.LinkEndpoint.Attach. It launches the goroutine that
+// reads packets from the rx queue.
+func (e *serverEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
+	e.mu.Lock()
+	if !e.workerStarted && atomic.LoadUint32(&e.stopRequested) == 0 {
+		e.workerStarted = true
+		e.completed.Add(1)
+		if e.peerFD >= 0 {
+			e.completed.Add(1)
+			// Spin up a goroutine to monitor for peer shutdown.
+			go func() {
+				b := make([]byte, 1)
+				// When sharedmem endpoint is in use the peerFD is never used for any
+				// data transfer and this Read should only return if the peer is
+				// shutting down.
+				_, err := rawfile.BlockingRead(e.peerFD, b)
+				if e.onClosed != nil {
+					e.onClosed(err)
+				}
+				e.completed.Done()
+			}()
+		}
+		// Link endpoints are not savable. When transportation endpoints are saved,
+		// they stop sending outgoing packets and all incoming packets are rejected.
+		go e.dispatchLoop(dispatcher) // S/R-SAFE: see above.
+	}
+	e.mu.Unlock()
+}
+
+// IsAttached implements stack.LinkEndpoint.IsAttached.
+func (e *serverEndpoint) IsAttached() bool {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.workerStarted
+}
+
+// MTU implements stack.LinkEndpoint.MTU. It returns the value initialized
+// during construction.
+func (e *serverEndpoint) MTU() uint32 {
+	return e.mtu - e.hdrSize
+}
+
+// Capabilities implements stack.LinkEndpoint.Capabilities.
+func (e *serverEndpoint) Capabilities() stack.LinkEndpointCapabilities {
+	return e.caps
+}
+
+// MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. It returns the
+// ethernet frame header size.
+func (e *serverEndpoint) MaxHeaderLength() uint16 {
+	return uint16(e.hdrSize)
+}
+
+// LinkAddress implements stack.LinkEndpoint.LinkAddress. It returns the local
+// link address.
+func (e *serverEndpoint) LinkAddress() tcpip.LinkAddress {
+	return e.addr
+}
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *serverEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	// Add ethernet header if needed.
+	eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
+	ethHdr := &header.EthernetFields{
+		DstAddr: remote,
+		Type:    protocol,
+	}
+
+	// Preserve the src address if it's set in the route.
+	if local != "" {
+		ethHdr.SrcAddr = local
+	} else {
+		ethHdr.SrcAddr = e.addr
+	}
+	eth.Encode(ethHdr)
+}
+
+// WriteRawPacket implements stack.LinkEndpoint.
+func (*serverEndpoint) WriteRawPacket(*stack.PacketBuffer) tcpip.Error {
+	return &tcpip.ErrNotSupported{}
+}
+
+// +checklocks:e.mu
+func (e *serverEndpoint) writePacketLocked(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
+	e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+
+	views := pkt.Views()
+	ok := e.tx.transmit(views)
+	if !ok {
+		return &tcpip.ErrWouldBlock{}
+	}
+
+	return nil
+}
+
+// WritePacket writes outbound packets to the file descriptor. If it is not
+// currently writable, the packet is dropped.
+func (e *serverEndpoint) WritePacket(r stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
+	// Transmit the packet.
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	if err := e.writePacketLocked(r, protocol, pkt); err != nil {
+		return err
+	}
+	e.tx.notify()
+	return nil
+}
+
+// WritePackets implements stack.LinkEndpoint.WritePackets.
+func (e *serverEndpoint) WritePackets(r stack.RouteInfo, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) {
+	n := 0
+	var err tcpip.Error
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
+		if err = e.writePacketLocked(r, pkt.NetworkProtocolNumber, pkt); err != nil {
+			break
+		}
+		n++
+	}
+	// WritePackets never returns an error if it successfully transmitted at least
+	// one packet.
+	if err != nil && n == 0 {
+		return 0, err
+	}
+	e.tx.notify()
+	return n, nil
+}
+
+// dispatchLoop reads packets from the rx queue in a loop and dispatches them
+// to the network stack.
+func (e *serverEndpoint) dispatchLoop(d stack.NetworkDispatcher) {
+	for atomic.LoadUint32(&e.stopRequested) == 0 {
+		b := e.rx.receive()
+		if b == nil {
+			e.rx.waitForPackets()
+			continue
+		}
+		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+			Data: buffer.View(b).ToVectorisedView(),
+		})
+		var src, dst tcpip.LinkAddress
+		var proto tcpip.NetworkProtocolNumber
+		if e.addr != "" {
+			hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
+			if !ok {
+				continue
+			}
+			eth := header.Ethernet(hdr)
+			src = eth.SourceAddress()
+			dst = eth.DestinationAddress()
+			proto = eth.Type()
+		} else {
+			// We don't get any indication of what the packet is, so try to guess
+			// if it's an IPv4 or IPv6 packet.
+			// IP version information is at the first octet, so pulling up 1 byte.
+			h, ok := pkt.Data().PullUp(1)
+			if !ok {
+				continue
+			}
+			switch header.IPVersion(h) {
+			case header.IPv4Version:
+				proto = header.IPv4ProtocolNumber
+			case header.IPv6Version:
+				proto = header.IPv6ProtocolNumber
+			default:
+				continue
+			}
+		}
+		// Send packet up the stack.
+		d.DeliverNetworkPacket(src, dst, proto, pkt)
+	}
+
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	// Clean state.
+	e.tx.cleanup()
+	e.rx.cleanup()
+
+	e.completed.Done()
+}
+
+// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType
+func (e *serverEndpoint) ARPHardwareType() header.ARPHardwareType {
+	if e.hdrSize > 0 {
+		return header.ARPHardwareEther
+	}
+	return header.ARPHardwareNone
+}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_server_test.go b/pkg/tcpip/link/sharedmem/sharedmem_server_test.go
new file mode 100644
index 000000000..1bc58614e
--- /dev/null
+++ b/pkg/tcpip/link/sharedmem/sharedmem_server_test.go
@@ -0,0 +1,220 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build linux
+// +build linux
+
+package sharedmem_server_test
+
+import (
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"syscall"
+	"testing"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem"
+	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+)
+
+const (
+	localLinkAddr     = "\xde\xad\xbe\xef\x56\x78"
+	remoteLinkAddr    = "\xde\xad\xbe\xef\x12\x34"
+	localIPv4Address  = tcpip.Address("\x0a\x00\x00\x01")
+	remoteIPv4Address = tcpip.Address("\x0a\x00\x00\x02")
+	serverPort        = 10001
+
+	defaultMTU        = 1500
+	defaultBufferSize = 1500
+)
+
+type stackOptions struct {
+	ep   stack.LinkEndpoint
+	addr tcpip.Address
+}
+
+func newStackWithOptions(stackOpts stackOptions) (*stack.Stack, error) {
+	st := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocolFactory{
+			ipv4.NewProtocolWithOptions(ipv4.Options{
+				AllowExternalLoopbackTraffic: true,
+			}),
+			ipv6.NewProtocolWithOptions(ipv6.Options{
+				AllowExternalLoopbackTraffic: true,
+			}),
+		},
+		TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol, udp.NewProtocol},
+	})
+	nicID := tcpip.NICID(1)
+	sniffEP := sniffer.New(stackOpts.ep)
+	opts := stack.NICOptions{Name: "eth0"}
+	if err := st.CreateNICWithOptions(nicID, sniffEP, opts); err != nil {
+		return nil, fmt.Errorf("method CreateNICWithOptions(%d, _, %v) failed: %s", nicID, opts, err)
+	}
+
+	// Add Protocol Address.
+	protocolNum := ipv4.ProtocolNumber
+	routeTable := []tcpip.Route{{Destination: header.IPv4EmptySubnet, NIC: nicID}}
+	if len(stackOpts.addr) == 16 {
+		routeTable = []tcpip.Route{{Destination: header.IPv6EmptySubnet, NIC: nicID}}
+		protocolNum = ipv6.ProtocolNumber
+	}
+	protocolAddr := tcpip.ProtocolAddress{
+		Protocol:          protocolNum,
+		AddressWithPrefix: stackOpts.addr.WithPrefix(),
+	}
+	if err := st.AddProtocolAddress(nicID, protocolAddr, stack.AddressProperties{}); err != nil {
+		return nil, fmt.Errorf("AddProtocolAddress(%d, %v, {}): %s", nicID, protocolAddr, err)
+	}
+
+	// Setup route table.
+	st.SetRouteTable(routeTable)
+
+	return st, nil
+}
+
+func newClientStack(t *testing.T, qPair *sharedmem.QueuePair, peerFD int) (*stack.Stack, error) {
+	ep, err := sharedmem.New(sharedmem.Options{
+		MTU:         defaultMTU,
+		BufferSize:  defaultBufferSize,
+		LinkAddress: localLinkAddr,
+		TX:          qPair.TXQueueConfig(),
+		RX:          qPair.RXQueueConfig(),
+		PeerFD:      peerFD,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to create sharedmem endpoint: %s", err)
+	}
+	st, err := newStackWithOptions(stackOptions{ep: ep, addr: localIPv4Address})
+	if err != nil {
+		return nil, fmt.Errorf("failed to create client stack: %s", err)
+	}
+	return st, nil
+}
+
+func newServerStack(t *testing.T, qPair *sharedmem.QueuePair, peerFD int) (*stack.Stack, error) {
+	ep, err := sharedmem.NewServerEndpoint(sharedmem.Options{
+		MTU:         defaultMTU,
+		BufferSize:  defaultBufferSize,
+		LinkAddress: remoteLinkAddr,
+		TX:          qPair.TXQueueConfig(),
+		RX:          qPair.RXQueueConfig(),
+		PeerFD:      peerFD,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to create sharedmem endpoint: %s", err)
+	}
+	st, err := newStackWithOptions(stackOptions{ep: ep, addr: remoteIPv4Address})
+	if err != nil {
+		return nil, fmt.Errorf("failed to create client stack: %s", err)
+	}
+	return st, nil
+}
+
+type testContext struct {
+	clientStk *stack.Stack
+	serverStk *stack.Stack
+	peerFDs   [2]int
+}
+
+func newTestContext(t *testing.T) *testContext {
+	peerFDs, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET|syscall.SOCK_NONBLOCK, 0)
+	if err != nil {
+		t.Fatalf("failed to create peerFDs: %s", err)
+	}
+	q, err := sharedmem.NewQueuePair()
+	if err != nil {
+		t.Fatalf("failed to create sharedmem queue: %s", err)
+	}
+	clientStack, err := newClientStack(t, q, peerFDs[0])
+	if err != nil {
+		q.Close()
+		unix.Close(peerFDs[0])
+		unix.Close(peerFDs[1])
+		t.Fatalf("failed to create client stack: %s", err)
+	}
+	serverStack, err := newServerStack(t, q, peerFDs[1])
+	if err != nil {
+		q.Close()
+		unix.Close(peerFDs[0])
+		unix.Close(peerFDs[1])
+		clientStack.Close()
+		t.Fatalf("failed to create server stack: %s", err)
+	}
+	return &testContext{
+		clientStk: clientStack,
+		serverStk: serverStack,
+		peerFDs:   peerFDs,
+	}
+}
+
+func (ctx *testContext) cleanup() {
+	unix.Close(ctx.peerFDs[0])
+	unix.Close(ctx.peerFDs[1])
+	ctx.clientStk.Close()
+	ctx.serverStk.Close()
+}
+
+func TestServerRoundTrip(t *testing.T) {
+	ctx := newTestContext(t)
+	defer ctx.cleanup()
+	listenAddr := tcpip.FullAddress{Addr: remoteIPv4Address, Port: serverPort}
+	l, err := gonet.ListenTCP(ctx.serverStk, listenAddr, ipv4.ProtocolNumber)
+	if err != nil {
+		t.Fatalf("failed to start TCP Listener: %s", err)
+	}
+	defer l.Close()
+	var responseString = "response"
+	go func() {
+		http.Serve(l, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.Write([]byte(responseString))
+		}))
+	}()
+
+	dialFunc := func(address, protocol string) (net.Conn, error) {
+		return gonet.DialTCP(ctx.clientStk, listenAddr, ipv4.ProtocolNumber)
+	}
+
+	httpClient := &http.Client{
+		Transport: &http.Transport{
+			Dial: dialFunc,
+		},
+	}
+	serverURL := fmt.Sprintf("http://[%s]:%d/", net.IP(remoteIPv4Address), serverPort)
+	response, err := httpClient.Get(serverURL)
+	if err != nil {
+		t.Fatalf("httpClient.Get(\"/\") failed: %s", err)
+	}
+	if got, want := response.StatusCode, http.StatusOK; got != want {
+		t.Fatalf("unexpected status code got: %d, want: %d", got, want)
+	}
+	body, err := io.ReadAll(response.Body)
+	if err != nil {
+		t.Fatalf("io.ReadAll(response.Body) failed: %s", err)
+	}
+	response.Body.Close()
+	if got, want := string(body), responseString; got != want {
+		t.Fatalf("unexpected response got: %s, want: %s", got, want)
+	}
+}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index d6d953085..66ffc33b8 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -19,9 +19,7 @@ package sharedmem
 
 import (
 	"bytes"
-	"io/ioutil"
 	"math/rand"
-	"os"
 	"strings"
 	"testing"
 	"time"
@@ -104,24 +102,36 @@ func newTestContext(t *testing.T, mtu, bufferSize uint32, addr tcpip.LinkAddress
 		t:        t,
 		packetCh: make(chan struct{}, 1000000),
 	}
-	c.txCfg = createQueueFDs(t, queueSizes{
+	c.txCfg, err = createQueueFDs(queueSizes{
 		dataSize:       queueDataSize,
 		txPipeSize:     queuePipeSize,
 		rxPipeSize:     queuePipeSize,
 		sharedDataSize: 4096,
 	})
-
-	c.rxCfg = createQueueFDs(t, queueSizes{
+	if err != nil {
+		t.Fatalf("createQueueFDs for tx failed: %s", err)
+	}
+	c.rxCfg, err = createQueueFDs(queueSizes{
 		dataSize:       queueDataSize,
 		txPipeSize:     queuePipeSize,
 		rxPipeSize:     queuePipeSize,
 		sharedDataSize: 4096,
 	})
+	if err != nil {
+		t.Fatalf("createQueueFDs for rx failed: %s", err)
+	}
 
 	initQueue(t, &c.txq, &c.txCfg)
 	initQueue(t, &c.rxq, &c.rxCfg)
 
-	ep, err := New(mtu, bufferSize, addr, c.txCfg, c.rxCfg)
+	ep, err := New(Options{
+		MTU:         mtu,
+		BufferSize:  bufferSize,
+		LinkAddress: addr,
+		TX:          c.txCfg,
+		RX:          c.rxCfg,
+		PeerFD:      -1,
+	})
 	if err != nil {
 		t.Fatalf("New failed: %v", err)
 	}
@@ -150,8 +160,8 @@ func (c *testContext) DeliverOutboundPacket(remoteLinkAddr, localLinkAddr tcpip.
 
 func (c *testContext) cleanup() {
 	c.ep.Close()
-	closeFDs(&c.txCfg)
-	closeFDs(&c.rxCfg)
+	closeFDs(c.txCfg)
+	closeFDs(c.rxCfg)
 	c.txq.cleanup()
 	c.rxq.cleanup()
 }
@@ -191,69 +201,6 @@ func shuffle(b []int) {
 	}
 }
 
-func createFile(t *testing.T, size int64, initQueue bool) int {
-	tmpDir, ok := os.LookupEnv("TEST_TMPDIR")
-	if !ok {
-		tmpDir = os.Getenv("TMPDIR")
-	}
-	f, err := ioutil.TempFile(tmpDir, "sharedmem_test")
-	if err != nil {
-		t.Fatalf("TempFile failed: %v", err)
-	}
-	defer f.Close()
-	unix.Unlink(f.Name())
-
-	if initQueue {
-		// Write the "slot-free" flag in the initial queue.
-		_, err := f.WriteAt([]byte{0, 0, 0, 0, 0, 0, 0, 0x80}, 0)
-		if err != nil {
-			t.Fatalf("WriteAt failed: %v", err)
-		}
-	}
-
-	fd, err := unix.Dup(int(f.Fd()))
-	if err != nil {
-		t.Fatalf("Dup failed: %v", err)
-	}
-
-	if err := unix.Ftruncate(fd, size); err != nil {
-		unix.Close(fd)
-		t.Fatalf("Ftruncate failed: %v", err)
-	}
-
-	return fd
-}
-
-func closeFDs(c *QueueConfig) {
-	unix.Close(c.DataFD)
-	unix.Close(c.EventFD)
-	unix.Close(c.TxPipeFD)
-	unix.Close(c.RxPipeFD)
-	unix.Close(c.SharedDataFD)
-}
-
-type queueSizes struct {
-	dataSize       int64
-	txPipeSize     int64
-	rxPipeSize     int64
-	sharedDataSize int64
-}
-
-func createQueueFDs(t *testing.T, s queueSizes) QueueConfig {
-	fd, _, err := unix.RawSyscall(unix.SYS_EVENTFD2, 0, 0, 0)
-	if err != 0 {
-		t.Fatalf("eventfd failed: %v", error(err))
-	}
-
-	return QueueConfig{
-		EventFD:      int(fd),
-		DataFD:       createFile(t, s.dataSize, false),
-		TxPipeFD:     createFile(t, s.txPipeSize, true),
-		RxPipeFD:     createFile(t, s.rxPipeSize, true),
-		SharedDataFD: createFile(t, s.sharedDataSize, false),
-	}
-}
-
 // TestSimpleSend sends 1000 packets with random header and payload sizes,
 // then checks that the right payload is received on the shared memory queues.
 func TestSimpleSend(t *testing.T) {
@@ -672,7 +619,7 @@ func TestSimpleReceive(t *testing.T) {
 		// Push completion.
 		c.pushRxCompletion(uint32(len(contents)), bufs)
 		c.rxq.rx.Flush()
-		unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+		c.rxCfg.EventFD.Notify()
 
 		// Wait for packet to be received, then check it.
 		c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
@@ -718,7 +665,7 @@ func TestRxBuffersReposted(t *testing.T) {
 		// Complete the buffer.
 		c.pushRxCompletion(buffers[i].Size, buffers[i:][:1])
 		c.rxq.rx.Flush()
-		unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+		c.rxCfg.EventFD.Notify()
 
 		// Wait for it to be reposted.
 		bi := queue.DecodeRxBufferHeader(pollPull(t, &c.rxq.tx, timeout, "Timeout waiting for buffer to be reposted"))
@@ -734,7 +681,7 @@ func TestRxBuffersReposted(t *testing.T) {
 		// Complete with two buffers.
 		c.pushRxCompletion(2*bufferSize, buffers[2*i:][:2])
 		c.rxq.rx.Flush()
-		unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+		c.rxCfg.EventFD.Notify()
 
 		// Wait for them to be reposted.
 		for j := 0; j < 2; j++ {
@@ -759,7 +706,7 @@ func TestReceivePostingIsFull(t *testing.T) {
 	first := queue.DecodeRxBufferHeader(pollPull(t, &c.rxq.tx, time.After(time.Second), "Timeout waiting for first buffer to be posted"))
 	c.pushRxCompletion(first.Size, []queue.RxBuffer{first})
 	c.rxq.rx.Flush()
-	unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+	c.rxCfg.EventFD.Notify()
 
 	// Check that packet is received.
 	c.waitForPackets(1, time.After(time.Second), "Timeout waiting for completed packet")
@@ -768,7 +715,7 @@ func TestReceivePostingIsFull(t *testing.T) {
 	second := queue.DecodeRxBufferHeader(pollPull(t, &c.rxq.tx, time.After(time.Second), "Timeout waiting for second buffer to be posted"))
 	c.pushRxCompletion(second.Size, []queue.RxBuffer{second})
 	c.rxq.rx.Flush()
-	unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+	c.rxCfg.EventFD.Notify()
 
 	// Check that no packet is received yet, as the worker is blocked trying
 	// to repost.
@@ -781,7 +728,7 @@ func TestReceivePostingIsFull(t *testing.T) {
 	// Flush tx queue, which will allow the first buffer to be reposted,
 	// and the second completion to be pulled.
 	c.rxq.tx.Flush()
-	unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+	c.rxCfg.EventFD.Notify()
 
 	// Check that second packet completes.
 	c.waitForPackets(1, time.After(time.Second), "Timeout waiting for second completed packet")
@@ -803,7 +750,7 @@ func TestCloseWhileWaitingToPost(t *testing.T) {
 	bi := queue.DecodeRxBufferHeader(pollPull(t, &c.rxq.tx, time.After(time.Second), "Timeout waiting for initial buffer to be posted"))
 	c.pushRxCompletion(bi.Size, []queue.RxBuffer{bi})
 	c.rxq.rx.Flush()
-	unix.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
+	c.rxCfg.EventFD.Notify()
 
 	// Wait for packet to be indicated.
 	c.waitForPackets(1, time.After(time.Second), "Timeout waiting for completed packet")
diff --git a/pkg/tcpip/link/sharedmem/tx.go b/pkg/tcpip/link/sharedmem/tx.go
index e3210051f..35e5bff12 100644
--- a/pkg/tcpip/link/sharedmem/tx.go
+++ b/pkg/tcpip/link/sharedmem/tx.go
@@ -18,6 +18,7 @@ import (
 	"math"
 
 	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/eventfd"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
 )
@@ -28,10 +29,12 @@ const (
 
 // tx holds all state associated with a tx queue.
 type tx struct {
-	data []byte
-	q    queue.Tx
-	ids  idManager
-	bufs bufferManager
+	data         []byte
+	q            queue.Tx
+	ids          idManager
+	bufs         bufferManager
+	eventFD      eventfd.Eventfd
+	sharedDataFD int
 }
 
 // init initializes all state needed by the tx queue based on the information
@@ -64,7 +67,8 @@ func (t *tx) init(mtu uint32, c *QueueConfig) error {
 	t.ids.init()
 	t.bufs.init(0, len(data), int(mtu))
 	t.data = data
-
+	t.eventFD = c.EventFD
+	t.sharedDataFD = c.SharedDataFD
 	return nil
 }
 
@@ -142,6 +146,12 @@ func (t *tx) transmit(bufs ...buffer.View) bool {
 	return true
 }
 
+// notify writes to the tx.eventFD to indicate to the peer that there is data to
+// be read.
+func (t *tx) notify() {
+	t.eventFD.Notify()
+}
+
 // getBuffer returns a memory region mapped to the full contents of the given
 // file descriptor.
 func getBuffer(fd int) ([]byte, error) {
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index d51c36f19..1c3b0887f 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -167,14 +167,17 @@ func (e *endpoint) handleControl(errInfo stack.TransportError, pkt *stack.Packet
 	p := hdr.TransportProtocol()
 	dstAddr := hdr.DestinationAddress()
 	// Skip the ip header, then deliver the error.
-	pkt.Data().DeleteFront(hlen)
+	if _, ok := pkt.Data().Consume(hlen); !ok {
+		panic(fmt.Sprintf("could not consume the IP header of %d bytes", hlen))
+	}
 	e.dispatcher.DeliverTransportError(srcAddr, dstAddr, ProtocolNumber, p, errInfo, pkt)
 }
 
 func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
 	received := e.stats.icmp.packetsReceived
 	// ICMP packets don't have their TransportHeader fields set. See
-	// icmp/protocol.go:protocol.Parse for a full explanation.
+	// icmp/protocol.go:protocol.Parse for a full explanation. Not all ICMP types
+	// require consuming the header, so we only call PullUp.
 	v, ok := pkt.Data().PullUp(header.ICMPv4MinimumSize)
 	if !ok {
 		received.invalid.Increment()
@@ -242,7 +245,8 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
 
 		// DeliverTransportPacket will take ownership of pkt so don't use it beyond
 		// this point. Make a deep copy of the data before pkt gets sent as we will
-		// be modifying fields.
+		// be modifying fields. Both the ICMP header (with its type modified to
+		// EchoReply) and payload are reused in the reply packet.
 		//
 		// TODO(gvisor.dev/issue/4399): The copy may not be needed if there are no
 		// waiting endpoints. Consider moving responsibility for doing the copy to
@@ -331,6 +335,8 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
 	case header.ICMPv4EchoReply:
 		received.echoReply.Increment()
 
+		// ICMP sockets expect the ICMP header to be present, so we don't consume
+		// the ICMP header.
 		e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
 
 	case header.ICMPv4DstUnreachable:
@@ -338,7 +344,9 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
 
 		mtu := h.MTU()
 		code := h.Code()
-		pkt.Data().DeleteFront(header.ICMPv4MinimumSize)
+		if _, ok := pkt.Data().Consume(header.ICMPv4MinimumSize); !ok {
+			panic("could not consume ICMPv4MinimumSize bytes")
+		}
 		switch code {
 		case header.ICMPv4HostUnreachable:
 			e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt)
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index dda473e48..9b71738ae 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -466,7 +466,7 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, headerIn
 	// Postrouting NAT can only change the source address, and does not alter the
 	// route or outgoing interface of the packet.
 	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
-	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, outNicName); !ok {
+	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok {
 		// iptables is telling us to drop the packet.
 		e.stats.ip.IPTablesPostroutingDropped.Increment()
 		return nil
@@ -576,7 +576,7 @@ func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, par
 	// We ignore the list of NAT-ed packets here because Postrouting NAT can only
 	// change the source address, and does not alter the route or outgoing
 	// interface of the packet.
-	postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, outNicName)
+	postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, e, outNicName)
 	stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped)))
 	for pkt := range postroutingDropped {
 		pkts.Remove(pkt)
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 6c6107264..ff23d48e7 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -187,7 +187,9 @@ func (e *endpoint) handleControl(transErr stack.TransportError, pkt *stack.Packe
 
 	// Skip the IP header, then handle the fragmentation header if there
 	// is one.
-	pkt.Data().DeleteFront(header.IPv6MinimumSize)
+	if _, ok := pkt.Data().Consume(header.IPv6MinimumSize); !ok {
+		panic("could not consume IPv6MinimumSize bytes")
+	}
 	if p == header.IPv6FragmentHeader {
 		f, ok := pkt.Data().PullUp(header.IPv6FragmentHeaderSize)
 		if !ok {
@@ -203,7 +205,9 @@ func (e *endpoint) handleControl(transErr stack.TransportError, pkt *stack.Packe
 
 		// Skip fragmentation header and find out the actual protocol
 		// number.
-		pkt.Data().DeleteFront(header.IPv6FragmentHeaderSize)
+		if _, ok := pkt.Data().Consume(header.IPv6FragmentHeaderSize); !ok {
+			panic("could not consume IPv6FragmentHeaderSize bytes")
+		}
 	}
 
 	e.dispatcher.DeliverTransportError(srcAddr, dstAddr, ProtocolNumber, p, transErr, pkt)
@@ -325,7 +329,7 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool, r
 	switch icmpType := h.Type(); icmpType {
 	case header.ICMPv6PacketTooBig:
 		received.packetTooBig.Increment()
-		hdr, ok := pkt.Data().PullUp(header.ICMPv6PacketTooBigMinimumSize)
+		hdr, ok := pkt.Data().Consume(header.ICMPv6PacketTooBigMinimumSize)
 		if !ok {
 			received.invalid.Increment()
 			return
@@ -334,18 +338,16 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool, r
 		if err != nil {
 			networkMTU = 0
 		}
-		pkt.Data().DeleteFront(header.ICMPv6PacketTooBigMinimumSize)
 		e.handleControl(&icmpv6PacketTooBigSockError{mtu: networkMTU}, pkt)
 
 	case header.ICMPv6DstUnreachable:
 		received.dstUnreachable.Increment()
-		hdr, ok := pkt.Data().PullUp(header.ICMPv6DstUnreachableMinimumSize)
+		hdr, ok := pkt.Data().Consume(header.ICMPv6DstUnreachableMinimumSize)
 		if !ok {
 			received.invalid.Increment()
 			return
 		}
 		code := header.ICMPv6(hdr).Code()
-		pkt.Data().DeleteFront(header.ICMPv6DstUnreachableMinimumSize)
 		switch code {
 		case header.ICMPv6NetworkUnreachable:
 			e.handleControl(&icmpv6DestinationNetworkUnreachableSockError{}, pkt)
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index e2d2cf907..600e805f8 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -788,7 +788,7 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol
 	// Postrouting NAT can only change the source address, and does not alter the
 	// route or outgoing interface of the packet.
 	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
-	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, outNicName); !ok {
+	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok {
 		// iptables is telling us to drop the packet.
 		e.stats.ip.IPTablesPostroutingDropped.Increment()
 		return nil
@@ -897,7 +897,7 @@ func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, par
 	// We ignore the list of NAT-ed packets here because Postrouting NAT can only
 	// change the source address, and does not alter the route or outgoing
 	// interface of the packet.
-	postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, outNicName)
+	postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, e, outNicName)
 	stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped)))
 	for pkt := range postroutingDropped {
 		pkts.Remove(pkt)
@@ -1537,19 +1537,22 @@ func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffe
 			// If the last header in the payload isn't a known IPv6 extension header,
 			// handle it as if it is transport layer data.
 
-			// Calculate the number of octets parsed from data. We want to remove all
-			// the data except the unparsed portion located at the end, which its size
-			// is extHdr.Buf.Size().
+			// Calculate the number of octets parsed from data. We want to consume all
+			// the data except the unparsed portion located at the end, whose size is
+			// extHdr.Buf.Size().
 			trim := pkt.Data().Size() - extHdr.Buf.Size()
 
 			// For unfragmented packets, extHdr still contains the transport header.
-			// Get rid of it.
+			// Consume that too.
 			//
 			// For reassembled fragments, pkt.TransportHeader is unset, so this is a
 			// no-op and pkt.Data begins with the transport header.
 			trim += pkt.TransportHeader().View().Size()
 
-			pkt.Data().DeleteFront(trim)
+			if _, ok := pkt.Data().Consume(trim); !ok {
+				stats.MalformedPacketsReceived.Increment()
+				return fmt.Errorf("could not consume %d bytes", trim)
+			}
 
 			stats.PacketsDelivered.Increment()
 			if p := tcpip.TransportProtocolNumber(extHdr.Identifier); p == header.ICMPv6ProtocolNumber {
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 4fb7e9adb..48f290187 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -45,17 +45,6 @@ const (
 	dirReply
 )
 
-// Manipulation type for the connection.
-// TODO(gvisor.dev/issue/5696): Define this as a bit set and support SNAT and
-// DNAT at the same time.
-type manipType int
-
-const (
-	manipNone manipType = iota
-	manipSource
-	manipDestination
-)
-
 // tuple holds a connection's identifying and manipulating data in one
 // direction. It is immutable.
 //
@@ -64,13 +53,21 @@ type tuple struct {
 	// tupleEntry is used to build an intrusive list of tuples.
 	tupleEntry
 
-	tupleID
-
 	// conn is the connection tracking entry this tuple belongs to.
 	conn *conn
 
 	// direction is the direction of the tuple.
 	direction direction
+
+	mu sync.RWMutex `state:"nosave"`
+	// +checklocks:mu
+	tupleID tupleID
+}
+
+func (t *tuple) id() tupleID {
+	t.mu.RLock()
+	defer t.mu.RUnlock()
+	return t.tupleID
 }
 
 // tupleID uniquely identifies a connection in one direction. It currently
@@ -103,50 +100,47 @@ func (ti tupleID) reply() tupleID {
 //
 // +stateify savable
 type conn struct {
+	ct *ConnTrack
+
 	// original is the tuple in original direction. It is immutable.
 	original tuple
 
-	// reply is the tuple in reply direction. It is immutable.
+	// reply is the tuple in reply direction.
 	reply tuple
 
-	// manip indicates if the packet should be manipulated. It is immutable.
-	// TODO(gvisor.dev/issue/5696): Support updating manipulation type.
-	manip manipType
-
-	// tcbHook indicates if the packet is inbound or outbound to
-	// update the state of tcb. It is immutable.
-	tcbHook Hook
-
-	// mu protects all mutable state.
-	mu sync.Mutex `state:"nosave"`
+	mu sync.RWMutex `state:"nosave"`
+	// Indicates that the connection has been finalized and may handle replies.
+	//
+	// +checklocks:mu
+	finalized bool
+	// sourceManip indicates the packet's source is manipulated.
+	//
+	// +checklocks:mu
+	sourceManip bool
+	// destinationManip indicates the packet's destination is manipulated.
+	//
+	// +checklocks:mu
+	destinationManip bool
 	// tcb is TCB control block. It is used to keep track of states
-	// of tcp connection and is protected by mu.
+	// of tcp connection.
+	//
+	// +checklocks:mu
 	tcb tcpconntrack.TCB
 	// lastUsed is the last time the connection saw a relevant packet, and
-	// is updated by each packet on the connection. It is protected by mu.
+	// is updated by each packet on the connection.
 	//
 	// TODO(gvisor.dev/issue/5939): do not use the ambient clock.
+	//
+	// +checklocks:mu
 	lastUsed time.Time `state:".(unixTime)"`
 }
 
-// newConn creates new connection.
-func newConn(orig, reply tupleID, manip manipType, hook Hook) *conn {
-	conn := conn{
-		manip:    manip,
-		tcbHook:  hook,
-		lastUsed: time.Now(),
-	}
-	conn.original = tuple{conn: &conn, tupleID: orig}
-	conn.reply = tuple{conn: &conn, tupleID: reply, direction: dirReply}
-	return &conn
-}
-
 // timedOut returns whether the connection timed out based on its state.
 func (cn *conn) timedOut(now time.Time) bool {
 	const establishedTimeout = 5 * 24 * time.Hour
 	const defaultTimeout = 120 * time.Second
-	cn.mu.Lock()
-	defer cn.mu.Unlock()
+	cn.mu.RLock()
+	defer cn.mu.RUnlock()
 	if cn.tcb.State() == tcpconntrack.ResultAlive {
 		// Use the same default as Linux, which doesn't delete
 		// established connections for 5(!) days.
@@ -159,8 +153,9 @@ func (cn *conn) timedOut(now time.Time) bool {
 
 // update the connection tracking state.
 //
-// Precondition: cn.mu must be held.
-func (cn *conn) updateLocked(pkt *PacketBuffer, hook Hook) {
+// TODO(https://gvisor.dev/issue/6590): annotate r/w locking requirements.
+// +checklocks:cn.mu
+func (cn *conn) updateLocked(pkt *PacketBuffer, dir direction) {
 	if pkt.TransportProtocolNumber != header.TCPProtocolNumber {
 		return
 	}
@@ -172,10 +167,16 @@ func (cn *conn) updateLocked(pkt *PacketBuffer, hook Hook) {
 	// established or not, so the client/server distinction isn't important.
 	if cn.tcb.IsEmpty() {
 		cn.tcb.Init(tcpHeader)
-	} else if hook == cn.tcbHook {
+		return
+	}
+
+	switch dir {
+	case dirOriginal:
 		cn.tcb.UpdateStateOutbound(tcpHeader)
-	} else {
+	case dirReply:
 		cn.tcb.UpdateStateInbound(tcpHeader)
+	default:
+		panic(fmt.Sprintf("unhandled dir = %d", dir))
 	}
 }
 
@@ -200,18 +201,18 @@ type ConnTrack struct {
 	// It is immutable.
 	seed uint32
 
+	mu sync.RWMutex `state:"nosave"`
 	// mu protects the buckets slice, but not buckets' contents. Only take
 	// the write lock if you are modifying the slice or saving for S/R.
-	mu sync.RWMutex `state:"nosave"`
-
-	// buckets is protected by mu.
+	//
+	// +checklocks:mu
 	buckets []bucket
 }
 
 // +stateify savable
 type bucket struct {
-	// mu protects tuples.
-	mu     sync.Mutex `state:"nosave"`
+	mu sync.RWMutex `state:"nosave"`
+	// +checklocks:mu
 	tuples tupleList
 }
 
@@ -230,241 +231,212 @@ func getTransportHeader(pkt *PacketBuffer) (header.ChecksummableTransport, bool)
 	return nil, false
 }
 
-// packetToTupleID converts packet to a tuple ID. It fails when pkt lacks a valid
-// TCP header.
-//
-// Preconditions: pkt.NetworkHeader() is valid.
-func packetToTupleID(pkt *PacketBuffer) (tupleID, tcpip.Error) {
+func (ct *ConnTrack) init() {
+	ct.mu.Lock()
+	defer ct.mu.Unlock()
+	ct.buckets = make([]bucket, numBuckets)
+}
+
+func (ct *ConnTrack) getConnOrMaybeInsertNoop(pkt *PacketBuffer) *tuple {
 	netHeader := pkt.Network()
 	transportHeader, ok := getTransportHeader(pkt)
 	if !ok {
-		return tupleID{}, &tcpip.ErrUnknownProtocol{}
+		return nil
 	}
 
-	return tupleID{
+	tid := tupleID{
 		srcAddr:    netHeader.SourceAddress(),
 		srcPort:    transportHeader.SourcePort(),
 		dstAddr:    netHeader.DestinationAddress(),
 		dstPort:    transportHeader.DestinationPort(),
 		transProto: pkt.TransportProtocolNumber,
 		netProto:   pkt.NetworkProtocolNumber,
-	}, nil
-}
-
-func (ct *ConnTrack) init() {
-	ct.mu.Lock()
-	defer ct.mu.Unlock()
-	ct.buckets = make([]bucket, numBuckets)
-}
-
-// connFor gets the conn for pkt if it exists, or returns nil
-// if it does not. It returns an error when pkt does not contain a valid TCP
-// header.
-// TODO(gvisor.dev/issue/6168): Support UDP.
-func (ct *ConnTrack) connFor(pkt *PacketBuffer) (*conn, direction) {
-	tid, err := packetToTupleID(pkt)
-	if err != nil {
-		return nil, dirOriginal
 	}
-	return ct.connForTID(tid)
-}
 
-func (ct *ConnTrack) connForTID(tid tupleID) (*conn, direction) {
-	bucket := ct.bucket(tid)
-	now := time.Now()
+	bktID := ct.bucket(tid)
 
 	ct.mu.RLock()
-	defer ct.mu.RUnlock()
-	ct.buckets[bucket].mu.Lock()
-	defer ct.buckets[bucket].mu.Unlock()
-
-	// Iterate over the tuples in a bucket, cleaning up any unused
-	// connections we find.
-	for other := ct.buckets[bucket].tuples.Front(); other != nil; other = other.Next() {
-		// Clean up any timed-out connections we happen to find.
-		if ct.reapTupleLocked(other, bucket, now) {
-			// The tuple expired.
-			continue
-		}
-		if tid == other.tupleID {
-			return other.conn, other.direction
-		}
+	bkt := &ct.buckets[bktID]
+	ct.mu.RUnlock()
+
+	now := time.Now()
+	if t := bkt.connForTID(tid, now); t != nil {
+		return t
 	}
 
-	return nil, dirOriginal
-}
+	bkt.mu.Lock()
+	defer bkt.mu.Unlock()
 
-func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, port uint16, address tcpip.Address) *conn {
-	tid, err := packetToTupleID(pkt)
-	if err != nil {
-		return nil
+	// Make sure a connection wasn't added between when we last checked the
+	// bucket and acquired the bucket's write lock.
+	if t := bkt.connForTIDRLocked(tid, now); t != nil {
+		return t
 	}
-	if hook != Prerouting && hook != Output {
-		return nil
+
+	// This is the first packet we're seeing for the connection. Create an entry
+	// for this new connection.
+	conn := &conn{
+		ct:       ct,
+		original: tuple{tupleID: tid, direction: dirOriginal},
+		reply:    tuple{tupleID: tid.reply(), direction: dirReply},
+		lastUsed: now,
 	}
+	conn.original.conn = conn
+	conn.reply.conn = conn
 
-	replyTID := tid.reply()
-	replyTID.srcAddr = address
-	replyTID.srcPort = port
+	// For now, we only map an entry for the packet's original tuple as NAT may be
+	// performed on this connection. Until the packet goes through all the hooks
+	// and its final address/port is known, we cannot know what the response
+	// packet's addresses/ports will look like.
+	//
+	// This is okay because the destination cannot send its response until it
+	// receives the packet; the packet will only be received once all the hooks
+	// have been performed.
+	//
+	// See (*conn).finalize.
+	bkt.tuples.PushFront(&conn.original)
+	return &conn.original
+}
 
-	conn, _ := ct.connForTID(tid)
-	if conn != nil {
-		// The connection is already tracked.
-		// TODO(gvisor.dev/issue/5696): Support updating an existing connection.
-		return nil
-	}
-	conn = newConn(tid, replyTID, manipDestination, hook)
-	ct.insertConn(conn)
-	return conn
+func (ct *ConnTrack) connForTID(tid tupleID) *tuple {
+	bktID := ct.bucket(tid)
+
+	ct.mu.RLock()
+	bkt := &ct.buckets[bktID]
+	ct.mu.RUnlock()
+
+	return bkt.connForTID(tid, time.Now())
 }
 
-func (ct *ConnTrack) insertSNATConn(pkt *PacketBuffer, hook Hook, port uint16, address tcpip.Address) *conn {
-	tid, err := packetToTupleID(pkt)
-	if err != nil {
-		return nil
-	}
-	if hook != Input && hook != Postrouting {
-		return nil
+func (bkt *bucket) connForTID(tid tupleID, now time.Time) *tuple {
+	bkt.mu.RLock()
+	defer bkt.mu.RUnlock()
+	return bkt.connForTIDRLocked(tid, now)
+}
+
+// +checklocks:bkt.mu
+func (bkt *bucket) connForTIDRLocked(tid tupleID, now time.Time) *tuple {
+	for other := bkt.tuples.Front(); other != nil; other = other.Next() {
+		if tid == other.id() && !other.conn.timedOut(now) {
+			return other
+		}
 	}
+	return nil
+}
 
-	replyTID := tid.reply()
-	replyTID.dstAddr = address
-	replyTID.dstPort = port
+func (ct *ConnTrack) finalize(cn *conn) {
+	tid := cn.reply.id()
+	id := ct.bucket(tid)
 
-	conn, _ := ct.connForTID(tid)
-	if conn != nil {
-		// The connection is already tracked.
-		// TODO(gvisor.dev/issue/5696): Support updating an existing connection.
-		return nil
+	ct.mu.RLock()
+	bkt := &ct.buckets[id]
+	ct.mu.RUnlock()
+
+	bkt.mu.Lock()
+	defer bkt.mu.Unlock()
+
+	if t := bkt.connForTIDRLocked(tid, time.Now()); t != nil {
+		// Another connection for the reply already exists. We can't do much about
+		// this so we leave the connection cn represents in a state where it can
+		// send packets but its responses will be mapped to some other connection.
+		// This may be okay if the connection only expects to send packets without
+		// any responses.
+		return
 	}
-	conn = newConn(tid, replyTID, manipSource, hook)
-	ct.insertConn(conn)
-	return conn
+
+	bkt.tuples.PushFront(&cn.reply)
 }
 
-// insertConn inserts conn into the appropriate table bucket.
-func (ct *ConnTrack) insertConn(conn *conn) {
-	// Lock the buckets in the correct order.
-	tupleBucket := ct.bucket(conn.original.tupleID)
-	replyBucket := ct.bucket(conn.reply.tupleID)
-	ct.mu.RLock()
-	defer ct.mu.RUnlock()
-	if tupleBucket < replyBucket {
-		ct.buckets[tupleBucket].mu.Lock()
-		ct.buckets[replyBucket].mu.Lock()
-	} else if tupleBucket > replyBucket {
-		ct.buckets[replyBucket].mu.Lock()
-		ct.buckets[tupleBucket].mu.Lock()
-	} else {
-		// Both tuples are in the same bucket.
-		ct.buckets[tupleBucket].mu.Lock()
-	}
-
-	// Now that we hold the locks, ensure the tuple hasn't been inserted by
-	// another thread.
-	// TODO(gvisor.dev/issue/5773): Should check conn.reply.tupleID, too?
-	alreadyInserted := false
-	for other := ct.buckets[tupleBucket].tuples.Front(); other != nil; other = other.Next() {
-		if other.tupleID == conn.original.tupleID {
-			alreadyInserted = true
-			break
+func (cn *conn) finalize() {
+	{
+		cn.mu.RLock()
+		finalized := cn.finalized
+		cn.mu.RUnlock()
+		if finalized {
+			return
 		}
 	}
 
-	if !alreadyInserted {
-		// Add the tuple to the map.
-		ct.buckets[tupleBucket].tuples.PushFront(&conn.original)
-		ct.buckets[replyBucket].tuples.PushFront(&conn.reply)
+	cn.mu.Lock()
+	finalized := cn.finalized
+	cn.finalized = true
+	cn.mu.Unlock()
+	if finalized {
+		return
 	}
 
-	// Unlocking can happen in any order.
-	ct.buckets[tupleBucket].mu.Unlock()
-	if tupleBucket != replyBucket {
-		ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
-	}
+	cn.ct.finalize(cn)
 }
 
-// handlePacket will manipulate the port and address of the packet if the
-// connection exists. Returns whether, after the packet traverses the tables,
-// it should create a new entry in the table.
-func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) bool {
-	if pkt.NatDone {
-		return false
-	}
+// performNAT setups up the connection for the specified NAT.
+//
+// Generally, only the first packet of a connection reaches this method; other
+// other packets will be manipulated without needing to modify the connection.
+func (cn *conn) performNAT(pkt *PacketBuffer, hook Hook, r *Route, port uint16, address tcpip.Address, dnat bool) {
+	cn.performNATIfNoop(port, address, dnat)
+	cn.handlePacket(pkt, hook, r)
+}
 
-	switch hook {
-	case Prerouting, Input, Output, Postrouting:
-	default:
-		return false
-	}
+func (cn *conn) performNATIfNoop(port uint16, address tcpip.Address, dnat bool) {
+	cn.mu.Lock()
+	defer cn.mu.Unlock()
 
-	transportHeader, ok := getTransportHeader(pkt)
-	if !ok {
-		return false
+	if cn.finalized {
+		return
 	}
 
-	conn, dir := ct.connFor(pkt)
-	// Connection not found for the packet.
-	if conn == nil {
-		// If this is the last hook in the data path for this packet (Input if
-		// incoming, Postrouting if outgoing), indicate that a connection should be
-		// inserted by the end of this hook.
-		return hook == Input || hook == Postrouting
+	if dnat {
+		if cn.destinationManip {
+			return
+		}
+		cn.destinationManip = true
+	} else {
+		if cn.sourceManip {
+			return
+		}
+		cn.sourceManip = true
 	}
 
-	netHeader := pkt.Network()
-
-	// TODO(gvisor.dev/issue/5748): TCP checksums on inbound packets should be
-	// validated if checksum offloading is off. It may require IP defrag if the
-	// packets are fragmented.
-
-	var newAddr tcpip.Address
-	var newPort uint16
+	cn.reply.mu.Lock()
+	defer cn.reply.mu.Unlock()
 
-	updateSRCFields := false
+	if dnat {
+		cn.reply.tupleID.srcAddr = address
+		cn.reply.tupleID.srcPort = port
+	} else {
+		cn.reply.tupleID.dstAddr = address
+		cn.reply.tupleID.dstPort = port
+	}
+}
 
-	switch hook {
-	case Prerouting, Output:
-		if conn.manip == manipDestination && dir == dirOriginal {
-			newPort = conn.reply.srcPort
-			newAddr = conn.reply.srcAddr
-			pkt.NatDone = true
-		} else if conn.manip == manipSource && dir == dirReply {
-			newPort = conn.original.srcPort
-			newAddr = conn.original.srcAddr
-			pkt.NatDone = true
-		}
-	case Input, Postrouting:
-		if conn.manip == manipSource && dir == dirOriginal {
-			newPort = conn.reply.dstPort
-			newAddr = conn.reply.dstAddr
-			updateSRCFields = true
-			pkt.NatDone = true
-		} else if conn.manip == manipDestination && dir == dirReply {
-			newPort = conn.original.dstPort
-			newAddr = conn.original.dstAddr
-			updateSRCFields = true
-			pkt.NatDone = true
-		}
-	default:
-		panic(fmt.Sprintf("unrecognized hook = %s", hook))
+func (cn *conn) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) {
+	if pkt.NatDone {
+		return
 	}
 
-	if !pkt.NatDone {
-		return false
+	transportHeader, ok := getTransportHeader(pkt)
+	if !ok {
+		return
 	}
 
 	fullChecksum := false
 	updatePseudoHeader := false
+	dnat := false
 	switch hook {
 	case Prerouting:
 		// Packet came from outside the stack so it must have a checksum set
 		// already.
 		fullChecksum = true
 		updatePseudoHeader = true
+
+		dnat = true
 	case Input:
-	case Output, Postrouting:
-		// Calculate the TCP checksum and set it.
+	case Forward:
+		panic("should not handle packet in the forwarding hook")
+	case Output:
+		dnat = true
+		fallthrough
+	case Postrouting:
 		if pkt.TransportProtocolNumber == header.TCPProtocolNumber && pkt.GSOOptions.Type != GSONone && pkt.GSOOptions.NeedsCsum {
 			updatePseudoHeader = true
 		} else if r.RequiresTXTransportChecksum() {
@@ -472,62 +444,73 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, r *Route) bool {
 			updatePseudoHeader = true
 		}
 	default:
-		panic(fmt.Sprintf("unrecognized hook = %s", hook))
+		panic(fmt.Sprintf("unrecognized hook = %d", hook))
 	}
 
-	rewritePacket(
-		netHeader,
-		transportHeader,
-		updateSRCFields,
-		fullChecksum,
-		updatePseudoHeader,
-		newPort,
-		newAddr,
-	)
+	// TODO(gvisor.dev/issue/5748): TCP checksums on inbound packets should be
+	// validated if checksum offloading is off. It may require IP defrag if the
+	// packets are fragmented.
 
-	// Update the state of tcb.
-	conn.mu.Lock()
-	defer conn.mu.Unlock()
+	dir := pkt.tuple.direction
+	tid, performManip := func() (tupleID, bool) {
+		cn.mu.Lock()
+		defer cn.mu.Unlock()
+
+		var tuple *tuple
+		switch dir {
+		case dirOriginal:
+			if dnat {
+				if !cn.destinationManip {
+					return tupleID{}, false
+				}
+			} else if !cn.sourceManip {
+				return tupleID{}, false
+			}
 
-	// Mark the connection as having been used recently so it isn't reaped.
-	conn.lastUsed = time.Now()
-	// Update connection state.
-	conn.updateLocked(pkt, hook)
+			tuple = &cn.reply
+		case dirReply:
+			if dnat {
+				if !cn.sourceManip {
+					return tupleID{}, false
+				}
+			} else if !cn.destinationManip {
+				return tupleID{}, false
+			}
 
-	return false
-}
+			tuple = &cn.original
+		default:
+			panic(fmt.Sprintf("unhandled dir = %d", dir))
+		}
 
-// maybeInsertNoop tries to insert a no-op connection entry to keep connections
-// from getting clobbered when replies arrive. It only inserts if there isn't
-// already a connection for pkt.
-//
-// This should be called after traversing iptables rules only, to ensure that
-// pkt.NatDone is set correctly.
-func (ct *ConnTrack) maybeInsertNoop(pkt *PacketBuffer, hook Hook) {
-	// If there were a rule applying to this packet, it would be marked
-	// with NatDone.
-	if pkt.NatDone {
-		return
-	}
+		// Mark the connection as having been used recently so it isn't reaped.
+		cn.lastUsed = time.Now()
+		// Update connection state.
+		cn.updateLocked(pkt, dir)
 
-	switch pkt.TransportProtocolNumber {
-	case header.TCPProtocolNumber, header.UDPProtocolNumber:
-	default:
-		// TODO(https://gvisor.dev/issue/5915): Track ICMP and other trackable
-		// connections.
+		return tuple.id(), true
+	}()
+	if !performManip {
 		return
 	}
 
-	// This is the first packet we're seeing for the TCP connection. Insert
-	// the noop entry (an identity mapping) so that the response doesn't
-	// get NATed, breaking the connection.
-	tid, err := packetToTupleID(pkt)
-	if err != nil {
-		return
+	newPort := tid.dstPort
+	newAddr := tid.dstAddr
+	if dnat {
+		newPort = tid.srcPort
+		newAddr = tid.srcAddr
 	}
-	conn := newConn(tid, tid.reply(), manipNone, hook)
-	conn.updateLocked(pkt, hook)
-	ct.insertConn(conn)
+
+	rewritePacket(
+		pkt.Network(),
+		transportHeader,
+		!dnat,
+		fullChecksum,
+		updatePseudoHeader,
+		newPort,
+		newAddr,
+	)
+
+	pkt.NatDone = true
 }
 
 // bucket gets the conntrack bucket for a tupleID.
@@ -579,14 +562,15 @@ func (ct *ConnTrack) reapUnused(start int, prevInterval time.Duration) (int, tim
 	defer ct.mu.RUnlock()
 	for i := 0; i < len(ct.buckets)/fractionPerReaping; i++ {
 		idx = (i + start) % len(ct.buckets)
-		ct.buckets[idx].mu.Lock()
-		for tuple := ct.buckets[idx].tuples.Front(); tuple != nil; tuple = tuple.Next() {
+		bkt := &ct.buckets[idx]
+		bkt.mu.Lock()
+		for tuple := bkt.tuples.Front(); tuple != nil; tuple = tuple.Next() {
 			checked++
-			if ct.reapTupleLocked(tuple, idx, now) {
+			if ct.reapTupleLocked(tuple, idx, bkt, now) {
 				expired++
 			}
 		}
-		ct.buckets[idx].mu.Unlock()
+		bkt.mu.Unlock()
 	}
 	// We already checked buckets[idx].
 	idx++
@@ -611,41 +595,45 @@ func (ct *ConnTrack) reapUnused(start int, prevInterval time.Duration) (int, tim
 // reapTupleLocked tries to remove tuple and its reply from the table. It
 // returns whether the tuple's connection has timed out.
 //
-// Preconditions:
-// * ct.mu is locked for reading.
-// * bucket is locked.
-func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bool {
+// Precondition: ct.mu is read locked and bkt.mu is write locked.
+// TODO(https://gvisor.dev/issue/6590): annotate r/w locking requirements.
+// +checklocks:ct.mu
+// +checklocks:bkt.mu
+func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bktID int, bkt *bucket, now time.Time) bool {
 	if !tuple.conn.timedOut(now) {
 		return false
 	}
 
 	// To maintain lock order, we can only reap these tuples if the reply
 	// appears later in the table.
-	replyBucket := ct.bucket(tuple.reply())
-	if bucket > replyBucket {
+	replyBktID := ct.bucket(tuple.id().reply())
+	if bktID > replyBktID {
 		return true
 	}
 
 	// Don't re-lock if both tuples are in the same bucket.
-	differentBuckets := bucket != replyBucket
-	if differentBuckets {
-		ct.buckets[replyBucket].mu.Lock()
+	if bktID != replyBktID {
+		replyBkt := &ct.buckets[replyBktID]
+		replyBkt.mu.Lock()
+		removeConnFromBucket(replyBkt, tuple)
+		replyBkt.mu.Unlock()
+	} else {
+		removeConnFromBucket(bkt, tuple)
 	}
 
 	// We have the buckets locked and can remove both tuples.
+	bkt.tuples.Remove(tuple)
+	return true
+}
+
+// TODO(https://gvisor.dev/issue/6590): annotate r/w locking requirements.
+// +checklocks:b.mu
+func removeConnFromBucket(b *bucket, tuple *tuple) {
 	if tuple.direction == dirOriginal {
-		ct.buckets[replyBucket].tuples.Remove(&tuple.conn.reply)
+		b.tuples.Remove(&tuple.conn.reply)
 	} else {
-		ct.buckets[replyBucket].tuples.Remove(&tuple.conn.original)
-	}
-	ct.buckets[bucket].tuples.Remove(tuple)
-
-	// Don't re-unlock if both tuples are in the same bucket.
-	if differentBuckets {
-		ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
+		b.tuples.Remove(&tuple.conn.original)
 	}
-
-	return true
 }
 
 func (ct *ConnTrack) originalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber) (tcpip.Address, uint16, tcpip.Error) {
@@ -659,14 +647,19 @@ func (ct *ConnTrack) originalDst(epID TransportEndpointID, netProto tcpip.Networ
 		transProto: transProto,
 		netProto:   netProto,
 	}
-	conn, _ := ct.connForTID(tid)
-	if conn == nil {
+	t := ct.connForTID(tid)
+	if t == nil {
 		// Not a tracked connection.
 		return "", 0, &tcpip.ErrNotConnected{}
-	} else if conn.manip != manipDestination {
+	}
+
+	t.conn.mu.RLock()
+	defer t.conn.mu.RUnlock()
+	if !t.conn.destinationManip {
 		// Unmanipulated destination.
 		return "", 0, &tcpip.ErrInvalidOptionValue{}
 	}
 
-	return conn.original.dstAddr, conn.original.dstPort, nil
+	id := t.conn.original.id()
+	return id.dstAddr, id.dstPort, nil
 }
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 74c9075b4..5808be685 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -271,7 +271,18 @@ const (
 //
 // Precondition: The packet's network and transport header must be set.
 func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndpoint, inNicName string) bool {
-	return it.check(Prerouting, pkt, nil /* route */, addressEP, inNicName, "" /* outNicName */)
+	const hook = Prerouting
+
+	if it.shouldSkip(pkt.NetworkProtocolNumber) {
+		return true
+	}
+
+	if t := it.connections.getConnOrMaybeInsertNoop(pkt); t != nil {
+		pkt.tuple = t
+		t.conn.handlePacket(pkt, hook, nil /* route */)
+	}
+
+	return it.check(hook, pkt, nil /* route */, addressEP, inNicName, "" /* outNicName */)
 }
 
 // CheckInput performs the input hook on the packet.
@@ -281,7 +292,22 @@ func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndp
 //
 // Precondition: The packet's network and transport header must be set.
 func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool {
-	return it.check(Input, pkt, nil /* route */, nil /* addressEP */, inNicName, "" /* outNicName */)
+	const hook = Input
+
+	if it.shouldSkip(pkt.NetworkProtocolNumber) {
+		return true
+	}
+
+	if t := pkt.tuple; t != nil {
+		t.conn.handlePacket(pkt, hook, nil /* route */)
+	}
+
+	ret := it.check(hook, pkt, nil /* route */, nil /* addressEP */, inNicName, "" /* outNicName */)
+	if t := pkt.tuple; t != nil {
+		t.conn.finalize()
+	}
+	pkt.tuple = nil
+	return ret
 }
 
 // CheckForward performs the forward hook on the packet.
@@ -291,6 +317,9 @@ func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool {
 //
 // Precondition: The packet's network and transport header must be set.
 func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string) bool {
+	if it.shouldSkip(pkt.NetworkProtocolNumber) {
+		return true
+	}
 	return it.check(Forward, pkt, nil /* route */, nil /* addressEP */, inNicName, outNicName)
 }
 
@@ -301,7 +330,18 @@ func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string
 //
 // Precondition: The packet's network and transport header must be set.
 func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string) bool {
-	return it.check(Output, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName)
+	const hook = Output
+
+	if it.shouldSkip(pkt.NetworkProtocolNumber) {
+		return true
+	}
+
+	if t := it.connections.getConnOrMaybeInsertNoop(pkt); t != nil {
+		pkt.tuple = t
+		t.conn.handlePacket(pkt, hook, r)
+	}
+
+	return it.check(hook, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName)
 }
 
 // CheckPostrouting performs the postrouting hook on the packet.
@@ -310,8 +350,38 @@ func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string)
 // must be dropped if false is returned.
 //
 // Precondition: The packet's network and transport header must be set.
-func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, outNicName string) bool {
-	return it.check(Postrouting, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName)
+func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, outNicName string) bool {
+	const hook = Postrouting
+
+	if it.shouldSkip(pkt.NetworkProtocolNumber) {
+		return true
+	}
+
+	if t := pkt.tuple; t != nil {
+		t.conn.handlePacket(pkt, hook, r)
+	}
+
+	ret := it.check(hook, pkt, r, addressEP, "" /* inNicName */, outNicName)
+	if t := pkt.tuple; t != nil {
+		t.conn.finalize()
+	}
+	pkt.tuple = nil
+	return ret
+}
+
+func (it *IPTables) shouldSkip(netProto tcpip.NetworkProtocolNumber) bool {
+	switch netProto {
+	case header.IPv4ProtocolNumber, header.IPv6ProtocolNumber:
+	default:
+		// IPTables only supports IPv4/IPv6.
+		return true
+	}
+
+	it.mu.RLock()
+	defer it.mu.RUnlock()
+	// Many users never configure iptables. Spare them the cost of rule
+	// traversal if rules have never been set.
+	return !it.modified
 }
 
 // check runs pkt through the rules for hook. It returns true when the packet
@@ -320,20 +390,8 @@ func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, outNicName str
 //
 // Precondition: The packet's network and transport header must be set.
 func (it *IPTables) check(hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool {
-	if pkt.NetworkProtocolNumber != header.IPv4ProtocolNumber && pkt.NetworkProtocolNumber != header.IPv6ProtocolNumber {
-		return true
-	}
-	// Many users never configure iptables. Spare them the cost of rule
-	// traversal if rules have never been set.
 	it.mu.RLock()
 	defer it.mu.RUnlock()
-	if !it.modified {
-		return true
-	}
-
-	// Packets are manipulated only if connection and matching
-	// NAT rule exists.
-	shouldTrack := it.connections.handlePacket(pkt, hook, r)
 
 	// Go through each table containing the hook.
 	priorities := it.priorities[hook]
@@ -361,7 +419,7 @@ func (it *IPTables) check(hook Hook, pkt *PacketBuffer, r *Route, addressEP Addr
 			// Any Return from a built-in chain means we have to
 			// call the underflow.
 			underflow := table.Rules[table.Underflows[hook]]
-			switch v, _ := underflow.Target.Action(pkt, &it.connections, hook, r, addressEP); v {
+			switch v, _ := underflow.Target.Action(pkt, hook, r, addressEP); v {
 			case RuleAccept:
 				continue
 			case RuleDrop:
@@ -377,21 +435,6 @@ func (it *IPTables) check(hook Hook, pkt *PacketBuffer, r *Route, addressEP Addr
 		}
 	}
 
-	// If this connection should be tracked, try to add an entry for it. If
-	// traversing the nat table didn't end in adding an entry,
-	// maybeInsertNoop will add a no-op entry for the connection. This is
-	// needeed when establishing connections so that the SYN/ACK reply to an
-	// outgoing SYN is delivered to the correct endpoint rather than being
-	// redirected by a prerouting rule.
-	//
-	// From the iptables documentation: "If there is no rule, a `null'
-	// binding is created: this usually does not map the packet, but exists
-	// to ensure we don't map another stream over an existing one."
-	if shouldTrack {
-		it.connections.maybeInsertNoop(pkt, hook)
-	}
-
-	// Every table returned Accept.
 	return true
 }
 
@@ -431,7 +474,9 @@ func (it *IPTables) startReaper(interval time.Duration) {
 //
 // Precondition:  The packets' network and transport header must be set.
 func (it *IPTables) CheckOutputPackets(pkts PacketBufferList, r *Route, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
-	return it.checkPackets(Output, pkts, r, outNicName)
+	return checkPackets(pkts, func(pkt *PacketBuffer) bool {
+		return it.CheckOutput(pkt, r, outNicName)
+	})
 }
 
 // CheckPostroutingPackets performs the postrouting hook on the packets.
@@ -439,21 +484,16 @@ func (it *IPTables) CheckOutputPackets(pkts PacketBufferList, r *Route, outNicNa
 // Returns a map of packets that must be dropped.
 //
 // Precondition:  The packets' network and transport header must be set.
-func (it *IPTables) CheckPostroutingPackets(pkts PacketBufferList, r *Route, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
-	return it.checkPackets(Postrouting, pkts, r, outNicName)
+func (it *IPTables) CheckPostroutingPackets(pkts PacketBufferList, r *Route, addressEP AddressableEndpoint, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
+	return checkPackets(pkts, func(pkt *PacketBuffer) bool {
+		return it.CheckPostrouting(pkt, r, addressEP, outNicName)
+	})
 }
 
-// checkPackets runs pkts through the rules for hook and returns a map of
-// packets that should not go forward.
-//
-// NOTE: unlike the Check API the returned map contains packets that should be
-// dropped.
-//
-// Precondition:  The packets' network and transport header must be set.
-func (it *IPTables) checkPackets(hook Hook, pkts PacketBufferList, r *Route, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
+func checkPackets(pkts PacketBufferList, f func(*PacketBuffer) bool) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
 	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
 		if !pkt.NatDone {
-			if ok := it.check(hook, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName); !ok {
+			if ok := f(pkt); !ok {
 				if drop == nil {
 					drop = make(map[*PacketBuffer]struct{})
 				}
@@ -543,7 +583,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
 	}
 
 	// All the matchers matched, so run the target.
-	return rule.Target.Action(pkt, &it.connections, hook, r, addressEP)
+	return rule.Target.Action(pkt, hook, r, addressEP)
 }
 
 // OriginalDst returns the original destination of redirected connections. It
diff --git a/pkg/tcpip/stack/iptables_state.go b/pkg/tcpip/stack/iptables_state.go
index 529e02a07..3d3c39c20 100644
--- a/pkg/tcpip/stack/iptables_state.go
+++ b/pkg/tcpip/stack/iptables_state.go
@@ -26,11 +26,15 @@ type unixTime struct {
 
 // saveLastUsed is invoked by stateify.
 func (cn *conn) saveLastUsed() unixTime {
+	cn.mu.Lock()
+	defer cn.mu.Unlock()
 	return unixTime{cn.lastUsed.Unix(), cn.lastUsed.UnixNano()}
 }
 
 // loadLastUsed is invoked by stateify.
 func (cn *conn) loadLastUsed(unix unixTime) {
+	cn.mu.Lock()
+	defer cn.mu.Unlock()
 	cn.lastUsed = time.Unix(unix.second, unix.nano)
 }
 
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index e8806ebdb..85490e2d4 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -29,7 +29,7 @@ type AcceptTarget struct {
 }
 
 // Action implements Target.Action.
-func (*AcceptTarget) Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
+func (*AcceptTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
 	return RuleAccept, 0
 }
 
@@ -40,7 +40,7 @@ type DropTarget struct {
 }
 
 // Action implements Target.Action.
-func (*DropTarget) Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
+func (*DropTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
 	return RuleDrop, 0
 }
 
@@ -52,7 +52,7 @@ type ErrorTarget struct {
 }
 
 // Action implements Target.Action.
-func (*ErrorTarget) Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
+func (*ErrorTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
 	log.Debugf("ErrorTarget triggered.")
 	return RuleDrop, 0
 }
@@ -67,7 +67,7 @@ type UserChainTarget struct {
 }
 
 // Action implements Target.Action.
-func (*UserChainTarget) Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
+func (*UserChainTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
 	panic("UserChainTarget should never be called.")
 }
 
@@ -79,10 +79,49 @@ type ReturnTarget struct {
 }
 
 // Action implements Target.Action.
-func (*ReturnTarget) Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
+func (*ReturnTarget) Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int) {
 	return RuleReturn, 0
 }
 
+// DNATTarget modifies the destination port/IP of packets.
+type DNATTarget struct {
+	// The new destination address for packets.
+	//
+	// Immutable.
+	Addr tcpip.Address
+
+	// The new destination port for packets.
+	//
+	// Immutable.
+	Port uint16
+
+	// NetworkProtocol is the network protocol the target is used with.
+	//
+	// Immutable.
+	NetworkProtocol tcpip.NetworkProtocolNumber
+}
+
+// Action implements Target.Action.
+func (rt *DNATTarget) Action(pkt *PacketBuffer, hook Hook, r *Route, addressEP AddressableEndpoint) (RuleVerdict, int) {
+	// Sanity check.
+	if rt.NetworkProtocol != pkt.NetworkProtocolNumber {
+		panic(fmt.Sprintf(
+			"DNATTarget.Action with NetworkProtocol %d called on packet with NetworkProtocolNumber %d",
+			rt.NetworkProtocol, pkt.NetworkProtocolNumber))
+	}
+
+	switch hook {
+	case Prerouting, Output:
+	case Input, Forward, Postrouting:
+		panic(fmt.Sprintf("%s not supported for DNAT", hook))
+	default:
+		panic(fmt.Sprintf("%s unrecognized", hook))
+	}
+
+	return natAction(pkt, hook, r, rt.Port, rt.Addr, true /* dnat */)
+
+}
+
 // RedirectTarget redirects the packet to this machine by modifying the
 // destination port/IP. Outgoing packets are redirected to the loopback device,
 // and incoming packets are redirected to the incoming interface (rather than
@@ -97,7 +136,7 @@ type RedirectTarget struct {
 }
 
 // Action implements Target.Action.
-func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Route, addressEP AddressableEndpoint) (RuleVerdict, int) {
+func (rt *RedirectTarget) Action(pkt *PacketBuffer, hook Hook, r *Route, addressEP AddressableEndpoint) (RuleVerdict, int) {
 	// Sanity check.
 	if rt.NetworkProtocol != pkt.NetworkProtocolNumber {
 		panic(fmt.Sprintf(
@@ -105,16 +144,6 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r
 			rt.NetworkProtocol, pkt.NetworkProtocolNumber))
 	}
 
-	// Packet is already manipulated.
-	if pkt.NatDone {
-		return RuleAccept, 0
-	}
-
-	// Drop the packet if network and transport header are not set.
-	if pkt.NetworkHeader().View().IsEmpty() || pkt.TransportHeader().View().IsEmpty() {
-		return RuleDrop, 0
-	}
-
 	// Change the address to loopback (127.0.0.1 or ::1) in Output and to
 	// the primary address of the incoming interface in Prerouting.
 	var address tcpip.Address
@@ -132,43 +161,7 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r
 		panic("redirect target is supported only on output and prerouting hooks")
 	}
 
-	switch protocol := pkt.TransportProtocolNumber; protocol {
-	case header.UDPProtocolNumber:
-		udpHeader := header.UDP(pkt.TransportHeader().View())
-
-		if hook == Output {
-			// Only calculate the checksum if offloading isn't supported.
-			requiresChecksum := r.RequiresTXTransportChecksum()
-			rewritePacket(
-				pkt.Network(),
-				udpHeader,
-				false, /* updateSRCFields */
-				requiresChecksum,
-				requiresChecksum,
-				rt.Port,
-				address,
-			)
-		} else {
-			udpHeader.SetDestinationPort(rt.Port)
-		}
-
-		pkt.NatDone = true
-	case header.TCPProtocolNumber:
-		if ct == nil {
-			return RuleAccept, 0
-		}
-
-		// Set up conection for matching NAT rule. Only the first
-		// packet of the connection comes here. Other packets will be
-		// manipulated in connection tracking.
-		if conn := ct.insertRedirectConn(pkt, hook, rt.Port, address); conn != nil {
-			ct.handlePacket(pkt, hook, r)
-		}
-	default:
-		return RuleDrop, 0
-	}
-
-	return RuleAccept, 0
+	return natAction(pkt, hook, r, rt.Port, address, true /* dnat */)
 }
 
 // SNATTarget modifies the source port/IP in the outgoing packets.
@@ -181,15 +174,7 @@ type SNATTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// Action implements Target.Action.
-func (st *SNATTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Route, _ AddressableEndpoint) (RuleVerdict, int) {
-	// Sanity check.
-	if st.NetworkProtocol != pkt.NetworkProtocolNumber {
-		panic(fmt.Sprintf(
-			"SNATTarget.Action with NetworkProtocol %d called on packet with NetworkProtocolNumber %d",
-			st.NetworkProtocol, pkt.NetworkProtocolNumber))
-	}
-
+func natAction(pkt *PacketBuffer, hook Hook, r *Route, port uint16, address tcpip.Address, dnat bool) (RuleVerdict, int) {
 	// Packet is already manipulated.
 	if pkt.NatDone {
 		return RuleAccept, 0
@@ -200,6 +185,37 @@ func (st *SNATTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Rou
 		return RuleDrop, 0
 	}
 
+	t := pkt.tuple
+	if t == nil {
+		return RuleDrop, 0
+	}
+
+	// TODO(https://gvisor.dev/issue/5773): If the port is in use, pick a
+	// different port.
+	if port == 0 {
+		switch protocol := pkt.TransportProtocolNumber; protocol {
+		case header.UDPProtocolNumber:
+			port = header.UDP(pkt.TransportHeader().View()).SourcePort()
+		case header.TCPProtocolNumber:
+			port = header.TCP(pkt.TransportHeader().View()).SourcePort()
+		default:
+			panic(fmt.Sprintf("unsupported transport protocol = %d", pkt.TransportProtocolNumber))
+		}
+	}
+
+	t.conn.performNAT(pkt, hook, r, port, address, dnat)
+	return RuleAccept, 0
+}
+
+// Action implements Target.Action.
+func (st *SNATTarget) Action(pkt *PacketBuffer, hook Hook, r *Route, _ AddressableEndpoint) (RuleVerdict, int) {
+	// Sanity check.
+	if st.NetworkProtocol != pkt.NetworkProtocolNumber {
+		panic(fmt.Sprintf(
+			"SNATTarget.Action with NetworkProtocol %d called on packet with NetworkProtocolNumber %d",
+			st.NetworkProtocol, pkt.NetworkProtocolNumber))
+	}
+
 	switch hook {
 	case Postrouting, Input:
 	case Prerouting, Output, Forward:
@@ -208,31 +224,43 @@ func (st *SNATTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, r *Rou
 		panic(fmt.Sprintf("%s unrecognized", hook))
 	}
 
-	port := st.Port
+	return natAction(pkt, hook, r, st.Port, st.Addr, false /* dnat */)
+}
 
-	if port == 0 {
-		switch protocol := pkt.TransportProtocolNumber; protocol {
-		case header.UDPProtocolNumber:
-			if port == 0 {
-				port = header.UDP(pkt.TransportHeader().View()).SourcePort()
-			}
-		case header.TCPProtocolNumber:
-			if port == 0 {
-				port = header.TCP(pkt.TransportHeader().View()).SourcePort()
-			}
-		}
+// MasqueradeTarget modifies the source port/IP in the outgoing packets.
+type MasqueradeTarget struct {
+	// NetworkProtocol is the network protocol the target is used with. It
+	// is immutable.
+	NetworkProtocol tcpip.NetworkProtocolNumber
+}
+
+// Action implements Target.Action.
+func (mt *MasqueradeTarget) Action(pkt *PacketBuffer, hook Hook, r *Route, addressEP AddressableEndpoint) (RuleVerdict, int) {
+	// Sanity check.
+	if mt.NetworkProtocol != pkt.NetworkProtocolNumber {
+		panic(fmt.Sprintf(
+			"MasqueradeTarget.Action with NetworkProtocol %d called on packet with NetworkProtocolNumber %d",
+			mt.NetworkProtocol, pkt.NetworkProtocolNumber))
 	}
 
-	// Set up conection for matching NAT rule. Only the first packet of the
-	// connection comes here. Other packets will be manipulated in connection
-	// tracking.
-	//
-	// Does nothing if the protocol does not support connection tracking.
-	if conn := ct.insertSNATConn(pkt, hook, port, st.Addr); conn != nil {
-		ct.handlePacket(pkt, hook, r)
+	switch hook {
+	case Postrouting:
+	case Prerouting, Input, Forward, Output:
+		panic(fmt.Sprintf("masquerade target is supported only on postrouting hook; hook = %d", hook))
+	default:
+		panic(fmt.Sprintf("%s unrecognized", hook))
 	}
 
-	return RuleAccept, 0
+	// addressEP is expected to be set for the postrouting hook.
+	ep := addressEP.AcquireOutgoingPrimaryAddress(pkt.Network().DestinationAddress(), false /* allowExpired */)
+	if ep == nil {
+		// No address exists that we can use as a source address.
+		return RuleDrop, 0
+	}
+
+	address := ep.AddressWithPrefix().Address
+	ep.DecRef()
+	return natAction(pkt, hook, r, 0 /* port */, address, false /* dnat */)
 }
 
 func rewritePacket(n header.Network, t header.ChecksummableTransport, updateSRCFields, fullChecksum, updatePseudoHeader bool, newPort uint16, newAddr tcpip.Address) {
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 976194124..b22024667 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -81,17 +81,6 @@ const (
 //
 // +stateify savable
 type IPTables struct {
-	// mu protects v4Tables, v6Tables, and modified.
-	mu sync.RWMutex
-	// v4Tables and v6tables map tableIDs to tables. They hold builtin
-	// tables only, not user tables. mu must be locked for accessing.
-	v4Tables [NumTables]Table
-	v6Tables [NumTables]Table
-	// modified is whether tables have been modified at least once. It is
-	// used to elide the iptables performance overhead for workloads that
-	// don't utilize iptables.
-	modified bool
-
 	// priorities maps each hook to a list of table names. The order of the
 	// list is the order in which each table should be visited for that
 	// hook. It is immutable.
@@ -101,6 +90,21 @@ type IPTables struct {
 
 	// reaperDone can be signaled to stop the reaper goroutine.
 	reaperDone chan struct{}
+
+	mu sync.RWMutex
+	// v4Tables and v6tables map tableIDs to tables. They hold builtin
+	// tables only, not user tables.
+	//
+	// +checklocks:mu
+	v4Tables [NumTables]Table
+	// +checklocks:mu
+	v6Tables [NumTables]Table
+	// modified is whether tables have been modified at least once. It is
+	// used to elide the iptables performance overhead for workloads that
+	// don't utilize iptables.
+	//
+	// +checklocks:mu
+	modified bool
 }
 
 // VisitTargets traverses all the targets of all tables and replaces each with
@@ -352,5 +356,5 @@ type Target interface {
 	// Action takes an action on the packet and returns a verdict on how
 	// traversal should (or should not) continue. If the return value is
 	// Jump, it also returns the index of the rule to jump to.
-	Action(*PacketBuffer, *ConnTrack, Hook, *Route, AddressableEndpoint) (RuleVerdict, int)
+	Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int)
 }
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index bf248ef20..888a8bd9d 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -143,6 +143,8 @@ type PacketBuffer struct {
 
 	// NetworkPacketInfo holds an incoming packet's network-layer information.
 	NetworkPacketInfo NetworkPacketInfo
+
+	tuple *tuple
 }
 
 // NewPacketBuffer creates a new PacketBuffer with opts.
@@ -302,6 +304,7 @@ func (pk *PacketBuffer) Clone() *PacketBuffer {
 		NICID:                        pk.NICID,
 		RXTransportChecksumValidated: pk.RXTransportChecksumValidated,
 		NetworkPacketInfo:            pk.NetworkPacketInfo,
+		tuple:                        pk.tuple,
 	}
 }
 
@@ -329,13 +332,8 @@ func (pk *PacketBuffer) CloneToInbound() *PacketBuffer {
 		buf: pk.buf.Clone(),
 		// Treat unfilled header portion as reserved.
 		reserved: pk.AvailableHeaderBytes(),
+		tuple:    pk.tuple,
 	}
-	// TODO(gvisor.dev/issue/5696): reimplement conntrack so that no need to
-	// maintain this flag in the packet. Currently conntrack needs this flag to
-	// tell if a noop connection should be inserted at Input hook. Once conntrack
-	// redefines the manipulation field as mutable, we won't need the special noop
-	// connection.
-	newPk.NatDone = pk.NatDone
 	return newPk
 }
 
@@ -367,12 +365,7 @@ func (pk *PacketBuffer) DeepCopyForForwarding(reservedHeaderBytes int) *PacketBu
 		newPk.TransportProtocolNumber = pk.TransportProtocolNumber
 	}
 
-	// TODO(gvisor.dev/issue/5696): reimplement conntrack so that no need to
-	// maintain this flag in the packet. Currently conntrack needs this flag to
-	// tell if a noop connection should be inserted at Input hook. Once conntrack
-	// redefines the manipulation field as mutable, we won't need the special noop
-	// connection.
-	newPk.NatDone = pk.NatDone
+	newPk.tuple = pk.tuple
 
 	return newPk
 }
@@ -425,13 +418,14 @@ func (d PacketData) PullUp(size int) (tcpipbuffer.View, bool) {
 	return d.pk.buf.PullUp(d.pk.dataOffset(), size)
 }
 
-// DeleteFront removes count from the beginning of d. It panics if count >
-// d.Size(). All backing storage references after the front of the d are
-// invalidated.
-func (d PacketData) DeleteFront(count int) {
-	if !d.pk.buf.Remove(d.pk.dataOffset(), count) {
-		panic("count > d.Size()")
+// Consume is the same as PullUp except that is additionally consumes the
+// returned bytes. Subsequent PullUp or Consume will not return these bytes.
+func (d PacketData) Consume(size int) (tcpipbuffer.View, bool) {
+	v, ok := d.PullUp(size)
+	if ok {
+		d.pk.consumed += size
 	}
+	return v, ok
 }
 
 // CapLength reduces d to at most length bytes.
diff --git a/pkg/tcpip/stack/packet_buffer_test.go b/pkg/tcpip/stack/packet_buffer_test.go
index 87b023445..c376ed1a1 100644
--- a/pkg/tcpip/stack/packet_buffer_test.go
+++ b/pkg/tcpip/stack/packet_buffer_test.go
@@ -123,32 +123,6 @@ func TestPacketHeaderPush(t *testing.T) {
 	}
 }
 
-func TestPacketBufferClone(t *testing.T) {
-	data := concatViews(makeView(20), makeView(30), makeView(40))
-	pk := NewPacketBuffer(PacketBufferOptions{
-		// Make a copy of data to make sure our truth data won't be taint by
-		// PacketBuffer.
-		Data: buffer.NewViewFromBytes(data).ToVectorisedView(),
-	})
-
-	bytesToDelete := 30
-	originalSize := data.Size()
-
-	clonedPks := []*PacketBuffer{
-		pk.Clone(),
-		pk.CloneToInbound(),
-	}
-	pk.Data().DeleteFront(bytesToDelete)
-	if got, want := pk.Data().Size(), originalSize-bytesToDelete; got != want {
-		t.Errorf("original packet was not changed: size expected = %d, got = %d", want, got)
-	}
-	for _, clonedPk := range clonedPks {
-		if got := clonedPk.Data().Size(); got != originalSize {
-			t.Errorf("cloned packet should not be modified: expected size = %d, got = %d", originalSize, got)
-		}
-	}
-}
-
 func TestPacketHeaderConsume(t *testing.T) {
 	for _, test := range []struct {
 		name      string
@@ -461,11 +435,17 @@ func TestPacketBufferData(t *testing.T) {
 				}
 			})
 
-			// DeleteFront
+			// Consume.
 			for _, n := range []int{1, len(tc.data)} {
-				t.Run(fmt.Sprintf("DeleteFront%d", n), func(t *testing.T) {
+				t.Run(fmt.Sprintf("Consume%d", n), func(t *testing.T) {
 					pkt := tc.makePkt(t)
-					pkt.Data().DeleteFront(n)
+					v, ok := pkt.Data().Consume(n)
+					if !ok {
+						t.Fatalf("Consume failed")
+					}
+					if want := []byte(tc.data)[:n]; !bytes.Equal(v, want) {
+						t.Fatalf("pkt.Data().Consume(n) = 0x%x, want 0x%x", v, want)
+					}
 
 					checkData(t, pkt, []byte(tc.data)[n:])
 				})
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index cd4137794..c23e91702 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -139,18 +139,15 @@ func (f *fakeNetworkEndpoint) HandlePacket(pkt *stack.PacketBuffer) {
 
 	// Handle control packets.
 	if netHdr[protocolNumberOffset] == uint8(fakeControlProtocol) {
-		hdr, ok := pkt.Data().PullUp(fakeNetHeaderLen)
+		hdr, ok := pkt.Data().Consume(fakeNetHeaderLen)
 		if !ok {
 			return
 		}
-		// DeleteFront invalidates slices. Make a copy before trimming.
-		nb := append([]byte(nil), hdr...)
-		pkt.Data().DeleteFront(fakeNetHeaderLen)
 		f.dispatcher.DeliverTransportError(
-			tcpip.Address(nb[srcAddrOffset:srcAddrOffset+1]),
-			tcpip.Address(nb[dstAddrOffset:dstAddrOffset+1]),
+			tcpip.Address(hdr[srcAddrOffset:srcAddrOffset+1]),
+			tcpip.Address(hdr[dstAddrOffset:dstAddrOffset+1]),
 			fakeNetNumber,
-			tcpip.TransportProtocolNumber(nb[protocolNumberOffset]),
+			tcpip.TransportProtocolNumber(hdr[protocolNumberOffset]),
 			// Nothing checks the error.
 			nil, /* transport error */
 			pkt,
diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go
index dc7289441..a941091b0 100644
--- a/pkg/tcpip/stack/tcp.go
+++ b/pkg/tcpip/stack/tcp.go
@@ -289,6 +289,12 @@ type TCPSenderState struct {
 
 	// RACKState holds the state related to RACK loss detection algorithm.
 	RACKState TCPRACKState
+
+	// RetransmitTS records the timestamp used to detect spurious recovery.
+	RetransmitTS uint32
+
+	// SpuriousRecovery indicates if the sender entered recovery spuriously.
+	SpuriousRecovery bool
 }
 
 // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index d45a2c05c..460a6afaf 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -423,9 +423,9 @@ type ControlMessages struct {
 	// HasTimestamp indicates whether Timestamp is valid/set.
 	HasTimestamp bool
 
-	// Timestamp is the time (in ns) that the last packet used to create
-	// the read data was received.
-	Timestamp int64
+	// Timestamp is the time that the last packet used to create the read data
+	// was received.
+	Timestamp time.Time `state:".(int64)"`
 
 	// HasInq indicates whether Inq is valid/set.
 	HasInq bool
@@ -471,10 +471,10 @@ type ControlMessages struct {
 
 // PacketOwner is used to get UID and GID of the packet.
 type PacketOwner interface {
-	// UID returns KUID of the packet.
+	// KUID returns KUID of the packet.
 	KUID() uint32
 
-	// GID returns KGID of the packet.
+	// KGID returns KGID of the packet.
 	KGID() uint32
 }
 
@@ -1245,11 +1245,11 @@ type Route struct {
 // String implements the fmt.Stringer interface.
 func (r Route) String() string {
 	var out strings.Builder
-	fmt.Fprintf(&out, "%s", r.Destination)
+	_, _ = fmt.Fprintf(&out, "%s", r.Destination)
 	if len(r.Gateway) > 0 {
-		fmt.Fprintf(&out, " via %s", r.Gateway)
+		_, _ = fmt.Fprintf(&out, " via %s", r.Gateway)
 	}
-	fmt.Fprintf(&out, " nic %d", r.NIC)
+	_, _ = fmt.Fprintf(&out, " nic %d", r.NIC)
 	return out.String()
 }
 
@@ -1286,7 +1286,7 @@ func (s *StatCounter) Decrement() {
 }
 
 // Value returns the current value of the counter.
-func (s *StatCounter) Value(name ...string) uint64 {
+func (s *StatCounter) Value(...string) uint64 {
 	return s.count.Load()
 }
 
@@ -1865,6 +1865,10 @@ type TCPStats struct {
 	// SegmentsAckedWithDSACK is the number of segments acknowledged with
 	// DSACK.
 	SegmentsAckedWithDSACK *StatCounter
+
+	// SpuriousRecovery is the number of times the connection entered loss
+	// recovery spuriously.
+	SpuriousRecovery *StatCounter
 }
 
 // UDPStats collects UDP-specific stats.
diff --git a/pkg/tcpip/tcpip_state.go b/pkg/tcpip/tcpip_state.go
new file mode 100644
index 000000000..1953e24a1
--- /dev/null
+++ b/pkg/tcpip/tcpip_state.go
@@ -0,0 +1,27 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcpip
+
+import (
+	"time"
+)
+
+func (c *ControlMessages) saveTimestamp() int64 {
+	return c.Timestamp.UnixNano()
+}
+
+func (c *ControlMessages) loadTimestamp(nsec int64) {
+	c.Timestamp = time.Unix(0, nsec)
+}
diff --git a/pkg/tcpip/tests/integration/iptables_test.go b/pkg/tcpip/tests/integration/iptables_test.go
index bdf4a64b9..7f872c271 100644
--- a/pkg/tcpip/tests/integration/iptables_test.go
+++ b/pkg/tcpip/tests/integration/iptables_test.go
@@ -1162,19 +1162,19 @@ func TestInputHookWithLocalForwarding(t *testing.T) {
 	}
 }
 
-func TestSNAT(t *testing.T) {
-	const listenPort = 8080
+func TestNAT(t *testing.T) {
+	const listenPort uint16 = 8080
 
 	type endpointAndAddresses struct {
-		serverEP         tcpip.Endpoint
-		serverAddr       tcpip.Address
-		serverReadableCH chan struct{}
-
-		clientEP         tcpip.Endpoint
-		clientAddr       tcpip.Address
-		clientReadableCH chan struct{}
-
-		nattedClientAddr tcpip.Address
+		serverEP          tcpip.Endpoint
+		serverAddr        tcpip.FullAddress
+		serverReadableCH  chan struct{}
+		serverConnectAddr tcpip.Address
+
+		clientEP          tcpip.Endpoint
+		clientAddr        tcpip.Address
+		clientReadableCH  chan struct{}
+		clientConnectAddr tcpip.FullAddress
 	}
 
 	newEP := func(t *testing.T, s *stack.Stack, transProto tcpip.TransportProtocolNumber, netProto tcpip.NetworkProtocolNumber) (tcpip.Endpoint, chan struct{}) {
@@ -1195,71 +1195,247 @@ func TestSNAT(t *testing.T) {
 		return ep, ch
 	}
 
+	setupNAT := func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, hook stack.Hook, filter stack.IPHeaderFilter, target stack.Target) {
+		t.Helper()
+
+		ipv6 := netProto == ipv6.ProtocolNumber
+		ipt := s.IPTables()
+		table := ipt.GetTable(stack.NATID, ipv6)
+		ruleIdx := table.BuiltinChains[hook]
+		table.Rules[ruleIdx].Filter = filter
+		table.Rules[ruleIdx].Target = target
+		// Make sure the packet is not dropped by the next rule.
+		table.Rules[ruleIdx+1].Target = &stack.AcceptTarget{}
+		if err := ipt.ReplaceTable(stack.NATID, table, ipv6); err != nil {
+			t.Fatalf("ipt.ReplaceTable(%d, _, %t): %s", stack.NATID, ipv6, err)
+		}
+	}
+
+	setupDNAT := func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, target stack.Target) {
+		t.Helper()
+
+		setupNAT(
+			t,
+			s,
+			netProto,
+			stack.Prerouting,
+			stack.IPHeaderFilter{
+				Protocol:       transProto,
+				CheckProtocol:  true,
+				InputInterface: utils.RouterNIC2Name,
+			},
+			target)
+	}
+
+	setupSNAT := func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, target stack.Target) {
+		t.Helper()
+
+		setupNAT(
+			t,
+			s,
+			netProto,
+			stack.Postrouting,
+			stack.IPHeaderFilter{
+				Protocol:        transProto,
+				CheckProtocol:   true,
+				OutputInterface: utils.RouterNIC1Name,
+			},
+			target)
+	}
+
+	type natType struct {
+		name     string
+		setupNAT func(_ *testing.T, _ *stack.Stack, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, snatAddr, dnatAddr tcpip.Address)
+	}
+
+	snatTypes := []natType{
+		{
+			name: "SNAT",
+			setupNAT: func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, snatAddr, _ tcpip.Address) {
+				t.Helper()
+
+				setupSNAT(t, s, netProto, transProto, &stack.SNATTarget{NetworkProtocol: netProto, Addr: snatAddr})
+			},
+		},
+		{
+			name: "Masquerade",
+			setupNAT: func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, _, _ tcpip.Address) {
+				t.Helper()
+
+				setupSNAT(t, s, netProto, transProto, &stack.MasqueradeTarget{NetworkProtocol: netProto})
+			},
+		},
+	}
+	dnatTypes := []natType{
+		{
+			name: "Redirect",
+			setupNAT: func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, _, _ tcpip.Address) {
+				t.Helper()
+
+				setupDNAT(t, s, netProto, transProto, &stack.RedirectTarget{NetworkProtocol: netProto, Port: listenPort})
+			},
+		},
+		{
+			name: "DNAT",
+			setupNAT: func(t *testing.T, s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, _, dnatAddr tcpip.Address) {
+				t.Helper()
+
+				setupDNAT(t, s, netProto, transProto, &stack.DNATTarget{NetworkProtocol: netProto, Addr: dnatAddr, Port: listenPort})
+			},
+		},
+	}
+
 	tests := []struct {
-		name       string
+		name     string
+		netProto tcpip.NetworkProtocolNumber
+		// Setups up the stacks in such a way that:
+		//
+		// - Host2 is the client for all tests.
+		// - Host1 is the server when performing SNAT
+		//   + NAT will transform client-originating packets' source addresses to
+		//     the router's NIC1's address before reaching Host1.
+		// - Router is the server when performing DNAT (client will still attempt to
+		//   send packets to Host1).
+		//   + NAT will transform client-originating packets' destination addresses
+		//     to the router's NIC2's address.
 		epAndAddrs func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack, proto tcpip.TransportProtocolNumber) endpointAndAddresses
+		natTypes   []natType
 	}{
 		{
-			name: "IPv4 host1 server with host2 client",
+			name:     "IPv4 SNAT",
+			netProto: ipv4.ProtocolNumber,
 			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack, proto tcpip.TransportProtocolNumber) endpointAndAddresses {
 				t.Helper()
 
-				ipt := routerStack.IPTables()
-				filter := ipt.GetTable(stack.NATID, false /* ipv6 */)
-				ruleIdx := filter.BuiltinChains[stack.Postrouting]
-				filter.Rules[ruleIdx].Filter = stack.IPHeaderFilter{OutputInterface: utils.RouterNIC1Name}
-				filter.Rules[ruleIdx].Target = &stack.SNATTarget{NetworkProtocol: ipv4.ProtocolNumber, Addr: utils.RouterNIC1IPv4Addr.AddressWithPrefix.Address}
-				// Make sure the packet is not dropped by the next rule.
-				filter.Rules[ruleIdx+1].Target = &stack.AcceptTarget{}
-				if err := ipt.ReplaceTable(stack.NATID, filter, false /* ipv6 */); err != nil {
-					t.Fatalf("ipt.ReplaceTable(%d, _, %t): %s", stack.NATID, false, err)
+				listenerStack := host1Stack
+				serverAddr := tcpip.FullAddress{
+					Addr: utils.Host1IPv4Addr.AddressWithPrefix.Address,
+					Port: listenPort,
 				}
-
-				ep1, ep1WECH := newEP(t, host1Stack, proto, ipv4.ProtocolNumber)
+				serverConnectAddr := utils.RouterNIC1IPv4Addr.AddressWithPrefix.Address
+				clientConnectPort := serverAddr.Port
+				ep1, ep1WECH := newEP(t, listenerStack, proto, ipv4.ProtocolNumber)
 				ep2, ep2WECH := newEP(t, host2Stack, proto, ipv4.ProtocolNumber)
 				return endpointAndAddresses{
-					serverEP:         ep1,
-					serverAddr:       utils.Host1IPv4Addr.AddressWithPrefix.Address,
-					serverReadableCH: ep1WECH,
+					serverEP:          ep1,
+					serverAddr:        serverAddr,
+					serverReadableCH:  ep1WECH,
+					serverConnectAddr: serverConnectAddr,
 
 					clientEP:         ep2,
 					clientAddr:       utils.Host2IPv4Addr.AddressWithPrefix.Address,
 					clientReadableCH: ep2WECH,
-
-					nattedClientAddr: utils.RouterNIC1IPv4Addr.AddressWithPrefix.Address,
+					clientConnectAddr: tcpip.FullAddress{
+						Addr: utils.Host1IPv4Addr.AddressWithPrefix.Address,
+						Port: clientConnectPort,
+					},
 				}
 			},
+			natTypes: snatTypes,
 		},
 		{
-			name: "IPv6 host1 server with host2 client",
+			name:     "IPv4 DNAT",
+			netProto: ipv4.ProtocolNumber,
 			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack, proto tcpip.TransportProtocolNumber) endpointAndAddresses {
 				t.Helper()
 
-				ipt := routerStack.IPTables()
-				filter := ipt.GetTable(stack.NATID, true /* ipv6 */)
-				ruleIdx := filter.BuiltinChains[stack.Postrouting]
-				filter.Rules[ruleIdx].Filter = stack.IPHeaderFilter{OutputInterface: utils.RouterNIC1Name}
-				filter.Rules[ruleIdx].Target = &stack.SNATTarget{NetworkProtocol: ipv6.ProtocolNumber, Addr: utils.RouterNIC1IPv6Addr.AddressWithPrefix.Address}
-				// Make sure the packet is not dropped by the next rule.
-				filter.Rules[ruleIdx+1].Target = &stack.AcceptTarget{}
-				if err := ipt.ReplaceTable(stack.NATID, filter, true /* ipv6 */); err != nil {
-					t.Fatalf("ipt.ReplaceTable(%d, _, %t): %s", stack.NATID, true, err)
+				// If we are performing DNAT, then the packet will be redirected
+				// to the router.
+				listenerStack := routerStack
+				serverAddr := tcpip.FullAddress{
+					Addr: utils.RouterNIC2IPv4Addr.AddressWithPrefix.Address,
+					Port: listenPort,
 				}
+				serverConnectAddr := utils.Host2IPv4Addr.AddressWithPrefix.Address
+				// DNAT will update the destination port to what the server is
+				// bound to.
+				clientConnectPort := serverAddr.Port + 1
+				ep1, ep1WECH := newEP(t, listenerStack, proto, ipv4.ProtocolNumber)
+				ep2, ep2WECH := newEP(t, host2Stack, proto, ipv4.ProtocolNumber)
+				return endpointAndAddresses{
+					serverEP:          ep1,
+					serverAddr:        serverAddr,
+					serverReadableCH:  ep1WECH,
+					serverConnectAddr: serverConnectAddr,
 
-				ep1, ep1WECH := newEP(t, host1Stack, proto, ipv6.ProtocolNumber)
+					clientEP:         ep2,
+					clientAddr:       utils.Host2IPv4Addr.AddressWithPrefix.Address,
+					clientReadableCH: ep2WECH,
+					clientConnectAddr: tcpip.FullAddress{
+						Addr: utils.Host1IPv4Addr.AddressWithPrefix.Address,
+						Port: clientConnectPort,
+					},
+				}
+			},
+			natTypes: dnatTypes,
+		},
+		{
+			name:     "IPv6 SNAT",
+			netProto: ipv6.ProtocolNumber,
+			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack, proto tcpip.TransportProtocolNumber) endpointAndAddresses {
+				t.Helper()
+
+				listenerStack := host1Stack
+				serverAddr := tcpip.FullAddress{
+					Addr: utils.Host1IPv6Addr.AddressWithPrefix.Address,
+					Port: listenPort,
+				}
+				serverConnectAddr := utils.RouterNIC1IPv6Addr.AddressWithPrefix.Address
+				clientConnectPort := serverAddr.Port
+				ep1, ep1WECH := newEP(t, listenerStack, proto, ipv6.ProtocolNumber)
 				ep2, ep2WECH := newEP(t, host2Stack, proto, ipv6.ProtocolNumber)
 				return endpointAndAddresses{
-					serverEP:         ep1,
-					serverAddr:       utils.Host1IPv6Addr.AddressWithPrefix.Address,
-					serverReadableCH: ep1WECH,
+					serverEP:          ep1,
+					serverAddr:        serverAddr,
+					serverReadableCH:  ep1WECH,
+					serverConnectAddr: serverConnectAddr,
 
 					clientEP:         ep2,
 					clientAddr:       utils.Host2IPv6Addr.AddressWithPrefix.Address,
 					clientReadableCH: ep2WECH,
+					clientConnectAddr: tcpip.FullAddress{
+						Addr: utils.Host1IPv6Addr.AddressWithPrefix.Address,
+						Port: clientConnectPort,
+					},
+				}
+			},
+			natTypes: snatTypes,
+		},
+		{
+			name:     "IPv6 DNAT",
+			netProto: ipv6.ProtocolNumber,
+			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack, proto tcpip.TransportProtocolNumber) endpointAndAddresses {
+				t.Helper()
+
+				// If we are performing DNAT, then the packet will be redirected
+				// to the router.
+				listenerStack := routerStack
+				serverAddr := tcpip.FullAddress{
+					Addr: utils.RouterNIC2IPv6Addr.AddressWithPrefix.Address,
+					Port: listenPort,
+				}
+				serverConnectAddr := utils.Host2IPv6Addr.AddressWithPrefix.Address
+				// DNAT will update the destination port to what the server is
+				// bound to.
+				clientConnectPort := serverAddr.Port + 1
+				ep1, ep1WECH := newEP(t, listenerStack, proto, ipv6.ProtocolNumber)
+				ep2, ep2WECH := newEP(t, host2Stack, proto, ipv6.ProtocolNumber)
+				return endpointAndAddresses{
+					serverEP:          ep1,
+					serverAddr:        serverAddr,
+					serverReadableCH:  ep1WECH,
+					serverConnectAddr: serverConnectAddr,
 
-					nattedClientAddr: utils.RouterNIC1IPv6Addr.AddressWithPrefix.Address,
+					clientEP:         ep2,
+					clientAddr:       utils.Host2IPv6Addr.AddressWithPrefix.Address,
+					clientReadableCH: ep2WECH,
+					clientConnectAddr: tcpip.FullAddress{
+						Addr: utils.Host1IPv6Addr.AddressWithPrefix.Address,
+						Port: clientConnectPort,
+					},
 				}
 			},
+			natTypes: dnatTypes,
 		},
 	}
 
@@ -1328,116 +1504,121 @@ func TestSNAT(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			for _, subTest := range subTests {
 				t.Run(subTest.name, func(t *testing.T) {
-					stackOpts := stack.Options{
-						NetworkProtocols:   []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol, ipv6.NewProtocol},
-						TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
-					}
+					for _, natType := range test.natTypes {
+						t.Run(natType.name, func(t *testing.T) {
+							stackOpts := stack.Options{
+								NetworkProtocols:   []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol, ipv6.NewProtocol},
+								TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
+							}
 
-					host1Stack := stack.New(stackOpts)
-					routerStack := stack.New(stackOpts)
-					host2Stack := stack.New(stackOpts)
-					utils.SetupRoutedStacks(t, host1Stack, routerStack, host2Stack)
+							host1Stack := stack.New(stackOpts)
+							routerStack := stack.New(stackOpts)
+							host2Stack := stack.New(stackOpts)
+							utils.SetupRoutedStacks(t, host1Stack, routerStack, host2Stack)
 
-					epsAndAddrs := test.epAndAddrs(t, host1Stack, routerStack, host2Stack, subTest.proto)
-					serverAddr := tcpip.FullAddress{Addr: epsAndAddrs.serverAddr, Port: listenPort}
-					if err := epsAndAddrs.serverEP.Bind(serverAddr); err != nil {
-						t.Fatalf("epsAndAddrs.serverEP.Bind(%#v): %s", serverAddr, err)
-					}
-					clientAddr := tcpip.FullAddress{Addr: epsAndAddrs.clientAddr}
-					if err := epsAndAddrs.clientEP.Bind(clientAddr); err != nil {
-						t.Fatalf("epsAndAddrs.clientEP.Bind(%#v): %s", clientAddr, err)
-					}
+							epsAndAddrs := test.epAndAddrs(t, host1Stack, routerStack, host2Stack, subTest.proto)
+							natType.setupNAT(t, routerStack, test.netProto, subTest.proto, epsAndAddrs.serverConnectAddr, epsAndAddrs.serverAddr.Addr)
 
-					if subTest.setupServer != nil {
-						subTest.setupServer(t, epsAndAddrs.serverEP)
-					}
-					{
-						err := epsAndAddrs.clientEP.Connect(serverAddr)
-						if diff := cmp.Diff(subTest.expectedConnectErr, err); diff != "" {
-							t.Fatalf("unexpected error from epsAndAddrs.clientEP.Connect(%#v), (-want, +got):\n%s", serverAddr, diff)
-						}
-					}
-					nattedClientAddr := tcpip.FullAddress{Addr: epsAndAddrs.nattedClientAddr}
-					if addr, err := epsAndAddrs.clientEP.GetLocalAddress(); err != nil {
-						t.Fatalf("epsAndAddrs.clientEP.GetLocalAddress(): %s", err)
-					} else {
-						nattedClientAddr.Port = addr.Port
-					}
+							if err := epsAndAddrs.serverEP.Bind(epsAndAddrs.serverAddr); err != nil {
+								t.Fatalf("epsAndAddrs.serverEP.Bind(%#v): %s", epsAndAddrs.serverAddr, err)
+							}
+							clientAddr := tcpip.FullAddress{Addr: epsAndAddrs.clientAddr}
+							if err := epsAndAddrs.clientEP.Bind(clientAddr); err != nil {
+								t.Fatalf("epsAndAddrs.clientEP.Bind(%#v): %s", clientAddr, err)
+							}
 
-					serverEP := epsAndAddrs.serverEP
-					serverCH := epsAndAddrs.serverReadableCH
-					if ep, ch := subTest.setupServerConn(t, serverEP, serverCH, nattedClientAddr); ep != nil {
-						defer ep.Close()
-						serverEP = ep
-						serverCH = ch
-					}
+							if subTest.setupServer != nil {
+								subTest.setupServer(t, epsAndAddrs.serverEP)
+							}
+							{
+								err := epsAndAddrs.clientEP.Connect(epsAndAddrs.clientConnectAddr)
+								if diff := cmp.Diff(subTest.expectedConnectErr, err); diff != "" {
+									t.Fatalf("unexpected error from epsAndAddrs.clientEP.Connect(%#v), (-want, +got):\n%s", epsAndAddrs.clientConnectAddr, diff)
+								}
+							}
+							serverConnectAddr := tcpip.FullAddress{Addr: epsAndAddrs.serverConnectAddr}
+							if addr, err := epsAndAddrs.clientEP.GetLocalAddress(); err != nil {
+								t.Fatalf("epsAndAddrs.clientEP.GetLocalAddress(): %s", err)
+							} else {
+								serverConnectAddr.Port = addr.Port
+							}
 
-					write := func(ep tcpip.Endpoint, data []byte) {
-						t.Helper()
-
-						var r bytes.Reader
-						r.Reset(data)
-						var wOpts tcpip.WriteOptions
-						n, err := ep.Write(&r, wOpts)
-						if err != nil {
-							t.Fatalf("ep.Write(_, %#v): %s", wOpts, err)
-						}
-						if want := int64(len(data)); n != want {
-							t.Fatalf("got ep.Write(_, %#v) = (%d, _), want = (%d, _)", wOpts, n, want)
-						}
-					}
+							serverEP := epsAndAddrs.serverEP
+							serverCH := epsAndAddrs.serverReadableCH
+							if ep, ch := subTest.setupServerConn(t, serverEP, serverCH, serverConnectAddr); ep != nil {
+								defer ep.Close()
+								serverEP = ep
+								serverCH = ch
+							}
 
-					read := func(ch chan struct{}, ep tcpip.Endpoint, data []byte, expectedFrom tcpip.FullAddress) {
-						t.Helper()
-
-						var buf bytes.Buffer
-						var res tcpip.ReadResult
-						for {
-							var err tcpip.Error
-							opts := tcpip.ReadOptions{NeedRemoteAddr: subTest.needRemoteAddr}
-							res, err = ep.Read(&buf, opts)
-							if _, ok := err.(*tcpip.ErrWouldBlock); ok {
-								<-ch
-								continue
+							write := func(ep tcpip.Endpoint, data []byte) {
+								t.Helper()
+
+								var r bytes.Reader
+								r.Reset(data)
+								var wOpts tcpip.WriteOptions
+								n, err := ep.Write(&r, wOpts)
+								if err != nil {
+									t.Fatalf("ep.Write(_, %#v): %s", wOpts, err)
+								}
+								if want := int64(len(data)); n != want {
+									t.Fatalf("got ep.Write(_, %#v) = (%d, _), want = (%d, _)", wOpts, n, want)
+								}
 							}
-							if err != nil {
-								t.Fatalf("ep.Read(_, %d, %#v): %s", len(data), opts, err)
+
+							read := func(ch chan struct{}, ep tcpip.Endpoint, data []byte, expectedFrom tcpip.FullAddress) {
+								t.Helper()
+
+								var buf bytes.Buffer
+								var res tcpip.ReadResult
+								for {
+									var err tcpip.Error
+									opts := tcpip.ReadOptions{NeedRemoteAddr: subTest.needRemoteAddr}
+									res, err = ep.Read(&buf, opts)
+									if _, ok := err.(*tcpip.ErrWouldBlock); ok {
+										<-ch
+										continue
+									}
+									if err != nil {
+										t.Fatalf("ep.Read(_, %d, %#v): %s", len(data), opts, err)
+									}
+									break
+								}
+
+								readResult := tcpip.ReadResult{
+									Count: len(data),
+									Total: len(data),
+								}
+								if subTest.needRemoteAddr {
+									readResult.RemoteAddr = expectedFrom
+								}
+								if diff := cmp.Diff(readResult, res, checker.IgnoreCmpPath(
+									"ControlMessages",
+									"RemoteAddr.NIC",
+								)); diff != "" {
+									t.Errorf("ep.Read: unexpected result (-want +got):\n%s", diff)
+								}
+								if diff := cmp.Diff(buf.Bytes(), data); diff != "" {
+									t.Errorf("received data mismatch (-want +got):\n%s", diff)
+								}
+
+								if t.Failed() {
+									t.FailNow()
+								}
 							}
-							break
-						}
-
-						readResult := tcpip.ReadResult{
-							Count: len(data),
-							Total: len(data),
-						}
-						if subTest.needRemoteAddr {
-							readResult.RemoteAddr = expectedFrom
-						}
-						if diff := cmp.Diff(readResult, res, checker.IgnoreCmpPath(
-							"ControlMessages",
-							"RemoteAddr.NIC",
-						)); diff != "" {
-							t.Errorf("ep.Read: unexpected result (-want +got):\n%s", diff)
-						}
-						if diff := cmp.Diff(buf.Bytes(), data); diff != "" {
-							t.Errorf("received data mismatch (-want +got):\n%s", diff)
-						}
-
-						if t.Failed() {
-							t.FailNow()
-						}
-					}
 
-					{
-						data := []byte{1, 2, 3, 4}
-						write(epsAndAddrs.clientEP, data)
-						read(serverCH, serverEP, data, nattedClientAddr)
-					}
+							{
+								data := []byte{1, 2, 3, 4}
+								write(epsAndAddrs.clientEP, data)
+								read(serverCH, serverEP, data, serverConnectAddr)
+							}
 
-					{
-						data := []byte{5, 6, 7, 8, 9, 10, 11, 12}
-						write(serverEP, data)
-						read(epsAndAddrs.clientReadableCH, epsAndAddrs.clientEP, data, serverAddr)
+							{
+								data := []byte{5, 6, 7, 8, 9, 10, 11, 12}
+								write(serverEP, data)
+								read(epsAndAddrs.clientReadableCH, epsAndAddrs.clientEP, data, epsAndAddrs.clientConnectAddr)
+							}
+						})
 					}
 				})
 			}
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index bb0db9f70..31579a896 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -180,7 +180,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
 		Total: p.data.Size(),
 		ControlMessages: tcpip.ControlMessages{
 			HasTimestamp: true,
-			Timestamp:    p.receivedAt.UnixNano(),
+			Timestamp:    p.receivedAt,
 		},
 	}
 	if opts.NeedRemoteAddr {
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 689427d53..80eef39e9 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -182,7 +182,7 @@ func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResul
 		Total: packet.data.Size(),
 		ControlMessages: tcpip.ControlMessages{
 			HasTimestamp: true,
-			Timestamp:    packet.receivedAt.UnixNano(),
+			Timestamp:    packet.receivedAt,
 		},
 	}
 	if opts.NeedRemoteAddr {
@@ -409,7 +409,7 @@ func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
 }
 
 // HandlePacket implements stack.PacketEndpoint.HandlePacket.
-func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+func (ep *endpoint) HandlePacket(nicID tcpip.NICID, _ tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
 	ep.rcvMu.Lock()
 
 	// Drop the packet if our buffer is currently full.
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index bfef75da7..ce76774af 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -49,6 +49,7 @@ type rawPacket struct {
 	receivedAt time.Time             `state:".(int64)"`
 	// senderAddr is the network address of the sender.
 	senderAddr tcpip.FullAddress
+	packetInfo tcpip.IPPacketInfo
 }
 
 // endpoint is the raw socket implementation of tcpip.Endpoint. It is legal to
@@ -202,12 +203,29 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
 		Total: pkt.data.Size(),
 		ControlMessages: tcpip.ControlMessages{
 			HasTimestamp: true,
-			Timestamp:    pkt.receivedAt.UnixNano(),
+			Timestamp:    pkt.receivedAt,
 		},
 	}
 	if opts.NeedRemoteAddr {
 		res.RemoteAddr = pkt.senderAddr
 	}
+	switch netProto := e.net.NetProto(); netProto {
+	case header.IPv4ProtocolNumber:
+		if e.ops.GetReceivePacketInfo() {
+			res.ControlMessages.HasIPPacketInfo = true
+			res.ControlMessages.PacketInfo = pkt.packetInfo
+		}
+	case header.IPv6ProtocolNumber:
+		if e.ops.GetIPv6ReceivePacketInfo() {
+			res.ControlMessages.HasIPv6PacketInfo = true
+			res.ControlMessages.IPv6PacketInfo = tcpip.IPv6PacketInfo{
+				NIC:  pkt.packetInfo.NIC,
+				Addr: pkt.packetInfo.DestinationAddr,
+			}
+		}
+	default:
+		panic(fmt.Sprintf("unrecognized network protocol = %d", netProto))
+	}
 
 	n, err := pkt.data.ReadTo(dst, opts.Peek)
 	if n == 0 && err != nil {
@@ -435,7 +453,9 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 			return false
 		}
 
-		srcAddr := pkt.Network().SourceAddress()
+		net := pkt.Network()
+		dstAddr := net.DestinationAddress()
+		srcAddr := net.SourceAddress()
 		info := e.net.Info()
 
 		switch state := e.net.State(); state {
@@ -457,7 +477,7 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 			}
 
 			// If bound to an address, only accept data for that address.
-			if info.BindAddr != "" && info.BindAddr != pkt.Network().DestinationAddress() {
+			if info.BindAddr != "" && info.BindAddr != dstAddr {
 				return false
 			}
 		default:
@@ -472,6 +492,14 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 				NIC:  pkt.NICID,
 				Addr: srcAddr,
 			},
+			packetInfo: tcpip.IPPacketInfo{
+				// TODO(gvisor.dev/issue/3556): dstAddr may be a multicast or broadcast
+				// address. LocalAddr should hold a unicast address that can be
+				// used to respond to the incoming packet.
+				LocalAddr:       dstAddr,
+				DestinationAddr: dstAddr,
+				NIC:             pkt.NICID,
+			},
 		}
 
 		// Raw IPv4 endpoints return the IP header, but IPv6 endpoints do not.
@@ -483,10 +511,10 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 		// overlapping slices.
 		var combinedVV buffer.VectorisedView
 		if info.NetProto == header.IPv4ProtocolNumber {
-			network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
-			headers := make(buffer.View, 0, len(network)+len(transport))
-			headers = append(headers, network...)
-			headers = append(headers, transport...)
+			networkHeader, transportHeader := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
+			headers := make(buffer.View, 0, len(networkHeader)+len(transportHeader))
+			headers = append(headers, networkHeader...)
+			headers = append(headers, transportHeader...)
 			combinedVV = headers.ToVectorisedView()
 		} else {
 			combinedVV = append(buffer.View(nil), pkt.TransportHeader().View()...).ToVectorisedView()
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index 7115d0a12..caf14b0dc 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -15,12 +15,12 @@
 package tcp
 
 import (
+	"container/list"
 	"crypto/sha1"
 	"encoding/binary"
 	"fmt"
 	"hash"
 	"io"
-	"sync/atomic"
 	"time"
 
 	"gvisor.dev/gvisor/pkg/sleep"
@@ -100,18 +100,6 @@ type listenContext struct {
 	// netProto indicates the network protocol(IPv4/v6) for the listening
 	// endpoint.
 	netProto tcpip.NetworkProtocolNumber
-
-	// pendingMu protects pendingEndpoints. This should only be accessed
-	// by the listening endpoint's worker goroutine.
-	//
-	// Lock Ordering: listenEP.workerMu -> pendingMu
-	pendingMu sync.Mutex
-	// pending is used to wait for all pendingEndpoints to finish when
-	// a socket is closed.
-	pending sync.WaitGroup
-	// pendingEndpoints is a map of all endpoints for which a handshake is
-	// in progress.
-	pendingEndpoints map[stack.TransportEndpointID]*endpoint
 }
 
 // timeStamp returns an 8-bit timestamp with a granularity of 64 seconds.
@@ -122,14 +110,13 @@ func timeStamp(clock tcpip.Clock) uint32 {
 // newListenContext creates a new listen context.
 func newListenContext(stk *stack.Stack, protocol *protocol, listenEP *endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
 	l := &listenContext{
-		stack:            stk,
-		protocol:         protocol,
-		rcvWnd:           rcvWnd,
-		hasher:           sha1.New(),
-		v6Only:           v6Only,
-		netProto:         netProto,
-		listenEP:         listenEP,
-		pendingEndpoints: make(map[stack.TransportEndpointID]*endpoint),
+		stack:    stk,
+		protocol: protocol,
+		rcvWnd:   rcvWnd,
+		hasher:   sha1.New(),
+		v6Only:   v6Only,
+		netProto: netProto,
+		listenEP: listenEP,
 	}
 
 	for i := range l.nonce {
@@ -265,7 +252,6 @@ func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, qu
 
 			return nil, &tcpip.ErrConnectionAborted{}
 		}
-		l.addPendingEndpoint(ep)
 
 		// Propagate any inheritable options from the listening endpoint
 		// to the newly created endpoint.
@@ -275,8 +261,6 @@ func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, qu
 			ep.mu.Unlock()
 			ep.Close()
 
-			l.removePendingEndpoint(ep)
-
 			return nil, &tcpip.ErrConnectionAborted{}
 		}
 
@@ -295,10 +279,6 @@ func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, qu
 		ep.mu.Unlock()
 		ep.Close()
 
-		if l.listenEP != nil {
-			l.removePendingEndpoint(ep)
-		}
-
 		ep.drainClosingSegmentQueue()
 
 		return nil, err
@@ -336,38 +316,12 @@ func (l *listenContext) performHandshake(s *segment, opts header.TCPSynOptions,
 	return ep, nil
 }
 
-func (l *listenContext) addPendingEndpoint(n *endpoint) {
-	l.pendingMu.Lock()
-	l.pendingEndpoints[n.TransportEndpointInfo.ID] = n
-	l.pending.Add(1)
-	l.pendingMu.Unlock()
-}
-
-func (l *listenContext) removePendingEndpoint(n *endpoint) {
-	l.pendingMu.Lock()
-	delete(l.pendingEndpoints, n.TransportEndpointInfo.ID)
-	l.pending.Done()
-	l.pendingMu.Unlock()
-}
-
-func (l *listenContext) closeAllPendingEndpoints() {
-	l.pendingMu.Lock()
-	for _, n := range l.pendingEndpoints {
-		n.notifyProtocolGoroutine(notifyClose)
-	}
-	l.pendingMu.Unlock()
-	l.pending.Wait()
-}
-
 // +checklocks:h.ep.mu
 func (l *listenContext) cleanupFailedHandshake(h *handshake) {
 	e := h.ep
 	e.mu.Unlock()
 	e.Close()
 	e.notifyAborted()
-	if l.listenEP != nil {
-		l.removePendingEndpoint(e)
-	}
 	e.drainClosingSegmentQueue()
 	e.h = nil
 }
@@ -378,9 +332,6 @@ func (l *listenContext) cleanupFailedHandshake(h *handshake) {
 // +checklocks:h.ep.mu
 func (l *listenContext) cleanupCompletedHandshake(h *handshake) {
 	e := h.ep
-	if l.listenEP != nil {
-		l.removePendingEndpoint(e)
-	}
 	e.isConnectNotified = true
 
 	// Update the receive window scaling. We can't do it before the
@@ -444,101 +395,30 @@ func (e *endpoint) notifyAborted() {
 	e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
 }
 
-// handleSynSegment is called in its own goroutine once the listening endpoint
-// receives a SYN segment. It is responsible for completing the handshake and
-// queueing the new endpoint for acceptance.
-//
-// A limited number of these goroutines are allowed before TCP starts using SYN
-// cookies to accept connections.
-//
-// +checklocks:e.mu
-func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts header.TCPSynOptions) tcpip.Error {
-	defer s.decRef()
-
-	h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner)
-	if err != nil {
-		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
-		e.stats.FailedConnectionAttempts.Increment()
-		atomic.AddInt32(&e.synRcvdCount, -1)
-		return err
-	}
-
-	go func() {
-		// Note that startHandshake returns a locked endpoint. The
-		// force call here just makes it so.
-		if err := h.complete(); err != nil { // +checklocksforce
-			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
-			e.stats.FailedConnectionAttempts.Increment()
-			ctx.cleanupFailedHandshake(h)
-			atomic.AddInt32(&e.synRcvdCount, -1)
-			return
-		}
-		ctx.cleanupCompletedHandshake(h)
-		h.ep.startAcceptedLoop()
-		e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
-
-		// Deliver the endpoint to the accept queue.
-		e.mu.Lock()
-		e.pendingAccepted.Add(1)
-		e.mu.Unlock()
-		defer e.pendingAccepted.Done()
-
-		// Drop the lock before notifying to avoid deadlock in user-specified
-		// callbacks.
-		delivered := func() bool {
-			e.acceptMu.Lock()
-			defer e.acceptMu.Unlock()
-			for {
-				if e.accepted == (accepted{}) {
-					// If the listener has transitioned out of the listen state (accepted
-					// is the zero value), the new endpoint is reset instead.
-					return false
-				}
-				if e.accepted.acceptQueueIsFullLocked() {
-					e.acceptCond.Wait()
-					continue
-				}
-
-				e.accepted.endpoints.PushBack(h.ep)
-				atomic.AddInt32(&e.synRcvdCount, -1)
-				return true
-			}
-		}()
-
-		if delivered {
-			e.waiterQueue.Notify(waiter.ReadableEvents)
-		} else {
-			h.ep.notifyProtocolGoroutine(notifyReset)
-		}
-	}()
-
-	return nil
-}
-
-func (e *endpoint) synRcvdBacklogFull() bool {
-	e.acceptMu.Lock()
-	acceptedCap := e.accepted.cap
-	e.acceptMu.Unlock()
-	// The capacity of the accepted queue would always be one greater than the
-	// listen backlog. But, the SYNRCVD connections count is always checked
-	// against the listen backlog value for Linux parity reason.
-	// https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280
-	//
-	// We maintain an equality check here as the synRcvdCount is incremented
-	// and compared only from a single listener context and the capacity of
-	// the accepted queue can only increase by a new listen call.
-	return int(atomic.LoadInt32(&e.synRcvdCount)) == acceptedCap-1
-}
-
 func (e *endpoint) acceptQueueIsFull() bool {
 	e.acceptMu.Lock()
-	full := e.accepted.acceptQueueIsFullLocked()
+	full := e.acceptQueue.isFull()
 	e.acceptMu.Unlock()
 	return full
 }
 
-func (a *accepted) acceptQueueIsFullLocked() bool {
-	return a.endpoints.Len() == a.cap
+// +stateify savable
+type acceptQueue struct {
+	// NB: this could be an endpointList, but ilist only permits endpoints to
+	// belong to one list at a time, and endpoints are already stored in the
+	// dispatcher's list.
+	endpoints list.List `state:".([]*endpoint)"`
+
+	// pendingEndpoints is a set of all endpoints for which a handshake is
+	// in progress.
+	pendingEndpoints map[*endpoint]struct{}
+
+	// capacity is the maximum number of endpoints that can be in endpoints.
+	capacity int
+}
+
+func (a *acceptQueue) isFull() bool {
+	return a.endpoints.Len() == a.capacity
 }
 
 // handleListenSegment is called when a listening endpoint receives a segment
@@ -571,20 +451,96 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
 			return nil
 		}
 
-		alwaysUseSynCookies := func() bool {
+		opts := parseSynSegmentOptions(s)
+
+		useSynCookies, err := func() (bool, tcpip.Error) {
 			var alwaysUseSynCookies tcpip.TCPAlwaysUseSynCookies
 			if err := e.stack.TransportProtocolOption(header.TCPProtocolNumber, &alwaysUseSynCookies); err != nil {
 				panic(fmt.Sprintf("TransportProtocolOption(%d, %T) = %s", header.TCPProtocolNumber, alwaysUseSynCookies, err))
 			}
-			return bool(alwaysUseSynCookies)
-		}()
+			if alwaysUseSynCookies {
+				return true, nil
+			}
+			e.acceptMu.Lock()
+			defer e.acceptMu.Unlock()
 
-		opts := parseSynSegmentOptions(s)
-		if !alwaysUseSynCookies && !e.synRcvdBacklogFull() {
-			s.incRef()
-			atomic.AddInt32(&e.synRcvdCount, 1)
-			return e.handleSynSegment(ctx, s, opts)
+			// The capacity of the accepted queue would always be one greater than the
+			// listen backlog. But, the SYNRCVD connections count is always checked
+			// against the listen backlog value for Linux parity reason.
+			// https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280
+			if len(e.acceptQueue.pendingEndpoints) == e.acceptQueue.capacity-1 {
+				return true, nil
+			}
+
+			h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner)
+			if err != nil {
+				e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
+				e.stats.FailedConnectionAttempts.Increment()
+				return false, err
+			}
+
+			e.acceptQueue.pendingEndpoints[h.ep] = struct{}{}
+			e.pendingAccepted.Add(1)
+
+			go func() {
+				defer func() {
+					e.pendingAccepted.Done()
+
+					e.acceptMu.Lock()
+					defer e.acceptMu.Unlock()
+					delete(e.acceptQueue.pendingEndpoints, h.ep)
+				}()
+
+				// Note that startHandshake returns a locked endpoint. The force call
+				// here just makes it so.
+				if err := h.complete(); err != nil { // +checklocksforce
+					e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
+					e.stats.FailedConnectionAttempts.Increment()
+					ctx.cleanupFailedHandshake(h)
+					return
+				}
+				ctx.cleanupCompletedHandshake(h)
+				h.ep.startAcceptedLoop()
+				e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
+
+				// Deliver the endpoint to the accept queue.
+				//
+				// Drop the lock before notifying to avoid deadlock in user-specified
+				// callbacks.
+				delivered := func() bool {
+					e.acceptMu.Lock()
+					defer e.acceptMu.Unlock()
+					for {
+						// The listener is transitioning out of the Listen state; bail.
+						if e.acceptQueue.capacity == 0 {
+							return false
+						}
+						if e.acceptQueue.isFull() {
+							e.acceptCond.Wait()
+							continue
+						}
+
+						e.acceptQueue.endpoints.PushBack(h.ep)
+						return true
+					}
+				}()
+
+				if delivered {
+					e.waiterQueue.Notify(waiter.ReadableEvents)
+				} else {
+					h.ep.notifyProtocolGoroutine(notifyReset)
+				}
+			}()
+
+			return false, nil
+		}()
+		if err != nil {
+			return err
 		}
+		if !useSynCookies {
+			return nil
+		}
+
 		route, err := e.stack.FindRoute(s.nicID, s.dstAddr, s.srcAddr, s.netProto, false /* multicastLoop */)
 		if err != nil {
 			return err
@@ -627,23 +583,6 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
 		return nil
 
 	case s.flags.Contains(header.TCPFlagAck):
-		// Keep hold of acceptMu until the new endpoint is in the accept queue (or
-		// if there is an error), to guarantee that we will keep our spot in the
-		// queue even if another handshake from the syn queue completes.
-		e.acceptMu.Lock()
-		if e.accepted.acceptQueueIsFullLocked() {
-			// Silently drop the ack as the application can't accept
-			// the connection at this point. The ack will be
-			// retransmitted by the sender anyway and we can
-			// complete the connection at the time of retransmit if
-			// the backlog has space.
-			e.acceptMu.Unlock()
-			e.stack.Stats().TCP.ListenOverflowAckDrop.Increment()
-			e.stats.ReceiveErrors.ListenOverflowAckDrop.Increment()
-			e.stack.Stats().DroppedPackets.Increment()
-			return nil
-		}
-
 		iss := s.ackNumber - 1
 		irs := s.sequenceNumber - 1
 
@@ -659,7 +598,6 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
 		// Validate the cookie.
 		data, ok := ctx.isCookieValid(s.id, iss, irs)
 		if !ok || int(data) >= len(mssTable) {
-			e.acceptMu.Unlock()
 			e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment()
 			e.stack.Stats().DroppedPackets.Increment()
 
@@ -680,6 +618,24 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
 			// ACK was received from the sender.
 			return replyWithReset(e.stack, s, e.sendTOS, e.ttl)
 		}
+
+		// Keep hold of acceptMu until the new endpoint is in the accept queue (or
+		// if there is an error), to guarantee that we will keep our spot in the
+		// queue even if another handshake from the syn queue completes.
+		e.acceptMu.Lock()
+		if e.acceptQueue.isFull() {
+			// Silently drop the ack as the application can't accept
+			// the connection at this point. The ack will be
+			// retransmitted by the sender anyway and we can
+			// complete the connection at the time of retransmit if
+			// the backlog has space.
+			e.acceptMu.Unlock()
+			e.stack.Stats().TCP.ListenOverflowAckDrop.Increment()
+			e.stats.ReceiveErrors.ListenOverflowAckDrop.Increment()
+			e.stack.Stats().DroppedPackets.Increment()
+			return nil
+		}
+
 		e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment()
 		// Create newly accepted endpoint and deliver it.
 		rcvdSynOptions := header.TCPSynOptions{
@@ -769,7 +725,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
 		e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
 
 		// Deliver the endpoint to the accept queue.
-		e.accepted.endpoints.PushBack(n)
+		e.acceptQueue.endpoints.PushBack(n)
 		e.acceptMu.Unlock()
 
 		e.waiterQueue.Notify(waiter.ReadableEvents)
@@ -789,14 +745,8 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) {
 	ctx := newListenContext(e.stack, e.protocol, e, rcvWnd, v6Only, e.NetProto)
 
 	defer func() {
-		// Mark endpoint as closed. This will prevent goroutines running
-		// handleSynSegment() from attempting to queue new connections
-		// to the endpoint.
 		e.setEndpointState(StateClose)
 
-		// Close any endpoints in SYN-RCVD state.
-		ctx.closeAllPendingEndpoints()
-
 		// Do cleanup if needed.
 		e.completeWorkerLocked()
 
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 407ab2664..066ffe051 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -15,7 +15,6 @@
 package tcp
 
 import (
-	"container/list"
 	"encoding/binary"
 	"fmt"
 	"io"
@@ -205,6 +204,8 @@ type SACKInfo struct {
 }
 
 // ReceiveErrors collect segment receive errors within transport layer.
+//
+// +stateify savable
 type ReceiveErrors struct {
 	tcpip.ReceiveErrors
 
@@ -234,6 +235,8 @@ type ReceiveErrors struct {
 }
 
 // SendErrors collect segment send errors within the transport layer.
+//
+// +stateify savable
 type SendErrors struct {
 	tcpip.SendErrors
 
@@ -257,6 +260,8 @@ type SendErrors struct {
 }
 
 // Stats holds statistics about the endpoint.
+//
+// +stateify savable
 type Stats struct {
 	// SegmentsReceived is the number of TCP segments received that
 	// the transport layer successfully parsed.
@@ -311,18 +316,6 @@ type rcvQueueInfo struct {
 	rcvQueue segmentList `state:"wait"`
 }
 
-// +stateify savable
-type accepted struct {
-	// NB: this could be an endpointList, but ilist only permits endpoints to
-	// belong to one list at a time, and endpoints are already stored in the
-	// dispatcher's list.
-	endpoints list.List `state:".([]*endpoint)"`
-
-	// cap is the maximum number of endpoints that can be in the accepted endpoint
-	// list.
-	cap int
-}
-
 // endpoint represents a TCP endpoint. This struct serves as the interface
 // between users of the endpoint and the protocol implementation; it is legal to
 // have concurrent goroutines make calls into the endpoint, they are properly
@@ -338,7 +331,7 @@ type accepted struct {
 // The following three mutexes can be acquired independent of e.mu but if
 // acquired with e.mu then e.mu must be acquired first.
 //
-// e.acceptMu -> Protects e.accepted.
+// e.acceptMu -> Protects e.acceptQueue.
 // e.rcvQueueMu -> Protects e.rcvQueue and associated fields.
 // e.sndQueueMu -> Protects the e.sndQueue and associated fields.
 // e.lastErrorMu -> Protects the lastError field.
@@ -502,10 +495,6 @@ type endpoint struct {
 	// and dropped when it is.
 	segmentQueue segmentQueue `state:"wait"`
 
-	// synRcvdCount is the number of connections for this endpoint that are
-	// in SYN-RCVD state; this is only accessed atomically.
-	synRcvdCount int32
-
 	// userMSS if non-zero is the MSS value explicitly set by the user
 	// for this endpoint using the TCP_MAXSEG setsockopt.
 	userMSS uint16
@@ -579,7 +568,7 @@ type endpoint struct {
 	// send newly accepted connections to the endpoint so that they can be
 	// read by Accept() calls.
 	// +checklocks:acceptMu
-	accepted accepted
+	acceptQueue acceptQueue
 
 	// The following are only used from the protocol goroutine, and
 	// therefore don't need locks to protect them.
@@ -612,8 +601,7 @@ type endpoint struct {
 
 	gso stack.GSO
 
-	// TODO(b/142022063): Add ability to save and restore per endpoint stats.
-	stats Stats `state:"nosave"`
+	stats Stats
 
 	// tcpLingerTimeout is the maximum amount of a time a socket
 	// a socket stays in TIME_WAIT state before being marked
@@ -825,10 +813,9 @@ func newEndpoint(s *stack.Stack, protocol *protocol, netProto tcpip.NetworkProto
 		waiterQueue: waiterQueue,
 		state:       uint32(StateInitial),
 		keepalive: keepalive{
-			// Linux defaults.
-			idle:     2 * time.Hour,
-			interval: 75 * time.Second,
-			count:    9,
+			idle:     DefaultKeepaliveIdle,
+			interval: DefaultKeepaliveInterval,
+			count:    DefaultKeepaliveCount,
 		},
 		uniqueID:      s.UniqueID(),
 		txHash:        s.Rand().Uint32(),
@@ -910,7 +897,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 		// Check if there's anything in the accepted queue.
 		if (mask & waiter.ReadableEvents) != 0 {
 			e.acceptMu.Lock()
-			if e.accepted.endpoints.Len() != 0 {
+			if e.acceptQueue.endpoints.Len() != 0 {
 				result |= waiter.ReadableEvents
 			}
 			e.acceptMu.Unlock()
@@ -1093,20 +1080,20 @@ func (e *endpoint) closeNoShutdownLocked() {
 // handshake but not yet been delivered to the application.
 func (e *endpoint) closePendingAcceptableConnectionsLocked() {
 	e.acceptMu.Lock()
-	acceptedCopy := e.accepted
-	e.accepted = accepted{}
-	e.acceptMu.Unlock()
-
-	if acceptedCopy == (accepted{}) {
-		return
+	// Close any endpoints in SYN-RCVD state.
+	for n := range e.acceptQueue.pendingEndpoints {
+		n.notifyProtocolGoroutine(notifyClose)
 	}
-
-	e.acceptCond.Broadcast()
-
+	e.acceptQueue.pendingEndpoints = nil
 	// Reset all connections that are waiting to be accepted.
-	for n := acceptedCopy.endpoints.Front(); n != nil; n = n.Next() {
+	for n := e.acceptQueue.endpoints.Front(); n != nil; n = n.Next() {
 		n.Value.(*endpoint).notifyProtocolGoroutine(notifyReset)
 	}
+	e.acceptQueue.endpoints.Init()
+	e.acceptMu.Unlock()
+
+	e.acceptCond.Broadcast()
+
 	// Wait for reset of all endpoints that are still waiting to be delivered to
 	// the now closed accepted.
 	e.pendingAccepted.Wait()
@@ -2498,22 +2485,23 @@ func (e *endpoint) listen(backlog int) tcpip.Error {
 	if e.EndpointState() == StateListen && !e.closed {
 		e.acceptMu.Lock()
 		defer e.acceptMu.Unlock()
-		if e.accepted == (accepted{}) {
-			// listen is called after shutdown.
-			e.accepted.cap = backlog
-			e.shutdownFlags = 0
-			e.rcvQueueInfo.rcvQueueMu.Lock()
-			e.rcvQueueInfo.RcvClosed = false
-			e.rcvQueueInfo.rcvQueueMu.Unlock()
-		} else {
-			// Adjust the size of the backlog iff we can fit
-			// existing pending connections into the new one.
-			if e.accepted.endpoints.Len() > backlog {
-				return &tcpip.ErrInvalidEndpointState{}
-			}
-			e.accepted.cap = backlog
+
+		// Adjust the size of the backlog iff we can fit
+		// existing pending connections into the new one.
+		if e.acceptQueue.endpoints.Len() > backlog {
+			return &tcpip.ErrInvalidEndpointState{}
+		}
+		e.acceptQueue.capacity = backlog
+
+		if e.acceptQueue.pendingEndpoints == nil {
+			e.acceptQueue.pendingEndpoints = make(map[*endpoint]struct{})
 		}
 
+		e.shutdownFlags = 0
+		e.rcvQueueInfo.rcvQueueMu.Lock()
+		e.rcvQueueInfo.RcvClosed = false
+		e.rcvQueueInfo.rcvQueueMu.Unlock()
+
 		// Notify any blocked goroutines that they can attempt to
 		// deliver endpoints again.
 		e.acceptCond.Broadcast()
@@ -2548,8 +2536,11 @@ func (e *endpoint) listen(backlog int) tcpip.Error {
 	// may be pre-populated with some previously accepted (but not Accepted)
 	// endpoints.
 	e.acceptMu.Lock()
-	if e.accepted == (accepted{}) {
-		e.accepted.cap = backlog
+	if e.acceptQueue.pendingEndpoints == nil {
+		e.acceptQueue.pendingEndpoints = make(map[*endpoint]struct{})
+	}
+	if e.acceptQueue.capacity == 0 {
+		e.acceptQueue.capacity = backlog
 	}
 	e.acceptMu.Unlock()
 
@@ -2589,8 +2580,8 @@ func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.
 	// Get the new accepted endpoint.
 	var n *endpoint
 	e.acceptMu.Lock()
-	if element := e.accepted.endpoints.Front(); element != nil {
-		n = e.accepted.endpoints.Remove(element).(*endpoint)
+	if element := e.acceptQueue.endpoints.Front(); element != nil {
+		n = e.acceptQueue.endpoints.Remove(element).(*endpoint)
 	}
 	e.acceptMu.Unlock()
 	if n == nil {
@@ -3007,6 +2998,8 @@ func (e *endpoint) completeStateLocked() stack.TCPEndpointState {
 	}
 
 	s.Sender.RACKState = e.snd.rc.TCPRACKState
+	s.Sender.RetransmitTS = e.snd.retransmitTS
+	s.Sender.SpuriousRecovery = e.snd.spuriousRecovery
 	return s
 }
 
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index 381f4474d..94072a115 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -100,7 +100,7 @@ func (e *endpoint) beforeSave() {
 }
 
 // saveEndpoints is invoked by stateify.
-func (a *accepted) saveEndpoints() []*endpoint {
+func (a *acceptQueue) saveEndpoints() []*endpoint {
 	acceptedEndpoints := make([]*endpoint, a.endpoints.Len())
 	for i, e := 0, a.endpoints.Front(); e != nil; i, e = i+1, e.Next() {
 		acceptedEndpoints[i] = e.Value.(*endpoint)
@@ -109,7 +109,7 @@ func (a *accepted) saveEndpoints() []*endpoint {
 }
 
 // loadEndpoints is invoked by stateify.
-func (a *accepted) loadEndpoints(acceptedEndpoints []*endpoint) {
+func (a *acceptQueue) loadEndpoints(acceptedEndpoints []*endpoint) {
 	for _, ep := range acceptedEndpoints {
 		a.endpoints.PushBack(ep)
 	}
@@ -252,7 +252,7 @@ func (e *endpoint) Resume(s *stack.Stack) {
 			connectedLoading.Wait()
 			bind()
 			e.acceptMu.Lock()
-			backlog := e.accepted.cap
+			backlog := e.acceptQueue.capacity
 			e.acceptMu.Unlock()
 			if err := e.Listen(backlog); err != nil {
 				panic("endpoint listening failed: " + err.String())
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index e4410ad93..f122ea009 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -66,6 +66,18 @@ const (
 	// DefaultSynRetries is the default value for the number of SYN retransmits
 	// before a connect is aborted.
 	DefaultSynRetries = 6
+
+	// DefaultKeepaliveIdle is the idle time for a connection before keep-alive
+	// probes are sent.
+	DefaultKeepaliveIdle = 2 * time.Hour
+
+	// DefaultKeepaliveInterval is the time between two successive keep-alive
+	// probes.
+	DefaultKeepaliveInterval = 75 * time.Second
+
+	// DefaultKeepaliveCount is the number of keep-alive probes that are sent
+	// before declaring the connection dead.
+	DefaultKeepaliveCount = 9
 )
 
 const (
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 2fabf1594..4377f07a0 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -144,6 +144,15 @@ type sender struct {
 	// probeTimer and probeWaker are used to schedule PTO for RACK TLP algorithm.
 	probeTimer timer       `state:"nosave"`
 	probeWaker sleep.Waker `state:"nosave"`
+
+	// spuriousRecovery indicates whether the sender entered recovery
+	// spuriously as described in RFC3522 Section 3.2.
+	spuriousRecovery bool
+
+	// retransmitTS is the timestamp at which the sender sends retransmitted
+	// segment after entering an RTO for the first time as described in
+	// RFC3522 Section 3.2.
+	retransmitTS uint32
 }
 
 // rtt is a synchronization wrapper used to appease stateify. See the comment
@@ -425,6 +434,13 @@ func (s *sender) retransmitTimerExpired() bool {
 		return true
 	}
 
+	// Initialize the variables used to detect spurious recovery after
+	// entering RTO.
+	//
+	// See: https://www.rfc-editor.org/rfc/rfc3522.html#section-3.2 Step 1.
+	s.spuriousRecovery = false
+	s.retransmitTS = 0
+
 	// TODO(b/147297758): Band-aid fix, retransmitTimer can fire in some edge cases
 	// when writeList is empty. Remove this once we have a proper fix for this
 	// issue.
@@ -495,6 +511,10 @@ func (s *sender) retransmitTimerExpired() bool {
 		s.leaveRecovery()
 	}
 
+	// Record retransmitTS if the sender is not in recovery as per:
+	// https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 2
+	s.recordRetransmitTS()
+
 	s.state = tcpip.RTORecovery
 	s.cc.HandleRTOExpired()
 
@@ -958,6 +978,13 @@ func (s *sender) sendData() {
 }
 
 func (s *sender) enterRecovery() {
+	// Initialize the variables used to detect spurious recovery after
+	// entering recovery.
+	//
+	// See: https://www.rfc-editor.org/rfc/rfc3522.html#section-3.2 Step 1.
+	s.spuriousRecovery = false
+	s.retransmitTS = 0
+
 	s.FastRecovery.Active = true
 	// Save state to reflect we're now in fast recovery.
 	//
@@ -972,6 +999,11 @@ func (s *sender) enterRecovery() {
 	s.FastRecovery.MaxCwnd = s.SndCwnd + s.Outstanding
 	s.FastRecovery.HighRxt = s.SndUna
 	s.FastRecovery.RescueRxt = s.SndUna
+
+	// Record retransmitTS if the sender is not in recovery as per:
+	// https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 2
+	s.recordRetransmitTS()
+
 	if s.ep.SACKPermitted {
 		s.state = tcpip.SACKRecovery
 		s.ep.stack.Stats().TCP.SACKRecovery.Increment()
@@ -1147,13 +1179,15 @@ func (s *sender) isDupAck(seg *segment) bool {
 // Iterate the writeList and update RACK for each segment which is newly acked
 // either cumulatively or selectively. Loop through the segments which are
 // sacked, and update the RACK related variables and check for reordering.
+// Returns true when the DSACK block has been detected in the received ACK.
 //
 // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
 // steps 2 and 3.
-func (s *sender) walkSACK(rcvdSeg *segment) {
+func (s *sender) walkSACK(rcvdSeg *segment) bool {
 	s.rc.setDSACKSeen(false)
 
 	// Look for DSACK block.
+	hasDSACK := false
 	idx := 0
 	n := len(rcvdSeg.parsedOptions.SACKBlocks)
 	if checkDSACK(rcvdSeg) {
@@ -1167,10 +1201,11 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
 		s.rc.setDSACKSeen(true)
 		idx = 1
 		n--
+		hasDSACK = true
 	}
 
 	if n == 0 {
-		return
+		return hasDSACK
 	}
 
 	// Sort the SACK blocks. The first block is the most recent unacked
@@ -1193,6 +1228,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
 			seg = seg.Next()
 		}
 	}
+	return hasDSACK
 }
 
 // checkDSACK checks if a DSACK is reported.
@@ -1239,6 +1275,85 @@ func checkDSACK(rcvdSeg *segment) bool {
 	return false
 }
 
+func (s *sender) recordRetransmitTS() {
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2
+	//
+	// The Eifel detection algorithm is used, only upon initiation of loss
+	// recovery, i.e., when either the timeout-based retransmit or the fast
+	// retransmit is sent. The Eifel detection algorithm MUST NOT be
+	// reinitiated after loss recovery has already started. In particular,
+	// it must not be reinitiated upon subsequent timeouts for the same
+	// segment, and not upon retransmitting segments other than the oldest
+	// outstanding segment, e.g., during selective loss recovery.
+	if s.inRecovery() {
+		return
+	}
+
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 2
+	//
+	// Set a "RetransmitTS" variable to the value of the Timestamp Value
+	// field of the Timestamps option included in the retransmit sent when
+	// loss recovery is initiated. A TCP sender must ensure that
+	// RetransmitTS does not get overwritten as loss recovery progresses,
+	// e.g., in case of a second timeout and subsequent second retransmit of
+	// the same octet.
+	s.retransmitTS = s.ep.tsValNow()
+}
+
+func (s *sender) detectSpuriousRecovery(hasDSACK bool, tsEchoReply uint32) {
+	// Return if the sender has already detected spurious recovery.
+	if s.spuriousRecovery {
+		return
+	}
+
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 4
+	//
+	// If the value of the Timestamp Echo Reply field of the acceptable ACK's
+	// Timestamps option is smaller than the value of RetransmitTS, then
+	// proceed to next step, else return.
+	if tsEchoReply >= s.retransmitTS {
+		return
+	}
+
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 5
+	//
+	// If the acceptable ACK carries a DSACK option [RFC2883], then return.
+	if hasDSACK {
+		return
+	}
+
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 5
+	//
+	// If during the lifetime of the TCP connection the TCP sender has
+	// previously received an ACK with a DSACK option, or the acceptable ACK
+	// does not acknowledge all outstanding data, then proceed to next step,
+	// else return.
+	numDSACK := s.ep.stack.Stats().TCP.SegmentsAckedWithDSACK.Value()
+	if numDSACK == 0 && s.SndUna == s.SndNxt {
+		return
+	}
+
+	// See: https://datatracker.ietf.org/doc/html/rfc3522#section-3.2 Step 6
+	//
+	// If the loss recovery has been initiated with a timeout-based
+	// retransmit, then set
+	//    SpuriousRecovery <- SPUR_TO (equal 1),
+	// else set
+	//    SpuriousRecovery <- dupacks+1
+	// Set the spurious recovery variable to true as we do not differentiate
+	// between fast, SACK or RTO recovery.
+	s.spuriousRecovery = true
+	s.ep.stack.Stats().TCP.SpuriousRecovery.Increment()
+}
+
+// Check if the sender is in RTORecovery, FastRecovery or SACKRecovery state.
+func (s *sender) inRecovery() bool {
+	if s.state == tcpip.RTORecovery || s.state == tcpip.FastRecovery || s.state == tcpip.SACKRecovery {
+		return true
+	}
+	return false
+}
+
 // handleRcvdSegment is called when a segment is received; it is responsible for
 // updating the send-related state.
 func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
@@ -1254,6 +1369,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 	}
 
 	// Insert SACKBlock information into our scoreboard.
+	hasDSACK := false
 	if s.ep.SACKPermitted {
 		for _, sb := range rcvdSeg.parsedOptions.SACKBlocks {
 			// Only insert the SACK block if the following holds
@@ -1288,7 +1404,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 		//   RACK.fack, then the corresponding packet has been
 		//   reordered and RACK.reord is set to TRUE.
 		if s.ep.tcpRecovery&tcpip.TCPRACKLossDetection != 0 {
-			s.walkSACK(rcvdSeg)
+			hasDSACK = s.walkSACK(rcvdSeg)
 		}
 		s.SetPipe()
 	}
@@ -1418,6 +1534,11 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 		// Clear SACK information for all acked data.
 		s.ep.scoreboard.Delete(s.SndUna)
 
+		// Detect if the sender entered recovery spuriously.
+		if s.inRecovery() {
+			s.detectSpuriousRecovery(hasDSACK, rcvdSeg.parsedOptions.TSEcr)
+		}
+
 		// If we are not in fast recovery then update the congestion
 		// window based on the number of acknowledged packets.
 		if !s.FastRecovery.Active {
diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go
index c35db7c95..0d36d0dd0 100644
--- a/pkg/tcpip/transport/tcp/tcp_rack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go
@@ -1059,16 +1059,17 @@ func TestRACKWithWindowFull(t *testing.T) {
 	for i := 0; i < numPkts; i++ {
 		c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
 		bytesRead += maxPayload
-		if i == 0 {
-			// Send ACK for the first packet to establish RTT.
-			c.SendAck(seq, maxPayload)
-		}
 	}
 
-	// SACK for #10 packet.
-	start := c.IRS.Add(seqnum.Size(1 + (numPkts-1)*maxPayload))
+	// Expect retransmission of last packet due to TLP.
+	c.ReceiveAndCheckPacketWithOptions(data, (numPkts-1)*maxPayload, maxPayload, tsOptionSize)
+
+	// SACK for first and last packet.
+	start := c.IRS.Add(seqnum.Size(maxPayload))
 	end := start.Add(seqnum.Size(maxPayload))
-	c.SendAckWithSACK(seq, 2*maxPayload, []header.SACKBlock{{start, end}})
+	dsackStart := c.IRS.Add(seqnum.Size(1 + (numPkts-1)*maxPayload))
+	dsackEnd := dsackStart.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, 2*maxPayload, []header.SACKBlock{{dsackStart, dsackEnd}, {start, end}})
 
 	var info tcpip.TCPInfoOption
 	if err := c.EP.GetSockOpt(&info); err != nil {
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 6255355bb..896249d2d 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -23,6 +23,7 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/checker"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
@@ -702,3 +703,257 @@ func TestRecoveryEntry(t *testing.T) {
 		t.Error(err)
 	}
 }
+
+func verifySpuriousRecoveryMetric(t *testing.T, c *context.Context, numSpuriousRecovery uint64) {
+	t.Helper()
+
+	metricPollFn := func() error {
+		tcpStats := c.Stack().Stats().TCP
+		stats := []struct {
+			stat *tcpip.StatCounter
+			name string
+			want uint64
+		}{
+			{tcpStats.SpuriousRecovery, "stats.TCP.SpuriousRecovery", numSpuriousRecovery},
+		}
+		for _, s := range stats {
+			if got, want := s.stat.Value(), s.want; got != want {
+				return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want)
+			}
+		}
+		return nil
+	}
+
+	if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+		t.Error(err)
+	}
+}
+
+func checkReceivedPacket(t *testing.T, c *context.Context, tcpHdr header.TCP, bytesRead uint32, b, data []byte) {
+	payloadLen := uint32(len(tcpHdr.Payload()))
+	checker.IPv4(t, b,
+		checker.TCP(
+			checker.DstPort(context.TestPort),
+			checker.TCPSeqNum(uint32(c.IRS)+1+bytesRead),
+			checker.TCPAckNum(context.TestInitialSequenceNumber+1),
+			checker.TCPFlagsMatch(header.TCPFlagAck, ^header.TCPFlagPsh),
+		),
+	)
+	pdata := data[bytesRead : bytesRead+payloadLen]
+	if p := tcpHdr.Payload(); !bytes.Equal(pdata, p) {
+		t.Fatalf("got data = %v, want = %v", p, pdata)
+	}
+}
+
+func buildTSOptionFromHeader(tcpHdr header.TCP) []byte {
+	parsedOpts := tcpHdr.ParsedOptions()
+	tsOpt := [12]byte{header.TCPOptionNOP, header.TCPOptionNOP}
+	header.EncodeTSOption(parsedOpts.TSEcr+1, parsedOpts.TSVal, tsOpt[2:])
+	return tsOpt[:]
+}
+
+func TestDetectSpuriousRecoveryWithRTO(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan struct{})
+	c.Stack().AddTCPProbe(func(s stack.TCPEndpointState) {
+		if s.Sender.RetransmitTS == 0 {
+			t.Fatalf("RetransmitTS did not get updated, got: 0 want > 0")
+		}
+		if !s.Sender.SpuriousRecovery {
+			t.Fatalf("Spurious recovery was not detected")
+		}
+		close(probeDone)
+	})
+
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+	numPackets := 5
+	data := make([]byte, numPackets*maxPayload)
+	for i := range data {
+		data[i] = byte(i)
+	}
+	// Write the data.
+	var r bytes.Reader
+	r.Reset(data)
+	if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
+		t.Fatalf("Write failed: %s", err)
+	}
+
+	var options []byte
+	var bytesRead uint32
+	for i := 0; i < numPackets; i++ {
+		b := c.GetPacket()
+		tcpHdr := header.TCP(header.IPv4(b).Payload())
+		checkReceivedPacket(t, c, tcpHdr, bytesRead, b, data)
+
+		// Get options only for the first packet. This will be sent with
+		// the ACK to indicate the acknowledgement is for the original
+		// packet.
+		if i == 0 && c.TimeStampEnabled {
+			options = buildTSOptionFromHeader(tcpHdr)
+		}
+		bytesRead += uint32(len(tcpHdr.Payload()))
+	}
+
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	// Expect #5 segment with TLP.
+	c.ReceiveAndCheckPacketWithOptions(data, 4*maxPayload, maxPayload, tsOptionSize)
+
+	// Expect #1 segment because of RTO.
+	c.ReceiveAndCheckPacketWithOptions(data, 0, maxPayload, tsOptionSize)
+
+	info := tcpip.TCPInfoOption{}
+	if err := c.EP.GetSockOpt(&info); err != nil {
+		t.Fatalf("c.EP.GetSockOpt(&%T) = %s", info, err)
+	}
+
+	if info.CcState != tcpip.RTORecovery {
+		t.Fatalf("Loss recovery did not happen, got: %v want: %v", info.CcState, tcpip.RTORecovery)
+	}
+
+	// Acknowledge the data.
+	rcvWnd := seqnum.Size(30000)
+	c.SendPacket(nil, &context.Headers{
+		SrcPort: context.TestPort,
+		DstPort: c.Port,
+		Flags:   header.TCPFlagAck,
+		SeqNum:  seq,
+		AckNum:  c.IRS.Add(1 + seqnum.Size(maxPayload)),
+		RcvWnd:  rcvWnd,
+		TCPOpts: options,
+	})
+
+	// Wait for the probe function to finish processing the
+	// ACK before the test completes.
+	<-probeDone
+
+	verifySpuriousRecoveryMetric(t, c, 1 /* numSpuriousRecovery */)
+}
+
+func TestSACKDetectSpuriousRecoveryWithDupACK(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	numAck := 0
+	probeDone := make(chan struct{})
+	c.Stack().AddTCPProbe(func(s stack.TCPEndpointState) {
+		if numAck < 3 {
+			numAck++
+			return
+		}
+
+		if s.Sender.RetransmitTS == 0 {
+			t.Fatalf("RetransmitTS did not get updated, got: 0 want > 0")
+		}
+		if !s.Sender.SpuriousRecovery {
+			t.Fatalf("Spurious recovery was not detected")
+		}
+		close(probeDone)
+	})
+
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+	numPackets := 5
+	data := make([]byte, numPackets*maxPayload)
+	for i := range data {
+		data[i] = byte(i)
+	}
+	// Write the data.
+	var r bytes.Reader
+	r.Reset(data)
+	if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
+		t.Fatalf("Write failed: %s", err)
+	}
+
+	var options []byte
+	var bytesRead uint32
+	for i := 0; i < numPackets; i++ {
+		b := c.GetPacket()
+		tcpHdr := header.TCP(header.IPv4(b).Payload())
+		checkReceivedPacket(t, c, tcpHdr, bytesRead, b, data)
+
+		// Get options only for the first packet. This will be sent with
+		// the ACK to indicate the acknowledgement is for the original
+		// packet.
+		if i == 0 && c.TimeStampEnabled {
+			options = buildTSOptionFromHeader(tcpHdr)
+		}
+		bytesRead += uint32(len(tcpHdr.Payload()))
+	}
+
+	// Receive the retransmitted packet after TLP.
+	c.ReceiveAndCheckPacketWithOptions(data, 4*maxPayload, maxPayload, tsOptionSize)
+
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	// Send ACK for #3 and #4 segments to avoid entering TLP.
+	start := c.IRS.Add(3*maxPayload + 1)
+	end := start.Add(2 * maxPayload)
+	c.SendAckWithSACK(seq, 0, []header.SACKBlock{{start, end}})
+
+	c.SendAck(seq, 0 /* bytesReceived */)
+	c.SendAck(seq, 0 /* bytesReceived */)
+
+	// Receive the retransmitted packet after three duplicate ACKs.
+	c.ReceiveAndCheckPacketWithOptions(data, 0, maxPayload, tsOptionSize)
+
+	info := tcpip.TCPInfoOption{}
+	if err := c.EP.GetSockOpt(&info); err != nil {
+		t.Fatalf("c.EP.GetSockOpt(&%T) = %s", info, err)
+	}
+
+	if info.CcState != tcpip.SACKRecovery {
+		t.Fatalf("Loss recovery did not happen, got: %v want: %v", info.CcState, tcpip.SACKRecovery)
+	}
+
+	// Acknowledge the data.
+	rcvWnd := seqnum.Size(30000)
+	c.SendPacket(nil, &context.Headers{
+		SrcPort: context.TestPort,
+		DstPort: c.Port,
+		Flags:   header.TCPFlagAck,
+		SeqNum:  seq,
+		AckNum:  c.IRS.Add(1 + seqnum.Size(maxPayload)),
+		RcvWnd:  rcvWnd,
+		TCPOpts: options,
+	})
+
+	// Wait for the probe function to finish processing the
+	// ACK before the test completes.
+	<-probeDone
+
+	verifySpuriousRecoveryMetric(t, c, 1 /* numSpuriousRecovery */)
+}
+
+func TestNoSpuriousRecoveryWithDSACK(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+	numPackets := 5
+	data := sendAndReceiveWithSACK(t, c, numPackets, true /* enableRACK */)
+
+	// Receive the retransmitted packet after TLP.
+	c.ReceiveAndCheckPacketWithOptions(data, 4*maxPayload, maxPayload, tsOptionSize)
+
+	// Send ACK for #3 and #4 segments to avoid entering TLP.
+	start := c.IRS.Add(3*maxPayload + 1)
+	end := start.Add(2 * maxPayload)
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	c.SendAckWithSACK(seq, 0, []header.SACKBlock{{start, end}})
+
+	c.SendAck(seq, 0 /* bytesReceived */)
+	c.SendAck(seq, 0 /* bytesReceived */)
+
+	// Receive the retransmitted packet after three duplicate ACKs.
+	c.ReceiveAndCheckPacketWithOptions(data, 0, maxPayload, tsOptionSize)
+
+	// Acknowledge the data with DSACK for #1 segment.
+	start = c.IRS.Add(maxPayload + 1)
+	end = start.Add(2 * maxPayload)
+	seq = seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	c.SendAckWithSACK(seq, 6*maxPayload, []header.SACKBlock{{start, end}})
+
+	verifySpuriousRecoveryMetric(t, c, 0 /* numSpuriousRecovery */)
+}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 049957b81..39b1e08c0 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -233,7 +233,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
 	// Control Messages
 	cm := tcpip.ControlMessages{
 		HasTimestamp: true,
-		Timestamp:    p.receivedAt.UnixNano(),
+		Timestamp:    p.receivedAt,
 	}
 
 	switch p.netProto {
diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD
index 234125c38..8902be2d3 100644
--- a/pkg/unet/BUILD
+++ b/pkg/unet/BUILD
@@ -10,6 +10,7 @@ go_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "//pkg/eventfd",
         "//pkg/sync",
         "@org_golang_x_sys//unix:go_default_library",
     ],
diff --git a/pkg/unet/unet.go b/pkg/unet/unet.go
index 40fa72925..0dc0c37bd 100644
--- a/pkg/unet/unet.go
+++ b/pkg/unet/unet.go
@@ -23,6 +23,7 @@ import (
 	"sync/atomic"
 
 	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/eventfd"
 	"gvisor.dev/gvisor/pkg/sync"
 )
 
@@ -55,15 +56,6 @@ func socket(packet bool) (int, error) {
 	return fd, nil
 }
 
-// eventFD returns a new event FD with initial value 0.
-func eventFD() (int, error) {
-	f, _, e := unix.Syscall(unix.SYS_EVENTFD2, 0, 0, 0)
-	if e != 0 {
-		return -1, e
-	}
-	return int(f), nil
-}
-
 // Socket is a connected unix domain socket.
 type Socket struct {
 	// gate protects use of fd.
@@ -78,7 +70,7 @@ type Socket struct {
 	// efd is an event FD that is signaled when the socket is closing.
 	//
 	// efd is immutable and remains valid until Close/Release.
-	efd int
+	efd eventfd.Eventfd
 
 	// race is an atomic variable used to avoid triggering the race
 	// detector. See comment in SocketPair below.
@@ -95,7 +87,7 @@ func NewSocket(fd int) (*Socket, error) {
 		return nil, err
 	}
 
-	efd, err := eventFD()
+	efd, err := eventfd.Create()
 	if err != nil {
 		return nil, err
 	}
@@ -110,16 +102,14 @@ func NewSocket(fd int) (*Socket, error) {
 // closing the event FD.
 func (s *Socket) finish() error {
 	// Signal any blocked or future polls.
-	//
-	// N.B. eventfd writes must be 8 bytes.
-	if _, err := unix.Write(s.efd, []byte{1, 0, 0, 0, 0, 0, 0, 0}); err != nil {
+	if err := s.efd.Notify(); err != nil {
 		return err
 	}
 
 	// Close the gate, blocking until all FD users leave.
 	s.gate.Close()
 
-	return unix.Close(s.efd)
+	return s.efd.Close()
 }
 
 // Close closes the socket.
diff --git a/pkg/unet/unet_unsafe.go b/pkg/unet/unet_unsafe.go
index f0bf93ddd..ea281fec3 100644
--- a/pkg/unet/unet_unsafe.go
+++ b/pkg/unet/unet_unsafe.go
@@ -43,7 +43,7 @@ func (s *Socket) wait(write bool) error {
 			},
 			{
 				// The eventfd, signaled when we are closing.
-				Fd:     int32(s.efd),
+				Fd:     int32(s.efd.FD()),
 				Events: unix.POLLIN,
 			},
 		}
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ff7a5a44b..36806b740 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -80,7 +80,6 @@ go_library(
         "//pkg/sentry/loader",
         "//pkg/sentry/pgalloc",
         "//pkg/sentry/platform",
-        "//pkg/sentry/sighandling",
         "//pkg/sentry/socket/hostinet",
         "//pkg/sentry/socket/netfilter",
         "//pkg/sentry/socket/netlink",
@@ -96,6 +95,7 @@ go_library(
         "//pkg/sentry/usage",
         "//pkg/sentry/vfs",
         "//pkg/sentry/watchdog",
+        "//pkg/sighandling",
         "//pkg/sync",
         "//pkg/tcpip",
         "//pkg/tcpip/link/ethernet",
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 703f34827..db363435b 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -304,6 +304,22 @@ var allowedSyscalls = seccomp.SyscallRules{
 			seccomp.EqualTo(unix.SPLICE_F_NONBLOCK), /* flags */
 		},
 	},
+	unix.SYS_TIMER_CREATE: []seccomp.Rule{
+		{
+			seccomp.EqualTo(unix.CLOCK_THREAD_CPUTIME_ID), /* which */
+			seccomp.MatchAny{},                            /* sevp */
+			seccomp.MatchAny{},                            /* timerid */
+		},
+	},
+	unix.SYS_TIMER_DELETE: []seccomp.Rule{},
+	unix.SYS_TIMER_SETTIME: []seccomp.Rule{
+		{
+			seccomp.MatchAny{}, /* timerid */
+			seccomp.EqualTo(0), /* flags */
+			seccomp.MatchAny{}, /* new_value */
+			seccomp.EqualTo(0), /* old_value */
+		},
+	},
 	unix.SYS_TGKILL: []seccomp.Rule{
 		{
 			seccomp.EqualTo(uint64(os.Getpid())),
@@ -630,6 +646,11 @@ func hostInetFilters() seccomp.SyscallRules {
 
 func controlServerFilters(fd int) seccomp.SyscallRules {
 	return seccomp.SyscallRules{
+		unix.SYS_ACCEPT4: []seccomp.Rule{
+			{
+				seccomp.EqualTo(fd),
+			},
+		},
 		unix.SYS_ACCEPT: []seccomp.Rule{
 			{
 				seccomp.EqualTo(fd),
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index b46d84e5a..2f2d4df5e 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -49,13 +49,13 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/loader"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
-	"gvisor.dev/gvisor/pkg/sentry/sighandling"
 	"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
 	"gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2"
 	"gvisor.dev/gvisor/pkg/sentry/time"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
+	"gvisor.dev/gvisor/pkg/sighandling"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/link/ethernet"
@@ -241,10 +241,8 @@ func New(args Args) (*Loader, error) {
 	// Is this a VFSv2 kernel?
 	if args.Conf.VFS2 {
 		kernel.VFS2Enabled = true
-		if args.Conf.FUSE {
-			kernel.FUSEEnabled = true
-		}
-
+		kernel.FUSEEnabled = args.Conf.FUSE
+		kernel.LISAFSEnabled = args.Conf.Lisafs
 		vfs2.Override()
 	}
 
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 2f1332566..ac1e5ac37 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -173,7 +173,7 @@ func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.Create
 	rootProcArgs.Credentials = rootCreds
 	rootProcArgs.Umask = 0022
 	rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
-	rootCtx := procArgs.NewContext(c.k)
+	rootCtx := rootProcArgs.NewContext(c.k)
 
 	mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds)
 	if err != nil {
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index f7e892584..d3aec1fff 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -9,6 +9,7 @@ go_library(
     deps = [
         "//pkg/cleanup",
         "//pkg/log",
+        "//pkg/sync",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 5dbf14376..7a0f0694f 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -19,7 +19,6 @@ package cgroup
 import (
 	"bufio"
 	"context"
-	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -34,6 +33,7 @@ import (
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sync"
 )
 
 const (
@@ -104,17 +104,21 @@ func setOptionalValueUint16(path, name string, val *uint16) error {
 
 func setValue(path, name, data string) error {
 	fullpath := filepath.Join(path, name)
+	log.Debugf("Setting %q to %q", fullpath, data)
+	return writeFile(fullpath, []byte(data), 0700)
+}
 
-	// Retry writes on EINTR; see:
-	//    https://github.com/golang/go/issues/38033
-	for {
-		err := ioutil.WriteFile(fullpath, []byte(data), 0700)
-		if err == nil {
-			return nil
-		} else if !errors.Is(err, unix.EINTR) {
-			return err
-		}
+// writeFile is similar to ioutil.WriteFile() but doesn't create the file if it
+// doesn't exist.
+func writeFile(path string, data []byte, perm os.FileMode) error {
+	f, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, perm)
+	if err != nil {
+		return err
 	}
+	defer f.Close()
+
+	_, err = f.Write(data)
+	return err
 }
 
 func getValue(path, name string) (string, error) {
@@ -155,15 +159,8 @@ func fillFromAncestor(path string) (string, error) {
 		return "", err
 	}
 
-	// Retry writes on EINTR; see:
-	//    https://github.com/golang/go/issues/38033
-	for {
-		err := ioutil.WriteFile(path, []byte(val), 0700)
-		if err == nil {
-			break
-		} else if !errors.Is(err, unix.EINTR) {
-			return "", err
-		}
+	if err := writeFile(path, []byte(val), 0700); err != nil {
+		return "", nil
 	}
 	return val, nil
 }
@@ -309,7 +306,13 @@ func NewFromSpec(spec *specs.Spec) (*Cgroup, error) {
 	if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
 		return nil, nil
 	}
-	return new("self", spec.Linux.CgroupsPath)
+	return NewFromPath(spec.Linux.CgroupsPath)
+}
+
+// NewFromPath creates a new Cgroup instance from the specified relative path.
+// Cgroup paths are loaded based on the current process.
+func NewFromPath(cgroupsPath string) (*Cgroup, error) {
+	return new("self", cgroupsPath)
 }
 
 // NewFromPid loads cgroup for the given process.
@@ -365,21 +368,20 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
 	}
 	for _, key := range missing {
 		ctrlr := controllers[key]
-		path := c.MakePath(key)
-		log.Debugf("Creating cgroup %q: %q", key, path)
-		if err := os.MkdirAll(path, 0755); err != nil {
-			if ctrlr.optional() && errors.Is(err, unix.EROFS) {
-				if err := ctrlr.skip(res); err != nil {
-					return err
-				}
-				log.Infof("Skipping cgroup %q", key)
-				continue
+
+		if skip, err := c.createController(key); skip && ctrlr.optional() {
+			if err := ctrlr.skip(res); err != nil {
+				return err
 			}
+			log.Infof("Skipping cgroup %q, err: %v", key, err)
+			continue
+		} else if err != nil {
 			return err
 		}
 
 		// Only set controllers that were created by me.
 		c.Own[key] = true
+		path := c.MakePath(key)
 		if err := ctrlr.set(res, path); err != nil {
 			return err
 		}
@@ -388,10 +390,29 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
 	return nil
 }
 
+// createController creates the controller directory, checking that the
+// controller is enabled in the system. It returns a boolean indicating whether
+// the controller should be skipped (e.g. controller is disabled). In case it
+// should be skipped, it also returns the error it got.
+func (c *Cgroup) createController(name string) (bool, error) {
+	ctrlrPath := filepath.Join(cgroupRoot, name)
+	if _, err := os.Stat(ctrlrPath); err != nil {
+		return os.IsNotExist(err), err
+	}
+
+	path := c.MakePath(name)
+	log.Debugf("Creating cgroup %q: %q", name, path)
+	if err := os.MkdirAll(path, 0755); err != nil {
+		return false, err
+	}
+	return false, nil
+}
+
 // Uninstall removes the settings done in Install(). If cgroup path already
 // existed when Install() was called, Uninstall is a noop.
 func (c *Cgroup) Uninstall() error {
 	log.Debugf("Deleting cgroup %q", c.Name)
+	wait := sync.WaitGroupErr{}
 	for key := range controllers {
 		if !c.Own[key] {
 			// cgroup is managed by caller, don't touch it.
@@ -400,9 +421,8 @@ func (c *Cgroup) Uninstall() error {
 		path := c.MakePath(key)
 		log.Debugf("Removing cgroup controller for key=%q path=%q", key, path)
 
-		// If we try to remove the cgroup too soon after killing the
-		// sandbox we might get EBUSY, so we retry for a few seconds
-		// until it succeeds.
+		// If we try to remove the cgroup too soon after killing the sandbox we
+		// might get EBUSY, so we retry for a few seconds until it succeeds.
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
 		b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
@@ -413,11 +433,18 @@ func (c *Cgroup) Uninstall() error {
 			}
 			return err
 		}
-		if err := backoff.Retry(fn, b); err != nil {
-			return fmt.Errorf("removing cgroup path %q: %w", path, err)
-		}
+		// Run deletions in parallel to remove all directories even if there are
+		// failures/timeouts in other directories.
+		wait.Add(1)
+		go func() {
+			defer wait.Done()
+			if err := backoff.Retry(fn, b); err != nil {
+				wait.ReportError(fmt.Errorf("removing cgroup path %q: %w", path, err))
+				return
+			}
+		}()
 	}
-	return nil
+	return wait.Error()
 }
 
 // Join adds the current process to the all controllers. Returns function that
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 1431b4e8f..0b6a5431b 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -129,6 +129,18 @@ func boolPtr(v bool) *bool {
 	return &v
 }
 
+func createDir(dir string, contents map[string]string) error {
+	for name := range contents {
+		path := filepath.Join(dir, name)
+		f, err := os.Create(path)
+		if err != nil {
+			return err
+		}
+		f.Close()
+	}
+	return nil
+}
+
 func checkDir(t *testing.T, dir string, contents map[string]string) {
 	all, err := ioutil.ReadDir(dir)
 	if err != nil {
@@ -254,6 +266,9 @@ func TestBlockIO(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				BlockIO: tc.spec,
@@ -304,6 +319,9 @@ func TestCPU(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				CPU: tc.spec,
@@ -343,6 +361,9 @@ func TestCPUSet(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				CPU: tc.spec,
@@ -481,6 +502,9 @@ func TestHugeTlb(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				HugepageLimits: tc.spec,
@@ -542,6 +566,9 @@ func TestMemory(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				Memory: tc.spec,
@@ -584,6 +611,9 @@ func TestNetworkClass(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				Network: tc.spec,
@@ -631,6 +661,9 @@ func TestNetworkPriority(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				Network: tc.spec,
@@ -671,6 +704,9 @@ func TestPids(t *testing.T) {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
 			defer os.RemoveAll(dir)
+			if err := createDir(dir, tc.wants); err != nil {
+				t.Fatalf("createDir(): %v", err)
+			}
 
 			spec := &specs.LinuxResources{
 				Pids: tc.spec,
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 5314549d6..4e744e604 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -19,7 +19,7 @@ go_library(
         "//pkg/cleanup",
         "//pkg/log",
         "//pkg/sentry/control",
-        "//pkg/sentry/sighandling",
+        "//pkg/sighandling",
         "//pkg/sync",
         "//runsc/boot",
         "//runsc/cgroup",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 9c0004753..6a59df411 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -35,7 +35,7 @@ import (
 	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
-	"gvisor.dev/gvisor/pkg/sentry/sighandling"
+	"gvisor.dev/gvisor/pkg/sighandling"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cgroup"
 	"gvisor.dev/gvisor/runsc/config"
@@ -44,6 +44,8 @@ import (
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
+const cgroupParentAnnotation = "dev.gvisor.spec.cgroup-parent"
+
 // validateID validates the container id.
 func validateID(id string) error {
 	// See libcontainer/factory_linux.go.
@@ -113,6 +115,16 @@ type Container struct {
 	// container is created and reset when the sandbox is destroyed.
 	Sandbox *sandbox.Sandbox `json:"sandbox"`
 
+	// CompatCgroup has the cgroup configuration for the container. For the single
+	// container case, container cgroup is set in `c.Sandbox` only. CompactCgroup
+	// is only set for multi-container, where the `c.Sandbox` cgroup represents
+	// the entire pod.
+	//
+	// Note that CompatCgroup is created only for compatibility with tools
+	// that expect container cgroups to exist. Setting limits here makes no change
+	// to the container in question.
+	CompatCgroup *cgroup.Cgroup `json:"compatCgroup"`
+
 	// Saver handles load from/save to the state file safely from multiple
 	// processes.
 	Saver StateFile `json:"saver"`
@@ -233,27 +245,12 @@ func New(conf *config.Config, args Args) (*Container, error) {
 		}
 		// Create and join cgroup before processes are created to ensure they are
 		// part of the cgroup from the start (and all their children processes).
-		cg, err := cgroup.NewFromSpec(args.Spec)
+		parentCgroup, subCgroup, err := c.setupCgroupForRoot(conf, args.Spec)
 		if err != nil {
 			return nil, err
 		}
-		if cg != nil {
-			// TODO(gvisor.dev/issue/3481): Remove when cgroups v2 is supported.
-			if !conf.Rootless && cgroup.IsOnlyV2() {
-				return nil, fmt.Errorf("cgroups V2 is not yet supported. Enable cgroups V1 and retry")
-			}
-			// If there is cgroup config, install it before creating sandbox process.
-			if err := cg.Install(args.Spec.Linux.Resources); err != nil {
-				switch {
-				case errors.Is(err, unix.EACCES) && conf.Rootless:
-					log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
-					cg = nil
-				default:
-					return nil, fmt.Errorf("configuring cgroup: %v", err)
-				}
-			}
-		}
-		if err := runInCgroup(cg, func() error {
+		c.CompatCgroup = subCgroup
+		if err := runInCgroup(parentCgroup, func() error {
 			ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir, args.Attached)
 			if err != nil {
 				return err
@@ -269,7 +266,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
 				UserLog:       args.UserLog,
 				IOFiles:       ioFiles,
 				MountsFile:    specFile,
-				Cgroup:        cg,
+				Cgroup:        parentCgroup,
 				Attached:      args.Attached,
 			}
 			sand, err := sandbox.New(conf, sandArgs)
@@ -296,6 +293,12 @@ func New(conf *config.Config, args Args) (*Container, error) {
 		}
 		c.Sandbox = sb.Sandbox
 
+		subCgroup, err := c.setupCgroupForSubcontainer(conf, args.Spec)
+		if err != nil {
+			return nil, err
+		}
+		c.CompatCgroup = subCgroup
+
 		// If the console control socket file is provided, then create a new
 		// pty master/slave pair and send the TTY to the sandbox process.
 		var tty *os.File
@@ -781,16 +784,16 @@ func (c *Container) saveLocked() error {
 // root containers), and waits for the container or sandbox and the gofer
 // to stop. If any of them doesn't stop before timeout, an error is returned.
 func (c *Container) stop() error {
-	var cgroup *cgroup.Cgroup
+	var parentCgroup *cgroup.Cgroup
 
 	if c.Sandbox != nil {
 		log.Debugf("Destroying container, cid: %s", c.ID)
 		if err := c.Sandbox.DestroyContainer(c.ID); err != nil {
 			return fmt.Errorf("destroying container %q: %v", c.ID, err)
 		}
-		// Only uninstall cgroup for sandbox stop.
+		// Only uninstall parentCgroup for sandbox stop.
 		if c.Sandbox.IsRootContainer(c.ID) {
-			cgroup = c.Sandbox.Cgroup
+			parentCgroup = c.Sandbox.Cgroup
 		}
 		// Only set sandbox to nil after it has been told to destroy the container.
 		c.Sandbox = nil
@@ -809,9 +812,16 @@ func (c *Container) stop() error {
 		return err
 	}
 
-	// Gofer is running in cgroups, so Cgroup.Uninstall has to be called after it.
-	if cgroup != nil {
-		if err := cgroup.Uninstall(); err != nil {
+	// Delete container cgroup if any.
+	if c.CompatCgroup != nil {
+		if err := c.CompatCgroup.Uninstall(); err != nil {
+			return err
+		}
+	}
+	// Gofer is running inside parentCgroup, so Cgroup.Uninstall has to be called
+	// after the gofer has stopped.
+	if parentCgroup != nil {
+		if err := parentCgroup.Uninstall(); err != nil {
 			return err
 		}
 	}
@@ -1208,3 +1218,77 @@ func (c *Container) populateStats(event *boot.EventOut) {
 	event.Event.Data.CPU.Usage.Total = uint64(total)
 	return
 }
+
+// setupCgroupForRoot configures and returns cgroup for the sandbox and the
+// root container. If `cgroupParentAnnotation` is set, use that path as the
+// sandbox cgroup and use Spec.Linux.CgroupsPath as the root container cgroup.
+func (c *Container) setupCgroupForRoot(conf *config.Config, spec *specs.Spec) (*cgroup.Cgroup, *cgroup.Cgroup, error) {
+	var parentCgroup *cgroup.Cgroup
+	if parentPath, ok := spec.Annotations[cgroupParentAnnotation]; ok {
+		var err error
+		parentCgroup, err = cgroup.NewFromPath(parentPath)
+		if err != nil {
+			return nil, nil, err
+		}
+	} else {
+		var err error
+		parentCgroup, err = cgroup.NewFromSpec(spec)
+		if parentCgroup == nil || err != nil {
+			return nil, nil, err
+		}
+	}
+
+	var err error
+	parentCgroup, err = cgroupInstall(conf, parentCgroup, spec.Linux.Resources)
+	if parentCgroup == nil || err != nil {
+		return nil, nil, err
+	}
+
+	subCgroup, err := c.setupCgroupForSubcontainer(conf, spec)
+	if err != nil {
+		_ = parentCgroup.Uninstall()
+		return nil, nil, err
+	}
+	return parentCgroup, subCgroup, nil
+}
+
+// setupCgroupForSubcontainer sets up empty cgroups for subcontainers. Since
+// subcontainers run exclusively inside the sandbox, subcontainer cgroups on the
+// host have no effect on them. However, some tools (e.g. cAdvisor) uses cgroups
+// paths to discover new containers and report stats for them.
+func (c *Container) setupCgroupForSubcontainer(conf *config.Config, spec *specs.Spec) (*cgroup.Cgroup, error) {
+	if isRoot(spec) {
+		if _, ok := spec.Annotations[cgroupParentAnnotation]; !ok {
+			return nil, nil
+		}
+	}
+
+	cg, err := cgroup.NewFromSpec(spec)
+	if cg == nil || err != nil {
+		return nil, err
+	}
+	// Use empty resources, just want the directory structure created.
+	return cgroupInstall(conf, cg, &specs.LinuxResources{})
+}
+
+// cgroupInstall creates cgroups dir structure and sets their respective
+// resources. In case of success, returns the cgroups instance and nil error.
+// For rootless, it's possible that cgroups operations fail, in this case the
+// error is suppressed and a nil cgroups instance is returned to indicate that
+// no cgroups was configured.
+func cgroupInstall(conf *config.Config, cg *cgroup.Cgroup, res *specs.LinuxResources) (*cgroup.Cgroup, error) {
+	// TODO(gvisor.dev/issue/3481): Remove when cgroups v2 is supported.
+	if !conf.Rootless && cgroup.IsOnlyV2() {
+		return nil, fmt.Errorf("cgroups V2 is not yet supported. Enable cgroups V1 and retry")
+	}
+	if err := cg.Install(res); err != nil {
+		switch {
+		case errors.Is(err, unix.EACCES) && conf.Rootless:
+			log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
+			return nil, nil
+		default:
+			return nil, fmt.Errorf("configuring cgroup: %v", err)
+		}
+	}
+	return cg, nil
+}
diff --git a/test/packetimpact/runner/dut.go b/test/packetimpact/runner/dut.go
index 02678a76a..f27e52f93 100644
--- a/test/packetimpact/runner/dut.go
+++ b/test/packetimpact/runner/dut.go
@@ -331,6 +331,22 @@ func TestWithDUT(ctx context.Context, t *testing.T, mkDevice func(*dockerutil.Co
 				t.Logf("sniffer logs:\n%s", snifferOut)
 			}
 		})
+	}
+
+	// Arm the cleanup hook before we do anthing else. Otherwise failures below
+	// can cause the test to hang in the cleanup hook above.
+	t.Cleanup(func() {
+		// Wait 1 second before killing tcpdump to give it time to flush
+		// any packets. On linux tests killing it immediately can
+		// sometimes result in partial pcaps.
+		time.Sleep(1 * time.Second)
+		if logs, err := testbenchContainer.Exec(ctx, dockerutil.ExecOpts{}, "killall", baseSnifferArgs[0]); err != nil {
+			t.Errorf("failed to kill all sniffers: %s, logs: %s", err, logs)
+		}
+	})
+
+	for _, info := range dutInfos {
+		n := info.Net
 		// When the Linux kernel receives a SYN-ACK for a SYN it didn't send, it
 		// will respond with an RST. In most packetimpact tests, the SYN is sent
 		// by the raw socket, the kernel knows nothing about the connection, this
@@ -344,16 +360,6 @@ func TestWithDUT(ctx context.Context, t *testing.T, mkDevice func(*dockerutil.Co
 		}
 	}
 
-	t.Cleanup(func() {
-		// Wait 1 second before killing tcpdump to give it time to flush
-		// any packets. On linux tests killing it immediately can
-		// sometimes result in partial pcaps.
-		time.Sleep(1 * time.Second)
-		if logs, err := testbenchContainer.Exec(ctx, dockerutil.ExecOpts{}, "killall", baseSnifferArgs[0]); err != nil {
-			t.Errorf("failed to kill all sniffers: %s, logs: %s", err, logs)
-		}
-	})
-
 	// FIXME(b/156449515): Some piece of the system has a race. The old
 	// bash script version had a sleep, so we have one too. The race should
 	// be fixed and this sleep removed.
diff --git a/test/packetimpact/tests/tcp_listen_backlog_test.go b/test/packetimpact/tests/tcp_listen_backlog_test.go
index fea7d5b6f..e124002f6 100644
--- a/test/packetimpact/tests/tcp_listen_backlog_test.go
+++ b/test/packetimpact/tests/tcp_listen_backlog_test.go
@@ -15,7 +15,9 @@
 package tcp_listen_backlog_test
 
 import (
+	"bytes"
 	"flag"
+	"sync"
 	"testing"
 	"time"
 
@@ -35,60 +37,272 @@ func init() {
 func TestTCPListenBacklog(t *testing.T) {
 	dut := testbench.NewDUT(t)
 
-	// Listening endpoint accepts one more connection than the listen backlog.
-	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 0 /*backlog*/)
+	// This is the number of pending connections before SYN cookies are used.
+	const backlog = 10
 
-	var establishedConn testbench.TCPIPv4
-	var incompleteConn testbench.TCPIPv4
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, backlog)
+	defer dut.Close(t, listenFd)
 
-	// Test if the DUT listener replies to more SYNs than listen backlog+1
-	for i, conn := range []*testbench.TCPIPv4{&establishedConn, &incompleteConn} {
-		*conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-		// Expect dut connection to have transitioned to SYN-RCVD state.
+	// Fill the SYN queue with connections in SYN-RCVD. We will use these to test
+	// that ACKs received while the accept queue is full are ignored.
+	var synQueueConns [backlog]testbench.TCPIPv4
+	defer func() {
+		for i := range synQueueConns {
+			synQueueConns[i].Close(t)
+		}
+	}()
+	{
+		var wg sync.WaitGroup
+		for i := range synQueueConns {
+			conn := &synQueueConns[i]
+			*conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{})
+
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+				if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagSyn|header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+			}(i)
+		}
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+
+	const payloadLen = 1
+	payload := testbench.Payload{Bytes: testbench.GenerateRandomPayload(t, payloadLen)}
+
+	// Fill the accept queue with connections established using SYN cookies.
+	var synCookieConns [backlog + 1]testbench.TCPIPv4
+	defer func() {
+		for i := range synCookieConns {
+			synCookieConns[i].Close(t)
+		}
+	}()
+	{
+		var wg sync.WaitGroup
+		for i := range synCookieConns {
+			conn := &synCookieConns[i]
+			*conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{})
+
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+				if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagSyn|header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+				// Send a payload so we can observe the dut ACK.
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)}, &payload)
+				if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+			}(i)
+		}
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+
+	// Send ACKs to complete the handshakes. These are expected to be dropped
+	// because the accept queue is full.
+	{
+		var wg sync.WaitGroup
+		for i := range synQueueConns {
+			conn := &synQueueConns[i]
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+				// Wait for the SYN-ACK to be retransmitted to confirm the ACK was
+				// dropped.
+				seqNum := uint32(*conn.RemoteSeqNum(t) - 1)
+				if got, err := conn.Expect(t, testbench.TCP{SeqNum: &seqNum}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagSyn|header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+			}(i)
+		}
+
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+
+	// While the accept queue is still full, send an unexpected ACK from a new
+	// socket. The listener should reply with an RST.
+	func() {
+		conn := dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{})
+		defer conn.Close(t)
+		conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+		if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+			t.Errorf("expected TCP frame: %s", err)
+		} else if got, want := *got.Flags, header.TCPFlagRst; got != want {
+			t.Errorf("got %s, want %s", got, want)
+		}
+	}()
+
+	func() {
+		// Now initiate a new connection when the accept queue is full.
+		conn := dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{})
+		defer conn.Close(t)
+		// Expect dut connection to drop the SYN.
 		conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
-		if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
-			t.Fatalf("expected SYN-ACK for %d connection, %s", i, err)
+		if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err == nil {
+			t.Fatalf("expected no TCP frame, got %s", got)
+		}
+	}()
+
+	// Drain the accept queue.
+	{
+		var wg sync.WaitGroup
+		for i := range synCookieConns {
+			conn := &synCookieConns[i]
+
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				fd, _ := dut.Accept(t, listenFd)
+				b := dut.Recv(t, fd, payloadLen+1, 0)
+				dut.Close(t, fd)
+				if !bytes.Equal(b, payload.Bytes) {
+					t.Errorf("connection %d: got dut.Recv = %x, want = %x", i, b, payload.Bytes)
+				}
+
+				if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagFin|header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+
+				// Prevent retransmission.
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+			}(i)
+		}
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
 		}
 	}
-	defer establishedConn.Close(t)
-	defer incompleteConn.Close(t)
-
-	// Send the ACK to complete handshake.
-	establishedConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
-
-	// Poll for the established connection ready for accept.
-	dut.PollOne(t, listenFd, unix.POLLIN, time.Second)
-
-	// Send the ACK to complete handshake, expect this to be dropped by the
-	// listener as the accept queue would be full because of the previous
-	// handshake.
-	incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
-	// Let the test wait for sometime so that the ACK is indeed dropped by
-	// the listener. Without such a wait, the DUT accept can race with
-	// ACK handling (dropping) causing the test to be flaky.
-	time.Sleep(100 * time.Millisecond)
-
-	// Drain the accept queue to enable poll for subsequent connections on the
-	// listener.
-	fd, _ := dut.Accept(t, listenFd)
-	dut.Close(t, fd)
-
-	// The ACK for the incomplete connection should be ignored by the
-	// listening endpoint and the poll on listener should now time out.
-	if pfds := dut.Poll(t, []unix.PollFd{{Fd: listenFd, Events: unix.POLLIN}}, time.Second); len(pfds) != 0 {
-		t.Fatalf("got dut.Poll(...) = %#v", pfds)
+
+	// Complete the partial connections to move them from the SYN queue to the
+	// accept queue. We will use these to test that connections in the accept
+	// queue are closed on listener shutdown.
+	{
+		var wg sync.WaitGroup
+		for i := range synQueueConns {
+			conn := &synQueueConns[i]
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				tcp := testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)}
+
+				// Exercise connections with and without pending data.
+				if i%2 == 0 {
+					// Send ACK with no payload; wait for absence of SYN-ACK retransmit.
+					conn.Send(t, tcp)
+					if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err == nil {
+						t.Errorf("%d: expected no TCP frame, got %s", i, got)
+					}
+				} else {
+					// Send ACK with payload; wait for ACK.
+					conn.Send(t, tcp, &payload)
+					if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+						t.Errorf("%d: expected TCP frame: %s", i, err)
+					} else if got, want := *got.Flags, header.TCPFlagAck; got != want {
+						t.Errorf("%d: got %s, want %s", i, got, want)
+					}
+				}
+			}(i)
+		}
+
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
+		}
 	}
 
-	// Re-send the ACK to complete handshake and re-fill the accept-queue.
-	incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
-	dut.PollOne(t, listenFd, unix.POLLIN, time.Second)
-
-	// Now initiate a new connection when the accept queue is full.
-	connectingConn := dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer connectingConn.Close(t)
-	// Expect dut connection to drop the SYN and let the client stay in SYN_SENT state.
-	connectingConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
-	if got, err := connectingConn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err == nil {
-		t.Fatalf("expected no SYN-ACK, but got %s", got)
+	// The accept queue now has N-1 connections in it. The next incoming SYN will
+	// enter the SYN queue, and the one following will use SYN cookies. We test
+	// both.
+	var connectingConns [2]testbench.TCPIPv4
+	defer func() {
+		for i := range connectingConns {
+			connectingConns[i].Close(t)
+		}
+	}()
+	{
+		var wg sync.WaitGroup
+		for i := range connectingConns {
+			conn := &connectingConns[i]
+			*conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{})
+
+			wg.Add(1)
+			go func(i int) {
+				defer wg.Done()
+
+				conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+				if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+					t.Errorf("%d: expected TCP frame: %s", i, err)
+				} else if got, want := *got.Flags, header.TCPFlagSyn|header.TCPFlagAck; got != want {
+					t.Errorf("%d: got %s, want %s", i, got, want)
+				}
+			}(i)
+		}
+		wg.Wait()
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+
+	dut.Shutdown(t, listenFd, unix.SHUT_RD)
+
+	var wg sync.WaitGroup
+
+	// Shutdown causes Connections in the accept queue to be closed.
+	for i := range synQueueConns {
+		conn := &synQueueConns[i]
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+
+			if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err != nil {
+				t.Errorf("%d: expected TCP frame: %s", i, err)
+			} else if got, want := *got.Flags, header.TCPFlagRst|header.TCPFlagAck; got != want {
+				t.Errorf("%d: got %s, want %s", i, got, want)
+			}
+		}(i)
+	}
+
+	for i := range connectingConns {
+		conn := &connectingConns[i]
+
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+
+			if got, err := conn.Expect(t, testbench.TCP{}, time.Second); err == nil {
+				t.Errorf("%d: expected no TCP frame, got %s", i, got)
+			}
+		}(i)
 	}
+
+	wg.Wait()
 }
diff --git a/test/runtimes/runner/lib/BUILD b/test/runtimes/runner/lib/BUILD
index d308f41b0..3491c535b 100644
--- a/test/runtimes/runner/lib/BUILD
+++ b/test/runtimes/runner/lib/BUILD
@@ -5,7 +5,11 @@ package(licenses = ["notice"])
 go_library(
     name = "lib",
     testonly = 1,
-    srcs = ["lib.go"],
+    srcs = [
+        "go_test_dependency_go118.go",
+        "go_test_dependency_not_go118.go",
+        "lib.go",
+    ],
     visibility = ["//test/runtimes/runner:__pkg__"],
     deps = [
         "//pkg/log",
diff --git a/test/runtimes/runner/lib/go_test_dependency_go118.go b/test/runtimes/runner/lib/go_test_dependency_go118.go
new file mode 100644
index 000000000..d430e81c7
--- /dev/null
+++ b/test/runtimes/runner/lib/go_test_dependency_go118.go
@@ -0,0 +1,27 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build go1.18 && go1.1
+// +build go1.18,go1.1
+
+package lib
+
+import (
+	"testing"
+)
+
+// mainStart wraps testing.MainStart for Go release == 1.18.
+func mainStart(tests []testing.InternalTest) *testing.M {
+	return testing.MainStart(testDeps{}, tests, nil, nil, nil)
+}
diff --git a/test/runtimes/runner/lib/go_test_dependency_not_go118.go b/test/runtimes/runner/lib/go_test_dependency_not_go118.go
new file mode 100644
index 000000000..8b0b34c72
--- /dev/null
+++ b/test/runtimes/runner/lib/go_test_dependency_not_go118.go
@@ -0,0 +1,25 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !go1.18 && go1.1
+// +build !go1.18,go1.1
+
+package lib
+
+import "testing"
+
+// mainStart wraps testing.MainStart for Go release < 1.18.
+func mainStart(tests []testing.InternalTest) *testing.M {
+	return testing.MainStart(testDeps{}, tests, nil, nil)
+}
diff --git a/test/runtimes/runner/lib/lib.go b/test/runtimes/runner/lib/lib.go
index d6b652897..d704f8895 100644
--- a/test/runtimes/runner/lib/lib.go
+++ b/test/runtimes/runner/lib/lib.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"reflect"
 	"sort"
 	"strings"
 	"testing"
@@ -63,8 +64,7 @@ func RunTests(lang, image, excludeFile string, batchSize int, timeout time.Durat
 		fmt.Fprintf(os.Stderr, "%s\n", err.Error())
 		return 1
 	}
-
-	m := testing.MainStart(testDeps{}, tests, nil, nil)
+	m := mainStart(tests)
 	return m.Run()
 }
 
@@ -197,3 +197,21 @@ func (f testDeps) ImportPath() string                          { return "" }
 func (f testDeps) StartTestLog(io.Writer)                      {}
 func (f testDeps) StopTestLog() error                          { return nil }
 func (f testDeps) SetPanicOnExit0(bool)                        {}
+func (f testDeps) CoordinateFuzzing(time.Duration, int64, time.Duration, int64, int, []corpusEntry, []reflect.Type, string, string) error {
+	return nil
+}
+func (f testDeps) RunFuzzWorker(func(corpusEntry) error) error              { return nil }
+func (f testDeps) ReadCorpus(string, []reflect.Type) ([]corpusEntry, error) { return nil, nil }
+func (f testDeps) CheckCorpus([]interface{}, []reflect.Type) error          { return nil }
+func (f testDeps) ResetCoverage()                                           {}
+func (f testDeps) SnapshotCoverage()                                        {}
+
+// Copied from testing/fuzz.go.
+type corpusEntry = struct {
+	Parent     string
+	Name       string
+	Data       []byte
+	Values     []interface{}
+	Generation int
+	IsSeed     bool
+}
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 5b882875f..b96005d7d 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -590,6 +590,7 @@ cc_binary(
         "//test/util:posix_error",
         "//test/util:test_main",
         "//test/util:test_util",
+        "//test/util:thread_util",
     ],
 )
 
@@ -1978,6 +1979,7 @@ cc_binary(
     defines = select_system(),
     linkstatic = 1,
     deps = [
+        ":ip_socket_test_util",
         ":unix_domain_socket_test_util",
         "//test/util:capability_util",
         "//test/util:file_descriptor",
@@ -2660,16 +2662,11 @@ cc_library(
 cc_library(
     name = "socket_ip_udp_unbound_external_networking",
     testonly = 1,
-    srcs = [
-        "socket_ip_udp_unbound_external_networking.cc",
-    ],
     hdrs = [
         "socket_ip_udp_unbound_external_networking.h",
     ],
     deps = [
         ":ip_socket_test_util",
-        "//test/util:socket_util",
-        "//test/util:test_util",
     ],
     alwayslink = 1,
 )
@@ -2685,6 +2682,9 @@ cc_library(
     ],
     deps = [
         ":socket_ip_udp_unbound_external_networking",
+        "//test/util:socket_util",
+        "//test/util:test_util",
+        "@com_google_absl//absl/cleanup",
         gtest,
     ],
     alwayslink = 1,
diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc
index 3ef8b0327..c2dc8174c 100644
--- a/test/syscalls/linux/epoll.cc
+++ b/test/syscalls/linux/epoll.cc
@@ -30,6 +30,7 @@
 #include "test/util/file_descriptor.h"
 #include "test/util/posix_error.h"
 #include "test/util/test_util.h"
+#include "test/util/thread_util.h"
 
 namespace gvisor {
 namespace testing {
@@ -496,6 +497,41 @@ TEST(EpollTest, PipeReaderHupAfterWriterClosed) {
   EXPECT_EQ(result[0].data.u64, kMagicConstant);
 }
 
+TEST(EpollTest, DoubleLayerEpoll) {
+  int pipefds[2];
+  ASSERT_THAT(pipe2(pipefds, O_NONBLOCK), SyscallSucceeds());
+  FileDescriptor rfd(pipefds[0]);
+  FileDescriptor wfd(pipefds[1]);
+
+  auto epfd1 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  ASSERT_NO_ERRNO(
+      RegisterEpollFD(epfd1.get(), rfd.get(), EPOLLIN | EPOLLHUP, rfd.get()));
+
+  auto epfd2 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  ASSERT_NO_ERRNO(RegisterEpollFD(epfd2.get(), epfd1.get(), EPOLLIN | EPOLLHUP,
+                                  epfd1.get()));
+
+  // Write to wfd and then check if epoll events were generated correctly.
+  // Run this loop a couple of times to check if event in epfd1 is cleaned.
+  constexpr char data[] = "data";
+  for (int i = 0; i < 2; ++i) {
+    ScopedThread thread1([&wfd, &data]() {
+      sleep(1);
+      ASSERT_EQ(WriteFd(wfd.get(), data, sizeof(data)), sizeof(data));
+    });
+
+    struct epoll_event ret_events[2];
+    ASSERT_THAT(RetryEINTR(epoll_wait)(epfd2.get(), ret_events, 2, 5000),
+                SyscallSucceedsWithValue(1));
+    ASSERT_EQ(ret_events[0].data.fd, epfd1.get());
+    ASSERT_THAT(RetryEINTR(epoll_wait)(epfd1.get(), ret_events, 2, 5000),
+                SyscallSucceedsWithValue(1));
+    ASSERT_EQ(ret_events[0].data.fd, rfd.get());
+    char readBuf[sizeof(data)];
+    ASSERT_EQ(ReadFd(rfd.get(), readBuf, sizeof(data)), sizeof(data));
+  }
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc
index a1216d23f..d18e616d0 100644
--- a/test/syscalls/linux/ip_socket_test_util.cc
+++ b/test/syscalls/linux/ip_socket_test_util.cc
@@ -16,6 +16,7 @@
 
 #include <net/if.h>
 #include <netinet/in.h>
+#include <netpacket/packet.h>
 #include <sys/socket.h>
 
 #include <cstring>
@@ -196,75 +197,53 @@ SocketKind IPv6TCPUnboundSocket(int type) {
       UnboundSocketCreator(AF_INET6, type | SOCK_STREAM, IPPROTO_TCP)};
 }
 
-PosixError IfAddrHelper::Load() {
-  Release();
-#ifndef ANDROID
-  RETURN_ERROR_IF_SYSCALL_FAIL(getifaddrs(&ifaddr_));
-#else
-  // Android does not support getifaddrs in r22.
-  return PosixError(ENOSYS, "getifaddrs");
-#endif
-  return NoError();
-}
-
-void IfAddrHelper::Release() {
-  if (ifaddr_) {
-#ifndef ANDROID
-    // Android does not support freeifaddrs in r22.
-    freeifaddrs(ifaddr_);
-#endif
-    ifaddr_ = nullptr;
-  }
-}
-
-std::vector<std::string> IfAddrHelper::InterfaceList(int family) const {
-  std::vector<std::string> names;
-  for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) {
-    if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) {
-      continue;
-    }
-    names.emplace(names.end(), ifa->ifa_name);
-  }
-  return names;
-}
-
-const sockaddr* IfAddrHelper::GetAddr(int family, std::string name) const {
-  for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) {
-    if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) {
-      continue;
-    }
-    if (name == ifa->ifa_name) {
-      return ifa->ifa_addr;
-    }
-  }
-  return nullptr;
-}
-
-PosixErrorOr<int> IfAddrHelper::GetIndex(std::string name) const {
-  return InterfaceIndex(name);
-}
-
 std::string GetAddr4Str(const in_addr* a) {
   char str[INET_ADDRSTRLEN];
-  inet_ntop(AF_INET, a, str, sizeof(str));
-  return std::string(str);
+  return inet_ntop(AF_INET, a, str, sizeof(str));
 }
 
 std::string GetAddr6Str(const in6_addr* a) {
   char str[INET6_ADDRSTRLEN];
-  inet_ntop(AF_INET6, a, str, sizeof(str));
-  return std::string(str);
+  return inet_ntop(AF_INET6, a, str, sizeof(str));
 }
 
 std::string GetAddrStr(const sockaddr* a) {
-  if (a->sa_family == AF_INET) {
-    auto src = &(reinterpret_cast<const sockaddr_in*>(a)->sin_addr);
-    return GetAddr4Str(src);
-  } else if (a->sa_family == AF_INET6) {
-    auto src = &(reinterpret_cast<const sockaddr_in6*>(a)->sin6_addr);
-    return GetAddr6Str(src);
+  switch (a->sa_family) {
+    case AF_INET: {
+      return GetAddr4Str(&(reinterpret_cast<const sockaddr_in*>(a)->sin_addr));
+    }
+    case AF_INET6: {
+      return GetAddr6Str(
+          &(reinterpret_cast<const sockaddr_in6*>(a)->sin6_addr));
+    }
+    case AF_PACKET: {
+      const sockaddr_ll& ll = *reinterpret_cast<const sockaddr_ll*>(a);
+      std::ostringstream ss;
+      ss << std::hex;
+      ss << std::showbase;
+      ss << '{';
+      ss << " protocol=" << ntohs(ll.sll_protocol);
+      ss << " ifindex=" << ll.sll_ifindex;
+      ss << " hatype=" << ll.sll_hatype;
+      ss << " pkttype=" << static_cast<unsigned short>(ll.sll_pkttype);
+      if (ll.sll_halen != 0) {
+        ss << " addr=";
+        for (unsigned char i = 0; i < ll.sll_halen; ++i) {
+          if (i != 0) {
+            ss << ':';
+          }
+          ss << static_cast<unsigned short>(ll.sll_addr[i]);
+        }
+      }
+      ss << " }";
+      return ss.str();
+    }
+    default: {
+      std::ostringstream ss;
+      ss << "invalid(sa_family=" << a->sa_family << ")";
+      return ss.str();
+    }
   }
-  return std::string("<invalid>");
 }
 
 }  // namespace testing
diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h
index 556838356..957006e25 100644
--- a/test/syscalls/linux/ip_socket_test_util.h
+++ b/test/syscalls/linux/ip_socket_test_util.h
@@ -115,25 +115,6 @@ SocketKind IPv4TCPUnboundSocket(int type);
 // created with AF_INET6, SOCK_STREAM, IPPROTO_TCP and the given type.
 SocketKind IPv6TCPUnboundSocket(int type);
 
-// IfAddrHelper is a helper class that determines the local interfaces present
-// and provides functions to obtain their names, index numbers, and IP address.
-class IfAddrHelper {
- public:
-  IfAddrHelper() : ifaddr_(nullptr) {}
-  ~IfAddrHelper() { Release(); }
-
-  PosixError Load();
-  void Release();
-
-  std::vector<std::string> InterfaceList(int family) const;
-
-  const sockaddr* GetAddr(int family, std::string name) const;
-  PosixErrorOr<int> GetIndex(std::string name) const;
-
- private:
-  struct ifaddrs* ifaddr_;
-};
-
 // GetAddr4Str returns the given IPv4 network address structure as a string.
 std::string GetAddr4Str(const in_addr* a);
 
diff --git a/test/syscalls/linux/proc_isolated.cc b/test/syscalls/linux/proc_isolated.cc
index a38689667..38d079d2b 100644
--- a/test/syscalls/linux/proc_isolated.cc
+++ b/test/syscalls/linux/proc_isolated.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <linux/msg.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
 
@@ -73,6 +74,27 @@ TEST(ProcDefaults, PresenceOfSem) {
   ASSERT_EQ(semmni, SEMMNI);
 }
 
+TEST(ProcDefaults, PresenceOfMsgMniMaxMnb) {
+  uint64_t msgmni = 0;
+  uint64_t msgmax = 0;
+  uint64_t msgmnb = 0;
+
+  std::string proc_file;
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/msgmni"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &msgmni));
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/msgmax"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &msgmax));
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/msgmnb"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &msgmnb));
+
+  ASSERT_EQ(msgmni, MSGMNI);
+  ASSERT_EQ(msgmax, MSGMAX);
+  ASSERT_EQ(msgmnb, MSGMNB);
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc
index ef1db47ee..ef176cbee 100644
--- a/test/syscalls/linux/raw_socket.cc
+++ b/test/syscalls/linux/raw_socket.cc
@@ -25,6 +25,7 @@
 #include <algorithm>
 
 #include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
 #include "test/syscalls/linux/unix_domain_socket_test_util.h"
 #include "test/util/capability_util.h"
 #include "test/util/file_descriptor.h"
@@ -39,6 +40,9 @@ namespace testing {
 
 namespace {
 
+using ::testing::IsNull;
+using ::testing::NotNull;
+
 // Fixture for tests parameterized by protocol.
 class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> {
  protected:
@@ -1057,6 +1061,131 @@ TEST(RawSocketTest, BindReceive) {
   ASSERT_NO_FATAL_FAILURE(TestRawSocketMaybeBindReceive(true /* do_bind */));
 }
 
+TEST(RawSocketTest, ReceiveIPPacketInfo) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
+
+  FileDescriptor raw =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
+
+  const sockaddr_in addr_ = {
+      .sin_family = AF_INET,
+      .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)},
+  };
+  ASSERT_THAT(
+      bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+
+  // Register to receive IP packet info.
+  constexpr int one = 1;
+  ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_PKTINFO, &one, sizeof(one)),
+              SyscallSucceeds());
+
+  constexpr char send_buf[] = "malformed UDP";
+  ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), 0 /* flags */,
+                     reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  struct {
+    iphdr ip;
+    char data[sizeof(send_buf)];
+
+    // Extra space in the receive buffer should be unused.
+    char unused_space;
+  } ABSL_ATTRIBUTE_PACKED recv_buf;
+  iovec recv_iov = {
+      .iov_base = &recv_buf,
+      .iov_len = sizeof(recv_buf),
+  };
+  in_pktinfo received_pktinfo;
+  char recv_cmsg_buf[CMSG_SPACE(sizeof(received_pktinfo))];
+  msghdr recv_msg = {
+      .msg_iov = &recv_iov,
+      .msg_iovlen = 1,
+      .msg_control = recv_cmsg_buf,
+      .msg_controllen = CMSG_LEN(sizeof(received_pktinfo)),
+  };
+  ASSERT_THAT(RetryEINTR(recvmsg)(raw.get(), &recv_msg, 0),
+              SyscallSucceedsWithValue(sizeof(iphdr) + sizeof(send_buf)));
+  EXPECT_EQ(memcmp(send_buf, &recv_buf.data, sizeof(send_buf)), 0);
+  EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION));
+  // IHL holds the number of header bytes in 4 byte units.
+  EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4);
+  EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(send_buf));
+  EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP);
+  EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK);
+  EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK);
+
+  cmsghdr* cmsg = CMSG_FIRSTHDR(&recv_msg);
+  ASSERT_THAT(cmsg, NotNull());
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(received_pktinfo)));
+  EXPECT_EQ(cmsg->cmsg_level, IPPROTO_IP);
+  EXPECT_EQ(cmsg->cmsg_type, IP_PKTINFO);
+  memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(received_pktinfo));
+  EXPECT_EQ(received_pktinfo.ipi_ifindex,
+            ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()));
+  EXPECT_EQ(ntohl(received_pktinfo.ipi_spec_dst.s_addr), INADDR_LOOPBACK);
+  EXPECT_EQ(ntohl(received_pktinfo.ipi_addr.s_addr), INADDR_LOOPBACK);
+
+  EXPECT_THAT(CMSG_NXTHDR(&recv_msg, cmsg), IsNull());
+}
+
+TEST(RawSocketTest, ReceiveIPv6PacketInfo) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
+
+  FileDescriptor raw =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
+
+  const sockaddr_in6 addr_ = {
+      .sin6_family = AF_INET6,
+      .sin6_addr = in6addr_loopback,
+  };
+  ASSERT_THAT(
+      bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+
+  // Register to receive IPv6 packet info.
+  constexpr int one = 1;
+  ASSERT_THAT(
+      setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)),
+      SyscallSucceeds());
+
+  constexpr char send_buf[] = "malformed UDP";
+  ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), 0 /* flags */,
+                     reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  char recv_buf[sizeof(send_buf) + 1];
+  iovec recv_iov = {
+      .iov_base = recv_buf,
+      .iov_len = sizeof(recv_buf),
+  };
+  in6_pktinfo received_pktinfo;
+  char recv_cmsg_buf[CMSG_SPACE(sizeof(received_pktinfo))];
+  msghdr recv_msg = {
+      .msg_iov = &recv_iov,
+      .msg_iovlen = 1,
+      .msg_control = recv_cmsg_buf,
+      .msg_controllen = CMSG_LEN(sizeof(received_pktinfo)),
+  };
+  ASSERT_THAT(RetryEINTR(recvmsg)(raw.get(), &recv_msg, 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+  EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0);
+
+  cmsghdr* cmsg = CMSG_FIRSTHDR(&recv_msg);
+  ASSERT_THAT(cmsg, NotNull());
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(received_pktinfo)));
+  EXPECT_EQ(cmsg->cmsg_level, IPPROTO_IPV6);
+  EXPECT_EQ(cmsg->cmsg_type, IPV6_PKTINFO);
+  memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(received_pktinfo));
+  EXPECT_EQ(received_pktinfo.ipi6_ifindex,
+            ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()));
+  ASSERT_EQ(memcmp(&received_pktinfo.ipi6_addr, &in6addr_loopback,
+                   sizeof(in6addr_loopback)),
+            0);
+
+  EXPECT_THAT(CMSG_NXTHDR(&recv_msg, cmsg), IsNull());
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc
deleted file mode 100644
index af2459a2f..000000000
--- a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "test/syscalls/linux/socket_ip_udp_unbound_external_networking.h"
-
-#include "test/util/socket_util.h"
-#include "test/util/test_util.h"
-
-namespace gvisor {
-namespace testing {
-
-void IPUDPUnboundExternalNetworkingSocketTest::SetUp() {
-  // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its
-  // IPv4 address on eth0.
-  found_net_interfaces_ = false;
-
-  // Get interface list.
-  ASSERT_NO_ERRNO(if_helper_.Load());
-  std::vector<std::string> if_names = if_helper_.InterfaceList(AF_INET);
-  if (if_names.size() != 2) {
-    return;
-  }
-
-  // Figure out which interface is where.
-  std::string lo = if_names[0];
-  std::string eth = if_names[1];
-  if (lo != "lo") std::swap(lo, eth);
-  if (lo != "lo") return;
-
-  lo_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(lo));
-  auto lo_if_addr = if_helper_.GetAddr(AF_INET, lo);
-  if (lo_if_addr == nullptr) {
-    return;
-  }
-  lo_if_addr_ = *reinterpret_cast<const sockaddr_in*>(lo_if_addr);
-
-  eth_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(eth));
-  auto eth_if_addr = if_helper_.GetAddr(AF_INET, eth);
-  if (eth_if_addr == nullptr) {
-    return;
-  }
-  eth_if_addr_ = *reinterpret_cast<const sockaddr_in*>(eth_if_addr);
-
-  found_net_interfaces_ = true;
-}
-
-}  // namespace testing
-}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h
index 2e8aab129..92c20eba9 100644
--- a/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h
+++ b/test/syscalls/linux/socket_ip_udp_unbound_external_networking.h
@@ -16,29 +16,13 @@
 #define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_UNBOUND_EXTERNAL_NETWORKING_H_
 
 #include "test/syscalls/linux/ip_socket_test_util.h"
-#include "test/util/socket_util.h"
 
 namespace gvisor {
 namespace testing {
 
 // Test fixture for tests that apply to unbound IP UDP sockets in a sandbox
 // with external networking support.
-class IPUDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest {
- protected:
-  void SetUp() override;
-
-  IfAddrHelper if_helper_;
-
-  // found_net_interfaces_ is set to false if SetUp() could not obtain
-  // all interface infos that we need.
-  bool found_net_interfaces_;
-
-  // Interface infos.
-  int lo_if_idx_;
-  int eth_if_idx_;
-  sockaddr_in lo_if_addr_;
-  sockaddr_in eth_if_addr_;
-};
+class IPUDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest {};
 
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc
index c6e775b2a..6c67ec51e 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc
@@ -14,9 +14,62 @@
 
 #include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h"
 
+#include <net/if.h>
+
+#include "absl/cleanup/cleanup.h"
+#include "test/util/socket_util.h"
+#include "test/util/test_util.h"
+
 namespace gvisor {
 namespace testing {
 
+void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() {
+#ifdef ANDROID
+  GTEST_SKIP() << "Android does not support getifaddrs in r22";
+#endif
+
+  ifaddrs* ifaddr;
+  ASSERT_THAT(getifaddrs(&ifaddr), SyscallSucceeds());
+  auto cleanup = absl::MakeCleanup([ifaddr] { freeifaddrs(ifaddr); });
+
+  for (const ifaddrs* ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
+    ASSERT_NE(ifa->ifa_name, nullptr);
+    ASSERT_NE(ifa->ifa_addr, nullptr);
+
+    if (ifa->ifa_addr->sa_family != AF_INET) {
+      continue;
+    }
+
+    std::optional<std::pair<int, sockaddr_in>>& if_pair = *[this, ifa]() {
+      if (strcmp(ifa->ifa_name, "lo") == 0) {
+        return &lo_if_;
+      }
+      return &eth_if_;
+    }();
+
+    const int if_index =
+        ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex(ifa->ifa_name));
+
+    std::cout << " name=" << ifa->ifa_name
+              << " addr=" << GetAddrStr(ifa->ifa_addr) << " index=" << if_index
+              << " has_value=" << if_pair.has_value() << std::endl;
+
+    if (if_pair.has_value()) {
+      continue;
+    }
+
+    if_pair = std::make_pair(
+        if_index, *reinterpret_cast<const sockaddr_in*>(ifa->ifa_addr));
+  }
+
+  if (!(eth_if_.has_value() && lo_if_.has_value())) {
+    // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its
+    // IPv4 address on eth0.
+    GTEST_SKIP() << " eth_if_.has_value()=" << eth_if_.has_value()
+                 << " lo_if_.has_value()=" << lo_if_.has_value();
+  }
+}
+
 TestAddress V4EmptyAddress() {
   TestAddress t("V4Empty");
   t.addr.ss_family = AF_INET;
@@ -28,7 +81,6 @@ TestAddress V4EmptyAddress() {
 // the destination port number.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        UDPBroadcastReceivedOnExpectedPort) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
@@ -101,8 +153,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // not a unicast address.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        UDPBroadcastReceivedOnExpectedAddresses) {
-  SKIP_IF(!found_net_interfaces_);
-
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
@@ -149,7 +199,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
   // Bind the non-receiving socket to the unicast ethernet address.
   auto norecv_addr = rcv1_addr;
   reinterpret_cast<sockaddr_in*>(&norecv_addr.addr)->sin_addr =
-      eth_if_addr_.sin_addr;
+      eth_if_addr().sin_addr;
   ASSERT_THAT(
       bind(norcv->get(), AsSockAddr(&norecv_addr.addr), norecv_addr.addr_len),
       SyscallSucceedsWithValue(0));
@@ -184,7 +234,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 //                     (UDPBroadcastSendRecvOnSocketBoundToAny).
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        UDPBroadcastSendRecvOnSocketBoundToBroadcast) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   // Enable SO_BROADCAST.
@@ -224,7 +273,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 //                     (UDPBroadcastSendRecvOnSocketBoundToBroadcast).
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        UDPBroadcastSendRecvOnSocketBoundToAny) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   // Enable SO_BROADCAST.
@@ -261,7 +309,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // Verifies that a UDP broadcast fails to send on a socket with SO_BROADCAST
 // disabled.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendBroadcast) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   // Broadcast a test message without having enabled SO_BROADCAST on the sending
@@ -306,12 +353,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendUnicastOnUnbound) {
 // set interface or group membership.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastSelfNoGroup) {
-  // FIXME(b/125485338): A group membership is not required for external
-  // multicast on gVisor.
-  SKIP_IF(IsRunningOnGvisor());
-
-  SKIP_IF(!found_net_interfaces_);
-
   auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   auto bind_addr = V4Any();
@@ -345,7 +386,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // Check that multicast packets will be delivered to the sending socket without
 // setting an interface.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) {
-  SKIP_IF(!found_net_interfaces_);
   auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   auto bind_addr = V4Any();
@@ -388,7 +428,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) {
 // set interface and IP_MULTICAST_LOOP disabled.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastSelfLoopOff) {
-  SKIP_IF(!found_net_interfaces_);
   auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
   auto bind_addr = V4Any();
@@ -434,12 +473,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // Check that multicast packets won't be delivered to another socket with no
 // set interface or group membership.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) {
-  // FIXME(b/125485338): A group membership is not required for external
-  // multicast on gVisor.
-  SKIP_IF(IsRunningOnGvisor());
-
-  SKIP_IF(!found_net_interfaces_);
-
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
@@ -476,7 +509,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) {
 // Check that multicast packets will be delivered to another socket without
 // setting an interface.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
@@ -522,7 +554,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) {
 // set interface and IP_MULTICAST_LOOP disabled on the sending socket.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastSenderNoLoop) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
@@ -572,8 +603,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // setting an interface and IP_MULTICAST_LOOP disabled on the receiving socket.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastReceiverNoLoop) {
-  SKIP_IF(!found_net_interfaces_);
-
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
@@ -624,7 +653,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // and both will receive data on it when bound to the ANY address.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastToTwoBoundToAny) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   std::unique_ptr<FileDescriptor> receivers[2] = {
       ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
@@ -689,7 +717,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // and both will receive data on it when bound to the multicast address.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastToTwoBoundToMulticastAddress) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   std::unique_ptr<FileDescriptor> receivers[2] = {
       ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
@@ -757,7 +784,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // multicast address, both will receive data.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        TestSendMulticastToTwoBoundToAnyAndMulticastAddress) {
-  SKIP_IF(!found_net_interfaces_);
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   std::unique_ptr<FileDescriptor> receivers[2] = {
       ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
@@ -829,8 +855,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // is not a multicast address.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        IpMulticastLoopbackFromAddr) {
-  SKIP_IF(!found_net_interfaces_);
-
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
@@ -893,8 +917,6 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
 // interface, a multicast packet sent out uses the latter as its source address.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        IpMulticastLoopbackIfNicAndAddr) {
-  SKIP_IF(!found_net_interfaces_);
-
   // Create receiver, bind to ANY and join the multicast group.
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver_addr = V4Any();
@@ -906,11 +928,15 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
                           &receiver_addr_len),
               SyscallSucceeds());
   EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
-  int receiver_port =
+  const in_port_t receiver_port =
       reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
-  ip_mreqn group = {};
-  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
-  group.imr_ifindex = lo_if_idx_;
+  const ip_mreqn group = {
+      .imr_multiaddr =
+          {
+              .s_addr = inet_addr(kMulticastAddress),
+          },
+      .imr_ifindex = lo_if_idx(),
+  };
   ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
                          sizeof(group)),
               SyscallSucceeds());
@@ -918,9 +944,10 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
   // Set outgoing multicast interface config, with NIC and addr pointing to
   // different interfaces.
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-  ip_mreqn iface = {};
-  iface.imr_ifindex = lo_if_idx_;
-  iface.imr_address = eth_if_addr_.sin_addr;
+  const ip_mreqn iface = {
+      .imr_address = eth_if_addr().sin_addr,
+      .imr_ifindex = lo_if_idx(),
+  };
   ASSERT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
                          sizeof(iface)),
               SyscallSucceeds());
@@ -928,67 +955,104 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
   // Send a multicast packet.
   auto sendto_addr = V4Multicast();
   reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = receiver_port;
-  char send_buf[4] = {};
+  char send_buf[4];
   ASSERT_THAT(
       RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
                          AsSockAddr(&sendto_addr.addr), sendto_addr.addr_len),
       SyscallSucceedsWithValue(sizeof(send_buf)));
 
   // Receive a multicast packet.
-  char recv_buf[sizeof(send_buf)] = {};
+  char recv_buf[sizeof(send_buf) + 1];
   auto src_addr = V4EmptyAddress();
   ASSERT_THAT(
       RetryEINTR(recvfrom)(receiver->get(), recv_buf, sizeof(recv_buf), 0,
                            AsSockAddr(&src_addr.addr), &src_addr.addr_len),
-      SyscallSucceedsWithValue(sizeof(recv_buf)));
-  ASSERT_EQ(sizeof(struct sockaddr_in), src_addr.addr_len);
-  sockaddr_in* src_addr_in = reinterpret_cast<sockaddr_in*>(&src_addr.addr);
-
-  // FIXME (b/137781162): When sending a multicast packet use the proper logic
-  // to determine the packet's src-IP.
-  SKIP_IF(IsRunningOnGvisor());
+      SyscallSucceedsWithValue(sizeof(send_buf)));
+  ASSERT_EQ(src_addr.addr_len, sizeof(struct sockaddr_in));
 
   // Verify the received source address.
-  EXPECT_EQ(eth_if_addr_.sin_addr.s_addr, src_addr_in->sin_addr.s_addr);
+  //
+  // TODO(https://gvisor.dev/issue/6686): gVisor is a strong host, preventing
+  // the packet from being sent from the loopback device using the ethernet
+  // device's address.
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(GetAddrStr(AsSockAddr(&src_addr.addr)),
+              GetAddr4Str(&lo_if_addr().sin_addr));
+  } else {
+    EXPECT_EQ(GetAddrStr(AsSockAddr(&src_addr.addr)),
+              GetAddr4Str(&eth_if_addr().sin_addr));
+  }
 }
 
 // Check that when we are bound to one interface we can set IP_MULTICAST_IF to
 // another interface.
 TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
        IpMulticastLoopbackBindToOneIfSetMcastIfToAnother) {
-  SKIP_IF(!found_net_interfaces_);
-
-  // FIXME (b/137790511): When bound to one interface it is not possible to set
-  // IP_MULTICAST_IF to a different interface.
-  SKIP_IF(IsRunningOnGvisor());
-
-  // Create sender and bind to eth interface.
-  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
-  ASSERT_THAT(
-      bind(sender->get(), AsSockAddr(&eth_if_addr_), sizeof(eth_if_addr_)),
-      SyscallSucceeds());
-
   // Run through all possible combinations of index and address for
   // IP_MULTICAST_IF that selects the loopback interface.
-  struct {
-    int imr_ifindex;
-    struct in_addr imr_address;
-  } test_data[] = {
-      {lo_if_idx_, {}},
-      {0, lo_if_addr_.sin_addr},
-      {lo_if_idx_, lo_if_addr_.sin_addr},
-      {lo_if_idx_, eth_if_addr_.sin_addr},
+  ip_mreqn ifaces[] = {
+      {
+          .imr_address = {},
+          .imr_ifindex = lo_if_idx(),
+      },
+      {
+          .imr_address = lo_if_addr().sin_addr,
+          .imr_ifindex = 0,
+      },
+      {
+          .imr_address = lo_if_addr().sin_addr,
+          .imr_ifindex = lo_if_idx(),
+      },
+      {
+          .imr_address = eth_if_addr().sin_addr,
+          .imr_ifindex = lo_if_idx(),
+      },
   };
-  for (auto t : test_data) {
-    ip_mreqn iface = {};
-    iface.imr_ifindex = t.imr_ifindex;
-    iface.imr_address = t.imr_address;
-    EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
-                           sizeof(iface)),
-                SyscallSucceeds())
-        << "imr_index=" << iface.imr_ifindex
-        << " imr_address=" << GetAddr4Str(&iface.imr_address);
+
+  {
+    auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+    ASSERT_THAT(
+        bind(sender->get(), AsSockAddr(&eth_if_addr()), sizeof(eth_if_addr())),
+        SyscallSucceeds());
+
+    for (const ip_mreqn& iface : ifaces) {
+      EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                             sizeof(iface)),
+                  SyscallSucceeds())
+          << " imr_index=" << iface.imr_ifindex
+          << " imr_address=" << GetAddr4Str(&iface.imr_address);
+    }
+  }
+
+  {
+    char eth_if_name[IF_NAMESIZE];
+    memset(eth_if_name, 0xAA, sizeof(eth_if_name));
+    ASSERT_NE(if_indextoname(eth_if_idx(), eth_if_name), nullptr)
+        << strerror(errno);
+    auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+    ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BINDTODEVICE,
+                           eth_if_name, sizeof(eth_if_name)),
+                SyscallSucceeds());
+
+    for (const ip_mreqn& iface : ifaces) {
+      // FIXME(b/137790511): Disallow mismatching IP_MULTICAST_IF and
+      // SO_BINDTODEVICE.
+      if (IsRunningOnGvisor()) {
+        EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF,
+                               &iface, sizeof(iface)),
+                    SyscallSucceeds())
+            << " imr_index=" << iface.imr_ifindex
+            << " imr_address=" << GetAddr4Str(&iface.imr_address);
+      } else {
+        EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF,
+                               &iface, sizeof(iface)),
+                    SyscallFailsWithErrno(EINVAL))
+            << " imr_index=" << iface.imr_ifindex
+            << " imr_address=" << GetAddr4Str(&iface.imr_address);
+      }
+    }
   }
 }
+
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h
index 20922ac1f..ac917b32c 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h
@@ -22,8 +22,22 @@ namespace testing {
 
 // Test fixture for tests that apply to unbound IPv4 UDP sockets in a sandbox
 // with external networking support.
-using IPv4UDPUnboundExternalNetworkingSocketTest =
-    IPUDPUnboundExternalNetworkingSocketTest;
+class IPv4UDPUnboundExternalNetworkingSocketTest
+    : public IPUDPUnboundExternalNetworkingSocketTest {
+ protected:
+  void SetUp() override;
+
+  int lo_if_idx() const { return std::get<0>(lo_if_.value()); }
+  int eth_if_idx() const { return std::get<0>(eth_if_.value()); }
+
+  const sockaddr_in& lo_if_addr() const { return std::get<1>(lo_if_.value()); }
+  const sockaddr_in& eth_if_addr() const {
+    return std::get<1>(eth_if_.value());
+  }
+
+ private:
+  std::optional<std::pair<int, sockaddr_in>> lo_if_, eth_if_;
+};
 
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking.cc
index d72b6b53f..c6e563cd3 100644
--- a/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking.cc
+++ b/test/syscalls/linux/socket_ipv6_udp_unbound_external_networking.cc
@@ -18,8 +18,6 @@ namespace gvisor {
 namespace testing {
 
 TEST_P(IPv6UDPUnboundExternalNetworkingSocketTest, TestJoinLeaveMulticast) {
-  SKIP_IF(!found_net_interfaces_);
-
   auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
   auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
 
diff --git a/test/util/socket_util.h b/test/util/socket_util.h
index 0e2be63cc..588f041b7 100644
--- a/test/util/socket_util.h
+++ b/test/util/socket_util.h
@@ -554,15 +554,27 @@ uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload,
 inline sockaddr* AsSockAddr(sockaddr_storage* s) {
   return reinterpret_cast<sockaddr*>(s);
 }
+inline const sockaddr* AsSockAddr(const sockaddr_storage* s) {
+  return reinterpret_cast<const sockaddr*>(s);
+}
 inline sockaddr* AsSockAddr(sockaddr_in* s) {
   return reinterpret_cast<sockaddr*>(s);
 }
+inline const sockaddr* AsSockAddr(const sockaddr_in* s) {
+  return reinterpret_cast<const sockaddr*>(s);
+}
 inline sockaddr* AsSockAddr(sockaddr_in6* s) {
   return reinterpret_cast<sockaddr*>(s);
 }
+inline const sockaddr* AsSockAddr(const sockaddr_in6* s) {
+  return reinterpret_cast<const sockaddr*>(s);
+}
 inline sockaddr* AsSockAddr(sockaddr_un* s) {
   return reinterpret_cast<sockaddr*>(s);
 }
+inline const sockaddr* AsSockAddr(const sockaddr_un* s) {
+  return reinterpret_cast<const sockaddr*>(s);
+}
 
 PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr);
 
diff --git a/tools/bazel.mk b/tools/bazel.mk
index 1444423e4..68b804ec4 100644
--- a/tools/bazel.mk
+++ b/tools/bazel.mk
@@ -186,8 +186,8 @@ build_paths = \
   (set -euo pipefail; \
   $(call wrapper,$(BAZEL) build $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1)) && \
   $(call wrapper,$(BAZEL) cquery $(BASE_OPTIONS) $(BAZEL_OPTIONS) $(1) --output=starlark --starlark:file=tools/show_paths.bzl) \
-  | xargs -r -n 1 -I {} bash -c 'test -e "{}" || exit 0; readlink -f "{}"' \
-  | xargs -r -n 1 -I {} bash -c 'set -euo pipefail; $(2)')
+  | xargs -r -I {} bash -c 'test -e "{}" || exit 0; readlink -f "{}"' \
+  | xargs -r -I {} bash -c 'set -euo pipefail; $(2)')
 
 clean = $(call header,CLEAN) && $(call wrapper,$(BAZEL) clean)
 build = $(call header,BUILD $(1)) && $(call build_paths,$(1),echo {})
diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go
index d95d7652f..2f88f84db 100644
--- a/tools/nogo/nogo.go
+++ b/tools/nogo/nogo.go
@@ -293,6 +293,19 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 		break
 	}
 
+	// Go standard library packages using Go 1.18 type parameter features.
+	//
+	// As of writing, analysis tooling is not updated to support type
+	// parameters and will choke on these packages. We skip these packages
+	// entirely for now.
+	//
+	// TODO(b/201686256): remove once tooling can handle type parameters.
+	usesTypeParams := map[string]struct{}{
+		"constraints": struct{}{}, // golang.org/issue/45458
+		"maps":        struct{}{}, // golang.org/issue/47649
+		"slices":      struct{}{}, // golang.org/issue/45955
+	}
+
 	// Aggregate all files by directory.
 	packages := make(map[string]*PackageConfig)
 	for _, file := range config.Srcs {
@@ -306,10 +319,17 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 			continue // Not a file.
 		}
 		pkg := d[len(rootSrcPrefix):]
+
 		// Skip cmd packages and obvious test files: see above.
 		if strings.HasPrefix(pkg, "cmd/") || strings.HasSuffix(file, "_test.go") {
 			continue
 		}
+
+		if _, ok := usesTypeParams[pkg]; ok {
+			log.Printf("WARNING: Skipping package %q: type param analysis not yet supported", pkg)
+			continue
+		}
+
 		c, ok := packages[pkg]
 		if !ok {
 			c = &PackageConfig{