summaryrefslogtreecommitdiffhomepage
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/e2e/exec_test.go22
-rw-r--r--test/iptables/filter_input.go85
-rw-r--r--test/iptables/filter_output.go56
-rw-r--r--test/iptables/iptables_test.go86
-rw-r--r--test/iptables/iptables_util.go27
-rw-r--r--test/iptables/nat.go201
-rw-r--r--test/packetdrill/Dockerfile8
-rw-r--r--test/packetdrill/fin_wait2_timeout.pkt2
-rw-r--r--test/packetimpact/README.md531
-rw-r--r--test/packetimpact/dut/BUILD18
-rw-r--r--test/packetimpact/dut/posix_server.cc229
-rw-r--r--test/packetimpact/proto/BUILD12
-rw-r--r--test/packetimpact/proto/posix_server.proto150
-rw-r--r--test/packetimpact/testbench/BUILD31
-rw-r--r--test/packetimpact/testbench/connections.go245
-rw-r--r--test/packetimpact/testbench/dut.go363
-rw-r--r--test/packetimpact/testbench/dut_client.go28
-rw-r--r--test/packetimpact/testbench/layers.go507
-rw-r--r--test/packetimpact/testbench/rawsockets.go151
-rw-r--r--test/packetimpact/tests/BUILD21
-rw-r--r--test/packetimpact/tests/Dockerfile5
-rw-r--r--test/packetimpact/tests/defs.bzl106
-rw-r--r--test/packetimpact/tests/fin_wait2_timeout_test.go68
-rwxr-xr-xtest/packetimpact/tests/test_runner.sh246
-rw-r--r--test/perf/linux/futex_benchmark.cc144
-rw-r--r--test/root/runsc_test.go5
-rw-r--r--test/runner/runner.go27
-rw-r--r--test/syscalls/linux/proc_net.cc53
-rw-r--r--test/syscalls/linux/seccomp.cc32
-rw-r--r--test/syscalls/linux/stat.cc69
-rw-r--r--test/syscalls/linux/sticky.cc16
-rw-r--r--test/syscalls/linux/tuntap.cc105
32 files changed, 3464 insertions, 185 deletions
diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
index 4074d2285..594c8e752 100644
--- a/test/e2e/exec_test.go
+++ b/test/e2e/exec_test.go
@@ -240,17 +240,7 @@ func TestExecEnvHasHome(t *testing.T) {
}
d := dockerutil.MakeDocker("exec-env-home-test")
- // We will check that HOME is set for root user, and also for a new
- // non-root user we will create.
- newUID := 1234
- newHome := "/foo/bar"
-
- // Create a new user with a home directory, and then sleep.
- script := fmt.Sprintf(`
- mkdir -p -m 777 %s && \
- adduser foo -D -u %d -h %s && \
- sleep 1000`, newHome, newUID, newHome)
- if err := d.Run("alpine", "/bin/sh", "-c", script); err != nil {
+ if err := d.Run("alpine", "sleep", "1000"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
@@ -264,7 +254,15 @@ func TestExecEnvHasHome(t *testing.T) {
t.Errorf("wanted exec output to contain %q, got %q", want, got)
}
- // Execute the same as uid 123 and expect newHome.
+ // Create a new user with a home directory.
+ newUID := 1234
+ newHome := "/foo/bar"
+ cmd := fmt.Sprintf("mkdir -p -m 777 %q && adduser foo -D -u %d -h %q", newHome, newUID, newHome)
+ if _, err := d.Exec("/bin/sh", "-c", cmd); err != nil {
+ t.Fatalf("docker exec failed: %v", err)
+ }
+
+ // Execute the same as the new user and expect newHome.
got, err = d.ExecAsUser(strconv.Itoa(newUID), "/bin/sh", "-c", "echo $HOME")
if err != nil {
t.Fatalf("docker exec failed: %v", err)
diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go
index 141d20fbb..4ccd4cce7 100644
--- a/test/iptables/filter_input.go
+++ b/test/iptables/filter_input.go
@@ -47,6 +47,8 @@ func init() {
RegisterTestCase(FilterInputJumpReturnDrop{})
RegisterTestCase(FilterInputJumpBuiltin{})
RegisterTestCase(FilterInputJumpTwice{})
+ RegisterTestCase(FilterInputDestination{})
+ RegisterTestCase(FilterInputInvertDestination{})
}
// FilterInputDropUDP tests that we can drop UDP traffic.
@@ -192,8 +194,14 @@ func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error {
// LocalAction implements TestCase.LocalAction.
func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error {
- if err := connectTCP(ip, dropPort, sendloopDuration); err == nil {
- return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort)
+ // After the container sets its DROP rule, we shouldn't be able to connect.
+ // However, we may succeed in connecting if this runs before the container
+ // sets the rule. To avoid this race, we retry connecting until
+ // sendloopDuration has elapsed, ignoring whether the connect succeeds. The
+ // test works becuase the container will error if a connection is
+ // established after the rule is set.
+ for start := time.Now(); time.Since(start) < sendloopDuration; {
+ connectTCP(ip, dropPort, sendloopDuration-time.Since(start))
}
return nil
@@ -224,8 +232,14 @@ func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error {
// LocalAction implements TestCase.LocalAction.
func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error {
- if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil {
- return fmt.Errorf("connection should not be accepted, but was")
+ // After the container sets its DROP rule, we shouldn't be able to connect.
+ // However, we may succeed in connecting if this runs before the container
+ // sets the rule. To avoid this race, we retry connecting until
+ // sendloopDuration has elapsed, ignoring whether the connect succeeds. The
+ // test works becuase the container will error if a connection is
+ // established after the rule is set.
+ for start := time.Now(); time.Since(start) < sendloopDuration; {
+ connectTCP(ip, acceptPort, sendloopDuration-time.Since(start))
}
return nil
@@ -596,3 +610,66 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error {
func (FilterInputJumpTwice) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}
+
+// FilterInputDestination verifies that we can filter packets via `-d
+// <ipaddr>`.
+type FilterInputDestination struct{}
+
+// Name implements TestCase.Name.
+func (FilterInputDestination) Name() string {
+ return "FilterInputDestination"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterInputDestination) ContainerAction(ip net.IP) error {
+ addrs, err := localAddrs()
+ if err != nil {
+ return err
+ }
+
+ // Make INPUT's default action DROP, then ACCEPT all packets bound for
+ // this machine.
+ rules := [][]string{{"-P", "INPUT", "DROP"}}
+ for _, addr := range addrs {
+ rules = append(rules, []string{"-A", "INPUT", "-d", addr, "-j", "ACCEPT"})
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterInputDestination) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// FilterInputInvertDestination verifies that we can filter packets via `! -d
+// <ipaddr>`.
+type FilterInputInvertDestination struct{}
+
+// Name implements TestCase.Name.
+func (FilterInputInvertDestination) Name() string {
+ return "FilterInputInvertDestination"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterInputInvertDestination) ContainerAction(ip net.IP) error {
+ // Make INPUT's default action DROP, then ACCEPT all packets not bound
+ // for 127.0.0.1.
+ rules := [][]string{
+ {"-P", "INPUT", "DROP"},
+ {"-A", "INPUT", "!", "-d", localIP, "-j", "ACCEPT"},
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterInputInvertDestination) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go
index 1314a5a92..4582d514c 100644
--- a/test/iptables/filter_output.go
+++ b/test/iptables/filter_output.go
@@ -22,6 +22,8 @@ import (
func init() {
RegisterTestCase(FilterOutputDropTCPDestPort{})
RegisterTestCase(FilterOutputDropTCPSrcPort{})
+ RegisterTestCase(FilterOutputDestination{})
+ RegisterTestCase(FilterOutputInvertDestination{})
}
// FilterOutputDropTCPDestPort tests that connections are not accepted on
@@ -87,3 +89,57 @@ func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error {
return nil
}
+
+// FilterOutputDestination tests that we can selectively allow packets to
+// certain destinations.
+type FilterOutputDestination struct{}
+
+// Name implements TestCase.Name.
+func (FilterOutputDestination) Name() string {
+ return "FilterOutputDestination"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterOutputDestination) ContainerAction(ip net.IP) error {
+ rules := [][]string{
+ {"-A", "OUTPUT", "-d", ip.String(), "-j", "ACCEPT"},
+ {"-P", "OUTPUT", "DROP"},
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterOutputDestination) LocalAction(ip net.IP) error {
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// FilterOutputInvertDestination tests that we can selectively allow packets
+// not headed for a particular destination.
+type FilterOutputInvertDestination struct{}
+
+// Name implements TestCase.Name.
+func (FilterOutputInvertDestination) Name() string {
+ return "FilterOutputInvertDestination"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (FilterOutputInvertDestination) ContainerAction(ip net.IP) error {
+ rules := [][]string{
+ {"-A", "OUTPUT", "!", "-d", localIP, "-j", "ACCEPT"},
+ {"-P", "OUTPUT", "DROP"},
+ }
+ if err := filterTableRules(rules); err != nil {
+ return err
+ }
+
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (FilterOutputInvertDestination) LocalAction(ip net.IP) error {
+ return listenUDP(acceptPort, sendloopDuration)
+}
diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go
index 56ba78107..7f1f70606 100644
--- a/test/iptables/iptables_test.go
+++ b/test/iptables/iptables_test.go
@@ -191,6 +191,7 @@ func TestFilterInputDropOnlyUDP(t *testing.T) {
}
func TestNATRedirectUDPPort(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(NATRedirectUDPPort{}); err != nil {
t.Fatal(err)
@@ -198,6 +199,7 @@ func TestNATRedirectUDPPort(t *testing.T) {
}
func TestNATRedirectTCPPort(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(NATRedirectTCPPort{}); err != nil {
t.Fatal(err)
@@ -205,6 +207,7 @@ func TestNATRedirectTCPPort(t *testing.T) {
}
func TestNATDropUDP(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(NATDropUDP{}); err != nil {
t.Fatal(err)
@@ -212,6 +215,7 @@ func TestNATDropUDP(t *testing.T) {
}
func TestNATAcceptAll(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(NATAcceptAll{}); err != nil {
t.Fatal(err)
@@ -255,6 +259,7 @@ func TestFilterInputReturnUnderflow(t *testing.T) {
}
func TestFilterOutputDropTCPDestPort(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil {
t.Fatal(err)
@@ -262,6 +267,7 @@ func TestFilterOutputDropTCPDestPort(t *testing.T) {
}
func TestFilterOutputDropTCPSrcPort(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).")
if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil {
t.Fatal(err)
@@ -303,3 +309,83 @@ func TestJumpTwice(t *testing.T) {
t.Fatal(err)
}
}
+
+func TestInputDestination(t *testing.T) {
+ if err := singleTest(FilterInputDestination{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestInputInvertDestination(t *testing.T) {
+ if err := singleTest(FilterInputInvertDestination{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestOutputDestination(t *testing.T) {
+ if err := singleTest(FilterOutputDestination{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestOutputInvertDestination(t *testing.T) {
+ if err := singleTest(FilterOutputInvertDestination{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATOutRedirectIP(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATOutRedirectIP{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATOutDontRedirectIP(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATOutDontRedirectIP{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATOutRedirectInvert(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATOutRedirectInvert{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATPreRedirectIP(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATPreRedirectIP{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATPreDontRedirectIP(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATPreDontRedirectIP{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATPreRedirectInvert(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATPreRedirectInvert{}); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestNATRedirectRequiresProtocol(t *testing.T) {
+ // TODO(gvisor.dev/issue/170): Enable when supported.
+ t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).")
+ if err := singleTest(NATRedirectRequiresProtocol{}); err != nil {
+ t.Fatal(err)
+ }
+}
diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go
index 1f8dac4f1..134391e8d 100644
--- a/test/iptables/iptables_util.go
+++ b/test/iptables/iptables_util.go
@@ -24,6 +24,7 @@ import (
)
const iptablesBinary = "iptables"
+const localIP = "127.0.0.1"
// filterTable calls `iptables -t filter` with the given args.
func filterTable(args ...string) error {
@@ -46,8 +47,17 @@ func tableCmd(table string, args []string) error {
// filterTableRules is like filterTable, but runs multiple iptables commands.
func filterTableRules(argsList [][]string) error {
+ return tableRules("filter", argsList)
+}
+
+// natTableRules is like natTable, but runs multiple iptables commands.
+func natTableRules(argsList [][]string) error {
+ return tableRules("nat", argsList)
+}
+
+func tableRules(table string, argsList [][]string) error {
for _, args := range argsList {
- if err := filterTable(args...); err != nil {
+ if err := tableCmd(table, args); err != nil {
return err
}
}
@@ -134,7 +144,7 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error {
// The container may not be listening when we first connect, so retry
// upon error.
callback := func() error {
- conn, err := net.DialTCP("tcp4", nil, &contAddr)
+ conn, err := net.DialTimeout("tcp", contAddr.String(), timeout)
if conn != nil {
conn.Close()
}
@@ -146,3 +156,16 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error {
return nil
}
+
+// localAddrs returns a list of local network interface addresses.
+func localAddrs() ([]string, error) {
+ addrs, err := net.InterfaceAddrs()
+ if err != nil {
+ return nil, err
+ }
+ addrStrs := make([]string, 0, len(addrs))
+ for _, addr := range addrs {
+ addrStrs = append(addrStrs, addr.String())
+ }
+ return addrStrs, nil
+}
diff --git a/test/iptables/nat.go b/test/iptables/nat.go
index 6ca6b46ca..40096901c 100644
--- a/test/iptables/nat.go
+++ b/test/iptables/nat.go
@@ -15,8 +15,10 @@
package iptables
import (
+ "errors"
"fmt"
"net"
+ "time"
)
const (
@@ -28,6 +30,13 @@ func init() {
RegisterTestCase(NATRedirectTCPPort{})
RegisterTestCase(NATDropUDP{})
RegisterTestCase(NATAcceptAll{})
+ RegisterTestCase(NATPreRedirectIP{})
+ RegisterTestCase(NATPreDontRedirectIP{})
+ RegisterTestCase(NATPreRedirectInvert{})
+ RegisterTestCase(NATOutRedirectIP{})
+ RegisterTestCase(NATOutDontRedirectIP{})
+ RegisterTestCase(NATOutRedirectInvert{})
+ RegisterTestCase(NATRedirectRequiresProtocol{})
}
// NATRedirectUDPPort tests that packets are redirected to different port.
@@ -79,7 +88,8 @@ func (NATRedirectTCPPort) LocalAction(ip net.IP) error {
return connectTCP(ip, dropPort, sendloopDuration)
}
-// NATDropUDP tests that packets are not received in ports other than redirect port.
+// NATDropUDP tests that packets are not received in ports other than redirect
+// port.
type NATDropUDP struct{}
// Name implements TestCase.Name.
@@ -130,3 +140,192 @@ func (NATAcceptAll) ContainerAction(ip net.IP) error {
func (NATAcceptAll) LocalAction(ip net.IP) error {
return sendUDPLoop(ip, acceptPort, sendloopDuration)
}
+
+// NATOutRedirectIP uses iptables to select packets based on destination IP and
+// redirects them.
+type NATOutRedirectIP struct{}
+
+// Name implements TestCase.Name.
+func (NATOutRedirectIP) Name() string {
+ return "NATOutRedirectIP"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATOutRedirectIP) ContainerAction(ip net.IP) error {
+ // Redirect OUTPUT packets to a listening localhost port.
+ dest := net.IP([]byte{200, 0, 0, 2})
+ return loopbackTest(dest, "-A", "OUTPUT", "-d", dest.String(), "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort))
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATOutRedirectIP) LocalAction(ip net.IP) error {
+ // No-op.
+ return nil
+}
+
+// NATOutDontRedirectIP tests that iptables matching with "-d" does not match
+// packets it shouldn't.
+type NATOutDontRedirectIP struct{}
+
+// Name implements TestCase.Name.
+func (NATOutDontRedirectIP) Name() string {
+ return "NATOutDontRedirectIP"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATOutDontRedirectIP) ContainerAction(ip net.IP) error {
+ if err := natTable("-A", "OUTPUT", "-d", localIP, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil {
+ return err
+ }
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATOutDontRedirectIP) LocalAction(ip net.IP) error {
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// NATOutRedirectInvert tests that iptables can match with "! -d".
+type NATOutRedirectInvert struct{}
+
+// Name implements TestCase.Name.
+func (NATOutRedirectInvert) Name() string {
+ return "NATOutRedirectInvert"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATOutRedirectInvert) ContainerAction(ip net.IP) error {
+ // Redirect OUTPUT packets to a listening localhost port.
+ dest := []byte{200, 0, 0, 3}
+ destStr := "200.0.0.2"
+ return loopbackTest(dest, "-A", "OUTPUT", "!", "-d", destStr, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort))
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATOutRedirectInvert) LocalAction(ip net.IP) error {
+ // No-op.
+ return nil
+}
+
+// NATPreRedirectIP tests that we can use iptables to select packets based on
+// destination IP and redirect them.
+type NATPreRedirectIP struct{}
+
+// Name implements TestCase.Name.
+func (NATPreRedirectIP) Name() string {
+ return "NATPreRedirectIP"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATPreRedirectIP) ContainerAction(ip net.IP) error {
+ addrs, err := localAddrs()
+ if err != nil {
+ return err
+ }
+
+ var rules [][]string
+ for _, addr := range addrs {
+ rules = append(rules, []string{"-A", "PREROUTING", "-p", "udp", "-d", addr, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)})
+ }
+ if err := natTableRules(rules); err != nil {
+ return err
+ }
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATPreRedirectIP) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, dropPort, sendloopDuration)
+}
+
+// NATPreDontRedirectIP tests that iptables matching with "-d" does not match
+// packets it shouldn't.
+type NATPreDontRedirectIP struct{}
+
+// Name implements TestCase.Name.
+func (NATPreDontRedirectIP) Name() string {
+ return "NATPreDontRedirectIP"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATPreDontRedirectIP) ContainerAction(ip net.IP) error {
+ if err := natTable("-A", "PREROUTING", "-p", "udp", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil {
+ return err
+ }
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATPreDontRedirectIP) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, acceptPort, sendloopDuration)
+}
+
+// NATPreRedirectInvert tests that iptables can match with "! -d".
+type NATPreRedirectInvert struct{}
+
+// Name implements TestCase.Name.
+func (NATPreRedirectInvert) Name() string {
+ return "NATPreRedirectInvert"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATPreRedirectInvert) ContainerAction(ip net.IP) error {
+ if err := natTable("-A", "PREROUTING", "-p", "udp", "!", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil {
+ return err
+ }
+ return listenUDP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATPreRedirectInvert) LocalAction(ip net.IP) error {
+ return sendUDPLoop(ip, dropPort, sendloopDuration)
+}
+
+// NATRedirectRequiresProtocol tests that use of the --to-ports flag requires a
+// protocol to be specified with -p.
+type NATRedirectRequiresProtocol struct{}
+
+// Name implements TestCase.Name.
+func (NATRedirectRequiresProtocol) Name() string {
+ return "NATRedirectRequiresProtocol"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP) error {
+ if err := natTable("-A", "PREROUTING", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err == nil {
+ return errors.New("expected an error using REDIRECT --to-ports without a protocol")
+ }
+ return nil
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATRedirectRequiresProtocol) LocalAction(ip net.IP) error {
+ // No-op.
+ return nil
+}
+
+// loopbackTests runs an iptables rule and ensures that packets sent to
+// dest:dropPort are received by localhost:acceptPort.
+func loopbackTest(dest net.IP, args ...string) error {
+ if err := natTable(args...); err != nil {
+ return err
+ }
+ sendCh := make(chan error)
+ listenCh := make(chan error)
+ go func() {
+ sendCh <- sendUDPLoop(dest, dropPort, sendloopDuration)
+ }()
+ go func() {
+ listenCh <- listenUDP(acceptPort, sendloopDuration)
+ }()
+ select {
+ case err := <-listenCh:
+ if err != nil {
+ return err
+ }
+ case <-time.After(sendloopDuration):
+ return errors.New("timed out")
+ }
+ // sendCh will always take the full sendloop time.
+ return <-sendCh
+}
diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile
index bd4451355..4b75e9527 100644
--- a/test/packetdrill/Dockerfile
+++ b/test/packetdrill/Dockerfile
@@ -1,9 +1,9 @@
FROM ubuntu:bionic
-RUN apt-get update
-RUN apt-get install -y net-tools git iptables iputils-ping netcat tcpdump jq tar
+RUN apt-get update && apt-get install -y net-tools git iptables iputils-ping \
+ netcat tcpdump jq tar bison flex make
RUN hash -r
RUN git clone --branch packetdrill-v2.0 \
https://github.com/google/packetdrill.git
-RUN cd packetdrill/gtests/net/packetdrill && ./configure && \
- apt-get install -y bison flex make && make
+RUN cd packetdrill/gtests/net/packetdrill && ./configure && make
+CMD /bin/bash
diff --git a/test/packetdrill/fin_wait2_timeout.pkt b/test/packetdrill/fin_wait2_timeout.pkt
index 613f0bec9..93ab08575 100644
--- a/test/packetdrill/fin_wait2_timeout.pkt
+++ b/test/packetdrill/fin_wait2_timeout.pkt
@@ -19,5 +19,5 @@
+0 > F. 1:1(0) ack 1 <...>
+0 < . 1:1(0) ack 2 win 257
-+1.1 < . 1:1(0) ack 2 win 257
++2 < . 1:1(0) ack 2 win 257
+0 > R 2:2(0) win 0
diff --git a/test/packetimpact/README.md b/test/packetimpact/README.md
new file mode 100644
index 000000000..ece4dedc6
--- /dev/null
+++ b/test/packetimpact/README.md
@@ -0,0 +1,531 @@
+# Packetimpact
+
+## What is packetimpact?
+
+Packetimpact is a tool for platform-independent network testing. It is heavily
+inspired by [packetdrill](https://github.com/google/packetdrill). It creates two
+docker containers connected by a network. One is for the test bench, which
+operates the test. The other is for the device-under-test (DUT), which is the
+software being tested. The test bench communicates over the network with the DUT
+to check correctness of the network.
+
+### Goals
+
+Packetimpact aims to provide:
+
+* A **multi-platform** solution that can test both Linux and gVisor.
+* **Conciseness** on par with packetdrill scripts.
+* **Control-flow** like for loops, conditionals, and variables.
+* **Flexibilty** to specify every byte in a packet or use multiple sockets.
+
+## When to use packetimpact?
+
+There are a few ways to write networking tests for gVisor currently:
+
+* [Go unit tests](https://github.com/google/gvisor/tree/master/pkg/tcpip)
+* [syscall tests](https://github.com/google/gvisor/tree/master/test/syscalls/linux)
+* [packetdrill tests](https://github.com/google/gvisor/tree/master/test/packetdrill)
+* packetimpact tests
+
+The right choice depends on the needs of the test.
+
+Feature | Go unit test | syscall test | packetdrill | packetimpact
+------------- | ------------ | ------------ | ----------- | ------------
+Multiplatform | no | **YES** | **YES** | **YES**
+Concise | no | somewhat | somewhat | **VERY**
+Control-flow | **YES** | **YES** | no | **YES**
+Flexible | **VERY** | no | somewhat | **VERY**
+
+### Go unit tests
+
+If the test depends on the internals of gVisor and doesn't need to run on Linux
+or other platforms for comparison purposes, a Go unit test can be appropriate.
+They can observe internals of gVisor networking. The downside is that they are
+**not concise** and **not multiplatform**. If you require insight on gVisor
+internals, this is the right choice.
+
+### Syscall tests
+
+Syscall tests are **multiplatform** but cannot examine the internals of gVisor
+networking. They are **concise**. They can use **control-flow** structures like
+conditionals, for loops, and variables. However, they are limited to only what
+the POSIX interface provides so they are **not flexible**. For example, you
+would have difficulty writing a syscall test that intentionally sends a bad IP
+checksum. Or if you did write that test with raw sockets, it would be very
+**verbose** to write a test that intentionally send wrong checksums, wrong
+protocols, wrong sequence numbers, etc.
+
+### Packetdrill tests
+
+Packetdrill tests are **multiplatform** and can run against both Linux and
+gVisor. They are **concise** and use a special packetdrill scripting language.
+They are **more flexible** than a syscall test in that they can send packets
+that a syscall test would have difficulty sending, like a packet with a
+calcuated ACK number. But they are also somewhat limimted in flexibiilty in that
+they can't do tests with multiple sockets. They have **no control-flow** ability
+like variables or conditionals. For example, it isn't possible to send a packet
+that depends on the window size of a previous packet because the packetdrill
+language can't express that. Nor could you branch based on whether or not the
+other side supports window scaling, for example.
+
+### Packetimpact tests
+
+Packetimpact tests are similar to Packetdrill tests except that they are written
+in Go instead of the packetdrill scripting language. That gives them all the
+**control-flow** abilities of Go (loops, functions, variables, etc). They are
+**multiplatform** in the same way as packetdrill tests but even more
+**flexible** because Go is more expressive than the scripting language of
+packetdrill. However, Go is **not as concise** as the packetdrill language. Many
+design decisions below are made to mitigate that.
+
+## How it works
+
+```
+ +--------------+ +--------------+
+ | | TEST NET | |
+ | | <===========> | Device |
+ | Test | | Under |
+ | Bench | | Test |
+ | | <===========> | (DUT) |
+ | | CONTROL NET | |
+ +--------------+ +--------------+
+```
+
+Two docker containers are created by a script, one for the test bench and the
+other for the device under test (DUT). The script connects the two containers
+with a control network and test network. It also does some other tasks like
+waiting until the DUT is ready before starting the test and disabling Linux
+networking that would interfere with the test bench.
+
+### DUT
+
+The DUT container runs a program called the "posix_server". The posix_server is
+written in c++ for maximum portability. It is compiled on the host. The script
+that starts the containers copies it into the DUT's container and runs it. It's
+job is to receive directions from the test bench on what actions to take. For
+this, the posix_server does three steps in a loop:
+
+1. Listen for a request from the test bench.
+2. Execute a command.
+3. Send the response back to the test bench.
+
+The requests and responses are
+[protobufs](https://developers.google.com/protocol-buffers) and the
+communication is done with [gRPC](https://grpc.io/). The commands run are
+[POSIX socket commands](https://en.wikipedia.org/wiki/Berkeley_sockets#Socket_API_functions),
+with the inputs and outputs converted into protobuf requests and responses. All
+communication is on the control network, so that the test network is unaffected
+by extra packets.
+
+For example, this is the request and response pair to call
+[`socket()`](http://man7.org/linux/man-pages/man2/socket.2.html):
+
+```protocol-buffer
+message SocketRequest {
+ int32 domain = 1;
+ int32 type = 2;
+ int32 protocol = 3;
+}
+
+message SocketResponse {
+ int32 fd = 1;
+ int32 errno_ = 2;
+}
+```
+
+##### Alternatives considered
+
+* We could have use JSON for communication instead. It would have been a
+ lighter-touch than protobuf but protobuf handles all the data type and has
+ strict typing to prevent a class of errors. The test bench could be written
+ in other languages, too.
+* Instead of mimicking the POSIX interfaces, arguments could have had a more
+ natural form, like the `bind()` getting a string IP address instead of bytes
+ in a `sockaddr_t`. However, conforming to the existing structures keeps more
+ of the complexity in Go and keeps the posix_server simpler and thus more
+ likely to compile everywhere.
+
+### Test Bench
+
+The test bench does most of the work in a test. It is a Go program that compiles
+on the host and is copied by the script into test bench's container. It is a
+regular [go unit test](https://golang.org/pkg/testing/) that imports the test
+bench framework. The test bench framwork is based on three basic utilities:
+
+* Commanding the DUT to run POSIX commands and return responses.
+* Sending raw packets to the DUT on the test network.
+* Listening for raw packets from the DUT on the test network.
+
+#### DUT commands
+
+To keep the interface to the DUT consistent and easy-to-use, each POSIX command
+supported by the posix_server is wrapped in functions with signatures similar to
+the ones in the [Go unix package](https://godoc.org/golang.org/x/sys/unix). This
+way all the details of endianess and (un)marshalling of go structs such as
+[unix.Timeval](https://godoc.org/golang.org/x/sys/unix#Timeval) is handled in
+one place. This also makes it straight-forward to convert tests that use `unix.`
+or `syscall.` calls to `dut.` calls.
+
+For example, creating a connection to the DUT and commanding it to make a socket
+looks like this:
+
+```go
+dut := testbench.NewDut(t)
+fd, err := dut.SocketWithErrno(unix.AF_INET, unix.SOCK_STREAM, unix.IPPROTO_IP)
+if fd < 0 {
+ t.Fatalf(...)
+}
+```
+
+Because the usual case is to fail the test when the DUT fails to create a
+socket, there is a concise version of each of the `...WithErrno` functions that
+does that:
+
+```go
+dut := testbench.NewDut(t)
+fd := dut.Socket(unix.AF_INET, unix.SOCK_STREAM, unix.IPPROTO_IP)
+```
+
+The DUT and other structs in the code store a `*testing.T` so that they can
+provide versions of functions that call `t.Fatalf(...)`. This helps keep tests
+concise.
+
+##### Alternatives considered
+
+* Instead of mimicking the `unix.` go interface, we could have invented a more
+ natural one, like using `float64` instead of `Timeval`. However, using the
+ same function signatures that `unix.` has makes it easier to convert code to
+ `dut.`. Also, using an existing interface ensures that we don't invent an
+ interface that isn't extensible. For example, if we invented a function for
+ `bind()` that didn't support IPv6 and later we had to add a second `bind6()`
+ function.
+
+#### Sending/Receiving Raw Packets
+
+The framework wraps POSIX sockets for sending and receiving raw frames. Both
+send and receive are synchronous commands.
+[SO_RCVTIMEO](http://man7.org/linux/man-pages/man7/socket.7.html) is used to set
+a timeout on the receive commands. For ease of use, these are wrapped in an
+`Injector` and a `Sniffer`. They have functions:
+
+```go
+func (s *Sniffer) Recv(timeout time.Duration) []byte {...}
+func (i *Injector) Send(b []byte) {...}
+```
+
+##### Alternatives considered
+
+* [gopacket](https://github.com/google/gopacket) pcap has raw socket support
+ but requires cgo. cgo is not guaranteed to be portable from the host to the
+ container and in practice, the container doesn't recognize binaries built on
+ the host if they use cgo.
+* Both gVisor and gopacket have the ability to read and write pcap files
+ without cgo but that is insufficient here.
+* The sniffer and injector can't share a socket because they need to be bound
+ differently.
+* Sniffing could have been done asynchronously with channels, obviating the
+ need for `SO_RCVTIMEO`. But that would introduce asynchronous complication.
+ `SO_RCVTIMEO` is well supported on the test bench.
+
+#### `Layer` struct
+
+A large part of packetimpact tests is creating packets to send and comparing
+received packets against expectations. To keep tests concise, it is useful to be
+able to specify just the important parts of packets that need to be set. For
+example, sending a packet with default values except for TCP Flags. And for
+packets received, it's useful to be able to compare just the necessary parts of
+received packets and ignore the rest.
+
+To aid in both of those, Go structs with optional fields are created for each
+encapsulation type, such as IPv4, TCP, and Ethernet. This is inspired by
+[scapy](https://scapy.readthedocs.io/en/latest/). For example, here is the
+struct for Ethernet:
+
+```go
+type Ether struct {
+ LayerBase
+ SrcAddr *tcpip.LinkAddress
+ DstAddr *tcpip.LinkAddress
+ Type *tcpip.NetworkProtocolNumber
+}
+```
+
+Each struct has the same fields as those in the
+[gVisor headers](https://github.com/google/gvisor/tree/master/pkg/tcpip/header)
+but with a pointer for each field that may be `nil`.
+
+##### Alternatives considered
+
+* Just use []byte like gVisor headers do. The drawback is that it makes the
+ tests more verbose.
+ * For example, there would be no way to call `Send(myBytes)` concisely and
+ indicate if the checksum should be calculated automatically versus
+ overridden. The only way would be to add lines to the test to calculate
+ it before each Send, which is wordy. Or make multiple versions of Send:
+ one that checksums IP, one that doesn't, one that checksums TCP, one
+ that does both, etc. That would be many combinations.
+ * Filtering inputs would become verbose. Either:
+ * large conditionals that need to be repeated many places:
+ `h[FlagOffset] == SYN && h[LengthOffset:LengthOffset+2] == ...` or
+ * Many functions, one per field, like: `filterByFlag(myBytes, SYN)`,
+ `filterByLength(myBytes, 20)`, `filterByNextProto(myBytes, 0x8000)`,
+ etc.
+ * Using pointers allows us to combine `Layer`s with a one-line call to
+ `mergo.Merge(...)`. So the default `Layers` can be overridden by a
+ `Layers` with just the TCP conection's src/dst which can be overridden
+ by one with just a test specific TCP window size. Each override is
+ specified as just one call to `mergo.Merge`.
+ * It's a proven way to separate the details of a packet from the byte
+ format as shown by scapy's success.
+* Use packetgo. It's more general than parsing packets with gVisor. However:
+ * packetgo doesn't have optional fields so many of the above problems
+ still apply.
+ * It would be yet another dependency.
+ * It's not as well known to engineers that are already writing gVisor
+ code.
+ * It might be a good candidate for replacing the parsing of packets into
+ `Layer`s if all that parsing turns out to be more work than parsing by
+ packetgo and converting *that* to `Layer`. packetgo has easier to use
+ getters for the layers. This could be done later in a way that doesn't
+ break tests.
+
+#### `Layer` methods
+
+The `Layer` structs provide a way to partially specify an encapsulation. They
+also need methods for using those partially specified encapsulation, for example
+to marshal them to bytes or compare them. For those, each encapsulation
+implements the `Layer` interface:
+
+```go
+// Layer is the interface that all encapsulations must implement.
+//
+// A Layer is an encapsulation in a packet, such as TCP, IPv4, IPv6, etc. A
+// Layer contains all the fields of the encapsulation. Each field is a pointer
+// and may be nil.
+type Layer interface {
+ // toBytes converts the Layer into bytes. In places where the Layer's field
+ // isn't nil, the value that is pointed to is used. When the field is nil, a
+ // reasonable default for the Layer is used. For example, "64" for IPv4 TTL
+ // and a calculated checksum for TCP or IP. Some layers require information
+ // from the previous or next layers in order to compute a default, such as
+ // TCP's checksum or Ethernet's type, so each Layer has a doubly-linked list
+ // to the layer's neighbors.
+ toBytes() ([]byte, error)
+
+ // match checks if the current Layer matches the provided Layer. If either
+ // Layer has a nil in a given field, that field is considered matching.
+ // Otherwise, the values pointed to by the fields must match.
+ match(Layer) bool
+
+ // length in bytes of the current encapsulation
+ length() int
+
+ // next gets a pointer to the encapsulated Layer.
+ next() Layer
+
+ // prev gets a pointer to the Layer encapsulating this one.
+ prev() Layer
+
+ // setNext sets the pointer to the encapsulated Layer.
+ setNext(Layer)
+
+ // setPrev sets the pointer to the Layer encapsulating this one.
+ setPrev(Layer)
+}
+```
+
+For each `Layer` there is also a parsing function. For example, this one is for
+Ethernet:
+
+```
+func ParseEther(b []byte) (Layers, error)
+```
+
+The parsing function converts bytes received on the wire into a `Layer`
+(actually `Layers`, see below) which has no `nil`s in it. By using
+`match(Layer)` to compare against another `Layer` that *does* have `nil`s in it,
+the received bytes can be partially compared. The `nil`s behave as
+"don't-cares".
+
+##### Alternatives considered
+
+* Matching against `[]byte` instead of converting to `Layer` first.
+ * The downside is that it precludes the use of a `cmp.Equal` one-liner to
+ do comparisons.
+ * It creates confusion in the code to deal with both representations at
+ different times. For example, is the checksum calculated on `[]byte` or
+ `Layer` when sending? What about when checking received packets?
+
+#### `Layers`
+
+```
+type Layers []Layer
+
+func (ls *Layers) match(other Layers) bool {...}
+func (ls *Layers) toBytes() ([]byte, error) {...}
+```
+
+`Layers` is an array of `Layer`. It represents a stack of encapsulations, such
+as `Layers{Ether{},IPv4{},TCP{},Payload{}}`. It also has `toBytes()` and
+`match(Layers)`, like `Layer`. The parse functions above actually return
+`Layers` and not `Layer` because they know about the headers below and
+sequentially call each parser on the remaining, encapsulated bytes.
+
+All this leads to the ability to write concise packet processing. For example:
+
+```go
+etherType := 0x8000
+flags = uint8(header.TCPFlagSyn|header.TCPFlagAck)
+toMatch := Layers{Ether{Type: &etherType}, IPv4{}, TCP{Flags: &flags}}
+for {
+ recvBytes := sniffer.Recv(time.Second)
+ if recvBytes == nil {
+ println("Got no packet for 1 second")
+ }
+ gotPacket, err := ParseEther(recvBytes)
+ if err == nil && toMatch.match(gotPacket) {
+ println("Got a TCP/IPv4/Eth packet with SYNACK")
+ }
+}
+```
+
+##### Alternatives considered
+
+* Don't use previous and next pointers.
+ * Each layer may need to be able to interrogate the layers aroung it, like
+ for computing the next protocol number or total length. So *some*
+ mechanism is needed for a `Layer` to see neighboring layers.
+ * We could pass the entire array `Layers` to the `toBytes()` function.
+ Passing an array to a method that includes in the array the function
+ receiver itself seems wrong.
+
+#### Connections
+
+Using `Layers` above, we can create connection structures to maintain state
+about connections. For example, here is the `TCPIPv4` struct:
+
+```
+type TCPIPv4 struct {
+ outgoing Layers
+ incoming Layers
+ localSeqNum uint32
+ remoteSeqNum uint32
+ sniffer Sniffer
+ injector Injector
+ t *testing.T
+}
+```
+
+`TCPIPv4` contains an `outgoing Layers` which holds the defaults for the
+connection, such as the source and destination MACs, IPs, and ports. When
+`outgoing.toBytes()` is called a valid packet for this TCPIPv4 flow is built.
+
+It also contains `incoming Layers` which holds filter for incoming packets that
+belong to this flow. `incoming.match(Layers)` is used on received bytes to check
+if they are part of the flow.
+
+The `sniffer` and `injector` are for receiving and sending raw packet bytes. The
+`localSeqNum` and `remoteSeqNum` are updated by `Send` and `Recv` so that
+outgoing packets will have, by default, the correct sequence number and ack
+number.
+
+TCPIPv4 provides some functions:
+
+```
+func (conn *TCPIPv4) Send(tcp TCP) {...}
+func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP {...}
+```
+
+`Send(tcp TCP)` uses [mergo](https://github.com/imdario/mergo) to merge the
+provided `TCP` (a `Layer`) into `outgoing`. This way the user can specify
+concisely just which fields of `outgoing` to modify. The packet is sent using
+the `injector`.
+
+`Recv(timeout time.Duration)` reads packets from the sniffer until either the
+timeout has elapsed or a packet that matches `incoming` arrives.
+
+Using those, we can perform a TCP 3-way handshake without too much code:
+
+```go
+func (conn *TCPIPv4) Handshake() {
+ syn := uint8(header.TCPFlagSyn)
+ synack := uint8(header.TCPFlagSyn)
+ ack := uint8(header.TCPFlagAck)
+ conn.Send(TCP{Flags: &syn}) // Send a packet with all defaults but set TCP-SYN.
+
+ // Wait for the SYN-ACK response.
+ for {
+ newTCP := conn.Recv(time.Second) // This already filters by MAC, IP, and ports.
+ if TCP{Flags: &synack}.match(newTCP) {
+ break // Only if it's a SYN-ACK proceed.
+ }
+ }
+
+ conn.Send(TCP{Flags: &ack}) // Send an ACK. The seq and ack numbers are set correctly.
+}
+```
+
+The handshake code is part of the testbench utilities so tests can share this
+common sequence, making tests even more concise.
+
+##### Alternatives considered
+
+* Instead of storing `outgoing` and `incoming`, store values.
+ * There would be many more things to store instead, like `localMac`,
+ `remoteMac`, `localIP`, `remoteIP`, `localPort`, and `remotePort`.
+ * Construction of a packet would be many lines to copy each of these
+ values into a `[]byte`. And there would be slight variations needed for
+ each encapsulation stack, like TCPIPv6 and ARP.
+ * Filtering incoming packets would be a long sequence:
+ * Compare the MACs, then
+ * Parse the next header, then
+ * Compare the IPs, then
+ * Parse the next header, then
+ * Compare the TCP ports. Instead it's all just one call to
+ `cmp.Equal(...)`, for all sequences.
+ * A TCPIPv6 connection could share most of the code. Only the type of the
+ IP addresses are different. The types of `outgoing` and `incoming` would
+ be remain `Layers`.
+ * An ARP connection could share all the Ethernet parts. The IP `Layer`
+ could be factored out of `outgoing`. After that, the IPv4 and IPv6
+ connections could implement one interface and a single TCP struct could
+ have either network protocol through composition.
+
+## Putting it all together
+
+Here's what te start of a packetimpact unit test looks like. This test creates a
+TCP connection with the DUT. There are added comments for explanation in this
+document but a real test might not include them in order to stay even more
+concise.
+
+```go
+func TestMyTcpTest(t *testing.T) {
+ // Prepare a DUT for communication.
+ dut := testbench.NewDUT(t)
+
+ // This does:
+ // dut.Socket()
+ // dut.Bind()
+ // dut.Getsockname() to learn the new port number
+ // dut.Listen()
+ listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFD) // Tell the DUT to close the socket at the end of the test.
+
+ // Monitor a new TCP connection with sniffer, injector, sequence number tracking,
+ // and reasonable outgoing and incoming packet field default IPs, MACs, and port numbers.
+ conn := testbench.NewTCPIPv4(t, dut, remotePort)
+
+ // Perform a 3-way handshake: send SYN, expect SYNACK, send ACK.
+ conn.Handshake()
+
+ // Tell the DUT to accept the new connection.
+ acceptFD := dut.Accept(acceptFd)
+}
+```
+
+## Other notes
+
+* The time between receiving a SYN-ACK and replying with an ACK in `Handshake`
+ is about 3ms. This is much slower than the native unix response, which is
+ about 0.3ms. Packetdrill gets closer to 0.3ms. For tests where timing is
+ crucial, packetdrill is faster and more precise.
diff --git a/test/packetimpact/dut/BUILD b/test/packetimpact/dut/BUILD
new file mode 100644
index 000000000..3ce63c2c6
--- /dev/null
+++ b/test/packetimpact/dut/BUILD
@@ -0,0 +1,18 @@
+load("//tools:defs.bzl", "cc_binary", "grpcpp")
+
+package(
+ default_visibility = ["//test/packetimpact:__subpackages__"],
+ licenses = ["notice"],
+)
+
+cc_binary(
+ name = "posix_server",
+ srcs = ["posix_server.cc"],
+ linkstatic = 1,
+ static = True, # This is needed for running in a docker container.
+ deps = [
+ grpcpp,
+ "//test/packetimpact/proto:posix_server_cc_grpc_proto",
+ "//test/packetimpact/proto:posix_server_cc_proto",
+ ],
+)
diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc
new file mode 100644
index 000000000..2f10dda40
--- /dev/null
+++ b/test/packetimpact/dut/posix_server.cc
@@ -0,0 +1,229 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at //
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <unordered_map>
+
+#include "arpa/inet.h"
+#include "include/grpcpp/security/server_credentials.h"
+#include "include/grpcpp/server_builder.h"
+#include "test/packetimpact/proto/posix_server.grpc.pb.h"
+#include "test/packetimpact/proto/posix_server.pb.h"
+
+// Converts a sockaddr_storage to a Sockaddr message.
+::grpc::Status sockaddr_to_proto(const sockaddr_storage &addr,
+ socklen_t addrlen,
+ posix_server::Sockaddr *sockaddr_proto) {
+ switch (addr.ss_family) {
+ case AF_INET: {
+ auto addr_in = reinterpret_cast<const sockaddr_in *>(&addr);
+ auto response_in = sockaddr_proto->mutable_in();
+ response_in->set_family(addr_in->sin_family);
+ response_in->set_port(ntohs(addr_in->sin_port));
+ response_in->mutable_addr()->assign(
+ reinterpret_cast<const char *>(&addr_in->sin_addr.s_addr), 4);
+ return ::grpc::Status::OK;
+ }
+ case AF_INET6: {
+ auto addr_in6 = reinterpret_cast<const sockaddr_in6 *>(&addr);
+ auto response_in6 = sockaddr_proto->mutable_in6();
+ response_in6->set_family(addr_in6->sin6_family);
+ response_in6->set_port(ntohs(addr_in6->sin6_port));
+ response_in6->set_flowinfo(ntohl(addr_in6->sin6_flowinfo));
+ response_in6->mutable_addr()->assign(
+ reinterpret_cast<const char *>(&addr_in6->sin6_addr.s6_addr), 16);
+ response_in6->set_scope_id(ntohl(addr_in6->sin6_scope_id));
+ return ::grpc::Status::OK;
+ }
+ }
+ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Unknown Sockaddr");
+}
+
+class PosixImpl final : public posix_server::Posix::Service {
+ ::grpc::Status Socket(grpc_impl::ServerContext *context,
+ const ::posix_server::SocketRequest *request,
+ ::posix_server::SocketResponse *response) override {
+ response->set_fd(
+ socket(request->domain(), request->type(), request->protocol()));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status Bind(grpc_impl::ServerContext *context,
+ const ::posix_server::BindRequest *request,
+ ::posix_server::BindResponse *response) override {
+ if (!request->has_addr()) {
+ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+ "Missing address");
+ }
+ sockaddr_storage addr;
+
+ switch (request->addr().sockaddr_case()) {
+ case posix_server::Sockaddr::SockaddrCase::kIn: {
+ auto request_in = request->addr().in();
+ if (request_in.addr().size() != 4) {
+ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+ "IPv4 address must be 4 bytes");
+ }
+ auto addr_in = reinterpret_cast<sockaddr_in *>(&addr);
+ addr_in->sin_family = request_in.family();
+ addr_in->sin_port = htons(request_in.port());
+ request_in.addr().copy(
+ reinterpret_cast<char *>(&addr_in->sin_addr.s_addr), 4);
+ break;
+ }
+ case posix_server::Sockaddr::SockaddrCase::kIn6: {
+ auto request_in6 = request->addr().in6();
+ if (request_in6.addr().size() != 16) {
+ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+ "IPv6 address must be 16 bytes");
+ }
+ auto addr_in6 = reinterpret_cast<sockaddr_in6 *>(&addr);
+ addr_in6->sin6_family = request_in6.family();
+ addr_in6->sin6_port = htons(request_in6.port());
+ addr_in6->sin6_flowinfo = htonl(request_in6.flowinfo());
+ request_in6.addr().copy(
+ reinterpret_cast<char *>(&addr_in6->sin6_addr.s6_addr), 16);
+ addr_in6->sin6_scope_id = htonl(request_in6.scope_id());
+ break;
+ }
+ case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET:
+ default:
+ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+ "Unknown Sockaddr");
+ }
+ response->set_ret(bind(request->sockfd(),
+ reinterpret_cast<sockaddr *>(&addr), sizeof(addr)));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status GetSockName(
+ grpc_impl::ServerContext *context,
+ const ::posix_server::GetSockNameRequest *request,
+ ::posix_server::GetSockNameResponse *response) override {
+ sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ response->set_ret(getsockname(
+ request->sockfd(), reinterpret_cast<sockaddr *>(&addr), &addrlen));
+ response->set_errno_(errno);
+ return sockaddr_to_proto(addr, addrlen, response->mutable_addr());
+ }
+
+ ::grpc::Status Listen(grpc_impl::ServerContext *context,
+ const ::posix_server::ListenRequest *request,
+ ::posix_server::ListenResponse *response) override {
+ response->set_ret(listen(request->sockfd(), request->backlog()));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status Accept(grpc_impl::ServerContext *context,
+ const ::posix_server::AcceptRequest *request,
+ ::posix_server::AcceptResponse *response) override {
+ sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ response->set_fd(accept(request->sockfd(),
+ reinterpret_cast<sockaddr *>(&addr), &addrlen));
+ response->set_errno_(errno);
+ return sockaddr_to_proto(addr, addrlen, response->mutable_addr());
+ }
+
+ ::grpc::Status SetSockOpt(
+ grpc_impl::ServerContext *context,
+ const ::posix_server::SetSockOptRequest *request,
+ ::posix_server::SetSockOptResponse *response) override {
+ response->set_ret(setsockopt(request->sockfd(), request->level(),
+ request->optname(), request->optval().c_str(),
+ request->optval().size()));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status SetSockOptTimeval(
+ ::grpc::ServerContext *context,
+ const ::posix_server::SetSockOptTimevalRequest *request,
+ ::posix_server::SetSockOptTimevalResponse *response) override {
+ timeval tv = {.tv_sec = static_cast<__time_t>(request->timeval().seconds()),
+ .tv_usec = static_cast<__suseconds_t>(
+ request->timeval().microseconds())};
+ response->set_ret(setsockopt(request->sockfd(), request->level(),
+ request->optname(), &tv, sizeof(tv)));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+
+ ::grpc::Status Close(grpc_impl::ServerContext *context,
+ const ::posix_server::CloseRequest *request,
+ ::posix_server::CloseResponse *response) override {
+ response->set_ret(close(request->fd()));
+ response->set_errno_(errno);
+ return ::grpc::Status::OK;
+ }
+};
+
+// Parse command line options. Returns a pointer to the first argument beyond
+// the options.
+void parse_command_line_options(int argc, char *argv[], std::string *ip,
+ int *port) {
+ static struct option options[] = {{"ip", required_argument, NULL, 1},
+ {"port", required_argument, NULL, 2},
+ {0, 0, 0, 0}};
+
+ // Parse the arguments.
+ int c;
+ while ((c = getopt_long(argc, argv, "", options, NULL)) > 0) {
+ if (c == 1) {
+ *ip = optarg;
+ } else if (c == 2) {
+ *port = std::stoi(std::string(optarg));
+ }
+ }
+}
+
+void run_server(const std::string &ip, int port) {
+ PosixImpl posix_service;
+ grpc::ServerBuilder builder;
+ std::string server_address = ip + ":" + std::to_string(port);
+ // Set the authentication mechanism.
+ std::shared_ptr<grpc::ServerCredentials> creds =
+ grpc::InsecureServerCredentials();
+ builder.AddListeningPort(server_address, creds);
+ builder.RegisterService(&posix_service);
+
+ std::unique_ptr<grpc::Server> server(builder.BuildAndStart());
+ std::cerr << "Server listening on " << server_address << std::endl;
+ server->Wait();
+ std::cerr << "posix_server is finished." << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+ std::cerr << "posix_server is starting." << std::endl;
+ std::string ip;
+ int port;
+ parse_command_line_options(argc, argv, &ip, &port);
+
+ std::cerr << "Got IP " << ip << " and port " << port << "." << std::endl;
+ run_server(ip, port);
+}
diff --git a/test/packetimpact/proto/BUILD b/test/packetimpact/proto/BUILD
new file mode 100644
index 000000000..4a4370f42
--- /dev/null
+++ b/test/packetimpact/proto/BUILD
@@ -0,0 +1,12 @@
+load("//tools:defs.bzl", "proto_library")
+
+package(
+ default_visibility = ["//test/packetimpact:__subpackages__"],
+ licenses = ["notice"],
+)
+
+proto_library(
+ name = "posix_server",
+ srcs = ["posix_server.proto"],
+ has_services = 1,
+)
diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto
new file mode 100644
index 000000000..026876fc2
--- /dev/null
+++ b/test/packetimpact/proto/posix_server.proto
@@ -0,0 +1,150 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package posix_server;
+
+message SocketRequest {
+ int32 domain = 1;
+ int32 type = 2;
+ int32 protocol = 3;
+}
+
+message SocketResponse {
+ int32 fd = 1;
+ int32 errno_ = 2;
+}
+
+message SockaddrIn {
+ int32 family = 1;
+ uint32 port = 2;
+ bytes addr = 3;
+}
+
+message SockaddrIn6 {
+ uint32 family = 1;
+ uint32 port = 2;
+ uint32 flowinfo = 3;
+ bytes addr = 4;
+ uint32 scope_id = 5;
+}
+
+message Sockaddr {
+ oneof sockaddr {
+ SockaddrIn in = 1;
+ SockaddrIn6 in6 = 2;
+ }
+}
+
+message BindRequest {
+ int32 sockfd = 1;
+ Sockaddr addr = 2;
+}
+
+message BindResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
+message GetSockNameRequest {
+ int32 sockfd = 1;
+}
+
+message GetSockNameResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+ Sockaddr addr = 3;
+}
+
+message ListenRequest {
+ int32 sockfd = 1;
+ int32 backlog = 2;
+}
+
+message ListenResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
+message AcceptRequest {
+ int32 sockfd = 1;
+}
+
+message AcceptResponse {
+ int32 fd = 1;
+ int32 errno_ = 2;
+ Sockaddr addr = 3;
+}
+
+message SetSockOptRequest {
+ int32 sockfd = 1;
+ int32 level = 2;
+ int32 optname = 3;
+ bytes optval = 4;
+}
+
+message SetSockOptResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
+message Timeval {
+ int64 seconds = 1;
+ int64 microseconds = 2;
+}
+
+message SetSockOptTimevalRequest {
+ int32 sockfd = 1;
+ int32 level = 2;
+ int32 optname = 3;
+ Timeval timeval = 4;
+}
+
+message SetSockOptTimevalResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
+message CloseRequest {
+ int32 fd = 1;
+}
+
+message CloseResponse {
+ int32 ret = 1;
+ int32 errno_ = 2;
+}
+
+service Posix {
+ // Call socket() on the DUT.
+ rpc Socket(SocketRequest) returns (SocketResponse);
+ // Call bind() on the DUT.
+ rpc Bind(BindRequest) returns (BindResponse);
+ // Call getsockname() on the DUT.
+ rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse);
+ // Call listen() on the DUT.
+ rpc Listen(ListenRequest) returns (ListenResponse);
+ // Call accept() on the DUT.
+ rpc Accept(AcceptRequest) returns (AcceptResponse);
+ // Call setsockopt() on the DUT. You should prefer one of the other
+ // SetSockOpt* functions with a more structured optval or else you may get the
+ // encoding wrong, such as making a bad assumption about the server's word
+ // sizes or endianness.
+ rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse);
+ // Call setsockopt() on the DUT with a Timeval optval.
+ rpc SetSockOptTimeval(SetSockOptTimevalRequest)
+ returns (SetSockOptTimevalResponse);
+ // Call close() on the DUT.
+ rpc Close(CloseRequest) returns (CloseResponse);
+}
diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD
new file mode 100644
index 000000000..a34c81fcc
--- /dev/null
+++ b/test/packetimpact/testbench/BUILD
@@ -0,0 +1,31 @@
+load("//tools:defs.bzl", "go_library")
+
+package(
+ default_visibility = ["//test/packetimpact:__subpackages__"],
+ licenses = ["notice"],
+)
+
+go_library(
+ name = "testbench",
+ srcs = [
+ "connections.go",
+ "dut.go",
+ "dut_client.go",
+ "layers.go",
+ "rawsockets.go",
+ ],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/seqnum",
+ "//pkg/usermem",
+ "//test/packetimpact/proto:posix_server_go_proto",
+ "@com_github_google_go-cmp//cmp:go_default_library",
+ "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
+ "@com_github_imdario_mergo//:go_default_library",
+ "@com_github_mohae_deepcopy//:go_default_library",
+ "@org_golang_google_grpc//:go_default_library",
+ "@org_golang_google_grpc//keepalive:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go
new file mode 100644
index 000000000..b7aa63934
--- /dev/null
+++ b/test/packetimpact/testbench/connections.go
@@ -0,0 +1,245 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package testbench has utilities to send and receive packets and also command
+// the DUT to run POSIX functions.
+package testbench
+
+import (
+ "flag"
+ "fmt"
+ "math/rand"
+ "net"
+ "testing"
+ "time"
+
+ "github.com/mohae/deepcopy"
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+)
+
+var localIPv4 = flag.String("local_ipv4", "", "local IPv4 address for test packets")
+var remoteIPv4 = flag.String("remote_ipv4", "", "remote IPv4 address for test packets")
+var localMAC = flag.String("local_mac", "", "local mac address for test packets")
+var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets")
+
+// TCPIPv4 maintains state about a TCP/IPv4 connection.
+type TCPIPv4 struct {
+ outgoing Layers
+ incoming Layers
+ LocalSeqNum seqnum.Value
+ RemoteSeqNum seqnum.Value
+ SynAck *TCP
+ sniffer Sniffer
+ injector Injector
+ portPickerFD int
+ t *testing.T
+}
+
+// pickPort makes a new socket and returns the socket FD and port. The caller
+// must close the FD when done with the port if there is no error.
+func pickPort() (int, uint16, error) {
+ fd, err := unix.Socket(unix.AF_INET, unix.SOCK_STREAM, 0)
+ if err != nil {
+ return -1, 0, err
+ }
+ var sa unix.SockaddrInet4
+ copy(sa.Addr[0:4], net.ParseIP(*localIPv4).To4())
+ if err := unix.Bind(fd, &sa); err != nil {
+ unix.Close(fd)
+ return -1, 0, err
+ }
+ newSockAddr, err := unix.Getsockname(fd)
+ if err != nil {
+ unix.Close(fd)
+ return -1, 0, err
+ }
+ newSockAddrInet4, ok := newSockAddr.(*unix.SockaddrInet4)
+ if !ok {
+ unix.Close(fd)
+ return -1, 0, fmt.Errorf("can't cast Getsockname result to SockaddrInet4")
+ }
+ return fd, uint16(newSockAddrInet4.Port), nil
+}
+
+// tcpLayerIndex is the position of the TCP layer in the TCPIPv4 connection. It
+// is the third, after Ethernet and IPv4.
+const tcpLayerIndex int = 2
+
+// NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults.
+func NewTCPIPv4(t *testing.T, dut DUT, outgoingTCP, incomingTCP TCP) TCPIPv4 {
+ lMAC, err := tcpip.ParseMACAddress(*localMAC)
+ if err != nil {
+ t.Fatalf("can't parse localMAC %q: %s", *localMAC, err)
+ }
+
+ rMAC, err := tcpip.ParseMACAddress(*remoteMAC)
+ if err != nil {
+ t.Fatalf("can't parse remoteMAC %q: %s", *remoteMAC, err)
+ }
+
+ portPickerFD, localPort, err := pickPort()
+ if err != nil {
+ t.Fatalf("can't pick a port: %s", err)
+ }
+ lIP := tcpip.Address(net.ParseIP(*localIPv4).To4())
+ rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4())
+
+ sniffer, err := NewSniffer(t)
+ if err != nil {
+ t.Fatalf("can't make new sniffer: %s", err)
+ }
+
+ injector, err := NewInjector(t)
+ if err != nil {
+ t.Fatalf("can't make new injector: %s", err)
+ }
+
+ newOutgoingTCP := &TCP{
+ DataOffset: Uint8(header.TCPMinimumSize),
+ WindowSize: Uint16(32768),
+ SrcPort: &localPort,
+ }
+ if err := newOutgoingTCP.merge(outgoingTCP); err != nil {
+ t.Fatalf("can't merge %v into %v: %s", outgoingTCP, newOutgoingTCP, err)
+ }
+ newIncomingTCP := &TCP{
+ DstPort: &localPort,
+ }
+ if err := newIncomingTCP.merge(incomingTCP); err != nil {
+ t.Fatalf("can't merge %v into %v: %s", incomingTCP, newIncomingTCP, err)
+ }
+ return TCPIPv4{
+ outgoing: Layers{
+ &Ether{SrcAddr: &lMAC, DstAddr: &rMAC},
+ &IPv4{SrcAddr: &lIP, DstAddr: &rIP},
+ newOutgoingTCP},
+ incoming: Layers{
+ &Ether{SrcAddr: &rMAC, DstAddr: &lMAC},
+ &IPv4{SrcAddr: &rIP, DstAddr: &lIP},
+ newIncomingTCP},
+ sniffer: sniffer,
+ injector: injector,
+ portPickerFD: portPickerFD,
+ t: t,
+ LocalSeqNum: seqnum.Value(rand.Uint32()),
+ }
+}
+
+// Close the injector and sniffer associated with this connection.
+func (conn *TCPIPv4) Close() {
+ conn.sniffer.Close()
+ conn.injector.Close()
+ if err := unix.Close(conn.portPickerFD); err != nil {
+ conn.t.Fatalf("can't close portPickerFD: %s", err)
+ }
+ conn.portPickerFD = -1
+}
+
+// Send a packet with reasonable defaults and override some fields by tcp.
+func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) {
+ if tcp.SeqNum == nil {
+ tcp.SeqNum = Uint32(uint32(conn.LocalSeqNum))
+ }
+ if tcp.AckNum == nil {
+ tcp.AckNum = Uint32(uint32(conn.RemoteSeqNum))
+ }
+ layersToSend := deepcopy.Copy(conn.outgoing).(Layers)
+ if err := layersToSend[tcpLayerIndex].(*TCP).merge(tcp); err != nil {
+ conn.t.Fatalf("can't merge %v into %v: %s", tcp, layersToSend[tcpLayerIndex], err)
+ }
+ layersToSend = append(layersToSend, additionalLayers...)
+ outBytes, err := layersToSend.toBytes()
+ if err != nil {
+ conn.t.Fatalf("can't build outgoing TCP packet: %s", err)
+ }
+ conn.injector.Send(outBytes)
+
+ // Compute the next TCP sequence number.
+ for i := tcpLayerIndex + 1; i < len(layersToSend); i++ {
+ conn.LocalSeqNum.UpdateForward(seqnum.Size(layersToSend[i].length()))
+ }
+ if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 {
+ conn.LocalSeqNum.UpdateForward(1)
+ }
+}
+
+// Recv gets a packet from the sniffer within the timeout provided. If no packet
+// arrives before the timeout, it returns nil.
+func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP {
+ deadline := time.Now().Add(timeout)
+ for {
+ timeout = deadline.Sub(time.Now())
+ if timeout <= 0 {
+ break
+ }
+ b := conn.sniffer.Recv(timeout)
+ if b == nil {
+ break
+ }
+ layers, err := ParseEther(b)
+ if err != nil {
+ continue // Ignore packets that can't be parsed.
+ }
+ if !conn.incoming.match(layers) {
+ continue // Ignore packets that don't match the expected incoming.
+ }
+ tcpHeader := (layers[tcpLayerIndex]).(*TCP)
+ conn.RemoteSeqNum = seqnum.Value(*tcpHeader.SeqNum)
+ if *tcpHeader.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 {
+ conn.RemoteSeqNum.UpdateForward(1)
+ }
+ for i := tcpLayerIndex + 1; i < len(layers); i++ {
+ conn.RemoteSeqNum.UpdateForward(seqnum.Size(layers[i].length()))
+ }
+ return tcpHeader
+ }
+ return nil
+}
+
+// Expect a packet that matches the provided tcp within the timeout specified.
+// If it doesn't arrive in time, the test fails.
+func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP {
+ deadline := time.Now().Add(timeout)
+ for {
+ timeout = deadline.Sub(time.Now())
+ if timeout <= 0 {
+ return nil
+ }
+ gotTCP := conn.Recv(timeout)
+ if gotTCP == nil {
+ return nil
+ }
+ if tcp.match(gotTCP) {
+ return gotTCP
+ }
+ }
+}
+
+// Handshake performs a TCP 3-way handshake.
+func (conn *TCPIPv4) Handshake() {
+ // Send the SYN.
+ conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)})
+
+ // Wait for the SYN-ACK.
+ conn.SynAck = conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+ if conn.SynAck == nil {
+ conn.t.Fatalf("didn't get synack during handshake")
+ }
+
+ // Send an ACK.
+ conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)})
+}
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
new file mode 100644
index 000000000..8ea1706d3
--- /dev/null
+++ b/test/packetimpact/testbench/dut.go
@@ -0,0 +1,363 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testbench
+
+import (
+ "context"
+ "flag"
+ "net"
+ "strconv"
+ "syscall"
+ "testing"
+ "time"
+
+ pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto"
+
+ "golang.org/x/sys/unix"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/keepalive"
+)
+
+var (
+ posixServerIP = flag.String("posix_server_ip", "", "ip address to listen to for UDP commands")
+ posixServerPort = flag.Int("posix_server_port", 40000, "port to listen to for UDP commands")
+ rpcTimeout = flag.Duration("rpc_timeout", 100*time.Millisecond, "gRPC timeout")
+ rpcKeepalive = flag.Duration("rpc_keepalive", 10*time.Second, "gRPC keepalive")
+)
+
+// DUT communicates with the DUT to force it to make POSIX calls.
+type DUT struct {
+ t *testing.T
+ conn *grpc.ClientConn
+ posixServer PosixClient
+}
+
+// NewDUT creates a new connection with the DUT over gRPC.
+func NewDUT(t *testing.T) DUT {
+ flag.Parse()
+ posixServerAddress := *posixServerIP + ":" + strconv.Itoa(*posixServerPort)
+ conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: *rpcKeepalive}))
+ if err != nil {
+ t.Fatalf("failed to grpc.Dial(%s): %s", posixServerAddress, err)
+ }
+ posixServer := NewPosixClient(conn)
+ return DUT{
+ t: t,
+ conn: conn,
+ posixServer: posixServer,
+ }
+}
+
+// TearDown closes the underlying connection.
+func (dut *DUT) TearDown() {
+ dut.conn.Close()
+}
+
+// SocketWithErrno calls socket on the DUT and returns the fd and errno.
+func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) {
+ dut.t.Helper()
+ req := pb.SocketRequest{
+ Domain: domain,
+ Type: typ,
+ Protocol: proto,
+ }
+ ctx := context.Background()
+ resp, err := dut.posixServer.Socket(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Socket: %s", err)
+ }
+ return resp.GetFd(), syscall.Errno(resp.GetErrno_())
+}
+
+// Socket calls socket on the DUT and returns the file descriptor. If socket
+// fails on the DUT, the test ends.
+func (dut *DUT) Socket(domain, typ, proto int32) int32 {
+ dut.t.Helper()
+ fd, err := dut.SocketWithErrno(domain, typ, proto)
+ if fd < 0 {
+ dut.t.Fatalf("failed to create socket: %s", err)
+ }
+ return fd
+}
+
+func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr {
+ dut.t.Helper()
+ switch s := sa.(type) {
+ case *unix.SockaddrInet4:
+ return &pb.Sockaddr{
+ Sockaddr: &pb.Sockaddr_In{
+ In: &pb.SockaddrIn{
+ Family: unix.AF_INET,
+ Port: uint32(s.Port),
+ Addr: s.Addr[:],
+ },
+ },
+ }
+ case *unix.SockaddrInet6:
+ return &pb.Sockaddr{
+ Sockaddr: &pb.Sockaddr_In6{
+ In6: &pb.SockaddrIn6{
+ Family: unix.AF_INET6,
+ Port: uint32(s.Port),
+ Flowinfo: 0,
+ ScopeId: s.ZoneId,
+ Addr: s.Addr[:],
+ },
+ },
+ }
+ }
+ dut.t.Fatalf("can't parse Sockaddr: %+v", sa)
+ return nil
+}
+
+func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr {
+ dut.t.Helper()
+ switch s := sa.Sockaddr.(type) {
+ case *pb.Sockaddr_In:
+ ret := unix.SockaddrInet4{
+ Port: int(s.In.GetPort()),
+ }
+ copy(ret.Addr[:], s.In.GetAddr())
+ return &ret
+ case *pb.Sockaddr_In6:
+ ret := unix.SockaddrInet6{
+ Port: int(s.In6.GetPort()),
+ ZoneId: s.In6.GetScopeId(),
+ }
+ copy(ret.Addr[:], s.In6.GetAddr())
+ }
+ dut.t.Fatalf("can't parse Sockaddr: %+v", sa)
+ return nil
+}
+
+// BindWithErrno calls bind on the DUT.
+func (dut *DUT) BindWithErrno(fd int32, sa unix.Sockaddr) (int32, error) {
+ dut.t.Helper()
+ req := pb.BindRequest{
+ Sockfd: fd,
+ Addr: dut.sockaddrToProto(sa),
+ }
+ ctx := context.Background()
+ resp, err := dut.posixServer.Bind(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Bind: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
+// Bind calls bind on the DUT and causes a fatal test failure if it doesn't
+// succeed.
+func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) {
+ dut.t.Helper()
+ ret, err := dut.BindWithErrno(fd, sa)
+ if ret != 0 {
+ dut.t.Fatalf("failed to bind socket: %s", err)
+ }
+}
+
+// GetSockNameWithErrno calls getsockname on the DUT.
+func (dut *DUT) GetSockNameWithErrno(sockfd int32) (int32, unix.Sockaddr, error) {
+ dut.t.Helper()
+ req := pb.GetSockNameRequest{
+ Sockfd: sockfd,
+ }
+ ctx := context.Background()
+ resp, err := dut.posixServer.GetSockName(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Bind: %s", err)
+ }
+ return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_())
+}
+
+// GetSockName calls getsockname on the DUT and causes a fatal test failure if
+// it doens't succeed.
+func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr {
+ dut.t.Helper()
+ ret, sa, err := dut.GetSockNameWithErrno(sockfd)
+ if ret != 0 {
+ dut.t.Fatalf("failed to getsockname: %s", err)
+ }
+ return sa
+}
+
+// ListenWithErrno calls listen on the DUT.
+func (dut *DUT) ListenWithErrno(sockfd, backlog int32) (int32, error) {
+ dut.t.Helper()
+ req := pb.ListenRequest{
+ Sockfd: sockfd,
+ Backlog: backlog,
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout)
+ defer cancel()
+ resp, err := dut.posixServer.Listen(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Listen: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
+// Listen calls listen on the DUT and causes a fatal test failure if it doesn't
+// succeed.
+func (dut *DUT) Listen(sockfd, backlog int32) {
+ dut.t.Helper()
+ ret, err := dut.ListenWithErrno(sockfd, backlog)
+ if ret != 0 {
+ dut.t.Fatalf("failed to listen: %s", err)
+ }
+}
+
+// AcceptWithErrno calls accept on the DUT.
+func (dut *DUT) AcceptWithErrno(sockfd int32) (int32, unix.Sockaddr, error) {
+ dut.t.Helper()
+ req := pb.AcceptRequest{
+ Sockfd: sockfd,
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout)
+ defer cancel()
+ resp, err := dut.posixServer.Accept(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Accept: %s", err)
+ }
+ return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_())
+}
+
+// Accept calls accept on the DUT and causes a fatal test failure if it doesn't
+// succeed.
+func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) {
+ dut.t.Helper()
+ fd, sa, err := dut.AcceptWithErrno(sockfd)
+ if fd < 0 {
+ dut.t.Fatalf("failed to accept: %s", err)
+ }
+ return fd, sa
+}
+
+// SetSockOptWithErrno calls setsockopt on the DUT.
+func (dut *DUT) SetSockOptWithErrno(sockfd, level, optname int32, optval []byte) (int32, error) {
+ dut.t.Helper()
+ req := pb.SetSockOptRequest{
+ Sockfd: sockfd,
+ Level: level,
+ Optname: optname,
+ Optval: optval,
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout)
+ defer cancel()
+ resp, err := dut.posixServer.SetSockOpt(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call SetSockOpt: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
+// SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it
+// doesn't succeed.
+func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) {
+ dut.t.Helper()
+ ret, err := dut.SetSockOptWithErrno(sockfd, level, optname, optval)
+ if ret != 0 {
+ dut.t.Fatalf("failed to SetSockOpt: %s", err)
+ }
+}
+
+// SetSockOptTimevalWithErrno calls setsockopt with the timeval converted to
+// bytes.
+func (dut *DUT) SetSockOptTimevalWithErrno(sockfd, level, optname int32, tv *unix.Timeval) (int32, error) {
+ dut.t.Helper()
+ timeval := pb.Timeval{
+ Seconds: int64(tv.Sec),
+ Microseconds: int64(tv.Usec),
+ }
+ req := pb.SetSockOptTimevalRequest{
+ Sockfd: sockfd,
+ Level: level,
+ Optname: optname,
+ Timeval: &timeval,
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout)
+ defer cancel()
+ resp, err := dut.posixServer.SetSockOptTimeval(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call SetSockOptTimeval: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
+// SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure
+// if it doesn't succeed.
+func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) {
+ dut.t.Helper()
+ ret, err := dut.SetSockOptTimevalWithErrno(sockfd, level, optname, tv)
+ if ret != 0 {
+ dut.t.Fatalf("failed to SetSockOptTimeval: %s", err)
+ }
+}
+
+// CloseWithErrno calls close on the DUT.
+func (dut *DUT) CloseWithErrno(fd int32) (int32, error) {
+ dut.t.Helper()
+ req := pb.CloseRequest{
+ Fd: fd,
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout)
+ defer cancel()
+ resp, err := dut.posixServer.Close(ctx, &req)
+ if err != nil {
+ dut.t.Fatalf("failed to call Close: %s", err)
+ }
+ return resp.GetRet(), syscall.Errno(resp.GetErrno_())
+}
+
+// Close calls close on the DUT and causes a fatal test failure if it doesn't
+// succeed.
+func (dut *DUT) Close(fd int32) {
+ dut.t.Helper()
+ ret, err := dut.CloseWithErrno(fd)
+ if ret != 0 {
+ dut.t.Fatalf("failed to close: %s", err)
+ }
+}
+
+// CreateListener makes a new TCP connection. If it fails, the test ends.
+func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) {
+ dut.t.Helper()
+ addr := net.ParseIP(*remoteIPv4)
+ var fd int32
+ if addr.To4() != nil {
+ fd = dut.Socket(unix.AF_INET, typ, proto)
+ sa := unix.SockaddrInet4{}
+ copy(sa.Addr[:], addr.To4())
+ dut.Bind(fd, &sa)
+ } else if addr.To16() != nil {
+ fd = dut.Socket(unix.AF_INET6, typ, proto)
+ sa := unix.SockaddrInet6{}
+ copy(sa.Addr[:], addr.To16())
+ dut.Bind(fd, &sa)
+ } else {
+ dut.t.Fatal("unknown ip addr type for remoteIP")
+ }
+ sa := dut.GetSockName(fd)
+ var port int
+ switch s := sa.(type) {
+ case *unix.SockaddrInet4:
+ port = s.Port
+ case *unix.SockaddrInet6:
+ port = s.Port
+ default:
+ dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa)
+ }
+ dut.Listen(fd, backlog)
+ return fd, uint16(port)
+}
diff --git a/test/packetimpact/testbench/dut_client.go b/test/packetimpact/testbench/dut_client.go
new file mode 100644
index 000000000..b130a33a2
--- /dev/null
+++ b/test/packetimpact/testbench/dut_client.go
@@ -0,0 +1,28 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testbench
+
+import (
+ "google.golang.org/grpc"
+ pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto"
+)
+
+// PosixClient is a gRPC client for the Posix service.
+type PosixClient pb.PosixClient
+
+// NewPosixClient makes a new gRPC client for the Posix service.
+func NewPosixClient(c grpc.ClientConnInterface) PosixClient {
+ return pb.NewPosixClient(c)
+}
diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go
new file mode 100644
index 000000000..35fa4dcb6
--- /dev/null
+++ b/test/packetimpact/testbench/layers.go
@@ -0,0 +1,507 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testbench
+
+import (
+ "fmt"
+ "reflect"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+ "github.com/imdario/mergo"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+)
+
+// Layer is the interface that all encapsulations must implement.
+//
+// A Layer is an encapsulation in a packet, such as TCP, IPv4, IPv6, etc. A
+// Layer contains all the fields of the encapsulation. Each field is a pointer
+// and may be nil.
+type Layer interface {
+ // toBytes converts the Layer into bytes. In places where the Layer's field
+ // isn't nil, the value that is pointed to is used. When the field is nil, a
+ // reasonable default for the Layer is used. For example, "64" for IPv4 TTL
+ // and a calculated checksum for TCP or IP. Some layers require information
+ // from the previous or next layers in order to compute a default, such as
+ // TCP's checksum or Ethernet's type, so each Layer has a doubly-linked list
+ // to the layer's neighbors.
+ toBytes() ([]byte, error)
+
+ // match checks if the current Layer matches the provided Layer. If either
+ // Layer has a nil in a given field, that field is considered matching.
+ // Otherwise, the values pointed to by the fields must match.
+ match(Layer) bool
+
+ // length in bytes of the current encapsulation
+ length() int
+
+ // next gets a pointer to the encapsulated Layer.
+ next() Layer
+
+ // prev gets a pointer to the Layer encapsulating this one.
+ prev() Layer
+
+ // setNext sets the pointer to the encapsulated Layer.
+ setNext(Layer)
+
+ // setPrev sets the pointer to the Layer encapsulating this one.
+ setPrev(Layer)
+}
+
+// LayerBase is the common elements of all layers.
+type LayerBase struct {
+ nextLayer Layer
+ prevLayer Layer
+}
+
+func (lb *LayerBase) next() Layer {
+ return lb.nextLayer
+}
+
+func (lb *LayerBase) prev() Layer {
+ return lb.prevLayer
+}
+
+func (lb *LayerBase) setNext(l Layer) {
+ lb.nextLayer = l
+}
+
+func (lb *LayerBase) setPrev(l Layer) {
+ lb.prevLayer = l
+}
+
+func equalLayer(x, y Layer) bool {
+ opt := cmp.FilterValues(func(x, y interface{}) bool {
+ if reflect.ValueOf(x).Kind() == reflect.Ptr && reflect.ValueOf(x).IsNil() {
+ return true
+ }
+ if reflect.ValueOf(y).Kind() == reflect.Ptr && reflect.ValueOf(y).IsNil() {
+ return true
+ }
+ return false
+
+ }, cmp.Ignore())
+ return cmp.Equal(x, y, opt, cmpopts.IgnoreUnexported(LayerBase{}))
+}
+
+// Ether can construct and match the ethernet encapsulation.
+type Ether struct {
+ LayerBase
+ SrcAddr *tcpip.LinkAddress
+ DstAddr *tcpip.LinkAddress
+ Type *tcpip.NetworkProtocolNumber
+}
+
+func (l *Ether) toBytes() ([]byte, error) {
+ b := make([]byte, header.EthernetMinimumSize)
+ h := header.Ethernet(b)
+ fields := &header.EthernetFields{}
+ if l.SrcAddr != nil {
+ fields.SrcAddr = *l.SrcAddr
+ }
+ if l.DstAddr != nil {
+ fields.DstAddr = *l.DstAddr
+ }
+ if l.Type != nil {
+ fields.Type = *l.Type
+ } else {
+ switch n := l.next().(type) {
+ case *IPv4:
+ fields.Type = header.IPv4ProtocolNumber
+ default:
+ // TODO(b/150301488): Support more protocols, like IPv6.
+ return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", n)
+ }
+ }
+ h.Encode(fields)
+ return h, nil
+}
+
+// LinkAddress is a helper routine that allocates a new tcpip.LinkAddress value
+// to store v and returns a pointer to it.
+func LinkAddress(v tcpip.LinkAddress) *tcpip.LinkAddress {
+ return &v
+}
+
+// NetworkProtocolNumber is a helper routine that allocates a new
+// tcpip.NetworkProtocolNumber value to store v and returns a pointer to it.
+func NetworkProtocolNumber(v tcpip.NetworkProtocolNumber) *tcpip.NetworkProtocolNumber {
+ return &v
+}
+
+// ParseEther parses the bytes assuming that they start with an ethernet header
+// and continues parsing further encapsulations.
+func ParseEther(b []byte) (Layers, error) {
+ h := header.Ethernet(b)
+ ether := Ether{
+ SrcAddr: LinkAddress(h.SourceAddress()),
+ DstAddr: LinkAddress(h.DestinationAddress()),
+ Type: NetworkProtocolNumber(h.Type()),
+ }
+ layers := Layers{&ether}
+ switch h.Type() {
+ case header.IPv4ProtocolNumber:
+ moreLayers, err := ParseIPv4(b[ether.length():])
+ if err != nil {
+ return nil, err
+ }
+ return append(layers, moreLayers...), nil
+ default:
+ // TODO(b/150301488): Support more protocols, like IPv6.
+ return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %v", b)
+ }
+}
+
+func (l *Ether) match(other Layer) bool {
+ return equalLayer(l, other)
+}
+
+func (l *Ether) length() int {
+ return header.EthernetMinimumSize
+}
+
+// IPv4 can construct and match the ethernet excapulation.
+type IPv4 struct {
+ LayerBase
+ IHL *uint8
+ TOS *uint8
+ TotalLength *uint16
+ ID *uint16
+ Flags *uint8
+ FragmentOffset *uint16
+ TTL *uint8
+ Protocol *uint8
+ Checksum *uint16
+ SrcAddr *tcpip.Address
+ DstAddr *tcpip.Address
+}
+
+func (l *IPv4) toBytes() ([]byte, error) {
+ b := make([]byte, header.IPv4MinimumSize)
+ h := header.IPv4(b)
+ fields := &header.IPv4Fields{
+ IHL: 20,
+ TOS: 0,
+ TotalLength: 0,
+ ID: 0,
+ Flags: 0,
+ FragmentOffset: 0,
+ TTL: 64,
+ Protocol: 0,
+ Checksum: 0,
+ SrcAddr: tcpip.Address(""),
+ DstAddr: tcpip.Address(""),
+ }
+ if l.TOS != nil {
+ fields.TOS = *l.TOS
+ }
+ if l.TotalLength != nil {
+ fields.TotalLength = *l.TotalLength
+ } else {
+ fields.TotalLength = uint16(l.length())
+ current := l.next()
+ for current != nil {
+ fields.TotalLength += uint16(current.length())
+ current = current.next()
+ }
+ }
+ if l.ID != nil {
+ fields.ID = *l.ID
+ }
+ if l.Flags != nil {
+ fields.Flags = *l.Flags
+ }
+ if l.FragmentOffset != nil {
+ fields.FragmentOffset = *l.FragmentOffset
+ }
+ if l.TTL != nil {
+ fields.TTL = *l.TTL
+ }
+ if l.Protocol != nil {
+ fields.Protocol = *l.Protocol
+ } else {
+ switch n := l.next().(type) {
+ case *TCP:
+ fields.Protocol = uint8(header.TCPProtocolNumber)
+ default:
+ // TODO(b/150301488): Support more protocols, like UDP.
+ return nil, fmt.Errorf("can't deduce the ip header's next protocol: %+v", n)
+ }
+ }
+ if l.SrcAddr != nil {
+ fields.SrcAddr = *l.SrcAddr
+ }
+ if l.DstAddr != nil {
+ fields.DstAddr = *l.DstAddr
+ }
+ if l.Checksum != nil {
+ fields.Checksum = *l.Checksum
+ }
+ h.Encode(fields)
+ if l.Checksum == nil {
+ h.SetChecksum(^h.CalculateChecksum())
+ }
+ return h, nil
+}
+
+// Uint16 is a helper routine that allocates a new
+// uint16 value to store v and returns a pointer to it.
+func Uint16(v uint16) *uint16 {
+ return &v
+}
+
+// Uint8 is a helper routine that allocates a new
+// uint8 value to store v and returns a pointer to it.
+func Uint8(v uint8) *uint8 {
+ return &v
+}
+
+// Address is a helper routine that allocates a new tcpip.Address value to store
+// v and returns a pointer to it.
+func Address(v tcpip.Address) *tcpip.Address {
+ return &v
+}
+
+// ParseIPv4 parses the bytes assuming that they start with an ipv4 header and
+// continues parsing further encapsulations.
+func ParseIPv4(b []byte) (Layers, error) {
+ h := header.IPv4(b)
+ tos, _ := h.TOS()
+ ipv4 := IPv4{
+ IHL: Uint8(h.HeaderLength()),
+ TOS: &tos,
+ TotalLength: Uint16(h.TotalLength()),
+ ID: Uint16(h.ID()),
+ Flags: Uint8(h.Flags()),
+ FragmentOffset: Uint16(h.FragmentOffset()),
+ TTL: Uint8(h.TTL()),
+ Protocol: Uint8(h.Protocol()),
+ Checksum: Uint16(h.Checksum()),
+ SrcAddr: Address(h.SourceAddress()),
+ DstAddr: Address(h.DestinationAddress()),
+ }
+ layers := Layers{&ipv4}
+ switch h.Protocol() {
+ case uint8(header.TCPProtocolNumber):
+ moreLayers, err := ParseTCP(b[ipv4.length():])
+ if err != nil {
+ return nil, err
+ }
+ return append(layers, moreLayers...), nil
+ }
+ return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", h.Protocol())
+}
+
+func (l *IPv4) match(other Layer) bool {
+ return equalLayer(l, other)
+}
+
+func (l *IPv4) length() int {
+ if l.IHL == nil {
+ return header.IPv4MinimumSize
+ }
+ return int(*l.IHL)
+}
+
+// TCP can construct and match the TCP excapulation.
+type TCP struct {
+ LayerBase
+ SrcPort *uint16
+ DstPort *uint16
+ SeqNum *uint32
+ AckNum *uint32
+ DataOffset *uint8
+ Flags *uint8
+ WindowSize *uint16
+ Checksum *uint16
+ UrgentPointer *uint16
+}
+
+func (l *TCP) toBytes() ([]byte, error) {
+ b := make([]byte, header.TCPMinimumSize)
+ h := header.TCP(b)
+ if l.SrcPort != nil {
+ h.SetSourcePort(*l.SrcPort)
+ }
+ if l.DstPort != nil {
+ h.SetDestinationPort(*l.DstPort)
+ }
+ if l.SeqNum != nil {
+ h.SetSequenceNumber(*l.SeqNum)
+ }
+ if l.AckNum != nil {
+ h.SetAckNumber(*l.AckNum)
+ }
+ if l.DataOffset != nil {
+ h.SetDataOffset(*l.DataOffset)
+ }
+ if l.Flags != nil {
+ h.SetFlags(*l.Flags)
+ }
+ if l.WindowSize != nil {
+ h.SetWindowSize(*l.WindowSize)
+ }
+ if l.UrgentPointer != nil {
+ h.SetUrgentPoiner(*l.UrgentPointer)
+ }
+ if l.Checksum != nil {
+ h.SetChecksum(*l.Checksum)
+ return h, nil
+ }
+ if err := setChecksum(&h, l); err != nil {
+ return nil, err
+ }
+ return h, nil
+}
+
+// setChecksum calculates the checksum of the TCP header and sets it in h.
+func setChecksum(h *header.TCP, tcp *TCP) error {
+ h.SetChecksum(0)
+ tcpLength := uint16(tcp.length())
+ current := tcp.next()
+ for current != nil {
+ tcpLength += uint16(current.length())
+ current = current.next()
+ }
+
+ var xsum uint16
+ switch s := tcp.prev().(type) {
+ case *IPv4:
+ xsum = header.PseudoHeaderChecksum(header.TCPProtocolNumber, *s.SrcAddr, *s.DstAddr, tcpLength)
+ default:
+ // TODO(b/150301488): Support more protocols, like IPv6.
+ return fmt.Errorf("can't get src and dst addr from previous layer")
+ }
+ current = tcp.next()
+ for current != nil {
+ payload, err := current.toBytes()
+ if err != nil {
+ return fmt.Errorf("can't get bytes for next header: %s", payload)
+ }
+ xsum = header.Checksum(payload, xsum)
+ current = current.next()
+ }
+ h.SetChecksum(^h.CalculateChecksum(xsum))
+ return nil
+}
+
+// Uint32 is a helper routine that allocates a new
+// uint32 value to store v and returns a pointer to it.
+func Uint32(v uint32) *uint32 {
+ return &v
+}
+
+// ParseTCP parses the bytes assuming that they start with a tcp header and
+// continues parsing further encapsulations.
+func ParseTCP(b []byte) (Layers, error) {
+ h := header.TCP(b)
+ tcp := TCP{
+ SrcPort: Uint16(h.SourcePort()),
+ DstPort: Uint16(h.DestinationPort()),
+ SeqNum: Uint32(h.SequenceNumber()),
+ AckNum: Uint32(h.AckNumber()),
+ DataOffset: Uint8(h.DataOffset()),
+ Flags: Uint8(h.Flags()),
+ WindowSize: Uint16(h.WindowSize()),
+ Checksum: Uint16(h.Checksum()),
+ UrgentPointer: Uint16(h.UrgentPointer()),
+ }
+ layers := Layers{&tcp}
+ moreLayers, err := ParsePayload(b[tcp.length():])
+ if err != nil {
+ return nil, err
+ }
+ return append(layers, moreLayers...), nil
+}
+
+func (l *TCP) match(other Layer) bool {
+ return equalLayer(l, other)
+}
+
+func (l *TCP) length() int {
+ if l.DataOffset == nil {
+ return header.TCPMinimumSize
+ }
+ return int(*l.DataOffset)
+}
+
+// merge overrides the values in l with the values from other but only in fields
+// where the value is not nil.
+func (l *TCP) merge(other TCP) error {
+ return mergo.Merge(l, other, mergo.WithOverride)
+}
+
+// Payload has bytes beyond OSI layer 4.
+type Payload struct {
+ LayerBase
+ Bytes []byte
+}
+
+// ParsePayload parses the bytes assuming that they start with a payload and
+// continue to the end. There can be no further encapsulations.
+func ParsePayload(b []byte) (Layers, error) {
+ payload := Payload{
+ Bytes: b,
+ }
+ return Layers{&payload}, nil
+}
+
+func (l *Payload) toBytes() ([]byte, error) {
+ return l.Bytes, nil
+}
+
+func (l *Payload) match(other Layer) bool {
+ return equalLayer(l, other)
+}
+
+func (l *Payload) length() int {
+ return len(l.Bytes)
+}
+
+// Layers is an array of Layer and supports similar functions to Layer.
+type Layers []Layer
+
+func (ls *Layers) toBytes() ([]byte, error) {
+ for i, l := range *ls {
+ if i > 0 {
+ l.setPrev((*ls)[i-1])
+ }
+ if i+1 < len(*ls) {
+ l.setNext((*ls)[i+1])
+ }
+ }
+ outBytes := []byte{}
+ for _, l := range *ls {
+ layerBytes, err := l.toBytes()
+ if err != nil {
+ return nil, err
+ }
+ outBytes = append(outBytes, layerBytes...)
+ }
+ return outBytes, nil
+}
+
+func (ls *Layers) match(other Layers) bool {
+ if len(*ls) > len(other) {
+ return false
+ }
+ for i := 0; i < len(*ls); i++ {
+ if !equalLayer((*ls)[i], other[i]) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go
new file mode 100644
index 000000000..0c7d0f979
--- /dev/null
+++ b/test/packetimpact/testbench/rawsockets.go
@@ -0,0 +1,151 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testbench
+
+import (
+ "encoding/binary"
+ "flag"
+ "math"
+ "net"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+var device = flag.String("device", "", "local device for test packets")
+
+// Sniffer can sniff raw packets on the wire.
+type Sniffer struct {
+ t *testing.T
+ fd int
+}
+
+func htons(x uint16) uint16 {
+ buf := [2]byte{}
+ binary.BigEndian.PutUint16(buf[:], x)
+ return usermem.ByteOrder.Uint16(buf[:])
+}
+
+// NewSniffer creates a Sniffer connected to *device.
+func NewSniffer(t *testing.T) (Sniffer, error) {
+ flag.Parse()
+ snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL)))
+ if err != nil {
+ return Sniffer{}, err
+ }
+ return Sniffer{
+ t: t,
+ fd: snifferFd,
+ }, nil
+}
+
+// maxReadSize should be large enough for the maximum frame size in bytes. If a
+// packet too large for the buffer arrives, the test will get a fatal error.
+const maxReadSize int = 65536
+
+// Recv tries to read one frame until the timeout is up.
+func (s *Sniffer) Recv(timeout time.Duration) []byte {
+ deadline := time.Now().Add(timeout)
+ for {
+ timeout = deadline.Sub(time.Now())
+ if timeout <= 0 {
+ return nil
+ }
+ whole, frac := math.Modf(timeout.Seconds())
+ tv := unix.Timeval{
+ Sec: int64(whole),
+ Usec: int64(frac * float64(time.Microsecond/time.Second)),
+ }
+
+ if err := unix.SetsockoptTimeval(s.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil {
+ s.t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err)
+ }
+
+ buf := make([]byte, maxReadSize)
+ nread, _, err := unix.Recvfrom(s.fd, buf, unix.MSG_TRUNC)
+ if err == unix.EINTR || err == unix.EAGAIN {
+ // There was a timeout.
+ continue
+ }
+ if err != nil {
+ s.t.Fatalf("can't read: %s", err)
+ }
+ if nread > maxReadSize {
+ s.t.Fatalf("received a truncated frame of %d bytes", nread)
+ }
+ return buf[:nread]
+ }
+}
+
+// Close the socket that Sniffer is using.
+func (s *Sniffer) Close() {
+ if err := unix.Close(s.fd); err != nil {
+ s.t.Fatalf("can't close sniffer socket: %s", err)
+ }
+ s.fd = -1
+}
+
+// Injector can inject raw frames.
+type Injector struct {
+ t *testing.T
+ fd int
+}
+
+// NewInjector creates a new injector on *device.
+func NewInjector(t *testing.T) (Injector, error) {
+ flag.Parse()
+ ifInfo, err := net.InterfaceByName(*device)
+ if err != nil {
+ return Injector{}, err
+ }
+
+ var haddr [8]byte
+ copy(haddr[:], ifInfo.HardwareAddr)
+ sa := unix.SockaddrLinklayer{
+ Protocol: unix.ETH_P_IP,
+ Ifindex: ifInfo.Index,
+ Halen: uint8(len(ifInfo.HardwareAddr)),
+ Addr: haddr,
+ }
+
+ injectFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL)))
+ if err != nil {
+ return Injector{}, err
+ }
+ if err := unix.Bind(injectFd, &sa); err != nil {
+ return Injector{}, err
+ }
+ return Injector{
+ t: t,
+ fd: injectFd,
+ }, nil
+}
+
+// Send a raw frame.
+func (i *Injector) Send(b []byte) {
+ if _, err := unix.Write(i.fd, b); err != nil {
+ i.t.Fatalf("can't write: %s", err)
+ }
+}
+
+// Close the underlying socket.
+func (i *Injector) Close() {
+ if err := unix.Close(i.fd); err != nil {
+ i.t.Fatalf("can't close sniffer socket: %s", err)
+ }
+ i.fd = -1
+}
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
new file mode 100644
index 000000000..1dff2a4d5
--- /dev/null
+++ b/test/packetimpact/tests/BUILD
@@ -0,0 +1,21 @@
+load("defs.bzl", "packetimpact_go_test")
+
+package(
+ default_visibility = ["//test/packetimpact:__subpackages__"],
+ licenses = ["notice"],
+)
+
+packetimpact_go_test(
+ name = "fin_wait2_timeout",
+ srcs = ["fin_wait2_timeout_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+sh_binary(
+ name = "test_runner",
+ srcs = ["test_runner.sh"],
+)
diff --git a/test/packetimpact/tests/Dockerfile b/test/packetimpact/tests/Dockerfile
new file mode 100644
index 000000000..507030cc7
--- /dev/null
+++ b/test/packetimpact/tests/Dockerfile
@@ -0,0 +1,5 @@
+FROM ubuntu:bionic
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y iptables netcat tcpdump iproute2 tshark
+RUN hash -r
+CMD /bin/bash
diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl
new file mode 100644
index 000000000..1b4213d9b
--- /dev/null
+++ b/test/packetimpact/tests/defs.bzl
@@ -0,0 +1,106 @@
+"""Defines rules for packetimpact test targets."""
+
+load("//tools:defs.bzl", "go_test")
+
+def _packetimpact_test_impl(ctx):
+ test_runner = ctx.executable._test_runner
+ bench = ctx.actions.declare_file("%s-bench" % ctx.label.name)
+ bench_content = "\n".join([
+ "#!/bin/bash",
+ # This test will run part in a distinct user namespace. This can cause
+ # permission problems, because all runfiles may not be owned by the
+ # current user, and no other users will be mapped in that namespace.
+ # Make sure that everything is readable here.
+ "find . -type f -exec chmod a+rx {} \\;",
+ "find . -type d -exec chmod a+rx {} \\;",
+ "%s %s --posix_server_binary %s --testbench_binary %s $@\n" % (
+ test_runner.short_path,
+ " ".join(ctx.attr.flags),
+ ctx.files._posix_server_binary[0].short_path,
+ ctx.files.testbench_binary[0].short_path,
+ ),
+ ])
+ ctx.actions.write(bench, bench_content, is_executable = True)
+
+ transitive_files = depset()
+ if hasattr(ctx.attr._test_runner, "data_runfiles"):
+ transitive_files = depset(ctx.attr._test_runner.data_runfiles.files)
+ runfiles = ctx.runfiles(
+ files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary,
+ transitive_files = transitive_files,
+ collect_default = True,
+ collect_data = True,
+ )
+ return [DefaultInfo(executable = bench, runfiles = runfiles)]
+
+_packetimpact_test = rule(
+ attrs = {
+ "_test_runner": attr.label(
+ executable = True,
+ cfg = "target",
+ default = ":test_runner",
+ ),
+ "_posix_server_binary": attr.label(
+ cfg = "target",
+ default = "//test/packetimpact/dut:posix_server",
+ ),
+ "testbench_binary": attr.label(
+ cfg = "target",
+ mandatory = True,
+ ),
+ "flags": attr.string_list(
+ mandatory = False,
+ default = [],
+ ),
+ },
+ test = True,
+ implementation = _packetimpact_test_impl,
+)
+
+PACKETIMPACT_TAGS = ["local", "manual"]
+
+def packetimpact_linux_test(name, testbench_binary, **kwargs):
+ """Add a packetimpact test on linux.
+
+ Args:
+ name: name of the test
+ testbench_binary: the testbench binary
+ **kwargs: all the other args, forwarded to _packetimpact_test
+ """
+ _packetimpact_test(
+ name = name + "_linux_test",
+ testbench_binary = testbench_binary,
+ flags = ["--dut_platform", "linux"],
+ tags = PACKETIMPACT_TAGS + ["packetimpact"],
+ **kwargs
+ )
+
+def packetimpact_netstack_test(name, testbench_binary, **kwargs):
+ """Add a packetimpact test on netstack.
+
+ Args:
+ name: name of the test
+ testbench_binary: the testbench binary
+ **kwargs: all the other args, forwarded to _packetimpact_test
+ """
+ _packetimpact_test(
+ name = name + "_netstack_test",
+ testbench_binary = testbench_binary,
+ # This is the default runtime unless
+ # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value.
+ flags = ["--dut_platform", "netstack", "--runtime=runsc-d"],
+ tags = PACKETIMPACT_TAGS + ["packetimpact"],
+ **kwargs
+ )
+
+def packetimpact_go_test(name, size = "small", pure = True, **kwargs):
+ testbench_binary = name + "_test"
+ go_test(
+ name = testbench_binary,
+ size = size,
+ pure = pure,
+ tags = PACKETIMPACT_TAGS,
+ **kwargs
+ )
+ packetimpact_linux_test(name = name, testbench_binary = testbench_binary)
+ packetimpact_netstack_test(name = name, testbench_binary = testbench_binary)
diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go
new file mode 100644
index 000000000..5f54e67ed
--- /dev/null
+++ b/test/packetimpact/tests/fin_wait2_timeout_test.go
@@ -0,0 +1,68 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fin_wait2_timeout_test
+
+import (
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func TestFinWait2Timeout(t *testing.T) {
+ for _, tt := range []struct {
+ description string
+ linger2 bool
+ }{
+ {"WithLinger2", true},
+ {"WithoutLinger2", false},
+ } {
+ t.Run(tt.description, func(t *testing.T) {
+ dut := tb.NewDUT(t)
+ defer dut.TearDown()
+ listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+ defer dut.Close(listenFd)
+ conn := tb.NewTCPIPv4(t, dut, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
+ defer conn.Close()
+ conn.Handshake()
+
+ acceptFd, _ := dut.Accept(listenFd)
+ if tt.linger2 {
+ tv := unix.Timeval{Sec: 1, Usec: 0}
+ dut.SetSockOptTimeval(acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv)
+ }
+ dut.Close(acceptFd)
+
+ if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); gotOne == nil {
+ t.Fatal("expected a FIN-ACK within 1 second but got none")
+ }
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+
+ time.Sleep(5 * time.Second)
+ conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+ if tt.linger2 {
+ if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); gotOne == nil {
+ t.Fatal("expected a RST packet within a second but got none")
+ }
+ } else {
+ if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); gotOne != nil {
+ t.Fatal("expected no RST packets within ten seconds but got one")
+ }
+ }
+ })
+ }
+}
diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh
new file mode 100755
index 000000000..5281cb53d
--- /dev/null
+++ b/test/packetimpact/tests/test_runner.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+
+# Copyright 2020 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run a packetimpact test. Two docker containers are made, one for the
+# Device-Under-Test (DUT) and one for the test bench. Each is attached with
+# two networks, one for control packets that aid the test and one for test
+# packets which are sent as part of the test and observed for correctness.
+
+set -euxo pipefail
+
+function failure() {
+ local lineno=$1
+ local msg=$2
+ local filename="$0"
+ echo "FAIL: $filename:$lineno: $msg"
+}
+trap 'failure ${LINENO} "$BASH_COMMAND"' ERR
+
+declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark"
+
+# Don't use declare below so that the error from getopt will end the script.
+PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@")
+
+eval set -- "$PARSED"
+
+while true; do
+ case "$1" in
+ --dut_platform)
+ # Either "linux" or "netstack".
+ declare -r DUT_PLATFORM="$2"
+ shift 2
+ ;;
+ --posix_server_binary)
+ declare -r POSIX_SERVER_BINARY="$2"
+ shift 2
+ ;;
+ --testbench_binary)
+ declare -r TESTBENCH_BINARY="$2"
+ shift 2
+ ;;
+ --runtime)
+ # Not readonly because there might be multiple --runtime arguments and we
+ # want to use just the last one. Only used if --dut_platform is
+ # "netstack".
+ declare RUNTIME="$2"
+ shift 2
+ ;;
+ --tshark)
+ declare -r TSHARK="1"
+ shift 1
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Programming error"
+ exit 3
+ esac
+done
+
+# All the other arguments are scripts.
+declare -r scripts="$@"
+
+# Check that the required flags are defined in a way that is safe for "set -u".
+if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then
+ if [[ -z "${RUNTIME-}" ]]; then
+ echo "FAIL: Missing --runtime argument: ${RUNTIME-}"
+ exit 2
+ fi
+ declare -r RUNTIME_ARG="--runtime ${RUNTIME}"
+elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then
+ declare -r RUNTIME_ARG=""
+else
+ echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}"
+ exit 2
+fi
+if [[ ! -f "${POSIX_SERVER_BINARY-}" ]]; then
+ echo "FAIL: Bad or missing --posix_server_binary: ${POSIX_SERVER-}"
+ exit 2
+fi
+if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then
+ echo "FAIL: Bad or missing --testbench_binary: ${TESTBENCH_BINARY-}"
+ exit 2
+fi
+
+# Variables specific to the control network and interface start with CTRL_.
+# Variables specific to the test network and interface start with TEST_.
+# Variables specific to the DUT start with DUT_.
+# Variables specific to the test bench start with TESTBENCH_.
+# Use random numbers so that test networks don't collide.
+declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}"
+declare -r TEST_NET="test_net-${RANDOM}${RANDOM}"
+# On both DUT and test bench, testing packets are on the eth2 interface.
+declare -r TEST_DEVICE="eth2"
+# Number of bits in the *_NET_PREFIX variables.
+declare -r NET_MASK="24"
+function new_net_prefix() {
+ # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24.
+ echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)"
+}
+# Last bits of the DUT's IP address.
+declare -r DUT_NET_SUFFIX=".10"
+# Control port.
+declare -r CTRL_PORT="40000"
+# Last bits of the test bench's IP address.
+declare -r TESTBENCH_NET_SUFFIX=".20"
+declare -r TIMEOUT="60"
+declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact"
+# Make sure that docker is installed.
+docker --version
+
+function finish {
+ local cleanup_success=1
+ for net in "${CTRL_NET}" "${TEST_NET}"; do
+ # Kill all processes attached to ${net}.
+ for docker_command in "kill" "rm"; do
+ (docker network inspect "${net}" \
+ --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \
+ | xargs -r docker "${docker_command}") || \
+ cleanup_success=0
+ done
+ # Remove the network.
+ docker network rm "${net}" || \
+ cleanup_success=0
+ done
+
+ if ((!$cleanup_success)); then
+ echo "FAIL: Cleanup command failed"
+ exit 4
+ fi
+}
+trap finish EXIT
+
+# Subnet for control packets between test bench and DUT.
+declare CTRL_NET_PREFIX=$(new_net_prefix)
+while ! docker network create \
+ "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do
+ sleep 0.1
+ declare CTRL_NET_PREFIX=$(new_net_prefix)
+done
+
+# Subnet for the packets that are part of the test.
+declare TEST_NET_PREFIX=$(new_net_prefix)
+while ! docker network create \
+ "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do
+ sleep 0.1
+ declare TEST_NET_PREFIX=$(new_net_prefix)
+done
+
+docker pull "${IMAGE_TAG}"
+
+# Create the DUT container and connect to network.
+DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \
+ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG})
+docker network connect "${CTRL_NET}" \
+ --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \
+ || (docker kill ${DUT}; docker rm ${DUT}; false)
+docker network connect "${TEST_NET}" \
+ --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \
+ || (docker kill ${DUT}; docker rm ${DUT}; false)
+docker start "${DUT}"
+
+# Create the test bench container and connect to network.
+TESTBENCH=$(docker create --privileged --rm \
+ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG})
+docker network connect "${CTRL_NET}" \
+ --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \
+ || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false)
+docker network connect "${TEST_NET}" \
+ --ip "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \
+ || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false)
+docker start "${TESTBENCH}"
+
+# Start the posix_server in the DUT.
+declare -r DOCKER_POSIX_SERVER_BINARY="/$(basename ${POSIX_SERVER_BINARY})"
+docker cp -L ${POSIX_SERVER_BINARY} "${DUT}:${DOCKER_POSIX_SERVER_BINARY}"
+
+docker exec -t "${DUT}" \
+ /bin/bash -c "${DOCKER_POSIX_SERVER_BINARY} \
+ --ip ${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \
+ --port ${CTRL_PORT}" &
+
+# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will
+# issue a RST. To prevent this IPtables can be used to filter those out.
+docker exec "${TESTBENCH}" \
+ iptables -A INPUT -i ${TEST_DEVICE} -j DROP
+
+# Wait for the DUT server to come up. Attempt to connect to it from the test
+# bench every 100 milliseconds until success.
+while ! docker exec "${TESTBENCH}" \
+ nc -zv "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${CTRL_PORT}"; do
+ sleep 0.1
+done
+
+declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \
+ "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6)
+declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \
+ "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6)
+
+declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})"
+docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}"
+
+if [[ -z "${TSHARK-}" ]]; then
+ # Run tcpdump in the test bench unbuffered, without dns resolution, just on
+ # the interface with the test packets.
+ docker exec -t "${TESTBENCH}" \
+ tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" net "${TEST_NET_PREFIX}/24" &
+else
+ # Run tshark in the test bench unbuffered, without dns resolution, just on the
+ # interface with the test packets.
+ docker exec -t "${TESTBENCH}" \
+ tshark -V -l -n -i "${TEST_DEVICE}" \
+ host "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" &
+fi
+
+# tcpdump and tshark take time to startup
+sleep 3
+
+# Start a packetimpact test on the test bench. The packetimpact test sends and
+# receives packets and also sends POSIX socket commands to the posix_server to
+# be executed on the DUT.
+docker exec -t "${TESTBENCH}" \
+ /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \
+ --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \
+ --posix_server_port=${CTRL_PORT} \
+ --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \
+ --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \
+ --remote_mac=${REMOTE_MAC} \
+ --local_mac=${LOCAL_MAC} \
+ --device=${TEST_DEVICE}"
+
+echo PASS: No errors.
diff --git a/test/perf/linux/futex_benchmark.cc b/test/perf/linux/futex_benchmark.cc
index b349d50bf..241f39896 100644
--- a/test/perf/linux/futex_benchmark.cc
+++ b/test/perf/linux/futex_benchmark.cc
@@ -33,24 +33,24 @@ namespace testing {
namespace {
inline int FutexWait(std::atomic<int32_t>* v, int32_t val) {
- return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, nullptr);
+ return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, nullptr);
}
-inline int FutexWaitRelativeTimeout(std::atomic<int32_t>* v, int32_t val,
- const struct timespec* reltime) {
- return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, reltime);
+inline int FutexWaitMonotonicTimeout(std::atomic<int32_t>* v, int32_t val,
+ const struct timespec* timeout) {
+ return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, timeout);
}
-inline int FutexWaitAbsoluteTimeout(std::atomic<int32_t>* v, int32_t val,
- const struct timespec* abstime) {
- return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, abstime);
+inline int FutexWaitMonotonicDeadline(std::atomic<int32_t>* v, int32_t val,
+ const struct timespec* deadline) {
+ return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE, val, deadline,
+ nullptr, FUTEX_BITSET_MATCH_ANY);
}
-inline int FutexWaitBitsetAbsoluteTimeout(std::atomic<int32_t>* v, int32_t val,
- int32_t bits,
- const struct timespec* abstime) {
+inline int FutexWaitRealtimeDeadline(std::atomic<int32_t>* v, int32_t val,
+ const struct timespec* deadline) {
return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME,
- val, abstime, nullptr, bits);
+ val, deadline, nullptr, FUTEX_BITSET_MATCH_ANY);
}
inline int FutexWake(std::atomic<int32_t>* v, int32_t count) {
@@ -62,11 +62,11 @@ void BM_FutexWakeNop(benchmark::State& state) {
std::atomic<int32_t> v(0);
for (auto _ : state) {
- EXPECT_EQ(0, FutexWake(&v, 1));
+ TEST_PCHECK(FutexWake(&v, 1) == 0);
}
}
-BENCHMARK(BM_FutexWakeNop);
+BENCHMARK(BM_FutexWakeNop)->MinTime(5);
// This just uses FUTEX_WAIT on an address whose value has changed, i.e., the
// syscall won't wait.
@@ -74,43 +74,63 @@ void BM_FutexWaitNop(benchmark::State& state) {
std::atomic<int32_t> v(0);
for (auto _ : state) {
- EXPECT_EQ(-EAGAIN, FutexWait(&v, 1));
+ TEST_PCHECK(FutexWait(&v, 1) == -1 && errno == EAGAIN);
}
}
-BENCHMARK(BM_FutexWaitNop);
+BENCHMARK(BM_FutexWaitNop)->MinTime(5);
// This uses FUTEX_WAIT with a timeout on an address whose value never
// changes, such that it always times out. Timeout overhead can be estimated by
// timer overruns for short timeouts.
-void BM_FutexWaitTimeout(benchmark::State& state) {
+void BM_FutexWaitMonotonicTimeout(benchmark::State& state) {
const int timeout_ns = state.range(0);
std::atomic<int32_t> v(0);
auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns));
for (auto _ : state) {
- EXPECT_EQ(-ETIMEDOUT, FutexWaitRelativeTimeout(&v, 0, &ts));
+ TEST_PCHECK(FutexWaitMonotonicTimeout(&v, 0, &ts) == -1 &&
+ errno == ETIMEDOUT);
}
}
-BENCHMARK(BM_FutexWaitTimeout)
+BENCHMARK(BM_FutexWaitMonotonicTimeout)
+ ->MinTime(5)
+ ->UseRealTime()
->Arg(1)
->Arg(10)
->Arg(100)
->Arg(1000)
->Arg(10000);
-// This calls FUTEX_WAIT_BITSET with CLOCK_REALTIME.
-void BM_FutexWaitBitset(benchmark::State& state) {
+// This uses FUTEX_WAIT_BITSET with a deadline that is in the past. This allows
+// estimation of the overhead of setting up a timer for a deadline (as opposed
+// to a timeout as specified for FUTEX_WAIT).
+void BM_FutexWaitMonotonicDeadline(benchmark::State& state) {
std::atomic<int32_t> v(0);
- int timeout_ns = state.range(0);
- auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns));
+ struct timespec ts = {};
+
for (auto _ : state) {
- EXPECT_EQ(-ETIMEDOUT, FutexWaitBitsetAbsoluteTimeout(&v, 0, 1, &ts));
+ TEST_PCHECK(FutexWaitMonotonicDeadline(&v, 0, &ts) == -1 &&
+ errno == ETIMEDOUT);
}
}
-BENCHMARK(BM_FutexWaitBitset)->Range(0, 100000);
+BENCHMARK(BM_FutexWaitMonotonicDeadline)->MinTime(5);
+
+// This is equivalent to BM_FutexWaitMonotonicDeadline, but uses CLOCK_REALTIME
+// instead of CLOCK_MONOTONIC for the deadline.
+void BM_FutexWaitRealtimeDeadline(benchmark::State& state) {
+ std::atomic<int32_t> v(0);
+ struct timespec ts = {};
+
+ for (auto _ : state) {
+ TEST_PCHECK(FutexWaitRealtimeDeadline(&v, 0, &ts) == -1 &&
+ errno == ETIMEDOUT);
+ }
+}
+
+BENCHMARK(BM_FutexWaitRealtimeDeadline)->MinTime(5);
int64_t GetCurrentMonotonicTimeNanos() {
struct timespec ts;
@@ -130,11 +150,10 @@ void SpinNanos(int64_t delay_ns) {
// Each iteration of FutexRoundtripDelayed involves a thread sending a futex
// wakeup to another thread, which spins for delay_us and then sends a futex
-// wakeup back. The time per iteration is 2* (delay_us + kBeforeWakeDelayNs +
+// wakeup back. The time per iteration is 2 * (delay_us + kBeforeWakeDelayNs +
// futex/scheduling overhead).
void BM_FutexRoundtripDelayed(benchmark::State& state) {
const int delay_us = state.range(0);
-
const int64_t delay_ns = delay_us * 1000;
// Spin for an extra kBeforeWakeDelayNs before invoking FUTEX_WAKE to reduce
// the probability that the wakeup comes before the wait, preventing the wait
@@ -165,83 +184,14 @@ void BM_FutexRoundtripDelayed(benchmark::State& state) {
}
BENCHMARK(BM_FutexRoundtripDelayed)
+ ->MinTime(5)
+ ->UseRealTime()
->Arg(0)
->Arg(10)
->Arg(20)
->Arg(50)
->Arg(100);
-// FutexLock is a simple, dumb futex based lock implementation.
-// It will try to acquire the lock by atomically incrementing the
-// lock word. If it did not increment the lock from 0 to 1, someone
-// else has the lock, so it will FUTEX_WAIT until it is woken in
-// the unlock path.
-class FutexLock {
- public:
- FutexLock() : lock_word_(0) {}
-
- void lock(struct timespec* deadline) {
- int32_t val;
- while ((val = lock_word_.fetch_add(1, std::memory_order_acquire) + 1) !=
- 1) {
- // If we didn't get the lock by incrementing from 0 to 1,
- // do a FUTEX_WAIT with the desired current value set to
- // val. If val is no longer what the atomic increment returned,
- // someone might have set it to 0 so we can try to acquire
- // again.
- int ret = FutexWaitAbsoluteTimeout(&lock_word_, val, deadline);
- if (ret == 0 || ret == -EWOULDBLOCK || ret == -EINTR) {
- continue;
- } else {
- FAIL() << "unexpected FUTEX_WAIT return: " << ret;
- }
- }
- }
-
- void unlock() {
- // Store 0 into the lock word and wake one waiter. We intentionally
- // ignore the return value of the FUTEX_WAKE here, since there may be
- // no waiters to wake anyway.
- lock_word_.store(0, std::memory_order_release);
- (void)FutexWake(&lock_word_, 1);
- }
-
- private:
- std::atomic<int32_t> lock_word_;
-};
-
-FutexLock* test_lock; // Used below.
-
-void FutexContend(benchmark::State& state, int thread_index,
- struct timespec* deadline) {
- int counter = 0;
- if (thread_index == 0) {
- test_lock = new FutexLock();
- }
- for (auto _ : state) {
- test_lock->lock(deadline);
- counter++;
- test_lock->unlock();
- }
- if (thread_index == 0) {
- delete test_lock;
- }
- state.SetItemsProcessed(state.iterations());
-}
-
-void BM_FutexContend(benchmark::State& state) {
- FutexContend(state, state.thread_index, nullptr);
-}
-
-BENCHMARK(BM_FutexContend)->ThreadRange(1, 1024)->UseRealTime();
-
-void BM_FutexDeadlineContend(benchmark::State& state) {
- auto deadline = absl::ToTimespec(absl::Now() + absl::Minutes(10));
- FutexContend(state, state.thread_index, &deadline);
-}
-
-BENCHMARK(BM_FutexDeadlineContend)->ThreadRange(1, 1024)->UseRealTime();
-
} // namespace
} // namespace testing
diff --git a/test/root/runsc_test.go b/test/root/runsc_test.go
index 28bb60a12..90373e2db 100644
--- a/test/root/runsc_test.go
+++ b/test/root/runsc_test.go
@@ -18,6 +18,7 @@ import (
"bytes"
"fmt"
"io/ioutil"
+ "os"
"os/exec"
"path/filepath"
"strconv"
@@ -117,6 +118,10 @@ func sandboxPid(pid int) (int, error) {
cmdline, err := ioutil.ReadFile(filepath.Join("/proc", line, "cmdline"))
if err != nil {
+ if os.IsNotExist(err) {
+ // Raced with process exit.
+ continue
+ }
return 0, err
}
args := strings.SplitN(string(cmdline), "\x00", 2)
diff --git a/test/runner/runner.go b/test/runner/runner.go
index a78ef38e0..0d3742f71 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -300,6 +300,7 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
// Test spec comes with pre-defined mounts that we don't want. Reset it.
spec.Mounts = nil
+ testTmpDir := "/tmp"
if *useTmpfs {
// Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on
// features only available in gVisor's internal tmpfs may fail.
@@ -325,11 +326,19 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
t.Fatalf("could not chmod temp dir: %v", err)
}
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Type: "bind",
- Destination: "/tmp",
- Source: tmpDir,
- })
+ // "/tmp" is not replaced with a tmpfs mount inside the sandbox
+ // when it's not empty. This ensures that testTmpDir uses gofer
+ // in exclusive mode.
+ testTmpDir = tmpDir
+ if *fileAccess == "shared" {
+ // All external mounts except the root mount are shared.
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Type: "bind",
+ Destination: "/tmp",
+ Source: tmpDir,
+ })
+ testTmpDir = "/tmp"
+ }
}
// Set environment variables that indicate we are
@@ -349,12 +358,8 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
// Set TEST_TMPDIR to /tmp, as some of the syscall tests require it to
// be backed by tmpfs.
- for i, kv := range env {
- if strings.HasPrefix(kv, "TEST_TMPDIR=") {
- env[i] = "TEST_TMPDIR=/tmp"
- break
- }
- }
+ env = filterEnv(env, []string{"TEST_TMPDIR"})
+ env = append(env, fmt.Sprintf("TEST_TMPDIR=%s", testTmpDir))
spec.Process.Env = env
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
index 05c952b99..4e23d1e78 100644
--- a/test/syscalls/linux/proc_net.cc
+++ b/test/syscalls/linux/proc_net.cc
@@ -92,6 +92,59 @@ TEST(ProcSysNetIpv4Sack, CanReadAndWrite) {
EXPECT_EQ(buf, to_write);
}
+// DeviceEntry is an entry in /proc/net/dev
+struct DeviceEntry {
+ std::string name;
+ uint64_t stats[16];
+};
+
+PosixErrorOr<std::vector<DeviceEntry>> GetDeviceMetricsFromProc(
+ const std::string dev) {
+ std::vector<std::string> lines = absl::StrSplit(dev, '\n');
+ std::vector<DeviceEntry> entries;
+
+ // /proc/net/dev prints 2 lines of headers followed by a line of metrics for
+ // each network interface.
+ for (unsigned i = 2; i < lines.size(); i++) {
+ // Ignore empty lines.
+ if (lines[i].empty()) {
+ continue;
+ }
+
+ std::vector<std::string> values =
+ absl::StrSplit(lines[i], ' ', absl::SkipWhitespace());
+
+ // Interface name + 16 values.
+ if (values.size() != 17) {
+ return PosixError(EINVAL, "invalid line: " + lines[i]);
+ }
+
+ DeviceEntry entry;
+ entry.name = values[0];
+ // Skip the interface name and read only the values.
+ for (unsigned j = 1; j < 17; j++) {
+ uint64_t num;
+ if (!absl::SimpleAtoi(values[j], &num)) {
+ return PosixError(EINVAL, "invalid value: " + values[j]);
+ }
+ entry.stats[j - 1] = num;
+ }
+
+ entries.push_back(entry);
+ }
+
+ return entries;
+}
+
+// TEST(ProcNetDev, Format) tests that /proc/net/dev is parsable and
+// contains at least one entry.
+TEST(ProcNetDev, Format) {
+ auto dev = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/dev"));
+ auto entries = ASSERT_NO_ERRNO_AND_VALUE(GetDeviceMetricsFromProc(dev));
+
+ EXPECT_GT(entries.size(), 0);
+}
+
PosixErrorOr<uint64_t> GetSNMPMetricFromProc(const std::string snmp,
const std::string& type,
const std::string& item) {
diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc
index 06cc6a64e..ce88d90dd 100644
--- a/test/syscalls/linux/seccomp.cc
+++ b/test/syscalls/linux/seccomp.cc
@@ -70,27 +70,27 @@ void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result,
MaybeSave();
struct sock_filter filter[] = {
- // A = seccomp_data.arch
- BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4),
+ // A = seccomp_data.arch
+ BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4),
#if defined(__x86_64__)
- // if (A != AUDIT_ARCH_X86_64) goto kill
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4),
+ // if (A != AUDIT_ARCH_X86_64) goto kill
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4),
#elif defined(__aarch64__)
- // if (A != AUDIT_ARCH_AARCH64) goto kill
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 0, 4),
+ // if (A != AUDIT_ARCH_AARCH64) goto kill
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 0, 4),
#else
#error "Unknown architecture"
#endif
- // A = seccomp_data.nr
- BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0),
- // if (A != sysno) goto allow
- BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1),
- // return filtered_result
- BPF_STMT(BPF_RET | BPF_K, filtered_result),
- // allow: return SECCOMP_RET_ALLOW
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
- // kill: return SECCOMP_RET_KILL
- BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
+ // A = seccomp_data.nr
+ BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0),
+ // if (A != sysno) goto allow
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1),
+ // return filtered_result
+ BPF_STMT(BPF_RET | BPF_K, filtered_result),
+ // allow: return SECCOMP_RET_ALLOW
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ // kill: return SECCOMP_RET_KILL
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
};
struct sock_fprog prog;
prog.len = ABSL_ARRAYSIZE(filter);
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
index c951ac3b3..2503960f3 100644
--- a/test/syscalls/linux/stat.cc
+++ b/test/syscalls/linux/stat.cc
@@ -34,6 +34,13 @@
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
+#ifndef AT_STATX_FORCE_SYNC
+#define AT_STATX_FORCE_SYNC 0x2000
+#endif
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
+#endif
+
namespace gvisor {
namespace testing {
@@ -607,7 +614,7 @@ int statx(int dirfd, const char* pathname, int flags, unsigned int mask,
}
TEST_F(StatTest, StatxAbsPath) {
- SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
errno == ENOSYS);
struct kernel_statx stx;
@@ -617,7 +624,7 @@ TEST_F(StatTest, StatxAbsPath) {
}
TEST_F(StatTest, StatxRelPathDirFD) {
- SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
errno == ENOSYS);
struct kernel_statx stx;
@@ -631,7 +638,7 @@ TEST_F(StatTest, StatxRelPathDirFD) {
}
TEST_F(StatTest, StatxRelPathCwd) {
- SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
errno == ENOSYS);
ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
@@ -643,7 +650,7 @@ TEST_F(StatTest, StatxRelPathCwd) {
}
TEST_F(StatTest, StatxEmptyPath) {
- SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
errno == ENOSYS);
const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
@@ -653,6 +660,60 @@ TEST_F(StatTest, StatxEmptyPath) {
EXPECT_TRUE(S_ISREG(stx.stx_mode));
}
+TEST_F(StatTest, StatxDoesNotRejectExtraneousMaskBits) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+ errno == ENOSYS);
+
+ struct kernel_statx stx;
+ // Set all mask bits except for STATX__RESERVED.
+ uint mask = 0xffffffff & ~0x80000000;
+ EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, mask, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRejectsReservedMaskBit) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+ errno == ENOSYS);
+
+ struct kernel_statx stx;
+ // Set STATX__RESERVED in the mask.
+ EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, 0x80000000, &stx),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(StatTest, StatxSymlink) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+ errno == ENOSYS);
+
+ std::string parent_dir = "/tmp";
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(parent_dir, test_file_name_));
+ std::string p = link.path();
+
+ struct kernel_statx stx;
+ EXPECT_THAT(statx(AT_FDCWD, p.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISLNK(stx.stx_mode));
+ EXPECT_THAT(statx(AT_FDCWD, p.c_str(), 0, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxInvalidFlags) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+ errno == ENOSYS);
+
+ struct kernel_statx stx;
+ EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Sync flags are mutually exclusive.
+ EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(),
+ AT_STATX_FORCE_SYNC | AT_STATX_DONT_SYNC, 0, &stx),
+ SyscallFailsWithErrno(EINVAL));
+}
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
index 7e73325bf..92eec0449 100644
--- a/test/syscalls/linux/sticky.cc
+++ b/test/syscalls/linux/sticky.cc
@@ -42,8 +42,9 @@ TEST(StickyTest, StickyBitPermDenied) {
auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- std::string path = JoinPath(dir.path(), "NewDir");
- ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ const FileDescriptor dirfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY));
+ ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds());
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -61,7 +62,8 @@ TEST(StickyTest, StickyBitPermDenied) {
syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
SyscallSucceeds());
- EXPECT_THAT(rmdir(path.c_str()), SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR),
+ SyscallFailsWithErrno(EPERM));
});
}
@@ -96,8 +98,9 @@ TEST(StickyTest, StickyBitCapFOWNER) {
auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- std::string path = JoinPath(dir.path(), "NewDir");
- ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ const FileDescriptor dirfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY));
+ ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds());
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -114,7 +117,8 @@ TEST(StickyTest, StickyBitCapFOWNER) {
SyscallSucceeds());
EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
- EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR),
+ SyscallSucceeds());
});
}
} // namespace
diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc
index f734511d6..53ad2dda3 100644
--- a/test/syscalls/linux/tuntap.cc
+++ b/test/syscalls/linux/tuntap.cc
@@ -256,50 +256,59 @@ TEST_F(TuntapTest, WriteToDownDevice) {
EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO));
}
-// This test sets up a TAP device and pings kernel by sending ICMP echo request.
-//
-// It works as the following:
-// * Open /dev/net/tun, and create kTapName interface.
-// * Use rtnetlink to do initial setup of the interface:
-// * Assign IP address 10.0.0.1/24 to kernel.
-// * MAC address: kMacA
-// * Bring up the interface.
-// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel.
-// * Loop to receive packets from TAP device/fd:
-// * If packet is an ICMP echo reply, it stops and passes the test.
-// * If packet is an ARP request, it responds with canned reply and resends
-// the
-// ICMP request packet.
-TEST_F(TuntapTest, PingKernel) {
- SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
-
+PosixErrorOr<FileDescriptor> OpenAndAttachTap(
+ const std::string& dev_name, const std::string& dev_ipv4_addr) {
// Interface creation.
- FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+ ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR));
struct ifreq ifr_set = {};
ifr_set.ifr_flags = IFF_TAP;
- strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ);
- EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set),
- SyscallSucceedsWithValue(0));
+ strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ);
+ if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) {
+ return PosixError(errno);
+ }
- absl::optional<Link> link =
- ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTapName));
- ASSERT_TRUE(link.has_value());
+ ASSIGN_OR_RETURN_ERRNO(absl::optional<Link> link, GetLinkByName(dev_name));
+ if (!link.has_value()) {
+ return PosixError(ENOENT, "no link");
+ }
// Interface setup.
struct in_addr addr;
- inet_pton(AF_INET, "10.0.0.1", &addr);
+ inet_pton(AF_INET, dev_ipv4_addr.c_str(), &addr);
EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24,
&addr, sizeof(addr)));
if (!IsRunningOnGvisor()) {
// FIXME: gVisor doesn't support setting MAC address on interfaces yet.
- EXPECT_NO_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA)));
+ RETURN_IF_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA)));
// FIXME: gVisor always creates enabled/up'd interfaces.
- EXPECT_NO_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP));
+ RETURN_IF_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP));
}
+ return fd;
+}
+
+// This test sets up a TAP device and pings kernel by sending ICMP echo request.
+//
+// It works as the following:
+// * Open /dev/net/tun, and create kTapName interface.
+// * Use rtnetlink to do initial setup of the interface:
+// * Assign IP address 10.0.0.1/24 to kernel.
+// * MAC address: kMacA
+// * Bring up the interface.
+// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel.
+// * Loop to receive packets from TAP device/fd:
+// * If packet is an ICMP echo reply, it stops and passes the test.
+// * If packet is an ARP request, it responds with canned reply and resends
+// the
+// ICMP request packet.
+TEST_F(TuntapTest, PingKernel) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1"));
ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
@@ -349,5 +358,47 @@ TEST_F(TuntapTest, PingKernel) {
}
}
+TEST_F(TuntapTest, SendUdpTriggersArpResolution) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1"));
+
+ // Send a UDP packet to remote.
+ int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
+ ASSERT_THAT(sock, SyscallSucceeds());
+
+ struct sockaddr_in remote = {};
+ remote.sin_family = AF_INET;
+ remote.sin_port = htons(42);
+ inet_pton(AF_INET, "10.0.0.2", &remote.sin_addr);
+ int ret = sendto(sock, "hello", 5, 0, reinterpret_cast<sockaddr*>(&remote),
+ sizeof(remote));
+ ASSERT_THAT(ret, ::testing::AnyOf(SyscallSucceeds(),
+ SyscallFailsWithErrno(EHOSTDOWN)));
+
+ struct inpkt {
+ union {
+ pihdr pi;
+ arp_pkt arp;
+ };
+ };
+ while (1) {
+ inpkt r = {};
+ int n = read(fd.get(), &r, sizeof(r));
+ EXPECT_THAT(n, SyscallSucceeds());
+
+ if (n < sizeof(pihdr)) {
+ std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
+ << " len: " << n << std::endl;
+ continue;
+ }
+
+ if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
+ break;
+ }
+ }
+}
+
} // namespace testing
} // namespace gvisor