From 386a1a1564e57c36726ea5a45d6e4f739847a658 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Thu, 6 Feb 2020 17:06:14 -0800 Subject: Fix TestPauseResume in container test failed with connection refused. Sometimes we get this error under TSAN: """ error getting process data from container: connecting to control server at PID XXXX: connection refused """ The theory is that the top "sleep 20" was too short for TSAN, and the container already exited, so we get connected refused. This commit changes the test to let container signaling it's running by touching a file repeatedly forever during the test. PiperOrigin-RevId: 293710957 --- runsc/container/container_test.go | 201 ++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 116 deletions(-) (limited to 'runsc/container') diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index b54d8f712..04a7dc237 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -163,7 +163,7 @@ func createWriteableOutputFile(path string) (*os.File, error) { return outputFile, nil } -func waitForFile(f *os.File) error { +func waitForFileNotEmpty(f *os.File) error { op := func() error { fi, err := f.Stat() if err != nil { @@ -178,6 +178,17 @@ func waitForFile(f *os.File) error { return testutil.Poll(op, 30*time.Second) } +func waitForFileExist(path string) error { + op := func() error { + if _, err := os.Stat(path); os.IsNotExist(err) { + return err + } + return nil + } + + return testutil.Poll(op, 30*time.Second) +} + // readOutputNum reads a file at given filepath and returns the int at the // requested position. func readOutputNum(file string, position int) (int, error) { @@ -187,7 +198,7 @@ func readOutputNum(file string, position int) (int, error) { } // Ensure that there is content in output file. - if err := waitForFile(f); err != nil { + if err := waitForFileNotEmpty(f); err != nil { return 0, fmt.Errorf("error waiting for output file: %v", err) } @@ -801,7 +812,7 @@ func TestCheckpointRestore(t *testing.T) { defer file.Close() // Wait until application has ran. - if err := waitForFile(outputFile); err != nil { + if err := waitForFileNotEmpty(outputFile); err != nil { t.Fatalf("Failed to wait for output file: %v", err) } @@ -843,7 +854,7 @@ func TestCheckpointRestore(t *testing.T) { } // Wait until application has ran. - if err := waitForFile(outputFile2); err != nil { + if err := waitForFileNotEmpty(outputFile2); err != nil { t.Fatalf("Failed to wait for output file: %v", err) } @@ -887,7 +898,7 @@ func TestCheckpointRestore(t *testing.T) { } // Wait until application has ran. - if err := waitForFile(outputFile3); err != nil { + if err := waitForFileNotEmpty(outputFile3); err != nil { t.Fatalf("Failed to wait for output file: %v", err) } @@ -981,7 +992,7 @@ func TestUnixDomainSockets(t *testing.T) { defer os.RemoveAll(imagePath) // Wait until application has ran. - if err := waitForFile(outputFile); err != nil { + if err := waitForFileNotEmpty(outputFile); err != nil { t.Fatalf("Failed to wait for output file: %v", err) } @@ -1023,7 +1034,7 @@ func TestUnixDomainSockets(t *testing.T) { } // Wait until application has ran. - if err := waitForFile(outputFile2); err != nil { + if err := waitForFileNotEmpty(outputFile2); err != nil { t.Fatalf("Failed to wait for output file: %v", err) } @@ -1042,126 +1053,84 @@ func TestUnixDomainSockets(t *testing.T) { } // TestPauseResume tests that we can successfully pause and resume a container. -// It checks starts running sleep and executes another sleep. It pauses and checks -// that both processes are still running: sleep will be paused and still exist. -// It will then unpause and confirm that both processes are running. Then it will -// wait until one sleep completes and check to make sure the other is running. +// The container will keep touching a file to indicate it's running. The test +// pauses the container, removes the file, and checks that it doesn't get +// recreated. Then it resumes the container, verify that the file gets created +// again. func TestPauseResume(t *testing.T) { for _, conf := range configs(noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "20") - - lock, err := ioutil.TempFile(testutil.TmpDir(), "lock") - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer lock.Close() + t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) { + t.Logf("Running test with conf: %+v", conf) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } - - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "bash", - Threads: []kernel.ThreadID{2}, - }, - } - - script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name()) - execArgs := &control.ExecArgs{ - Filename: "/bin/bash", - Argv: []string{"bash", "-c", script}, - WorkingDirectory: "/", - KUID: uid, - } + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock") + if err != nil { + t.Fatalf("error creating temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) - // First, start running exec. - _, err = cont.Execute(execArgs) - if err != nil { - t.Fatalf("error executing: %v", err) - } + running := path.Join(tmpDir, "running") + script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running) + spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script) - // Verify that "sleep 5" is running. - if err := waitForProcessList(cont, expectedPL); err != nil { - t.Fatal(err) - } + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) - // Pause the running container. - if err := cont.Pause(); err != nil { - t.Errorf("error pausing container: %v", err) - } - if got, want := cont.Status, Paused; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // Create and start the container. + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - if err := os.Remove(lock.Name()); err != nil { - t.Fatalf("os.Remove(lock) failed: %v", err) - } - // Script loops and sleeps for 100ms. Give a bit a time for it to exit in - // case pause didn't work. - time.Sleep(200 * time.Millisecond) + // Wait until container starts running, observed by the existence of running + // file. + if err := waitForFileExist(running); err != nil { + t.Errorf("error waiting for container to start: %v", err) + } - // Verify that the two processes still exist. - if err := getAndCheckProcLists(cont, expectedPL); err != nil { - t.Fatal(err) - } + // Pause the running container. + if err := cont.Pause(); err != nil { + t.Errorf("error pausing container: %v", err) + } + if got, want := cont.Status, Paused; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Resume the running container. - if err := cont.Resume(); err != nil { - t.Errorf("error pausing container: %v", err) - } - if got, want := cont.Status, Running; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + if err := os.Remove(running); err != nil { + t.Fatalf("os.Remove(%q) failed: %v", running, err) + } + // Script touches the file every 100ms. Give a bit a time for it to run to + // catch the case that pause didn't work. + time.Sleep(200 * time.Millisecond) + if _, err := os.Stat(running); !os.IsNotExist(err) { + t.Fatalf("container did not pause: file exist check: %v", err) + } - expectedPL2 := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - } + // Resume the running container. + if err := cont.Resume(); err != nil { + t.Errorf("error pausing container: %v", err) + } + if got, want := cont.Status, Running; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Verify that deleting the file triggered the process to exit. - if err := waitForProcessList(cont, expectedPL2); err != nil { - t.Fatal(err) - } + // Verify that the file is once again created by container. + if err := waitForFileExist(running); err != nil { + t.Fatalf("error resuming container: file exist check: %v", err) + } + }) } } -- cgit v1.2.3