diff options
Diffstat (limited to 'pkg/test')
-rw-r--r-- | pkg/test/criutil/criutil.go | 16 | ||||
-rw-r--r-- | pkg/test/dockerutil/container.go | 112 | ||||
-rw-r--r-- | pkg/test/dockerutil/dockerutil.go | 10 | ||||
-rw-r--r-- | pkg/test/dockerutil/exec.go | 5 | ||||
-rw-r--r-- | pkg/test/dockerutil/profile.go | 161 | ||||
-rw-r--r-- | pkg/test/dockerutil/profile_test.go | 59 | ||||
-rw-r--r-- | pkg/test/testutil/BUILD | 2 | ||||
-rw-r--r-- | pkg/test/testutil/sh.go | 515 | ||||
-rw-r--r-- | pkg/test/testutil/testutil.go | 58 |
9 files changed, 740 insertions, 198 deletions
diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go index 70945f234..3b41a2824 100644 --- a/pkg/test/criutil/criutil.go +++ b/pkg/test/criutil/criutil.go @@ -36,7 +36,6 @@ import ( type Crictl struct { logger testutil.Logger endpoint string - runpArgs []string cleanup []func() } @@ -54,26 +53,31 @@ func ResolvePath(executable string) string { } } + // Favor /usr/local/bin, if it exists. + localBin := fmt.Sprintf("/usr/local/bin/%s", executable) + if _, err := os.Stat(localBin); err == nil { + return localBin + } + // Try to find via the path. - guess, err := exec.LookPath(executable) + guess, _ := exec.LookPath(executable) if err == nil { return guess } - // Return a default path. - return fmt.Sprintf("/usr/local/bin/%s", executable) + // Return a bare path; this generates a suitable error. + return executable } // NewCrictl returns a Crictl configured with a timeout and an endpoint over // which it will talk to containerd. -func NewCrictl(logger testutil.Logger, endpoint string, runpArgs []string) *Crictl { +func NewCrictl(logger testutil.Logger, endpoint string) *Crictl { // Attempt to find the executable, but don't bother propagating the // error at this point. The first command executed will return with a // binary not found error. return &Crictl{ logger: logger, endpoint: endpoint, - runpArgs: runpArgs, } } diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index 64d17f661..7bacb70d3 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -17,6 +17,7 @@ package dockerutil import ( "bytes" "context" + "errors" "fmt" "io/ioutil" "net" @@ -54,11 +55,8 @@ type Container struct { copyErr error cleanups []func() - // Profiles are profiles added to this container. They contain methods - // that are run after Creation, Start, and Cleanup of this Container, along - // a handle to restart the profile. Generally, tests/benchmarks using - // profiles need to run as root. - profiles []Profile + // profile is the profiling hook associated with this container. + profile *profile } // RunOpts are options for running a container. @@ -104,22 +102,7 @@ type RunOpts struct { Links []string } -// MakeContainer sets up the struct for a Docker container. -// -// Names of containers will be unique. -// Containers will check flags for profiling requests. -func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { - c := MakeNativeContainer(ctx, logger) - c.runtime = *runtime - if p := MakePprofFromFlags(c); p != nil { - c.AddProfile(p) - } - return c -} - -// MakeNativeContainer sets up the struct for a DockerContainer using runc. Native -// containers aren't profiled. -func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container { +func makeContainer(ctx context.Context, logger testutil.Logger, runtime string) *Container { // Slashes are not allowed in container names. name := testutil.RandomID(logger.Name()) name = strings.ReplaceAll(name, "/", "-") @@ -131,29 +114,32 @@ func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container return &Container{ logger: logger, Name: name, - runtime: "", + runtime: runtime, client: client, } } -// AddProfile adds a profile to this container. -func (c *Container) AddProfile(p Profile) { - c.profiles = append(c.profiles, p) +// MakeContainer constructs a suitable Container object. +// +// The runtime used is determined by the runtime flag. +// +// Containers will check flags for profiling requests. +func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { + return makeContainer(ctx, logger, *runtime) } -// RestartProfiles calls Restart on all profiles for this container. -func (c *Container) RestartProfiles() error { - for _, profile := range c.profiles { - if err := profile.Restart(c); err != nil { - return err - } - } - return nil +// MakeNativeContainer constructs a suitable Container object. +// +// The runtime used will be the system default. +// +// Native containers aren't profiled. +func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container { + return makeContainer(ctx, logger, "" /*runtime*/) } // Spawn is analogous to 'docker run -d'. func (c *Container) Spawn(ctx context.Context, r RunOpts, args ...string) error { - if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil { + if err := c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil); err != nil { return err } return c.Start(ctx) @@ -166,7 +152,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string) config.Tty = true config.OpenStdin = true - if err := c.CreateFrom(ctx, config, hostconf, netconf); err != nil { + if err := c.CreateFrom(ctx, r.Image, config, hostconf, netconf); err != nil { return Process{}, err } @@ -193,7 +179,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string) // Run is analogous to 'docker run'. func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, error) { - if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil { + if err := c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil); err != nil { return "", err } @@ -220,26 +206,26 @@ func (c *Container) MakeLink(target string) string { } // CreateFrom creates a container from the given configs. -func (c *Container) CreateFrom(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { - return c.create(ctx, conf, hostconf, netconf) +func (c *Container) CreateFrom(ctx context.Context, profileImage string, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { + return c.create(ctx, profileImage, conf, hostconf, netconf) } // Create is analogous to 'docker create'. func (c *Container) Create(ctx context.Context, r RunOpts, args ...string) error { - return c.create(ctx, c.config(r, args), c.hostConfig(r), nil) + return c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil) } -func (c *Container) create(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { +func (c *Container) create(ctx context.Context, profileImage string, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { + if c.runtime != "" { + // Use the image name as provided here; which normally represents the + // unmodified "basic/alpine" image name. This should be easy to grok. + c.profileInit(profileImage) + } cont, err := c.client.ContainerCreate(ctx, conf, hostconf, nil, c.Name) if err != nil { return err } c.id = cont.ID - for _, profile := range c.profiles { - if err := profile.OnCreate(c); err != nil { - return fmt.Errorf("OnCreate method failed with: %v", err) - } - } return nil } @@ -285,11 +271,13 @@ func (c *Container) Start(ctx context.Context) error { if err := c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}); err != nil { return fmt.Errorf("ContainerStart failed: %v", err) } - for _, profile := range c.profiles { - if err := profile.OnStart(c); err != nil { - return fmt.Errorf("OnStart method failed: %v", err) + + if c.profile != nil { + if err := c.profile.Start(c); err != nil { + c.logger.Logf("profile.Start failed: %v", err) } } + return nil } @@ -351,6 +339,9 @@ func (c *Container) SandboxPid(ctx context.Context) (int, error) { return resp.ContainerJSONBase.State.Pid, nil } +// ErrNoIP indicates that no IP address is available. +var ErrNoIP = errors.New("no IP available") + // FindIP returns the IP address of the container. func (c *Container) FindIP(ctx context.Context, ipv6 bool) (net.IP, error) { resp, err := c.client.ContainerInspect(ctx, c.id) @@ -365,7 +356,7 @@ func (c *Container) FindIP(ctx context.Context, ipv6 bool) (net.IP, error) { ip = net.ParseIP(resp.NetworkSettings.DefaultNetworkSettings.IPAddress) } if ip == nil { - return net.IP{}, fmt.Errorf("invalid IP: %q", ip) + return net.IP{}, ErrNoIP } return ip, nil } @@ -438,6 +429,7 @@ func (c *Container) Status(ctx context.Context) (types.ContainerState, error) { // Wait waits for the container to exit. func (c *Container) Wait(ctx context.Context) error { + defer c.stopProfiling() statusChan, errChan := c.client.ContainerWait(ctx, c.id, container.WaitConditionNotRunning) select { case err := <-errChan: @@ -495,8 +487,20 @@ func (c *Container) WaitForOutputSubmatch(ctx context.Context, pattern string, t } } +// stopProfiling stops profiling. +func (c *Container) stopProfiling() { + if c.profile != nil { + if err := c.profile.Stop(c); err != nil { + // This most likely means that the runtime for the container + // was too short to connect and actually get a profile. + c.logger.Logf("warning: profile.Stop failed: %v", err) + } + } +} + // Kill kills the container. func (c *Container) Kill(ctx context.Context) error { + defer c.stopProfiling() return c.client.ContainerKill(ctx, c.id, "") } @@ -513,14 +517,6 @@ func (c *Container) Remove(ctx context.Context) error { // CleanUp kills and deletes the container (best effort). func (c *Container) CleanUp(ctx context.Context) { - // Execute profile cleanups before the container goes down. - for _, profile := range c.profiles { - profile.OnCleanUp(c) - } - - // Forget profiles. - c.profiles = nil - // Execute all cleanups. We execute cleanups here to close any // open connections to the container before closing. Open connections // can cause Kill and Remove to hang. @@ -534,10 +530,12 @@ func (c *Container) CleanUp(ctx context.Context) { // Just log; can't do anything here. c.logger.Logf("error killing container %q: %v", c.Name, err) } + // Remove the image. if err := c.Remove(ctx); err != nil { c.logger.Logf("error removing container %q: %v", c.Name, err) } + // Forget all mounts. c.mounts = nil } diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 7027df1a5..a40005799 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -49,15 +49,11 @@ var ( // pprofBaseDir allows the user to change the directory to which profiles are // written. By default, profiles will appear under: // /tmp/profile/RUNTIME/CONTAINER_NAME/*.pprof. - pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)") - - // duration is the max duration `runsc debug` will run and capture profiles. - // If the container's clean up method is called prior to duration, the - // profiling process will be killed. - duration = flag.Duration("pprof-duration", 10*time.Second, "duration to run the profile in seconds") + pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)") + pprofDuration = flag.Duration("pprof-duration", time.Hour, "profiling duration (automatically stopped at container exit)") // The below flags enable each type of profile. Multiple profiles can be - // enabled for each run. + // enabled for each run. The profile will be collected from the start. pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug") pprofCPU = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug") pprofHeap = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug") diff --git a/pkg/test/dockerutil/exec.go b/pkg/test/dockerutil/exec.go index 4c739c9e9..bf968acec 100644 --- a/pkg/test/dockerutil/exec.go +++ b/pkg/test/dockerutil/exec.go @@ -77,11 +77,6 @@ func (c *Container) doExec(ctx context.Context, r ExecOpts, args []string) (Proc return Process{}, fmt.Errorf("exec attach failed with err: %v", err) } - if err := c.client.ContainerExecStart(ctx, resp.ID, types.ExecStartCheck{}); err != nil { - hijack.Close() - return Process{}, fmt.Errorf("exec start failed with err: %v", err) - } - return Process{ container: c, execid: resp.ID, diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go index 55f9496cd..5cad3e959 100644 --- a/pkg/test/dockerutil/profile.go +++ b/pkg/test/dockerutil/profile.go @@ -17,72 +17,64 @@ package dockerutil import ( "context" "fmt" - "io" "os" "os/exec" "path/filepath" + "syscall" "time" ) -// Profile represents profile-like operations on a container, -// such as running perf or pprof. It is meant to be added to containers -// such that the container type calls the Profile during its lifecycle. -type Profile interface { - // OnCreate is called just after the container is created when the container - // has a valid ID (e.g. c.ID()). - OnCreate(c *Container) error - - // OnStart is called just after the container is started when the container - // has a valid Pid (e.g. c.SandboxPid()). - OnStart(c *Container) error - - // Restart restarts the Profile on request. - Restart(c *Container) error - - // OnCleanUp is called during the container's cleanup method. - // Cleanups should just log errors if they have them. - OnCleanUp(c *Container) error -} - -// Pprof is for running profiles with 'runsc debug'. Pprof workloads -// should be run as root and ONLY against runsc sandboxes. The runtime -// should have --profile set as an option in /etc/docker/daemon.json in -// order for profiling to work with Pprof. -type Pprof struct { - BasePath string // path to put profiles - BlockProfile bool - CPUProfile bool - HeapProfile bool - MutexProfile bool - Duration time.Duration // duration to run profiler e.g. '10s' or '1m'. - shouldRun bool - cmd *exec.Cmd - stdout io.ReadCloser - stderr io.ReadCloser +// profile represents profile-like operations on a container. +// +// It is meant to be added to containers such that the container type calls +// the profile during its lifecycle. Standard implementations are below. + +// profile is for running profiles with 'runsc debug'. +type profile struct { + BasePath string + Types []string + Duration time.Duration + cmd *exec.Cmd } -// MakePprofFromFlags makes a Pprof profile from flags. -func MakePprofFromFlags(c *Container) *Pprof { - if !(*pprofBlock || *pprofCPU || *pprofHeap || *pprofMutex) { - return nil +// profileInit initializes a profile object, if required. +// +// N.B. The profiling filename initialized here will use the *image* +// name, and not the unique container name. This is intentional. Most +// of the time, profiling will be used for benchmarks. Benchmarks will +// be run iteratively until a sufficiently large N is reached. It is +// useful in this context to overwrite previous runs, and generate a +// single profile result for the final test. +func (c *Container) profileInit(image string) { + if !*pprofBlock && !*pprofCPU && !*pprofMutex && !*pprofHeap { + return // Nothing to do. + } + c.profile = &profile{ + BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.logger.Name(), image), + Duration: *pprofDuration, + } + if *pprofCPU { + c.profile.Types = append(c.profile.Types, "cpu") } - return &Pprof{ - BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.Name), - BlockProfile: *pprofBlock, - CPUProfile: *pprofCPU, - HeapProfile: *pprofHeap, - MutexProfile: *pprofMutex, - Duration: *duration, + if *pprofHeap { + c.profile.Types = append(c.profile.Types, "heap") + } + if *pprofMutex { + c.profile.Types = append(c.profile.Types, "mutex") + } + if *pprofBlock { + c.profile.Types = append(c.profile.Types, "block") } } -// OnCreate implements Profile.OnCreate. -func (p *Pprof) OnCreate(c *Container) error { - return os.MkdirAll(p.BasePath, 0755) -} +// createProcess creates the collection process. +func (p *profile) createProcess(c *Container) error { + // Ensure our directory exists. + if err := os.MkdirAll(p.BasePath, 0755); err != nil { + return err + } -// OnStart implements Profile.OnStart. -func (p *Pprof) OnStart(c *Container) error { + // Find the runtime to invoke. path, err := RuntimePath() if err != nil { return fmt.Errorf("failed to get runtime path: %v", err) @@ -90,58 +82,63 @@ func (p *Pprof) OnStart(c *Container) error { // The root directory of this container's runtime. root := fmt.Sprintf("--root=/var/run/docker/runtime-%s/moby", c.runtime) - // Format is `runsc --root=rootdir debug --profile-*=file --duration=* containerID`. + + // Format is `runsc --root=rootdir debug --profile-*=file --duration=24h containerID`. args := []string{root, "debug"} - args = append(args, p.makeProfileArgs(c)...) + for _, profileArg := range p.Types { + outputPath := filepath.Join(p.BasePath, fmt.Sprintf("%s.pprof", profileArg)) + args = append(args, fmt.Sprintf("--profile-%s=%s", profileArg, outputPath)) + } + args = append(args, fmt.Sprintf("--duration=%s", p.Duration)) // Or until container exits. + args = append(args, fmt.Sprintf("--delay=%s", p.Duration)) // Ditto. args = append(args, c.ID()) // Best effort wait until container is running. for now := time.Now(); time.Since(now) < 5*time.Second; { if status, err := c.Status(context.Background()); err != nil { return fmt.Errorf("failed to get status with: %v", err) - } else if status.Running { break } - time.Sleep(500 * time.Millisecond) + time.Sleep(100 * time.Millisecond) } p.cmd = exec.Command(path, args...) + p.cmd.Stderr = os.Stderr // Pass through errors. if err := p.cmd.Start(); err != nil { - return fmt.Errorf("process failed: %v", err) + return fmt.Errorf("start process failed: %v", err) } + return nil } -// Restart implements Profile.Restart. -func (p *Pprof) Restart(c *Container) error { - p.OnCleanUp(c) - return p.OnStart(c) +// killProcess kills the process, if running. +func (p *profile) killProcess() error { + if p.cmd != nil && p.cmd.Process != nil { + return p.cmd.Process.Signal(syscall.SIGTERM) + } + return nil } -// OnCleanUp implements Profile.OnCleanup -func (p *Pprof) OnCleanUp(c *Container) error { +// waitProcess waits for the process, if running. +func (p *profile) waitProcess() error { defer func() { p.cmd = nil }() - if p.cmd != nil && p.cmd.Process != nil && p.cmd.ProcessState != nil && !p.cmd.ProcessState.Exited() { - return p.cmd.Process.Kill() + if p.cmd != nil { + return p.cmd.Wait() } return nil } -// makeProfileArgs turns Pprof fields into runsc debug flags. -func (p *Pprof) makeProfileArgs(c *Container) []string { - var ret []string - if p.BlockProfile { - ret = append(ret, fmt.Sprintf("--profile-block=%s", filepath.Join(p.BasePath, "block.pprof"))) - } - if p.CPUProfile { - ret = append(ret, fmt.Sprintf("--profile-cpu=%s", filepath.Join(p.BasePath, "cpu.pprof"))) - } - if p.HeapProfile { - ret = append(ret, fmt.Sprintf("--profile-heap=%s", filepath.Join(p.BasePath, "heap.pprof"))) - } - if p.MutexProfile { - ret = append(ret, fmt.Sprintf("--profile-mutex=%s", filepath.Join(p.BasePath, "mutex.pprof"))) +// Start is called when profiling is started. +func (p *profile) Start(c *Container) error { + return p.createProcess(c) +} + +// Stop is called when profiling is started. +func (p *profile) Stop(c *Container) error { + killErr := p.killProcess() + waitErr := p.waitProcess() + if waitErr != nil && killErr != nil { + return killErr } - ret = append(ret, fmt.Sprintf("--duration=%s", p.Duration)) - return ret + return waitErr // Ignore okay wait, err kill. } diff --git a/pkg/test/dockerutil/profile_test.go b/pkg/test/dockerutil/profile_test.go index 8c4ffe483..4fe9ce15c 100644 --- a/pkg/test/dockerutil/profile_test.go +++ b/pkg/test/dockerutil/profile_test.go @@ -17,6 +17,7 @@ package dockerutil import ( "context" "fmt" + "io/ioutil" "os" "path/filepath" "testing" @@ -25,52 +26,60 @@ import ( type testCase struct { name string - pprof Pprof + profile profile expectedFiles []string } -func TestPprof(t *testing.T) { +func TestProfile(t *testing.T) { // Basepath and expected file names for each type of profile. - basePath := "/tmp/test/profile" + tmpDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("unable to create temporary directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // All expected names. + basePath := tmpDir block := "block.pprof" cpu := "cpu.pprof" - goprofle := "go.pprof" heap := "heap.pprof" mutex := "mutex.pprof" testCases := []testCase{ { - name: "Cpu", - pprof: Pprof{ - BasePath: basePath, - CPUProfile: true, - Duration: 2 * time.Second, + name: "One", + profile: profile{ + BasePath: basePath, + Types: []string{"cpu"}, + Duration: 2 * time.Second, }, expectedFiles: []string{cpu}, }, { name: "All", - pprof: Pprof{ - BasePath: basePath, - BlockProfile: true, - CPUProfile: true, - HeapProfile: true, - MutexProfile: true, - Duration: 2 * time.Second, + profile: profile{ + BasePath: basePath, + Types: []string{"block", "cpu", "heap", "mutex"}, + Duration: 2 * time.Second, }, - expectedFiles: []string{block, cpu, goprofle, heap, mutex}, + expectedFiles: []string{block, cpu, heap, mutex}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { ctx := context.Background() c := MakeContainer(ctx, t) + // Set basepath to include the container name so there are no conflicts. - tc.pprof.BasePath = filepath.Join(tc.pprof.BasePath, c.Name) - c.AddProfile(&tc.pprof) + localProfile := tc.profile // Copy it. + localProfile.BasePath = filepath.Join(localProfile.BasePath, tc.name) + + // Set directly on the container, to avoid flags. + c.profile = &localProfile func() { defer c.CleanUp(ctx) + // Start a container. if err := c.Spawn(ctx, RunOpts{ Image: "basic/alpine", @@ -83,24 +92,24 @@ func TestPprof(t *testing.T) { } // End early if the expected files exist and have data. - for start := time.Now(); time.Since(start) < tc.pprof.Duration; time.Sleep(500 * time.Millisecond) { - if err := checkFiles(tc); err == nil { + for start := time.Now(); time.Since(start) < localProfile.Duration; time.Sleep(100 * time.Millisecond) { + if err := checkFiles(localProfile.BasePath, tc.expectedFiles); err == nil { break } } }() // Check all expected files exist and have data. - if err := checkFiles(tc); err != nil { + if err := checkFiles(localProfile.BasePath, tc.expectedFiles); err != nil { t.Fatalf(err.Error()) } }) } } -func checkFiles(tc testCase) error { - for _, file := range tc.expectedFiles { - stat, err := os.Stat(filepath.Join(tc.pprof.BasePath, file)) +func checkFiles(basePath string, expectedFiles []string) error { + for _, file := range expectedFiles { + stat, err := os.Stat(filepath.Join(basePath, file)) if err != nil { return fmt.Errorf("stat failed with: %v", err) } else if stat.Size() < 1 { diff --git a/pkg/test/testutil/BUILD b/pkg/test/testutil/BUILD index c4b131896..00600a2ad 100644 --- a/pkg/test/testutil/BUILD +++ b/pkg/test/testutil/BUILD @@ -6,6 +6,7 @@ go_library( name = "testutil", testonly = 1, srcs = [ + "sh.go", "testutil.go", "testutil_runfiles.go", ], @@ -15,6 +16,7 @@ go_library( "//runsc/config", "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", + "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", ], ) diff --git a/pkg/test/testutil/sh.go b/pkg/test/testutil/sh.go new file mode 100644 index 000000000..1c77562be --- /dev/null +++ b/pkg/test/testutil/sh.go @@ -0,0 +1,515 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "strings" + "syscall" + "time" + + "github.com/kr/pty" +) + +// Prompt is used as shell prompt. +// It is meant to be unique enough to not be seen in command outputs. +const Prompt = "PROMPT> " + +// Simplistic shell string escape. +func shellEscape(s string) string { + // specialChars is used to determine whether s needs quoting at all. + const specialChars = "\\'\"`${[|&;<>()*?! \t\n" + // If s needs quoting, escapedChars is the set of characters that are + // escaped with a backslash. + const escapedChars = "\\\"$`" + if len(s) == 0 { + return "''" + } + if !strings.ContainsAny(s, specialChars) { + return s + } + var b bytes.Buffer + b.WriteString("\"") + for _, c := range s { + if strings.ContainsAny(string(c), escapedChars) { + b.WriteString("\\") + } + b.WriteRune(c) + } + b.WriteString("\"") + return b.String() +} + +type byteOrError struct { + b byte + err error +} + +// Shell manages a /bin/sh invocation with convenience functions to handle I/O. +// The shell is run in its own interactive TTY and should present its prompt. +type Shell struct { + // cmd is a reference to the underlying sh process. + cmd *exec.Cmd + // cmdFinished is closed when cmd exits. + cmdFinished chan struct{} + + // echo is whether the shell will echo input back to us. + // This helps setting expectations of getting feedback of written bytes. + echo bool + // Control characters we expect to see in the shell. + controlCharIntr string + controlCharEOF string + + // ptyMaster and ptyReplica are the TTY pair associated with the shell. + ptyMaster *os.File + ptyReplica *os.File + // readCh is a channel where everything read from ptyMaster is written. + readCh chan byteOrError + + // logger is used for logging. It may be nil. + logger Logger +} + +// cleanup kills the shell process and closes the TTY. +// Users of this library get a reference to this function with NewShell. +func (s *Shell) cleanup() { + s.logf("cleanup", "Shell cleanup started.") + if s.cmd.ProcessState == nil { + if err := s.cmd.Process.Kill(); err != nil { + s.logf("cleanup", "cannot kill shell process: %v", err) + } + // We don't log the error returned by Wait because the monitorExit + // goroutine will already do so. + s.cmd.Wait() + } + s.ptyReplica.Close() + s.ptyMaster.Close() + // Wait for monitorExit goroutine to write exit status to the debug log. + <-s.cmdFinished + // Empty out everything in the readCh, but don't wait too long for it. + var extraBytes bytes.Buffer + unreadTimeout := time.After(100 * time.Millisecond) +unreadLoop: + for { + select { + case r, ok := <-s.readCh: + if !ok { + break unreadLoop + } else if r.err == nil { + extraBytes.WriteByte(r.b) + } + case <-unreadTimeout: + break unreadLoop + } + } + if extraBytes.Len() > 0 { + s.logIO("unread", extraBytes.Bytes(), nil) + } + s.logf("cleanup", "Shell cleanup complete.") +} + +// logIO logs byte I/O to both standard logging and the test log, if provided. +func (s *Shell) logIO(prefix string, b []byte, err error) { + var sb strings.Builder + if len(b) > 0 { + sb.WriteString(fmt.Sprintf("%q", b)) + } else { + sb.WriteString("(nothing)") + } + if err != nil { + sb.WriteString(fmt.Sprintf(" [error: %v]", err)) + } + s.logf(prefix, "%s", sb.String()) +} + +// logf logs something to both standard logging and the test log, if provided. +func (s *Shell) logf(prefix, format string, values ...interface{}) { + if s.logger != nil { + s.logger.Logf("[%s] %s", prefix, fmt.Sprintf(format, values...)) + } +} + +// monitorExit waits for the shell process to exit and logs the exit result. +func (s *Shell) monitorExit() { + if err := s.cmd.Wait(); err != nil { + s.logf("cmd", "shell process terminated: %v", err) + } else { + s.logf("cmd", "shell process terminated successfully") + } + close(s.cmdFinished) +} + +// reader continuously reads the shell output and populates readCh. +func (s *Shell) reader(ctx context.Context) { + b := make([]byte, 4096) + defer close(s.readCh) + for { + select { + case <-s.cmdFinished: + // Shell process terminated; stop trying to read. + return + case <-ctx.Done(): + // Shell process will also have terminated in this case; + // stop trying to read. + // We don't print an error here because doing so would print this in the + // normal case where the context passed to NewShell is canceled at the + // end of a successful test. + return + default: + // Shell still running, try reading. + } + if got, err := s.ptyMaster.Read(b); err != nil { + s.readCh <- byteOrError{err: err} + if err == io.EOF { + return + } + } else { + for i := 0; i < got; i++ { + s.readCh <- byteOrError{b: b[i]} + } + } + } +} + +// readByte reads a single byte, respecting the context. +func (s *Shell) readByte(ctx context.Context) (byte, error) { + select { + case <-ctx.Done(): + return 0, ctx.Err() + case r := <-s.readCh: + return r.b, r.err + } +} + +// readLoop reads as many bytes as possible until the context expires, b is +// full, or a short time passes. It returns how many bytes it has successfully +// read. +func (s *Shell) readLoop(ctx context.Context, b []byte) (int, error) { + soonCtx, soonCancel := context.WithTimeout(ctx, 5*time.Second) + defer soonCancel() + var i int + for i = 0; i < len(b) && soonCtx.Err() == nil; i++ { + next, err := s.readByte(soonCtx) + if err != nil { + if i > 0 { + s.logIO("read", b[:i-1], err) + } else { + s.logIO("read", nil, err) + } + return i, err + } + b[i] = next + } + s.logIO("read", b[:i], soonCtx.Err()) + return i, soonCtx.Err() +} + +// readLine reads a single line. Strips out all \r characters for convenience. +// Upon error, it will still return what it has read so far. +// It will also exit quickly if the line content it has read so far (without a +// line break) matches `prompt`. +func (s *Shell) readLine(ctx context.Context, prompt string) ([]byte, error) { + soonCtx, soonCancel := context.WithTimeout(ctx, 5*time.Second) + defer soonCancel() + var lineData bytes.Buffer + var b byte + var err error + for soonCtx.Err() == nil && b != '\n' { + b, err = s.readByte(soonCtx) + if err != nil { + data := lineData.Bytes() + s.logIO("read", data, err) + return data, err + } + if b != '\r' { + lineData.WriteByte(b) + } + if bytes.Equal(lineData.Bytes(), []byte(prompt)) { + // Assume that there will not be any further output if we get the prompt. + // This avoids waiting for the read deadline just to read the prompt. + break + } + } + data := lineData.Bytes() + s.logIO("read", data, soonCtx.Err()) + return data, soonCtx.Err() +} + +// Expect verifies that the next `len(want)` bytes we read match `want`. +func (s *Shell) Expect(ctx context.Context, want []byte) error { + errPrefix := fmt.Sprintf("want(%q)", want) + b := make([]byte, len(want)) + got, err := s.readLoop(ctx, b) + if err != nil { + if ctx.Err() != nil { + return fmt.Errorf("%s: context done (%w), got: %q", errPrefix, err, b[:got]) + } + return fmt.Errorf("%s: %w", errPrefix, err) + } + if got < len(want) { + return fmt.Errorf("%s: short read (read %d bytes, expected %d): %q", errPrefix, got, len(want), b[:got]) + } + if !bytes.Equal(b, want) { + return fmt.Errorf("got %q want %q", b, want) + } + return nil +} + +// ExpectString verifies that the next `len(want)` bytes we read match `want`. +func (s *Shell) ExpectString(ctx context.Context, want string) error { + return s.Expect(ctx, []byte(want)) +} + +// ExpectPrompt verifies that the next few bytes we read are the shell prompt. +func (s *Shell) ExpectPrompt(ctx context.Context) error { + return s.ExpectString(ctx, Prompt) +} + +// ExpectEmptyLine verifies that the next few bytes we read are an empty line, +// as defined by any number of carriage or line break characters. +func (s *Shell) ExpectEmptyLine(ctx context.Context) error { + line, err := s.readLine(ctx, Prompt) + if err != nil { + return fmt.Errorf("cannot read line: %w", err) + } + if strings.Trim(string(line), "\r\n") != "" { + return fmt.Errorf("line was not empty: %q", line) + } + return nil +} + +// ExpectLine verifies that the next `len(want)` bytes we read match `want`, +// followed by carriage returns or newline characters. +func (s *Shell) ExpectLine(ctx context.Context, want string) error { + if err := s.ExpectString(ctx, want); err != nil { + return err + } + if err := s.ExpectEmptyLine(ctx); err != nil { + return fmt.Errorf("ExpectLine(%q): no line break: %w", want, err) + } + return nil +} + +// Write writes `b` to the shell and verifies that all of them get written. +func (s *Shell) Write(b []byte) error { + written, err := s.ptyMaster.Write(b) + s.logIO("write", b[:written], err) + if err != nil { + return fmt.Errorf("write(%q): %w", b, err) + } + if written != len(b) { + return fmt.Errorf("write(%q): wrote %d of %d bytes (%q)", b, written, len(b), b[:written]) + } + return nil +} + +// WriteLine writes `line` (to which \n will be appended) to the shell. +// If the shell is in `echo` mode, it will also check that we got these bytes +// back to read. +func (s *Shell) WriteLine(ctx context.Context, line string) error { + if err := s.Write([]byte(line + "\n")); err != nil { + return err + } + if s.echo { + // We expect to see everything we've typed. + if err := s.ExpectLine(ctx, line); err != nil { + return fmt.Errorf("echo: %w", err) + } + } + return nil +} + +// StartCommand is a convenience wrapper for WriteLine that mimics entering a +// command line and pressing Enter. It does some basic shell argument escaping. +func (s *Shell) StartCommand(ctx context.Context, cmd ...string) error { + escaped := make([]string, len(cmd)) + for i, arg := range cmd { + escaped[i] = shellEscape(arg) + } + return s.WriteLine(ctx, strings.Join(escaped, " ")) +} + +// GetCommandOutput gets all following bytes until the prompt is encountered. +// This is useful for matching the output of a command. +// All \r are removed for ease of matching. +func (s *Shell) GetCommandOutput(ctx context.Context) ([]byte, error) { + return s.ReadUntil(ctx, Prompt) +} + +// ReadUntil gets all following bytes until a certain line is encountered. +// This final line is not returned as part of the output, but everything before +// it (including the \n) is included. +// This is useful for matching the output of a command. +// All \r are removed for ease of matching. +func (s *Shell) ReadUntil(ctx context.Context, finalLine string) ([]byte, error) { + var output bytes.Buffer + for ctx.Err() == nil { + line, err := s.readLine(ctx, finalLine) + if err != nil { + return nil, err + } + if bytes.Equal(line, []byte(finalLine)) { + break + } + // readLine ensures that `line` either matches `finalLine` or contains \n. + // Thus we can be confident that `line` has a \n here. + output.Write(line) + } + return output.Bytes(), ctx.Err() +} + +// RunCommand is a convenience wrapper for StartCommand + GetCommandOutput. +func (s *Shell) RunCommand(ctx context.Context, cmd ...string) ([]byte, error) { + if err := s.StartCommand(ctx, cmd...); err != nil { + return nil, err + } + return s.GetCommandOutput(ctx) +} + +// RefreshSTTY interprets output from `stty -a` to check whether we are in echo +// mode and other settings. +// It will assume that any line matching `expectPrompt` means the end of +// the `stty -a` output. +// Why do this rather than using `tcgets`? Because this function can be used in +// conjunction with sub-shell processes that can allocate their own TTYs. +func (s *Shell) RefreshSTTY(ctx context.Context, expectPrompt string) error { + // Temporarily assume we will not get any output. + // If echo is actually on, we'll get the "stty -a" line as if it was command + // output. This is OK because we parse the output generously. + s.echo = false + if err := s.WriteLine(ctx, "stty -a"); err != nil { + return fmt.Errorf("could not run `stty -a`: %w", err) + } + sttyOutput, err := s.ReadUntil(ctx, expectPrompt) + if err != nil { + return fmt.Errorf("cannot get `stty -a` output: %w", err) + } + + // Set default control characters in case we can't see them in the output. + s.controlCharIntr = "^C" + s.controlCharEOF = "^D" + // stty output has two general notations: + // `a = b;` (for control characters), and `option` vs `-option` (for boolean + // options). We parse both kinds here. + // For `a = b;`, `controlChar` contains `a`, and `previousToken` is used to + // set `controlChar` to `previousToken` when we see an "=" token. + var previousToken, controlChar string + for _, token := range strings.Fields(string(sttyOutput)) { + if controlChar != "" { + value := strings.TrimSuffix(token, ";") + switch controlChar { + case "intr": + s.controlCharIntr = value + case "eof": + s.controlCharEOF = value + } + controlChar = "" + } else { + switch token { + case "=": + controlChar = previousToken + case "-echo": + s.echo = false + case "echo": + s.echo = true + } + } + previousToken = token + } + s.logf("stty", "refreshed settings: echo=%v, intr=%q, eof=%q", s.echo, s.controlCharIntr, s.controlCharEOF) + return nil +} + +// sendControlCode sends `code` to the shell and expects to see `repr`. +// If `expectLinebreak` is true, it also expects to see a linebreak. +func (s *Shell) sendControlCode(ctx context.Context, code byte, repr string, expectLinebreak bool) error { + if err := s.Write([]byte{code}); err != nil { + return fmt.Errorf("cannot send %q: %w", code, err) + } + if err := s.ExpectString(ctx, repr); err != nil { + return fmt.Errorf("did not see %s: %w", repr, err) + } + if expectLinebreak { + if err := s.ExpectEmptyLine(ctx); err != nil { + return fmt.Errorf("linebreak after %s: %v", repr, err) + } + } + return nil +} + +// SendInterrupt sends the \x03 (Ctrl+C) control character to the shell. +func (s *Shell) SendInterrupt(ctx context.Context, expectLinebreak bool) error { + return s.sendControlCode(ctx, 0x03, s.controlCharIntr, expectLinebreak) +} + +// SendEOF sends the \x04 (Ctrl+D) control character to the shell. +func (s *Shell) SendEOF(ctx context.Context, expectLinebreak bool) error { + return s.sendControlCode(ctx, 0x04, s.controlCharEOF, expectLinebreak) +} + +// NewShell returns a new managed sh process along with a cleanup function. +// The caller is expected to call this function once it no longer needs the +// shell. +// The optional passed-in logger will be used for logging. +func NewShell(ctx context.Context, logger Logger) (*Shell, func(), error) { + ptyMaster, ptyReplica, err := pty.Open() + if err != nil { + return nil, nil, fmt.Errorf("cannot create PTY: %w", err) + } + cmd := exec.CommandContext(ctx, "/bin/sh", "--noprofile", "--norc", "-i") + cmd.Stdin = ptyReplica + cmd.Stdout = ptyReplica + cmd.Stderr = ptyReplica + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setsid: true, + Setctty: true, + Ctty: 0, + } + cmd.Env = append(cmd.Env, fmt.Sprintf("PS1=%s", Prompt)) + if err := cmd.Start(); err != nil { + return nil, nil, fmt.Errorf("cannot start shell: %w", err) + } + s := &Shell{ + cmd: cmd, + cmdFinished: make(chan struct{}), + ptyMaster: ptyMaster, + ptyReplica: ptyReplica, + readCh: make(chan byteOrError, 1<<20), + logger: logger, + } + s.logf("creation", "Shell spawned.") + go s.monitorExit() + go s.reader(ctx) + setupCtx, setupCancel := context.WithTimeout(ctx, 5*time.Second) + defer setupCancel() + // We expect to see the prompt immediately on startup, + // since the shell is started in interactive mode. + if err := s.ExpectPrompt(setupCtx); err != nil { + s.cleanup() + return nil, nil, fmt.Errorf("did not get initial prompt: %w", err) + } + s.logf("creation", "Initial prompt observed.") + // Get initial TTY settings. + if err := s.RefreshSTTY(setupCtx, Prompt); err != nil { + s.cleanup() + return nil, nil, fmt.Errorf("cannot get initial STTY settings: %w", err) + } + return s, s.cleanup, nil +} diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go index 49ab87c58..fdd416b5e 100644 --- a/pkg/test/testutil/testutil.go +++ b/pkg/test/testutil/testutil.go @@ -36,7 +36,6 @@ import ( "path/filepath" "strconv" "strings" - "sync/atomic" "syscall" "testing" "time" @@ -49,7 +48,10 @@ import ( ) var ( - checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") + checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") + partition = flag.Int("partition", 1, "partition number, this is 1-indexed") + totalPartitions = flag.Int("total_partitions", 1, "total number of partitions") + isRunningWithHostNet = flag.Bool("hostnet", false, "whether test is running with hostnet") ) // IsCheckpointSupported returns the relevant command line flag. @@ -57,6 +59,11 @@ func IsCheckpointSupported() bool { return *checkpoint } +// IsRunningWithHostNet returns the relevant command line flag. +func IsRunningWithHostNet() bool { + return *isRunningWithHostNet +} + // ImageByName mangles the image name used locally. This depends on the image // build infrastructure in images/ and tools/vm. func ImageByName(name string) string { @@ -249,14 +256,25 @@ func writeSpec(dir string, spec *specs.Spec) error { // idRandomSrc is a pseudo random generator used to in RandomID. var idRandomSrc = rand.New(rand.NewSource(time.Now().UnixNano())) +// idRandomSrcMtx is the mutex protecting idRandomSrc.Read from being used +// concurrently in differnt goroutines. +var idRandomSrcMtx sync.Mutex + // RandomID returns 20 random bytes following the given prefix. func RandomID(prefix string) string { // Read 20 random bytes. b := make([]byte, 20) + // Rand.Read is not safe for concurrent use. Packetimpact tests can be run in + // parallel now, so we have to protect the Read with a mutex. Otherwise we'll + // run into name conflicts. + // https://golang.org/pkg/math/rand/#Rand.Read + idRandomSrcMtx.Lock() // "[Read] always returns len(p) and a nil error." --godoc if _, err := idRandomSrc.Read(b); err != nil { + idRandomSrcMtx.Unlock() panic("rand.Read failed: " + err.Error()) } + idRandomSrcMtx.Unlock() if prefix != "" { prefix = prefix + "-" } @@ -417,33 +435,35 @@ func StartReaper() func() { // WaitUntilRead reads from the given reader until the wanted string is found // or until timeout. -func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { +func WaitUntilRead(r io.Reader, want string, timeout time.Duration) error { sc := bufio.NewScanner(r) - if split != nil { - sc.Split(split) - } // done must be accessed atomically. A value greater than 0 indicates // that the read loop can exit. - var done uint32 - doneCh := make(chan struct{}) + doneCh := make(chan bool) + defer close(doneCh) go func() { for sc.Scan() { t := sc.Text() if strings.Contains(t, want) { - atomic.StoreUint32(&done, 1) - close(doneCh) - break + doneCh <- true + return } - if atomic.LoadUint32(&done) > 0 { - break + select { + case <-doneCh: + return + default: } } + doneCh <- false }() + select { case <-time.After(timeout): - atomic.StoreUint32(&done, 1) return fmt.Errorf("timeout waiting to read %q", want) - case <-doneCh: + case res := <-doneCh: + if !res { + return fmt.Errorf("reader closed while waiting to read %q", want) + } return nil } } @@ -509,7 +529,8 @@ func TouchShardStatusFile() error { } // TestIndicesForShard returns indices for this test shard based on the -// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. +// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars, as well as +// the passed partition flags. // // If either of the env vars are not present, then the function will return all // tests. If there are more shards than there are tests, then the returned list @@ -534,6 +555,11 @@ func TestIndicesForShard(numTests int) ([]int, error) { } } + // Combine with the partitions. + partitionSize := shardTotal + shardTotal = (*totalPartitions) * shardTotal + shardIndex = partitionSize*(*partition-1) + shardIndex + // Calculate! var indices []int numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) |