From ffa9a715aaf4ebc4c8d8bef948649af358e4f4e4 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 29 Dec 2020 18:26:46 -0800 Subject: Simplify profiling and benchmarks. - Tweak the benchmarks to work with b.N where appropriate. In many cases, b.N was simply being ignored. This creates an implicit dependency in the user passing a reasonable benchtime (less than or equal to the actual runtime of the test, or using the X syntax) otherwise the test runs forever. - In cases where the above is impossible, explicitly set benchtime from the test wrapper, to prevent the above behavior (tensorflow). - Drop the *Reverse variants, which are simply hey benchmarks. We should just add a hey benchmark. The platforms benchmarks already include a native platform, and thus these benchmarks are incredibly confusing. (In other words, BenchmarkNginxReverse has nothing to do with an nginx benchmark for runsc.) - Remove the redunant Harness object, which contains no state, in order to slightly simplify the code. - Make Block and Heap profiling actually work, but setting appropriate runtime parameters (and plumbing them through the config). - Split the profiling into two phases: start and stop, since some will need to be started early, and others will need to happen at the end. PiperOrigin-RevId: 349495377 --- pkg/test/dockerutil/container.go | 88 +++++++++----------- pkg/test/dockerutil/dockerutil.go | 10 +-- pkg/test/dockerutil/profile.go | 157 +++++++++++++++++------------------- pkg/test/dockerutil/profile_test.go | 59 ++++++++------ 4 files changed, 151 insertions(+), 163 deletions(-) (limited to 'pkg/test/dockerutil') diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index 2bf0a22ff..7b5fcef9c 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -55,11 +55,8 @@ type Container struct { copyErr error cleanups []func() - // Profiles are profiles added to this container. They contain methods - // that are run after Creation, Start, and Cleanup of this Container, along - // a handle to restart the profile. Generally, tests/benchmarks using - // profiles need to run as root. - profiles []Profile + // profile is the profiling hook associated with this container. + profile *profile } // RunOpts are options for running a container. @@ -105,22 +102,7 @@ type RunOpts struct { Links []string } -// MakeContainer sets up the struct for a Docker container. -// -// Names of containers will be unique. -// Containers will check flags for profiling requests. -func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { - c := MakeNativeContainer(ctx, logger) - c.runtime = *runtime - if p := MakePprofFromFlags(c); p != nil { - c.AddProfile(p) - } - return c -} - -// MakeNativeContainer sets up the struct for a DockerContainer using runc. Native -// containers aren't profiled. -func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container { +func makeContainer(ctx context.Context, logger testutil.Logger, runtime string) *Container { // Slashes are not allowed in container names. name := testutil.RandomID(logger.Name()) name = strings.ReplaceAll(name, "/", "-") @@ -132,24 +114,29 @@ func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container return &Container{ logger: logger, Name: name, - runtime: "", + runtime: runtime, client: client, } } -// AddProfile adds a profile to this container. -func (c *Container) AddProfile(p Profile) { - c.profiles = append(c.profiles, p) +// MakeContainer constructs a suitable Container object. +// +// The runtime used is determined by the runtime flag. +// +// Containers will check flags for profiling requests. +func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { + c := makeContainer(ctx, logger, *runtime) + c.profileInit() + return c } -// RestartProfiles calls Restart on all profiles for this container. -func (c *Container) RestartProfiles() error { - for _, profile := range c.profiles { - if err := profile.Restart(c); err != nil { - return err - } - } - return nil +// MakeNativeContainer constructs a suitable Container object. +// +// The runtime used will be the system default. +// +// Native containers aren't profiled. +func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container { + return makeContainer(ctx, logger, "" /*runtime*/) } // Spawn is analogous to 'docker run -d'. @@ -206,6 +193,8 @@ func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, return "", err } + c.stopProfiling() + return c.Logs(ctx) } @@ -236,11 +225,6 @@ func (c *Container) create(ctx context.Context, conf *container.Config, hostconf return err } c.id = cont.ID - for _, profile := range c.profiles { - if err := profile.OnCreate(c); err != nil { - return fmt.Errorf("OnCreate method failed with: %v", err) - } - } return nil } @@ -286,11 +270,13 @@ func (c *Container) Start(ctx context.Context) error { if err := c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}); err != nil { return fmt.Errorf("ContainerStart failed: %v", err) } - for _, profile := range c.profiles { - if err := profile.OnStart(c); err != nil { - return fmt.Errorf("OnStart method failed: %v", err) + + if c.profile != nil { + if err := c.profile.Start(c); err != nil { + c.logger.Logf("profile.Start failed: %v", err) } } + return nil } @@ -499,8 +485,18 @@ func (c *Container) WaitForOutputSubmatch(ctx context.Context, pattern string, t } } +// stopProfiling stops profiling. +func (c *Container) stopProfiling() { + if c.profile != nil { + if err := c.profile.Stop(c); err != nil { + c.logger.Logf("profile.Stop failed: %v", err) + } + } +} + // Kill kills the container. func (c *Container) Kill(ctx context.Context) error { + c.stopProfiling() return c.client.ContainerKill(ctx, c.id, "") } @@ -517,14 +513,6 @@ func (c *Container) Remove(ctx context.Context) error { // CleanUp kills and deletes the container (best effort). func (c *Container) CleanUp(ctx context.Context) { - // Execute profile cleanups before the container goes down. - for _, profile := range c.profiles { - profile.OnCleanUp(c) - } - - // Forget profiles. - c.profiles = nil - // Execute all cleanups. We execute cleanups here to close any // open connections to the container before closing. Open connections // can cause Kill and Remove to hang. @@ -538,10 +526,12 @@ func (c *Container) CleanUp(ctx context.Context) { // Just log; can't do anything here. c.logger.Logf("error killing container %q: %v", c.Name, err) } + // Remove the image. if err := c.Remove(ctx); err != nil { c.logger.Logf("error removing container %q: %v", c.Name, err) } + // Forget all mounts. c.mounts = nil } diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 7027df1a5..a40005799 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -49,15 +49,11 @@ var ( // pprofBaseDir allows the user to change the directory to which profiles are // written. By default, profiles will appear under: // /tmp/profile/RUNTIME/CONTAINER_NAME/*.pprof. - pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)") - - // duration is the max duration `runsc debug` will run and capture profiles. - // If the container's clean up method is called prior to duration, the - // profiling process will be killed. - duration = flag.Duration("pprof-duration", 10*time.Second, "duration to run the profile in seconds") + pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)") + pprofDuration = flag.Duration("pprof-duration", time.Hour, "profiling duration (automatically stopped at container exit)") // The below flags enable each type of profile. Multiple profiles can be - // enabled for each run. + // enabled for each run. The profile will be collected from the start. pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug") pprofCPU = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug") pprofHeap = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug") diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go index 55f9496cd..f1103eb6e 100644 --- a/pkg/test/dockerutil/profile.go +++ b/pkg/test/dockerutil/profile.go @@ -17,72 +17,57 @@ package dockerutil import ( "context" "fmt" - "io" "os" "os/exec" "path/filepath" + "syscall" "time" ) -// Profile represents profile-like operations on a container, -// such as running perf or pprof. It is meant to be added to containers -// such that the container type calls the Profile during its lifecycle. -type Profile interface { - // OnCreate is called just after the container is created when the container - // has a valid ID (e.g. c.ID()). - OnCreate(c *Container) error - - // OnStart is called just after the container is started when the container - // has a valid Pid (e.g. c.SandboxPid()). - OnStart(c *Container) error - - // Restart restarts the Profile on request. - Restart(c *Container) error - - // OnCleanUp is called during the container's cleanup method. - // Cleanups should just log errors if they have them. - OnCleanUp(c *Container) error -} - -// Pprof is for running profiles with 'runsc debug'. Pprof workloads -// should be run as root and ONLY against runsc sandboxes. The runtime -// should have --profile set as an option in /etc/docker/daemon.json in -// order for profiling to work with Pprof. -type Pprof struct { - BasePath string // path to put profiles - BlockProfile bool - CPUProfile bool - HeapProfile bool - MutexProfile bool - Duration time.Duration // duration to run profiler e.g. '10s' or '1m'. - shouldRun bool - cmd *exec.Cmd - stdout io.ReadCloser - stderr io.ReadCloser +// profile represents profile-like operations on a container. +// +// It is meant to be added to containers such that the container type calls +// the profile during its lifecycle. Standard implementations are below. + +// profile is for running profiles with 'runsc debug'. +type profile struct { + BasePath string + Types []string + Duration time.Duration + cmd *exec.Cmd } -// MakePprofFromFlags makes a Pprof profile from flags. -func MakePprofFromFlags(c *Container) *Pprof { - if !(*pprofBlock || *pprofCPU || *pprofHeap || *pprofMutex) { - return nil +// profileInit initializes a profile object, if required. +func (c *Container) profileInit() { + if !*pprofBlock && !*pprofCPU && !*pprofMutex && !*pprofHeap { + return // Nothing to do. + } + c.profile = &profile{ + BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.Name), + Duration: *pprofDuration, } - return &Pprof{ - BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.Name), - BlockProfile: *pprofBlock, - CPUProfile: *pprofCPU, - HeapProfile: *pprofHeap, - MutexProfile: *pprofMutex, - Duration: *duration, + if *pprofCPU { + c.profile.Types = append(c.profile.Types, "cpu") + } + if *pprofHeap { + c.profile.Types = append(c.profile.Types, "heap") + } + if *pprofMutex { + c.profile.Types = append(c.profile.Types, "mutex") + } + if *pprofBlock { + c.profile.Types = append(c.profile.Types, "block") } } -// OnCreate implements Profile.OnCreate. -func (p *Pprof) OnCreate(c *Container) error { - return os.MkdirAll(p.BasePath, 0755) -} +// createProcess creates the collection process. +func (p *profile) createProcess(c *Container) error { + // Ensure our directory exists. + if err := os.MkdirAll(p.BasePath, 0755); err != nil { + return err + } -// OnStart implements Profile.OnStart. -func (p *Pprof) OnStart(c *Container) error { + // Find the runtime to invoke. path, err := RuntimePath() if err != nil { return fmt.Errorf("failed to get runtime path: %v", err) @@ -90,58 +75,66 @@ func (p *Pprof) OnStart(c *Container) error { // The root directory of this container's runtime. root := fmt.Sprintf("--root=/var/run/docker/runtime-%s/moby", c.runtime) - // Format is `runsc --root=rootdir debug --profile-*=file --duration=* containerID`. + + // Format is `runsc --root=rootdir debug --profile-*=file --duration=24h containerID`. args := []string{root, "debug"} - args = append(args, p.makeProfileArgs(c)...) + for _, profileArg := range p.Types { + outputPath := filepath.Join(p.BasePath, fmt.Sprintf("%s.pprof", profileArg)) + args = append(args, fmt.Sprintf("--profile-%s=%s", profileArg, outputPath)) + } + args = append(args, fmt.Sprintf("--duration=%s", p.Duration)) // Or until container exits. args = append(args, c.ID()) // Best effort wait until container is running. for now := time.Now(); time.Since(now) < 5*time.Second; { if status, err := c.Status(context.Background()); err != nil { return fmt.Errorf("failed to get status with: %v", err) - } else if status.Running { break } - time.Sleep(500 * time.Millisecond) + time.Sleep(100 * time.Millisecond) } p.cmd = exec.Command(path, args...) + p.cmd.Stderr = os.Stderr // Pass through errors. if err := p.cmd.Start(); err != nil { - return fmt.Errorf("process failed: %v", err) + return fmt.Errorf("start process failed: %v", err) } + return nil } -// Restart implements Profile.Restart. -func (p *Pprof) Restart(c *Container) error { - p.OnCleanUp(c) - return p.OnStart(c) +// killProcess kills the process, if running. +// +// Precondition: mu must be held. +func (p *profile) killProcess() error { + if p.cmd != nil && p.cmd.Process != nil { + return p.cmd.Process.Signal(syscall.SIGTERM) + } + return nil } -// OnCleanUp implements Profile.OnCleanup -func (p *Pprof) OnCleanUp(c *Container) error { +// waitProcess waits for the process, if running. +// +// Precondition: mu must be held. +func (p *profile) waitProcess() error { defer func() { p.cmd = nil }() - if p.cmd != nil && p.cmd.Process != nil && p.cmd.ProcessState != nil && !p.cmd.ProcessState.Exited() { - return p.cmd.Process.Kill() + if p.cmd != nil { + return p.cmd.Wait() } return nil } -// makeProfileArgs turns Pprof fields into runsc debug flags. -func (p *Pprof) makeProfileArgs(c *Container) []string { - var ret []string - if p.BlockProfile { - ret = append(ret, fmt.Sprintf("--profile-block=%s", filepath.Join(p.BasePath, "block.pprof"))) - } - if p.CPUProfile { - ret = append(ret, fmt.Sprintf("--profile-cpu=%s", filepath.Join(p.BasePath, "cpu.pprof"))) - } - if p.HeapProfile { - ret = append(ret, fmt.Sprintf("--profile-heap=%s", filepath.Join(p.BasePath, "heap.pprof"))) - } - if p.MutexProfile { - ret = append(ret, fmt.Sprintf("--profile-mutex=%s", filepath.Join(p.BasePath, "mutex.pprof"))) +// Start is called when profiling is started. +func (p *profile) Start(c *Container) error { + return p.createProcess(c) +} + +// Stop is called when profiling is started. +func (p *profile) Stop(c *Container) error { + killErr := p.killProcess() + waitErr := p.waitProcess() + if waitErr != nil && killErr != nil { + return killErr } - ret = append(ret, fmt.Sprintf("--duration=%s", p.Duration)) - return ret + return waitErr // Ignore okay wait, err kill. } diff --git a/pkg/test/dockerutil/profile_test.go b/pkg/test/dockerutil/profile_test.go index 8c4ffe483..4fe9ce15c 100644 --- a/pkg/test/dockerutil/profile_test.go +++ b/pkg/test/dockerutil/profile_test.go @@ -17,6 +17,7 @@ package dockerutil import ( "context" "fmt" + "io/ioutil" "os" "path/filepath" "testing" @@ -25,52 +26,60 @@ import ( type testCase struct { name string - pprof Pprof + profile profile expectedFiles []string } -func TestPprof(t *testing.T) { +func TestProfile(t *testing.T) { // Basepath and expected file names for each type of profile. - basePath := "/tmp/test/profile" + tmpDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("unable to create temporary directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + // All expected names. + basePath := tmpDir block := "block.pprof" cpu := "cpu.pprof" - goprofle := "go.pprof" heap := "heap.pprof" mutex := "mutex.pprof" testCases := []testCase{ { - name: "Cpu", - pprof: Pprof{ - BasePath: basePath, - CPUProfile: true, - Duration: 2 * time.Second, + name: "One", + profile: profile{ + BasePath: basePath, + Types: []string{"cpu"}, + Duration: 2 * time.Second, }, expectedFiles: []string{cpu}, }, { name: "All", - pprof: Pprof{ - BasePath: basePath, - BlockProfile: true, - CPUProfile: true, - HeapProfile: true, - MutexProfile: true, - Duration: 2 * time.Second, + profile: profile{ + BasePath: basePath, + Types: []string{"block", "cpu", "heap", "mutex"}, + Duration: 2 * time.Second, }, - expectedFiles: []string{block, cpu, goprofle, heap, mutex}, + expectedFiles: []string{block, cpu, heap, mutex}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { ctx := context.Background() c := MakeContainer(ctx, t) + // Set basepath to include the container name so there are no conflicts. - tc.pprof.BasePath = filepath.Join(tc.pprof.BasePath, c.Name) - c.AddProfile(&tc.pprof) + localProfile := tc.profile // Copy it. + localProfile.BasePath = filepath.Join(localProfile.BasePath, tc.name) + + // Set directly on the container, to avoid flags. + c.profile = &localProfile func() { defer c.CleanUp(ctx) + // Start a container. if err := c.Spawn(ctx, RunOpts{ Image: "basic/alpine", @@ -83,24 +92,24 @@ func TestPprof(t *testing.T) { } // End early if the expected files exist and have data. - for start := time.Now(); time.Since(start) < tc.pprof.Duration; time.Sleep(500 * time.Millisecond) { - if err := checkFiles(tc); err == nil { + for start := time.Now(); time.Since(start) < localProfile.Duration; time.Sleep(100 * time.Millisecond) { + if err := checkFiles(localProfile.BasePath, tc.expectedFiles); err == nil { break } } }() // Check all expected files exist and have data. - if err := checkFiles(tc); err != nil { + if err := checkFiles(localProfile.BasePath, tc.expectedFiles); err != nil { t.Fatalf(err.Error()) } }) } } -func checkFiles(tc testCase) error { - for _, file := range tc.expectedFiles { - stat, err := os.Stat(filepath.Join(tc.pprof.BasePath, file)) +func checkFiles(basePath string, expectedFiles []string) error { + for _, file := range expectedFiles { + stat, err := os.Stat(filepath.Join(basePath, file)) if err != nil { return fmt.Errorf("stat failed with: %v", err) } else if stat.Size() < 1 { -- cgit v1.2.3