summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdin Scannell <ascannell@google.com>2021-01-05 13:20:12 -0800
committergVisor bot <gvisor-bot@google.com>2021-01-05 13:21:54 -0800
commitb06e5bc5b0913d3740b435d8753a2569220e0a33 (patch)
tree63c4a862ed11526c76aa7cf8c54d9b640d679df8
parent93b38bddba90f54bfdc166322f6e83e5f012e4cb (diff)
Add benchmarks targets to BuildKite.
This includes minor fix-ups: * Handle SIGTERM in runsc debug, to exit gracefully. * Fix cmd.debug.go opening all profiles as RDONLY. * Fix the test name in fio_test.go, and encode the block size in the test. PiperOrigin-RevId: 350205718
-rw-r--r--.buildkite/hooks/post-command104
-rw-r--r--.buildkite/hooks/pre-command5
-rw-r--r--.buildkite/pipeline.yaml73
-rwxr-xr-x.buildkite/summarize.sh52
-rw-r--r--Makefile2
-rw-r--r--pkg/control/server/server.go5
-rw-r--r--pkg/sentry/control/pprof.go56
-rw-r--r--pkg/test/dockerutil/container.go32
-rw-r--r--pkg/test/dockerutil/profile.go16
-rw-r--r--pkg/urpc/urpc.go19
-rw-r--r--runsc/boot/controller.go16
-rw-r--r--runsc/cmd/debug.go83
-rw-r--r--runsc/sandbox/sandbox.go3
-rw-r--r--test/benchmarks/fs/fio_test.go18
-rw-r--r--test/benchmarks/tools/sysbench.go31
-rw-r--r--test/packetimpact/runner/dut.go2
-rw-r--r--test/root/cgroup_test.go5
17 files changed, 293 insertions, 229 deletions
diff --git a/.buildkite/hooks/post-command b/.buildkite/hooks/post-command
index b0396bec7..5cd974002 100644
--- a/.buildkite/hooks/post-command
+++ b/.buildkite/hooks/post-command
@@ -1,72 +1,54 @@
-# Upload test logs on failure, if there are any.
-if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
- # Generate a metafile that ends with .output, and contains all the
- # test failures that have been uploaded. These will all be sorted and
- # aggregated by a failure stage in the build pipeline.
- declare output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.output)
- make -s testlogs 2>/dev/null | grep // | sort | uniq | (
- declare log_count=0
- while read target log; do
- if test -z "${target}"; then
- continue
- fi
+# Upload all relevant test failures.
+make -s testlogs 2>/dev/null | grep // | sort | uniq | (
+ declare log_count=0
+ while read target log; do
+ if test -z "${target}"; then
+ continue
+ fi
+
+ # N.B. If *all* tests fail due to some common cause, then we will
+ # end up spending way too much time uploading logs. Instead, we just
+ # upload the first 10 and stop. That is hopefully enough to debug.
+ #
+ # We include this test in the metadata, but note that we cannot
+ # upload the actual test logs. The user should rerun locally.
+ log_count=$((${log_count}+1))
+ if test "${log_count}" -ge 10; then
+ echo " * ${target} (no upload)" | \
+ buildkite-agent annotate --style error --context failures --append
+ else
+ buildkite-agent artifact upload "${log}"
+ echo " * [${target}](artifact://${log#/}) (${BUILDKITE_LABEL})" | \
+ buildkite-agent annotate --style error --context failures --append
+ fi
+ done
+)
- # N.B. If *all* tests fail due to some common cause, then we will
- # end up spending way too much time uploading logs. Instead, we just
- # upload the first 10 and stop. That is hopefully enough to debug.
- #
- # We include this test in the metadata, but note that we cannot
- # upload the actual test logs. The user should rerun locally.
- log_count=$((${log_count}+1))
- if test "${log_count}" -ge 10; then
- echo " * ${target} (no upload)" | tee -a "${output}"
- else
- buildkite-agent artifact upload "${log}"
- echo " * [${target}](artifact://${log#/})" | tee -a "${output}"
- fi
- done
- )
+# Upload all profiles, and include in an annotation.
+declare profile_output=$(mktemp --tmpdir)
+for file in $(find /tmp/profile -name \*.pprof -print 2>/dev/null | sort); do
+ # Generate a link to the profile file at the top.
+ profile_name="${file#/tmp/profile/}"
+ buildkite-agent artifact upload "${file}"
+ echo "<li><a href='artifact://${file#/}'>${profile_name}</a></li>" >> "${profile_output}"
+done
- # Upload if we had outputs.
- if test -s "${output}"; then
- buildkite-agent artifact upload "${output}"
- fi
- rm -rf "${output}"
+# Upload if we had outputs.
+if test -s "${profile_output}"; then
+ # Make the list a collapsible section in markdown.
+ sed -i "1s|^|<details><summary>${BUILDKITE_LABEL}</summary><ul>\n|" "${profile_output}"
+ echo "</ul></details>" >> "${profile_output}"
+ cat "${profile_output}" | buildkite-agent annotate --style info --context profiles --append
+fi
+rm -rf "${profile_output}"
+# Clean the bazel cache, if there's failure.
+if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
# Attempt to clear the cache and shut down.
make clean || echo "make clean failed with code $?"
make bazel-shutdown || echo "make bazel-shutdown failed with code $?"
fi
-# Upload all profiles, and include in an annotation.
-if test -d /tmp/profile; then
- # Same as above.
- declare profile_output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.profile_output)
- for file in $(find /tmp/profile -name \*.pprof -print 2>/dev/null | sort); do
- # Generate a link to speedscope, with a URL-encoded link to the BuildKite
- # artifact location. Note that we use do a fixed URL encode below, since
- # the link can be uniquely determined. If the storage location changes,
- # this schema may break and these links may stop working. The artifacts
- # uploaded however, will still work just fine.
- profile_name="${file#/tmp/profile/}"
- public_url="https://storage.googleapis.com/gvisor-buildkite/${BUILDKITE_BUILD_ID}/${BUILDKITE_JOB_ID}/${file#/}"
- encoded_url=$(jq -rn --arg x "${public_url}" '$x|@uri')
- encoded_title=$(jq -rn --arg x "${profile_name}" '$x|@uri')
- profile_url="https://speedscope.app/#profileURL=${encoded_url}&title=${encoded_title}"
- buildkite-agent artifact upload "${file}"
- echo " * [${profile_name}](${profile_url}) ([pprof](artifact://${file#/}))" | tee -a "${profile_output}"
- done
-
- # Upload if we had outputs.
- if test -s "${profile_output}"; then
- buildkite-agent artifact upload "${profile_output}"
- fi
- rm -rf "${profile_output}"
-
- # Remove stale profiles, which may be owned by root.
- sudo rm -rf /tmp/profile
-fi
-
# Kill any running containers (clear state).
CONTAINERS="$(docker ps -q)"
if ! test -z "${CONTAINERS}"; then
diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command
index 4f41fe021..ba688f9ac 100644
--- a/.buildkite/hooks/pre-command
+++ b/.buildkite/hooks/pre-command
@@ -27,4 +27,7 @@ if test "${BUILDKITE_BRANCH}" = "master"; then
export BENCHMARKS_OFFICIAL=true
else
export BENCHMARKS_OFFICIAL=false
-fi \ No newline at end of file
+fi
+
+# Clear existing profiles.
+sudo rm -rf /tmp/profile
diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml
index ba054319c..d03847800 100644
--- a/.buildkite/pipeline.yaml
+++ b/.buildkite/pipeline.yaml
@@ -7,6 +7,20 @@ _templates:
limit: 10
- exit_status: "*"
limit: 2
+ benchmarks: &benchmarks
+ timeout_in_minutes: 120
+ retry:
+ automatic: false
+ soft_fail: true
+ if: build.message =~ /benchmarks/ || build.branch == "master"
+ env:
+ # BENCHMARKS_OFFICIAL is set from hooks/pre-command, based
+ # on whether this is executing on the master branch.
+ BENCHMARKS_DATASET: buildkite
+ BENCHMARKS_PLATFORMS: "ptrace kvm"
+ BENCHMARKS_PROJECT: gvisor-benchmarks
+ BENCHMARKS_TABLE: benchmarks
+ BENCHMARKS_UPLOAD: true
steps:
# Run basic smoke tests before preceding to other tests.
@@ -133,17 +147,50 @@ steps:
parallelism: 10
if: build.message =~ /VFS1/ || build.branch == "master"
- # The final step here will aggregate data uploaded by all other steps into an
- # annotation that will appear at the top of the build, with useful information.
- #
- # See .buildkite/summarize.sh and .buildkite/hooks/post-command for more.
- - wait
+ # Run basic benchmarks smoke tests (no upload).
- <<: *common
- label: ":yawning_face: Wait"
- command: "true"
- key: "wait"
- - <<: *common
- label: ":thisisfine: Summarize"
- command: .buildkite/summarize.sh
- allow_dependency_failure: true
- depends_on: "wait"
+ label: ":fire: Benchmarks smoke test"
+ command: make benchmark-platforms
+ # Use the opposite of the benchmarks filter.
+ if: build.message !~ /benchmarks/ && build.branch != "master"
+
+ # Run all benchmarks.
+ - <<: *benchmarks
+ label: ":bazel: ABSL build benchmarks"
+ command: make benchmark-platforms BENCHMARKS_FILTER="ABSL/page_cache.clean" BENCHMARKS_SUITE=absl BENCHMARKS_TARGETS=test/benchmarks/fs:bazel_test
+ - <<: *benchmarks
+ label: ":metal: FFMPEG benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=ffmpeg BENCHMARKS_TARGETS=test/benchmarks/media:ffmpeg_test
+ - <<: *benchmarks
+ label: ":floppy_disk: FIO benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=fio BENCHMARKS_TARGETS=test/benchmarks/fs:fio_test
+ - <<: *benchmarks
+ label: ":globe_with_meridians: HTTPD benchmarks"
+ command: make benchmark-platforms BENCHMARKS_FILTER="Continuous" BENCHMARKS_SUITE=httpd BENCHMARKS_TARGETS=test/benchmarks/network:httpd_test
+ - <<: *benchmarks
+ label: ":piedpiper: iperf benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=iperf BENCHMARKS_TARGETS=test/benchmarks/network:iperf_test
+ - <<: *benchmarks
+ label: ":nginx: nginx benchmarks"
+ command: make benchmark-platforms BENCHMARKS_FILTER="Continuous" BENCHMARKS_SUITE=nginx BENCHMARKS_TARGETS=test/benchmarks/network:nginx_test
+ - <<: *benchmarks
+ label: ":node: node benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=node BENCHMARKS_TARGETS=test/benchmarks/network:node_test
+ - <<: *benchmarks
+ label: ":redis: Redis benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=redis BENCHMARKS_TARGETS=test/benchmarks/database:redis_test
+ - <<: *benchmarks
+ label: ":ruby: Ruby benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=ruby BENCHMARKS_TARGETS=test/benchmarks/network:ruby_test
+ - <<: *benchmarks
+ label: ":weight_lifter: Size benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=size BENCHMARKS_TARGETS=test/benchmarks/base:size_test
+ - <<: *benchmarks
+ label: ":speedboat: Startup benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=startup BENCHMARKS_TARGETS=test/benchmarks/base:startup_test
+ - <<: *benchmarks
+ label: ":computer: sysbench benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=sysbench BENCHMARKS_TARGETS=test/benchmarks/base:sysbench_test
+ - <<: *benchmarks
+ label: ":tensorflow: TensorFlow benchmarks"
+ command: make benchmark-platforms BENCHMARKS_SUITE=tensorflow BENCHMARKS_TARGETS=test/benchmarks/ml:tensorflow_test
diff --git a/.buildkite/summarize.sh b/.buildkite/summarize.sh
deleted file mode 100755
index ddf8c9ad4..000000000
--- a/.buildkite/summarize.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-# Copyright 2020 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -xeou pipefail
-
-# This script collects metadata fragments produced by individual test shards in
-# .buildkite/hooks/postcommand, and aggregates these into a single annotation
-# that is posted to the build. In the future, this will include coverage.
-
-# Start the summary.
-declare summary
-declare status
-summary=$(mktemp --tmpdir summary.XXXXXX)
-status="info"
-
-# Download all outputs.
-declare outputs
-outputs=$(mktemp -d --tmpdir outputs.XXXXXX)
-if buildkite-agent artifact download '**/*.output' "${outputs}"; then
- status="error"
- echo "## Failures" >> "${summary}"
- find "${outputs}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
-fi
-rm -rf "${outputs}"
-
-# Attempt to find profiles, if there are any.
-declare profiles
-profiles=$(mktemp -d --tmpdir profiles.XXXXXX)
-if buildkite-agent artifact download '**/*.profile_output' "${profiles}"; then
- echo "## Profiles" >> "${summary}"
- find "${profiles}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
-fi
-rm -rf "${profiles}"
-
-# Upload the final annotation.
-if [[ -s "${summary}" ]]; then
- cat "${summary}" | buildkite-agent annotate --style "${status}"
-fi
-rm -rf "${summary}"
diff --git a/Makefile b/Makefile
index e90680fc9..e7d4434f3 100644
--- a/Makefile
+++ b/Makefile
@@ -330,7 +330,7 @@ BENCHMARKS_OFFICIAL ?= false
BENCHMARKS_PLATFORMS ?= ptrace
BENCHMARKS_TARGETS := //test/benchmarks/media:ffmpeg_test
BENCHMARKS_FILTER := .
-BENCHMARKS_OPTIONS := -test.benchtime=10s
+BENCHMARKS_OPTIONS := -test.benchtime=30s
BENCHMARKS_ARGS := -test.v -test.bench=$(BENCHMARKS_FILTER) -pprof-dir=/tmp/profile -pprof-cpu -pprof-heap -pprof-block -pprof-mutex $(BENCHMARKS_OPTIONS)
init-benchmark-table: ## Initializes a BigQuery table with the benchmark schema.
diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go
index 41abe1f2d..629dae8f4 100644
--- a/pkg/control/server/server.go
+++ b/pkg/control/server/server.go
@@ -67,9 +67,10 @@ func (s *Server) Wait() {
// and the server should not be used afterwards.
func (s *Server) Stop() {
s.socket.Close()
- s.wg.Wait()
+ s.Wait()
- // This will cause existing clients to be terminated safely.
+ // This will cause existing clients to be terminated safely. If the
+ // registered handlers have a Stop callback, it will be called.
s.server.Stop()
}
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go
index b78e29416..2f3664c57 100644
--- a/pkg/sentry/control/pprof.go
+++ b/pkg/sentry/control/pprof.go
@@ -50,17 +50,17 @@ type Profile struct {
done chan struct{}
}
-// NewProfile returns a new Profile object, and a stop callback.
-//
-// The stop callback should be used at most once.
-func NewProfile(k *kernel.Kernel) (*Profile, func()) {
- p := &Profile{
+// NewProfile returns a new Profile object.
+func NewProfile(k *kernel.Kernel) *Profile {
+ return &Profile{
kernel: k,
done: make(chan struct{}),
}
- return p, func() {
- close(p.done)
- }
+}
+
+// Stop implements urpc.Stopper.Stop.
+func (p *Profile) Stop() {
+ close(p.done)
}
// CPUProfileOpts contains options specifically for CPU profiles.
@@ -70,9 +70,6 @@ type CPUProfileOpts struct {
// Duration is the duration of the profile.
Duration time.Duration `json:"duration"`
-
- // Hz is the rate, which may be zero.
- Hz int `json:"hz"`
}
// CPU is an RPC stub which collects a CPU profile.
@@ -81,19 +78,13 @@ func (p *Profile) CPU(o *CPUProfileOpts, _ *struct{}) error {
return nil // Allowed.
}
- output, err := fd.NewFromFile(o.FilePayload.Files[0])
- if err != nil {
- return err
- }
+ output := o.FilePayload.Files[0]
defer output.Close()
p.cpuMu.Lock()
defer p.cpuMu.Unlock()
// Returns an error if profiling is already started.
- if o.Hz != 0 {
- runtime.SetCPUProfileRate(o.Hz)
- }
if err := pprof.StartCPUProfile(output); err != nil {
return err
}
@@ -112,6 +103,11 @@ func (p *Profile) CPU(o *CPUProfileOpts, _ *struct{}) error {
type HeapProfileOpts struct {
// FilePayload is the destination for the profiling output.
urpc.FilePayload
+
+ // Delay is the sleep time, similar to Duration. This may
+ // not affect the data collected however, as the heap will
+ // continue only the memory associated with the last alloc.
+ Delay time.Duration `json:"delay"`
}
// Heap generates a heap profile.
@@ -123,7 +119,16 @@ func (p *Profile) Heap(o *HeapProfileOpts, _ *struct{}) error {
output := o.FilePayload.Files[0]
defer output.Close()
- runtime.GC() // Get up-to-date statistics.
+ // Wait for the given delay.
+ select {
+ case <-time.After(o.Delay):
+ case <-p.done:
+ }
+
+ // Get up-to-date statistics.
+ runtime.GC()
+
+ // Write the given profile.
return pprof.WriteHeapProfile(output)
}
@@ -170,8 +175,12 @@ func (p *Profile) Block(o *BlockProfileOpts, _ *struct{}) error {
defer p.blockMu.Unlock()
// Always set the rate. We then wait to collect a profile at this rate,
- // and disable when we're done.
- rate := 1
+ // and disable when we're done. Note that the default here is 10%, which
+ // will record a stacktrace 10% of the time when blocking occurs. Since
+ // these events should not be super frequent, we expect this to achieve
+ // a reasonable balance between collecting the data we need and imposing
+ // a high performance cost (e.g. skewing even the CPU profile).
+ rate := 10
if o.Rate != 0 {
rate = o.Rate
}
@@ -211,8 +220,9 @@ func (p *Profile) Mutex(o *MutexProfileOpts, _ *struct{}) error {
p.mutexMu.Lock()
defer p.mutexMu.Unlock()
- // Always set the fraction.
- fraction := 1
+ // Always set the fraction. Like the block rate above, we use
+ // a default rate of 10% for the same reasons.
+ fraction := 10
if o.Fraction != 0 {
fraction = o.Fraction
}
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
index 7b5fcef9c..7bacb70d3 100644
--- a/pkg/test/dockerutil/container.go
+++ b/pkg/test/dockerutil/container.go
@@ -125,9 +125,7 @@ func makeContainer(ctx context.Context, logger testutil.Logger, runtime string)
//
// Containers will check flags for profiling requests.
func MakeContainer(ctx context.Context, logger testutil.Logger) *Container {
- c := makeContainer(ctx, logger, *runtime)
- c.profileInit()
- return c
+ return makeContainer(ctx, logger, *runtime)
}
// MakeNativeContainer constructs a suitable Container object.
@@ -141,7 +139,7 @@ func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container
// Spawn is analogous to 'docker run -d'.
func (c *Container) Spawn(ctx context.Context, r RunOpts, args ...string) error {
- if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil {
+ if err := c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil); err != nil {
return err
}
return c.Start(ctx)
@@ -154,7 +152,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string)
config.Tty = true
config.OpenStdin = true
- if err := c.CreateFrom(ctx, config, hostconf, netconf); err != nil {
+ if err := c.CreateFrom(ctx, r.Image, config, hostconf, netconf); err != nil {
return Process{}, err
}
@@ -181,7 +179,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string)
// Run is analogous to 'docker run'.
func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, error) {
- if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil {
+ if err := c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil); err != nil {
return "", err
}
@@ -193,8 +191,6 @@ func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string,
return "", err
}
- c.stopProfiling()
-
return c.Logs(ctx)
}
@@ -210,16 +206,21 @@ func (c *Container) MakeLink(target string) string {
}
// CreateFrom creates a container from the given configs.
-func (c *Container) CreateFrom(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
- return c.create(ctx, conf, hostconf, netconf)
+func (c *Container) CreateFrom(ctx context.Context, profileImage string, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
+ return c.create(ctx, profileImage, conf, hostconf, netconf)
}
// Create is analogous to 'docker create'.
func (c *Container) Create(ctx context.Context, r RunOpts, args ...string) error {
- return c.create(ctx, c.config(r, args), c.hostConfig(r), nil)
+ return c.create(ctx, r.Image, c.config(r, args), c.hostConfig(r), nil)
}
-func (c *Container) create(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
+func (c *Container) create(ctx context.Context, profileImage string, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
+ if c.runtime != "" {
+ // Use the image name as provided here; which normally represents the
+ // unmodified "basic/alpine" image name. This should be easy to grok.
+ c.profileInit(profileImage)
+ }
cont, err := c.client.ContainerCreate(ctx, conf, hostconf, nil, c.Name)
if err != nil {
return err
@@ -428,6 +429,7 @@ func (c *Container) Status(ctx context.Context) (types.ContainerState, error) {
// Wait waits for the container to exit.
func (c *Container) Wait(ctx context.Context) error {
+ defer c.stopProfiling()
statusChan, errChan := c.client.ContainerWait(ctx, c.id, container.WaitConditionNotRunning)
select {
case err := <-errChan:
@@ -489,14 +491,16 @@ func (c *Container) WaitForOutputSubmatch(ctx context.Context, pattern string, t
func (c *Container) stopProfiling() {
if c.profile != nil {
if err := c.profile.Stop(c); err != nil {
- c.logger.Logf("profile.Stop failed: %v", err)
+ // This most likely means that the runtime for the container
+ // was too short to connect and actually get a profile.
+ c.logger.Logf("warning: profile.Stop failed: %v", err)
}
}
}
// Kill kills the container.
func (c *Container) Kill(ctx context.Context) error {
- c.stopProfiling()
+ defer c.stopProfiling()
return c.client.ContainerKill(ctx, c.id, "")
}
diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go
index f1103eb6e..5cad3e959 100644
--- a/pkg/test/dockerutil/profile.go
+++ b/pkg/test/dockerutil/profile.go
@@ -38,12 +38,19 @@ type profile struct {
}
// profileInit initializes a profile object, if required.
-func (c *Container) profileInit() {
+//
+// N.B. The profiling filename initialized here will use the *image*
+// name, and not the unique container name. This is intentional. Most
+// of the time, profiling will be used for benchmarks. Benchmarks will
+// be run iteratively until a sufficiently large N is reached. It is
+// useful in this context to overwrite previous runs, and generate a
+// single profile result for the final test.
+func (c *Container) profileInit(image string) {
if !*pprofBlock && !*pprofCPU && !*pprofMutex && !*pprofHeap {
return // Nothing to do.
}
c.profile = &profile{
- BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.Name),
+ BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.logger.Name(), image),
Duration: *pprofDuration,
}
if *pprofCPU {
@@ -83,6 +90,7 @@ func (p *profile) createProcess(c *Container) error {
args = append(args, fmt.Sprintf("--profile-%s=%s", profileArg, outputPath))
}
args = append(args, fmt.Sprintf("--duration=%s", p.Duration)) // Or until container exits.
+ args = append(args, fmt.Sprintf("--delay=%s", p.Duration)) // Ditto.
args = append(args, c.ID())
// Best effort wait until container is running.
@@ -104,8 +112,6 @@ func (p *profile) createProcess(c *Container) error {
}
// killProcess kills the process, if running.
-//
-// Precondition: mu must be held.
func (p *profile) killProcess() error {
if p.cmd != nil && p.cmd.Process != nil {
return p.cmd.Process.Signal(syscall.SIGTERM)
@@ -114,8 +120,6 @@ func (p *profile) killProcess() error {
}
// waitProcess waits for the process, if running.
-//
-// Precondition: mu must be held.
func (p *profile) waitProcess() error {
defer func() { p.cmd = nil }()
if p.cmd != nil {
diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go
index dfd23032c..0e9a829f6 100644
--- a/pkg/urpc/urpc.go
+++ b/pkg/urpc/urpc.go
@@ -170,6 +170,9 @@ type Server struct {
// methods is the set of server methods.
methods map[string]registeredMethod
+ // stoppers are all registered stoppers.
+ stoppers []Stopper
+
// clients is a map of clients.
clients map[*unet.Socket]clientState
@@ -195,6 +198,12 @@ func NewServerWithCallback(afterRPCCallback func()) *Server {
}
}
+// Stopper is an optional interface, that when implemented, allows an object
+// to have a callback executed when the server is shutting down.
+type Stopper interface {
+ Stop()
+}
+
// Register registers the given object as an RPC receiver.
//
// This functions is the same way as the built-in RPC package, but it does not
@@ -206,6 +215,7 @@ func (s *Server) Register(obj interface{}) {
defer s.mu.Unlock()
typ := reflect.TypeOf(obj)
+ stopper, hasStop := obj.(Stopper)
// If we got a pointer, deref it to the underlying object. We need this to
// obtain the name of the underlying type.
@@ -221,6 +231,10 @@ func (s *Server) Register(obj interface{}) {
// Can't be anonymous.
panic("type not named.")
}
+ if hasStop && method.Name == "Stop" {
+ s.stoppers = append(s.stoppers, stopper)
+ continue // Legal stop method.
+ }
prettyName := typDeref.Name() + "." + method.Name
if _, ok := s.methods[prettyName]; ok {
@@ -448,6 +462,11 @@ func (s *Server) Stop() {
// Wait for all outstanding requests.
defer s.wg.Wait()
+ // Call any Stop callbacks.
+ for _, stopper := range s.stoppers {
+ stopper.Stop()
+ }
+
// Close all known clients.
s.mu.Lock()
defer s.mu.Unlock()
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 9008e1282..cb5d8ea31 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -129,14 +129,6 @@ type controller struct {
// manager holds the containerManager methods.
manager *containerManager
-
- // pprof holds the profile instance if enabled. It may be nil.
- pprof *control.Profile
-
- // stopProfiling has the callback to stop profiling calls. As
- // this may be executed only once at most, it will be set to nil
- // after it is executed for the first time.
- stopProfiling func()
}
// newController creates a new controller. The caller must call
@@ -167,18 +159,14 @@ func newController(fd int, l *Loader) (*controller, error) {
ctrl.srv.Register(&control.Logging{})
if l.root.conf.ProfileEnable {
- ctrl.pprof, ctrl.stopProfiling = control.NewProfile(l.k)
- ctrl.srv.Register(ctrl.pprof)
+ ctrl.srv.Register(control.NewProfile(l.k))
}
return ctrl, nil
}
func (c *controller) stop() {
- if c.stopProfiling != nil {
- c.stopProfiling()
- c.stopProfiling = nil
- }
+ c.srv.Stop()
}
// containerManager manages sandbox containers.
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 843dce01d..b84142b0d 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -17,6 +17,7 @@ package cmd
import (
"context"
"os"
+ "os/signal"
"strconv"
"strings"
"sync"
@@ -44,6 +45,7 @@ type Debug struct {
strace string
logLevel string
logPackets string
+ delay time.Duration
duration time.Duration
ps bool
}
@@ -71,7 +73,8 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
- f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles.")
+ f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.")
+ f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all.`)
@@ -144,16 +147,6 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
log.Infof(" *** Stack dump ***\n%s", stacks)
}
- if d.profileHeap != "" {
- f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_TRUNC, 0644)
- if err != nil {
- return Errorf("error opening heap profile output: %v", err)
- }
- defer f.Close()
- if err := c.Sandbox.HeapProfile(f); err != nil {
- return Errorf("error collecting heap profile: %v", err)
- }
- }
if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
args := control.LoggingArgs{}
switch strings.ToLower(d.strace) {
@@ -224,13 +217,22 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Open profiling files.
var (
+ heapFile *os.File
cpuFile *os.File
traceFile *os.File
blockFile *os.File
mutexFile *os.File
)
+ if d.profileHeap != "" {
+ f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+ if err != nil {
+ return Errorf("error opening heap profile output: %v", err)
+ }
+ defer f.Close()
+ heapFile = f
+ }
if d.profileCPU != "" {
- f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening cpu profile output: %v", err)
}
@@ -238,14 +240,14 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
cpuFile = f
}
if d.trace != "" {
- f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening trace profile output: %v", err)
}
traceFile = f
}
if d.profileBlock != "" {
- f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening blocking profile output: %v", err)
}
@@ -253,7 +255,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
blockFile = f
}
if d.profileMutex != "" {
- f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening mutex profile output: %v", err)
}
@@ -264,11 +266,19 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Collect profiles.
var (
wg sync.WaitGroup
+ heapErr error
cpuErr error
traceErr error
blockErr error
mutexErr error
)
+ if heapFile != nil {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ heapErr = c.Sandbox.HeapProfile(heapFile, d.delay)
+ }()
+ }
if cpuFile != nil {
wg.Add(1)
go func() {
@@ -298,20 +308,61 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}()
}
- wg.Wait()
+ // Before sleeping, allow us to catch signals and try to exit
+ // gracefully before just exiting. If we can't wait for wg, then
+ // we will not be able to read the errors below safely.
+ readyChan := make(chan struct{})
+ go func() {
+ defer close(readyChan)
+ wg.Wait()
+ }()
+ signals := make(chan os.Signal, 1)
+ signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
+ select {
+ case <-readyChan:
+ break // Safe to proceed.
+ case <-signals:
+ log.Infof("caught signal, waiting at most one more second.")
+ select {
+ case <-signals:
+ log.Infof("caught second signal, exiting immediately.")
+ os.Exit(1) // Not finished.
+ case <-time.After(time.Second):
+ log.Infof("timeout, exiting.")
+ os.Exit(1) // Not finished.
+ case <-readyChan:
+ break // Safe to proceed.
+ }
+ }
+
+ // Collect all errors.
errorCount := 0
+ if heapErr != nil {
+ errorCount++
+ log.Infof("error collecting heap profile: %v", heapErr)
+ os.Remove(heapFile.Name())
+ }
if cpuErr != nil {
+ errorCount++
log.Infof("error collecting cpu profile: %v", cpuErr)
+ os.Remove(cpuFile.Name())
}
if traceErr != nil {
+ errorCount++
log.Infof("error collecting trace profile: %v", traceErr)
+ os.Remove(traceFile.Name())
}
if blockErr != nil {
+ errorCount++
log.Infof("error collecting block profile: %v", blockErr)
+ os.Remove(blockFile.Name())
}
if mutexErr != nil {
+ errorCount++
log.Infof("error collecting mutex profile: %v", mutexErr)
+ os.Remove(mutexFile.Name())
}
+
if errorCount > 0 {
return subcommands.ExitFailure
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index c1d13a58d..cfee9e63d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -991,7 +991,7 @@ func (s *Sandbox) Stacks() (string, error) {
}
// HeapProfile writes a heap profile to the given file.
-func (s *Sandbox) HeapProfile(f *os.File) error {
+func (s *Sandbox) HeapProfile(f *os.File, delay time.Duration) error {
log.Debugf("Heap profile %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -1001,6 +1001,7 @@ func (s *Sandbox) HeapProfile(f *os.File) error {
opts := control.HeapProfileOpts{
FilePayload: urpc.FilePayload{Files: []*os.File{f}},
+ Delay: delay,
}
return conn.Call(boot.HeapProfile, &opts, nil)
}
diff --git a/test/benchmarks/fs/fio_test.go b/test/benchmarks/fs/fio_test.go
index 0c772b768..83b8376a5 100644
--- a/test/benchmarks/fs/fio_test.go
+++ b/test/benchmarks/fs/fio_test.go
@@ -33,37 +33,37 @@ import (
func BenchmarkFio(b *testing.B) {
testCases := []tools.Fio{
tools.Fio{
- Test: "write4K",
+ Test: "write",
Size: b.N,
BlockSize: 4,
IODepth: 4,
},
tools.Fio{
- Test: "write1M",
+ Test: "write",
Size: b.N,
BlockSize: 1024,
IODepth: 4,
},
tools.Fio{
- Test: "read4K",
+ Test: "read",
Size: b.N,
BlockSize: 4,
IODepth: 4,
},
tools.Fio{
- Test: "read1M",
+ Test: "read",
Size: b.N,
BlockSize: 1024,
IODepth: 4,
},
tools.Fio{
- Test: "randwrite4K",
+ Test: "randwrite",
Size: b.N,
BlockSize: 4,
IODepth: 4,
},
tools.Fio{
- Test: "randread4K",
+ Test: "randread",
Size: b.N,
BlockSize: 4,
IODepth: 4,
@@ -82,11 +82,15 @@ func BenchmarkFio(b *testing.B) {
Name: "operation",
Value: tc.Test,
}
+ blockSize := tools.Parameter{
+ Name: "blockSize",
+ Value: fmt.Sprintf("%dK", tc.BlockSize),
+ }
filesystem := tools.Parameter{
Name: "filesystem",
Value: string(fsType),
}
- name, err := tools.ParametersToName(operation, filesystem)
+ name, err := tools.ParametersToName(operation, blockSize, filesystem)
if err != nil {
b.Fatalf("Failed to parser paramters: %v", err)
}
diff --git a/test/benchmarks/tools/sysbench.go b/test/benchmarks/tools/sysbench.go
index 2b8e6c8aa..350f8ec98 100644
--- a/test/benchmarks/tools/sysbench.go
+++ b/test/benchmarks/tools/sysbench.go
@@ -38,13 +38,15 @@ type SysbenchBase struct {
}
// baseFlags returns top level flags.
-func (s *SysbenchBase) baseFlags(b *testing.B) []string {
+func (s *SysbenchBase) baseFlags(b *testing.B, useEvents bool) []string {
var ret []string
if s.Threads > 0 {
ret = append(ret, fmt.Sprintf("--threads=%d", s.Threads))
}
- ret = append(ret, "--time=0") // Ensure events is used.
- ret = append(ret, fmt.Sprintf("--events=%d", b.N))
+ ret = append(ret, "--time=0") // Ensure other mechanism is used.
+ if useEvents {
+ ret = append(ret, fmt.Sprintf("--events=%d", b.N))
+ }
return ret
}
@@ -56,7 +58,7 @@ type SysbenchCPU struct {
// MakeCmd makes commands for SysbenchCPU.
func (s *SysbenchCPU) MakeCmd(b *testing.B) []string {
cmd := []string{"sysbench"}
- cmd = append(cmd, s.baseFlags(b)...)
+ cmd = append(cmd, s.baseFlags(b, true /* useEvents */)...)
cmd = append(cmd, "cpu", "run")
return cmd
}
@@ -86,7 +88,6 @@ func (s *SysbenchCPU) parseEvents(data string) (float64, error) {
type SysbenchMemory struct {
SysbenchBase
BlockSize int // size of test memory block in megabytes [1].
- TotalSize int // size of data to transfer in gigabytes [100].
Scope string // memory access scope {global, local} [global].
HugeTLB bool // allocate memory from HugeTLB [off].
OperationType string // type of memory ops {read, write, none} [write].
@@ -103,13 +104,10 @@ func (s *SysbenchMemory) MakeCmd(b *testing.B) []string {
// flags makes flags for SysbenchMemory cmds.
func (s *SysbenchMemory) flags(b *testing.B) []string {
- cmd := s.baseFlags(b)
+ cmd := s.baseFlags(b, false /* useEvents */)
if s.BlockSize != 0 {
cmd = append(cmd, fmt.Sprintf("--memory-block-size=%dM", s.BlockSize))
}
- if s.TotalSize != 0 {
- cmd = append(cmd, fmt.Sprintf("--memory-total-size=%dG", s.TotalSize))
- }
if s.Scope != "" {
cmd = append(cmd, fmt.Sprintf("--memory-scope=%s", s.Scope))
}
@@ -122,6 +120,10 @@ func (s *SysbenchMemory) flags(b *testing.B) []string {
if s.AccessMode != "" {
cmd = append(cmd, fmt.Sprintf("--memory-access-mode=%s", s.AccessMode))
}
+ // Sysbench ignores events for memory tests, and uses the total
+ // size parameter to determine when the test is done. We scale
+ // with this instead.
+ cmd = append(cmd, fmt.Sprintf("--memory-total-size=%dG", b.N))
return cmd
}
@@ -150,7 +152,6 @@ func (s *SysbenchMemory) parseOperations(data string) (float64, error) {
type SysbenchMutex struct {
SysbenchBase
Num int // total size of mutex array [4096].
- Locks int // number of mutex locks per thread [50000].
Loops int // number of loops to do outside mutex lock [10000].
}
@@ -165,16 +166,18 @@ func (s *SysbenchMutex) MakeCmd(b *testing.B) []string {
// flags makes flags for SysbenchMutex commands.
func (s *SysbenchMutex) flags(b *testing.B) []string {
var cmd []string
- cmd = append(cmd, s.baseFlags(b)...)
+ cmd = append(cmd, s.baseFlags(b, false /* useEvents */)...)
if s.Num > 0 {
cmd = append(cmd, fmt.Sprintf("--mutex-num=%d", s.Num))
}
- if s.Locks > 0 {
- cmd = append(cmd, fmt.Sprintf("--mutex-locks=%d", s.Locks))
- }
if s.Loops > 0 {
cmd = append(cmd, fmt.Sprintf("--mutex-loops=%d", s.Loops))
}
+ // Sysbench does not respect --events for mutex tests. From [1]:
+ // "Here --time or --events are completely ignored. Sysbench always
+ // runs one event per thread."
+ // [1] https://tomfern.com/posts/sysbench-guide-1
+ cmd = append(cmd, fmt.Sprintf("--mutex-locks=%d", b.N))
return cmd
}
diff --git a/test/packetimpact/runner/dut.go b/test/packetimpact/runner/dut.go
index 3e26c73cb..3da265b78 100644
--- a/test/packetimpact/runner/dut.go
+++ b/test/packetimpact/runner/dut.go
@@ -551,7 +551,7 @@ func StartContainer(ctx context.Context, runOpts dockerutil.RunOpts, c *dockerut
hostconf.AutoRemove = true
hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"}
- if err := c.CreateFrom(ctx, conf, hostconf, nil); err != nil {
+ if err := c.CreateFrom(ctx, runOpts.Image, conf, hostconf, nil); err != nil {
return fmt.Errorf("unable to create container %s: %w", c.Name, err)
}
diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go
index a26b83081..a74d6b1c1 100644
--- a/test/root/cgroup_test.go
+++ b/test/root/cgroup_test.go
@@ -249,12 +249,11 @@ func TestCgroup(t *testing.T) {
case "pids-limit":
val := attr.value
hostconf.Resources.PidsLimit = &val
-
}
}
// Create container.
- if err := d.CreateFrom(ctx, conf, hostconf, nil); err != nil {
+ if err := d.CreateFrom(ctx, "basic/alpine", conf, hostconf, nil); err != nil {
t.Fatalf("create failed with: %v", err)
}
@@ -323,7 +322,7 @@ func TestCgroupParent(t *testing.T) {
}, "sleep", "10000")
hostconf.Resources.CgroupParent = parent
- if err := d.CreateFrom(ctx, conf, hostconf, nil); err != nil {
+ if err := d.CreateFrom(ctx, "basic/alpine", conf, hostconf, nil); err != nil {
t.Fatalf("create failed with: %v", err)
}