Add BuildKite annotations for failures and profiles.

This change cleans up some minor Makefile issues, and adds support for BuildKite annotations on failure and on profiles being generated. These annotations will make failures very clear and link to the artifacts. This change is a stepping stone for aggregating coverage data from all individual test jobs, as this will also happen in .buildkite/annotate.sh. PiperOrigin-RevId: 349606598
author: Adin Scannell <ascannell@google.com> 2020-12-30 15:06:47 -0800
committer: gVisor bot <gvisor-bot@google.com> 2020-12-30 15:09:12 -0800
commit: 899b9ba46a69094975b4f25c24a3c467c0c21276 (patch)
tree: 2d3e41fa6ce9bdbfa83d25f4a91f43d18fec7091 /.buildkite
parent: 0fb5de1154411bd207dadae31c37054db9941061 (diff)
4 files changed, 151 insertions, 15 deletions
diff --git a/.buildkite/hooks/post-command b/.buildkite/hooks/post-command
index ce3111f3c..b0396bec7 100644
--- a/.buildkite/hooks/post-command
+++ b/.buildkite/hooks/post-command
@@ -1,24 +1,74 @@
 # Upload test logs on failure, if there are any.
-if [[ "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0" ]]; then
-  declare log_count=0
-  for log in $(make testlogs 2>/dev/null | sort | uniq); do
-    buildkite-agent artifact upload "${log}"
-    log_count=$((${log_count}+1))
-    # N.B. If *all* tests fail due to some common cause, then we will
-    # end up spending way too much time uploading logs. Instead, we just
-    # upload the first 100 and stop. That is hopefully enough to debug.
-    if [[ "${log_count}" -ge 100 ]]; then
-      echo "Only uploaded first 100 failures; skipping the rest."
-      break
-    fi
-  done
+if test "${BUILDKITE_COMMAND_EXIT_STATUS}" -ne "0"; then
+  # Generate a metafile that ends with .output, and contains all the
+  # test failures that have been uploaded. These will all be sorted and
+  # aggregated by a failure stage in the build pipeline.
+  declare output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.output)
+  make -s testlogs 2>/dev/null | grep // | sort | uniq | (
+    declare log_count=0
+    while read target log; do
+      if test -z "${target}"; then
+        continue
+      fi
+
+      # N.B. If *all* tests fail due to some common cause, then we will
+      # end up spending way too much time uploading logs. Instead, we just
+      # upload the first 10 and stop. That is hopefully enough to debug.
+      #
+      # We include this test in the metadata, but note that we cannot
+      # upload the actual test logs. The user should rerun locally.
+      log_count=$((${log_count}+1))
+      if test "${log_count}" -ge 10; then
+        echo " * ${target} (no upload)" | tee -a "${output}"
+      else
+        buildkite-agent artifact upload "${log}"
+        echo " * [${target}](artifact://${log#/})" | tee -a "${output}"
+      fi
+    done
+  )
+
+  # Upload if we had outputs.
+  if test -s "${output}"; then
+    buildkite-agent artifact upload "${output}"
+  fi
+  rm -rf "${output}"
+
   # Attempt to clear the cache and shut down.
   make clean || echo "make clean failed with code $?"
   make bazel-shutdown || echo "make bazel-shutdown failed with code $?"
 fi
 
+# Upload all profiles, and include in an annotation.
+if test -d /tmp/profile; then
+  # Same as above.
+  declare profile_output=$(mktemp "${BUILDKITE_JOB_ID}".XXXXXX.profile_output)
+  for file in $(find /tmp/profile -name \*.pprof -print 2>/dev/null | sort); do
+    # Generate a link to speedscope, with a URL-encoded link to the BuildKite
+    # artifact location. Note that we use do a fixed URL encode below, since
+    # the link can be uniquely determined. If the storage location changes,
+    # this schema may break and these links may stop working. The artifacts
+    # uploaded however, will still work just fine.
+    profile_name="${file#/tmp/profile/}"
+    public_url="https://storage.googleapis.com/gvisor-buildkite/${BUILDKITE_BUILD_ID}/${BUILDKITE_JOB_ID}/${file#/}"
+    encoded_url=$(jq -rn --arg x "${public_url}" '$x|@uri')
+    encoded_title=$(jq -rn --arg x "${profile_name}" '$x|@uri')
+    profile_url="https://speedscope.app/#profileURL=${encoded_url}&title=${encoded_title}"
+    buildkite-agent artifact upload "${file}"
+    echo " * [${profile_name}](${profile_url}) ([pprof](artifact://${file#/}))" | tee -a "${profile_output}"
+  done
+
+  # Upload if we had outputs.
+  if test -s "${profile_output}"; then
+    buildkite-agent artifact upload "${profile_output}"
+  fi
+  rm -rf "${profile_output}"
+
+  # Remove stale profiles, which may be owned by root.
+  sudo rm -rf /tmp/profile
+fi
+
 # Kill any running containers (clear state).
 CONTAINERS="$(docker ps -q)"
-if ! [[ -z "${CONTAINERS}" ]]; then
+if ! test -z "${CONTAINERS}"; then
   docker container kill ${CONTAINERS} 2>/dev/null || true
-fi
-\ No newline at end of file
+fi
diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command
index 7d277202b..4f41fe021 100644
--- a/.buildkite/hooks/pre-command
+++ b/.buildkite/hooks/pre-command
@@ -1,3 +1,15 @@
+# Install packages we need. Docker must be installed and configured,
+# as should Go itself. We just install some extra bits and pieces.
+function install_pkgs() {
+  while true; do
+    if sudo apt-get update && sudo apt-get install -y "$@"; then
+      break
+    fi
+  done
+}
+install_pkgs graphviz jq curl binutils gnupg gnupg-agent linux-libc-dev \
+  apt-transport-https ca-certificates software-properties-common
+
 # Setup for parallelization with PARTITION and TOTAL_PARTITIONS.
 export PARTITION=${BUILDKITE_PARALLEL_JOB:-0}
 PARTITION=$((${PARTITION}+1)) # 1-indexed, but PARALLEL_JOB is 0-indexed.
@@ -9,3 +21,10 @@ if test "${EXPERIMENTAL}" != "true"; then
   make sudo TARGETS=//runsc:runsc ARGS="install --experimental=true"
   sudo systemctl restart docker
 fi
+
+# Helper for benchmarks, based on the branch.
+if test "${BUILDKITE_BRANCH}" = "master"; then
+  export BENCHMARKS_OFFICIAL=true
+else
+  export BENCHMARKS_OFFICIAL=false
+fi
+\ No newline at end of file
diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml
index b1d59e091..ba054319c 100644
--- a/.buildkite/pipeline.yaml
+++ b/.buildkite/pipeline.yaml
@@ -132,3 +132,18 @@ steps:
     command: make python3.7.3-runtime-tests
     parallelism: 10
     if: build.message =~ /VFS1/ || build.branch == "master"
+
+  # The final step here will aggregate data uploaded by all other steps into an
+  # annotation that will appear at the top of the build, with useful information.
+  #
+  # See .buildkite/summarize.sh and .buildkite/hooks/post-command for more.
+  - wait
+  - <<: *common
+    label: ":yawning_face: Wait"
+    command: "true"
+    key: "wait"
+  - <<: *common
+    label: ":thisisfine: Summarize"
+    command: .buildkite/summarize.sh
+    allow_dependency_failure: true
+    depends_on: "wait"
diff --git a/.buildkite/summarize.sh b/.buildkite/summarize.sh
new file mode 100755
index 000000000..ddf8c9ad4
--- /dev/null
+++ b/.buildkite/summarize.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Copyright 2020 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -xeou pipefail
+
+# This script collects metadata fragments produced by individual test shards in
+# .buildkite/hooks/postcommand, and aggregates these into a single annotation
+# that is posted to the build. In the future, this will include coverage.
+
+# Start the summary.
+declare summary
+declare status
+summary=$(mktemp --tmpdir summary.XXXXXX)
+status="info"
+
+# Download all outputs.
+declare outputs
+outputs=$(mktemp -d --tmpdir outputs.XXXXXX)
+if buildkite-agent artifact download '**/*.output' "${outputs}"; then
+  status="error"
+  echo "## Failures" >> "${summary}"
+  find "${outputs}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
+fi
+rm -rf "${outputs}"
+
+# Attempt to find profiles, if there are any.
+declare profiles
+profiles=$(mktemp -d --tmpdir profiles.XXXXXX)
+if buildkite-agent artifact download '**/*.profile_output' "${profiles}"; then
+  echo "## Profiles" >> "${summary}"
+  find "${profiles}" -type f -print | xargs -r -n 1 cat | sort >> "${summary}"
+fi
+rm -rf "${profiles}"
+
+# Upload the final annotation.
+if [[ -s "${summary}" ]]; then
+  cat "${summary}" | buildkite-agent annotate --style "${status}"
+fi
+rm -rf "${summary}"
author	Adin Scannell <ascannell@google.com>	2020-12-30 15:06:47 -0800
committer	gVisor bot <gvisor-bot@google.com>	2020-12-30 15:09:12 -0800
commit	899b9ba46a69094975b4f25c24a3c467c0c21276 (patch)
tree	2d3e41fa6ce9bdbfa83d25f4a91f43d18fec7091 /.buildkite
parent	0fb5de1154411bd207dadae31c37054db9941061 (diff)