4 files changed, 595 insertions, 0 deletions
diff --git a/benchmarks/runner/BUILD b/benchmarks/runner/BUILD
new file mode 100644
index 000000000..e1b2ea550
--- /dev/null
+++ b/benchmarks/runner/BUILD
@@ -0,0 +1,63 @@
+load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement")
+
+package(licenses = ["notice"])
+
+py_library(
+    name = "runner",
+    srcs = ["__init__.py"],
+    data = [
+        "//benchmarks/workloads:files",
+    ],
+    visibility = ["//benchmarks:__pkg__"],
+    deps = [
+        ":commands",
+        "//benchmarks/harness:benchmark_driver",
+        "//benchmarks/harness/machine_producers:machine_producer",
+        "//benchmarks/harness/machine_producers:mock_producer",
+        "//benchmarks/harness/machine_producers:yaml_producer",
+        "//benchmarks/suites",
+        "//benchmarks/suites:absl",
+        "//benchmarks/suites:density",
+        "//benchmarks/suites:fio",
+        "//benchmarks/suites:helpers",
+        "//benchmarks/suites:http",
+        "//benchmarks/suites:media",
+        "//benchmarks/suites:ml",
+        "//benchmarks/suites:network",
+        "//benchmarks/suites:redis",
+        "//benchmarks/suites:startup",
+        "//benchmarks/suites:sysbench",
+        "//benchmarks/suites:syscall",
+        requirement("click", True),
+    ],
+)
+
+py_library(
+    name = "commands",
+    srcs = ["commands.py"],
+    deps = [
+        requirement("click", True),
+    ],
+)
+
+py_test(
+    name = "runner_test",
+    srcs = ["runner_test.py"],
+    python_version = "PY3",
+    tags = [
+        "local",
+        "manual",
+    ],
+    deps = [
+        ":runner",
+        requirement("click", True),
+        requirement("attrs", False),
+        requirement("atomicwrites", False),
+        requirement("more-itertools", False),
+        requirement("pathlib2", False),
+        requirement("pluggy", False),
+        requirement("py", False),
+        requirement("pytest", True),
+        requirement("six", False),
+    ],
+)
diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py
new file mode 100644
index 000000000..ba80d83d7
--- /dev/null
+++ b/benchmarks/runner/__init__.py
@@ -0,0 +1,338 @@
+# python3
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""High-level benchmark utility."""
+
+import copy
+import csv
+import json
+import logging
+import os
+import pkgutil
+import pydoc
+import re
+import subprocess
+import sys
+import types
+from typing import List
+from typing import Tuple
+
+import click
+
+from benchmarks import harness
+from benchmarks import suites
+from benchmarks.harness import benchmark_driver
+from benchmarks.harness.machine_producers import gcloud_producer
+from benchmarks.harness.machine_producers import machine_producer
+from benchmarks.harness.machine_producers import mock_producer
+from benchmarks.harness.machine_producers import yaml_producer
+from benchmarks.runner import commands
+
+
+@click.group()
+@click.option(
+    "--verbose/--no-verbose", default=False, help="Enable verbose logging.")
+@click.option("--debug/--no-debug", default=False, help="Enable debug logging.")
+def runner(verbose: bool = False, debug: bool = False):
+  """Run distributed benchmarks.
+
+  See the run and list commands for details.
+
+  Args:
+    verbose: Enable verbose logging.
+    debug: Enable debug logging (supercedes verbose).
+  """
+  if debug:
+    logging.basicConfig(level=logging.DEBUG)
+  elif verbose:
+    logging.basicConfig(level=logging.INFO)
+
+
+def find_benchmarks(
+    regex: str) -> List[Tuple[str, types.ModuleType, types.FunctionType]]:
+  """Finds all available benchmarks.
+
+  Args:
+    regex: A regular expression to match.
+
+  Returns:
+    A (short_name, module, function) tuple for each match.
+  """
+  pkgs = pkgutil.walk_packages(suites.__path__, suites.__name__ + ".")
+  found = []
+  for _, name, _ in pkgs:
+    mod = pydoc.locate(name)
+    funcs = [
+        getattr(mod, x)
+        for x in dir(mod)
+        if suites.is_benchmark(getattr(mod, x))
+    ]
+    for func in funcs:
+      # Use the short_name with the benchmarks. prefix stripped.
+      prefix_len = len(suites.__name__ + ".")
+      short_name = mod.__name__[prefix_len:] + "." + func.__name__
+      # Add to the list if a pattern is provided.
+      if re.compile(regex).match(short_name):
+        found.append((short_name, mod, func))
+  return found
+
+
+@runner.command("list")
+@click.argument("method", nargs=-1)
+def list_all(method):
+  """Lists available benchmarks."""
+  if not method:
+    method = ".*"
+  else:
+    method = "(" + ",".join(method) + ")"
+  for (short_name, _, func) in find_benchmarks(method):
+    print("Benchmark %s:" % short_name)
+    metrics = suites.benchmark_metrics(func)
+    if func.__doc__:
+      print("    " + func.__doc__.lstrip().rstrip())
+    if metrics:
+      print("\n    Metrics:")
+    for metric in metrics:
+      print("\t{name}: {doc}".format(name=metric[0], doc=metric[1]))
+    print("\n")
+
+
+@runner.command("run-local", commands.LocalCommand)
+@click.pass_context
+def run_local(ctx, limit: float, **kwargs):
+  """Runs benchmarks locally."""
+  run(ctx, machine_producer.LocalMachineProducer(limit=limit), **kwargs)
+
+
+@runner.command("run-mock", commands.RunCommand)
+@click.pass_context
+def run_mock(ctx, **kwargs):
+  """Runs benchmarks on Mock machines. Used for testing."""
+  run(ctx, mock_producer.MockMachineProducer(), **kwargs)
+
+
+@runner.command("run-gcp", commands.GCPCommand)
+@click.pass_context
+def run_gcp(ctx, project: str, ssh_key_file: str, image: str,
+            image_project: str, machine_type: str, zone: str, ssh_user: str,
+            ssh_password: str, **kwargs):
+  """Runs all benchmarks on GCP instances."""
+
+  if not ssh_user:
+    ssh_user = harness.DEFAULT_USER
+
+  # Get the default project if one was not provided.
+  if not project:
+    sub = subprocess.run(
+        "gcloud config get-value project".split(" "), stdout=subprocess.PIPE)
+    if sub.returncode:
+      raise ValueError(
+          "Cannot get default project from gcloud. Is it configured>")
+    project = sub.stdout.decode("utf-8").strip("\n")
+
+  if not image_project:
+    image_project = project
+
+  # Check that the ssh-key exists and is readable.
+  if not os.access(ssh_key_file, os.R_OK):
+    raise ValueError(
+        "ssh key given `{ssh_key}` is does not exist or is not readable."
+        .format(ssh_key=ssh_key_file))
+
+  # Check that the image exists.
+  sub = subprocess.run(
+      "gcloud compute images describe {image} --project {image_project} --format=json"
+      .format(image=image, image_project=image_project).split(" "),
+      stdout=subprocess.PIPE)
+  if sub.returncode or "READY" not in json.loads(sub.stdout)["status"]:
+    raise ValueError(
+        "given image was not found or is not ready: {image} {image_project}."
+        .format(image=image, image_project=image_project))
+
+  # Check and set zone to default.
+  if not zone:
+    sub = subprocess.run(
+        "gcloud config get-value compute/zone".split(" "),
+        stdout=subprocess.PIPE)
+    if sub.returncode:
+      raise ValueError(
+          "Default zone is not set in gcloud. Set one or pass a zone with the --zone flag."
+      )
+    zone = sub.stdout.decode("utf-8").strip("\n")
+
+  producer = gcloud_producer.GCloudProducer(project, ssh_key_file, image,
+                                            image_project, machine_type, zone,
+                                            ssh_user, ssh_password)
+  run(ctx, producer, **kwargs)
+
+
+def run(ctx, producer: machine_producer.MachineProducer, method: str, runs: int,
+        runtime: List[str], metric: List[str], stat: str, **kwargs):
+  """Runs arbitrary benchmarks.
+
+  All unknown command line flags are passed through to the underlying benchmark
+  method. Flags may be specified multiple times, in which case it is considered
+  a "dimension" for the test, and a comma-separated table will be emitted
+  instead of a single result.
+
+  See the output of list to see available metrics for any given benchmark
+  method. The method parameter is a regular expression that will match against
+  available benchmarks. If multiple benchmarks match, then that is considered a
+  distinct "dimension" for the test.
+
+  All benchmarks are run in parallel where possible, but have exclusive
+  ownership over the individual machines.
+
+  Every benchmark method will be run the times indicated by --runs.
+
+  Args:
+    ctx: Click context.
+    producer: A Machine Producer from which to get Machines.
+    method: A regular expression for methods to be run.
+    runs: Number of runs.
+    runtime: A list of runtimes to test.
+    metric: A list of metrics to extract.
+    stat: The class of statistics to extract.
+    **kwargs: Dimensions to test.
+  """
+  # First, calculate additional arguments.
+  #
+  # This essentially calculates any arguments that appear multiple times, and
+  # moves those to the "dimensions" dictionary, which maps to lists. These
+  # dimensions are then iterated over to generate the relevant csv output.
+  dimensions = {}
+
+  if stat not in ["median", "all", "meanstd"]:
+    raise ValueError("Illegal value for --result, see help.")
+
+  def squish(key: str, value: str):
+    """Collapse an argument into kwargs or dimensions."""
+    if key in dimensions:
+      # Extend an existing dimension.
+      dimensions[key].append(value)
+    elif key in kwargs:
+      # Create a new dimension.
+      dimensions[key] = [kwargs[key], value]
+      del kwargs[key]
+    else:
+      # A single value.
+      kwargs[key] = value
+
+  for item in ctx.args:
+    if "=" in method:
+      # This must be the method. The method is simply set to the first
+      # non-matching argument, which we're also parsing here.
+      item, method = method, item
+    if "=" not in item:
+      logging.error("illegal argument: %s", item)
+      sys.exit(1)
+    (key, value) = item.lstrip("-").split("=", 1)
+    squish(key, value)
+
+  # Convert runtime and metric to dimensions.
+  #
+  # They exist only in the arguments above for documentation purposes.
+  # Essentially here we are treating them like anything else. Note however,
+  # that an empty set here will result in a dimension. This is important for
+  # metrics, where an empty set actually means all metrics.
+  def fold(key: str, value, allow_flatten=False):
+    """Collapse a list value into kwargs or dimensions."""
+    if len(value) == 1 and allow_flatten:
+      kwargs[key] = value[0]
+    else:
+      dimensions[key] = value
+
+  fold("runtime", runtime, allow_flatten=True)
+  fold("metric", metric)
+
+  # Lookup the methods.
+  #
+  # We match the method parameter to a regular expression. This allows you to
+  # do things like `run --mock .*` for a broad test. Note that we track the
+  # short_names in the dimensions here, and look up again in the recursion.
+  methods = {
+      short_name: func for (short_name, _, func) in find_benchmarks(method)
+  }
+  if not methods:
+    # Must match at least one method.
+    logging.error("no matching benchmarks for %s: try list.", method)
+    sys.exit(1)
+  fold("method", list(methods.keys()), allow_flatten=True)
+
+  # Spin up the drivers.
+  #
+  # We ensure that metric is the last entry, because we have special behavior.
+  # They actually run the test once and the benchmark is a generator that
+  # produces all viable metrics.
+  dimension_keys = list(dimensions.keys())
+  if "metric" in dimension_keys:
+    dimension_keys.remove("metric")
+    dimension_keys.append("metric")
+  drivers = []
+
+  def _start(keywords, finished, left):
+    """Runs a test across dimensions recursively."""
+    # Resolve the method fully, it starts as a string.
+    if "method" in keywords and isinstance(keywords["method"], str):
+      keywords["method"] = methods[keywords["method"]]
+    # Is this a non-recursive case?
+    if not left:
+      driver = benchmark_driver.BenchmarkDriver(producer, runs=runs, **keywords)
+      driver.start()
+      drivers.append((finished, driver))
+    else:
+      # Recurse on the next dimension.
+      current, left = left[0], left[1:]
+      keywords = copy.deepcopy(keywords)
+      if current == "metric":
+        # We use a generator, popped below. Note that metric is
+        # guaranteed to be the last element here, and we will provide
+        # the value for 'done' below when generating the csv.
+        keywords[current] = dimensions[current]
+        _start(keywords, finished, left)
+      else:
+        # Generate manually.
+        for value in dimensions[current]:
+          keywords[current] = value
+          _start(keywords, finished + [value], left)
+
+  # Start all the drivers, recursively.
+  _start(kwargs, [], dimension_keys)
+
+  # Finish all tests, write results.
+  output = csv.writer(sys.stdout)
+  output.writerow(dimension_keys + ["result"])
+  for (done, driver) in drivers:
+    driver.join()
+    for (metric_name, result) in getattr(driver, stat)():
+      output.writerow([  # Collapse the method name.
+          hasattr(x, "__name__") and x.__name__ or x for x in done
+      ] + [metric_name] + result)
+
+
+@runner.command()
+@click.argument("env")
+@click.option(
+    "--cmd", default="uname -a", help="command to run on all found machines")
+@click.option(
+    "--workload", default="true", help="workload to run all found machines")
+def validate(env, cmd, workload):
+  """Validates an environment described by yaml file."""
+  producer = yaml_producer.YamlMachineProducer(env)
+  for machine in producer.machines:
+    print("Machine %s:" % machine)
+    stdout, _ = machine.run(cmd)
+    print("  Output of '%s': %s" % (cmd, stdout.lstrip().rstrip()))
+    image = machine.pull(workload)
+    stdout = machine.container(image).run()
+    print("  Container %s: %s" % (workload, stdout.lstrip().rstrip()))
diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py
new file mode 100644
index 000000000..7ab12fac6
--- /dev/null
+++ b/benchmarks/runner/commands.py
@@ -0,0 +1,135 @@
+# python3
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module with the guts of `click` commands.
+
+Overrides of the click.core.Command. This is done so flags are inherited between
+similar commands (the run command). The classes below are meant to be used in
+click templates like so.
+
+@runner.command("run-mock", RunCommand)
+def run_mock(**kwargs):
+  # mock implementation
+
+"""
+import click
+
+from benchmarks import harness
+
+
+class RunCommand(click.core.Command):
+  """Base Run Command with flags.
+
+  Attributes:
+    method: regex of which suite to choose (e.g. sysbench would run
+      sysbench.cpu, sysbench.memory, and sysbench.mutex) See list command for
+      details.
+    metric: metric(s) to extract. See list command for details.
+    runtime: the runtime(s) on which to run.
+    runs: the number of runs to do of each method.
+    stat: how to compile results in the case of multiple run (e.g. median).
+  """
+
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    method = click.core.Argument(("method",))
+
+    metric = click.core.Option(("--metric",),
+                               help="The metric to extract.",
+                               multiple=True)
+
+    runtime = click.core.Option(("--runtime",),
+                                default=["runc"],
+                                help="The runtime to use.",
+                                multiple=True)
+    runs = click.core.Option(("--runs",),
+                             default=1,
+                             help="The number of times to run each benchmark.")
+    stat = click.core.Option(
+        ("--stat",),
+        default="median",
+        help="How to aggregate the data from all runs."
+        "\nmedian - returns the median of all runs (default)"
+        "\nall - returns all results comma separated"
+        "\nmeanstd - returns result as mean,std")
+    self.params.extend([method, runtime, runs, stat, metric])
+    self.ignore_unknown_options = True
+    self.allow_extra_args = True
+
+
+class LocalCommand(RunCommand):
+  """LocalCommand inherits all flags from RunCommand.
+
+  Attributes:
+    limit: limits the number of machines on which to run benchmarks. This limits
+      for local how many benchmarks may run at a time. e.g. "startup" requires
+      one machine -- passing two machines would limit two startup jobs at a
+      time. Default is infinity.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+    self.params.append(
+        click.core.Option(
+            ("--limit",),
+            default=1,
+            help="Limit of number of benchmarks that can run at a given time."))
+
+
+class GCPCommand(RunCommand):
+  """GCPCommand inherits all flags from RunCommand and adds flags for run_gcp method.
+
+  Attributes:
+    project: GCP project
+    ssh_key_path: path to the ssh-key to use for the run
+    image: name of the image to build machines from
+    image_project: GCP project under which to find image
+    zone: a GCP zone (e.g. us-west1-b)
+    ssh_user: username to use for the ssh-key
+    ssh_password: password to use for the ssh-key
+  """
+
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, **kwargs)
+
+    project = click.core.Option(
+        ("--project",),
+        help="Project to run on if not default value given by 'gcloud config get-value project'."
+    )
+    ssh_key_path = click.core.Option(
+        ("--ssh-key-file",),
+        help="Path to a valid ssh private key to use. See README on generating a valid ssh key. Set to ~/.ssh/benchmark-tools by default.",
+        default=harness.DEFAULT_USER_HOME + "/.ssh/benchmark-tools")
+    image = click.core.Option(("--image",),
+                              help="The image on which to build VMs.",
+                              default="bm-tools-testing")
+    image_project = click.core.Option(
+        ("--image_project",),
+        help="The project under which the image to be used is listed.",
+        default="")
+    machine_type = click.core.Option(("--machine_type",),
+                                     help="Type to make all machines.",
+                                     default="n1-standard-4")
+    zone = click.core.Option(("--zone",),
+                             help="The GCP zone to run on.",
+                             default="")
+    ssh_user = click.core.Option(("--ssh-user",),
+                                 help="User for the ssh key.",
+                                 default=harness.DEFAULT_USER)
+    ssh_password = click.core.Option(("--ssh-password",),
+                                     help="Password for the ssh key.",
+                                     default="")
+    self.params.extend([
+        project, ssh_key_path, image, image_project, machine_type, zone,
+        ssh_user, ssh_password
+    ])
diff --git a/benchmarks/runner/runner_test.py b/benchmarks/runner/runner_test.py
new file mode 100644
index 000000000..7818d631a
--- /dev/null
+++ b/benchmarks/runner/runner_test.py
@@ -0,0 +1,59 @@
+# python3
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Top-level tests."""
+
+import os
+import subprocess
+import sys
+
+from click import testing
+import pytest
+
+from benchmarks import runner
+
+
+def _get_locale():
+  output = subprocess.check_output(["locale", "-a"])
+  locales = output.split()
+  if b"en_US.utf8" in locales:
+    return "en_US.UTF-8"
+  else:
+    return "C.UTF-8"
+
+
+def _set_locale():
+  locale = _get_locale()
+  if os.getenv("LANG") != locale:
+    os.environ["LANG"] = locale
+    os.environ["LC_ALL"] = locale
+    os.execv("/proc/self/exe", ["python"] + sys.argv)
+
+
+def test_list():
+  cli_runner = testing.CliRunner()
+  result = cli_runner.invoke(runner.runner, ["list"])
+  print(result.output)
+  assert result.exit_code == 0
+
+
+def test_run():
+  cli_runner = testing.CliRunner()
+  result = cli_runner.invoke(runner.runner, ["run-mock", "."])
+  print(result.output)
+  assert result.exit_code == 0
+
+
+if __name__ == "__main__":
+  _set_locale()
+  sys.exit(pytest.main([__file__]))