diff options
Diffstat (limited to 'benchmarks/runner')
-rw-r--r-- | benchmarks/runner/BUILD | 56 | ||||
-rw-r--r-- | benchmarks/runner/__init__.py | 308 | ||||
-rw-r--r-- | benchmarks/runner/commands.py | 135 | ||||
-rw-r--r-- | benchmarks/runner/runner_test.py | 59 |
4 files changed, 558 insertions, 0 deletions
diff --git a/benchmarks/runner/BUILD b/benchmarks/runner/BUILD new file mode 100644 index 000000000..471debfdf --- /dev/null +++ b/benchmarks/runner/BUILD @@ -0,0 +1,56 @@ +load("//tools:defs.bzl", "py_library", "py_requirement", "py_test") +load("//benchmarks:defs.bzl", "test_deps") + +package(licenses = ["notice"]) + +py_library( + name = "runner", + srcs = ["__init__.py"], + data = [ + "//benchmarks/workloads:files", + ], + visibility = ["//benchmarks:__pkg__"], + deps = [ + ":commands", + "//benchmarks/harness:benchmark_driver", + "//benchmarks/harness/machine_producers:machine_producer", + "//benchmarks/harness/machine_producers:mock_producer", + "//benchmarks/harness/machine_producers:yaml_producer", + "//benchmarks/suites", + "//benchmarks/suites:absl", + "//benchmarks/suites:density", + "//benchmarks/suites:fio", + "//benchmarks/suites:helpers", + "//benchmarks/suites:http", + "//benchmarks/suites:media", + "//benchmarks/suites:ml", + "//benchmarks/suites:network", + "//benchmarks/suites:redis", + "//benchmarks/suites:startup", + "//benchmarks/suites:sysbench", + "//benchmarks/suites:syscall", + py_requirement("click"), + ], +) + +py_library( + name = "commands", + srcs = ["commands.py"], + deps = [ + py_requirement("click"), + ], +) + +py_test( + name = "runner_test", + srcs = ["runner_test.py"], + python_version = "PY3", + tags = [ + "local", + "manual", + ], + deps = test_deps + [ + ":runner", + py_requirement("click"), + ], +) diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py new file mode 100644 index 000000000..fc59cf505 --- /dev/null +++ b/benchmarks/runner/__init__.py @@ -0,0 +1,308 @@ +# python3 +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""High-level benchmark utility.""" + +import copy +import csv +import logging +import pkgutil +import pydoc +import re +import subprocess +import sys +import types +from typing import List +from typing import Tuple + +import click + +from benchmarks import harness +from benchmarks import suites +from benchmarks.harness import benchmark_driver +from benchmarks.harness.machine_producers import gcloud_producer +from benchmarks.harness.machine_producers import machine_producer +from benchmarks.harness.machine_producers import mock_producer +from benchmarks.harness.machine_producers import yaml_producer +from benchmarks.runner import commands + + +@click.group() +@click.option( + "--verbose/--no-verbose", default=False, help="Enable verbose logging.") +@click.option("--debug/--no-debug", default=False, help="Enable debug logging.") +def runner(verbose: bool = False, debug: bool = False): + """Run distributed benchmarks. + + See the run and list commands for details. + + Args: + verbose: Enable verbose logging. + debug: Enable debug logging (supercedes verbose). + """ + if debug: + logging.basicConfig(level=logging.DEBUG) + elif verbose: + logging.basicConfig(level=logging.INFO) + + +def find_benchmarks( + regex: str) -> List[Tuple[str, types.ModuleType, types.FunctionType]]: + """Finds all available benchmarks. + + Args: + regex: A regular expression to match. + + Returns: + A (short_name, module, function) tuple for each match. + """ + pkgs = pkgutil.walk_packages(suites.__path__, suites.__name__ + ".") + found = [] + for _, name, _ in pkgs: + mod = pydoc.locate(name) + funcs = [ + getattr(mod, x) + for x in dir(mod) + if suites.is_benchmark(getattr(mod, x)) + ] + for func in funcs: + # Use the short_name with the benchmarks. prefix stripped. + prefix_len = len(suites.__name__ + ".") + short_name = mod.__name__[prefix_len:] + "." + func.__name__ + # Add to the list if a pattern is provided. + if re.compile(regex).match(short_name): + found.append((short_name, mod, func)) + return found + + +@runner.command("list") +@click.argument("method", nargs=-1) +def list_all(method): + """Lists available benchmarks.""" + if not method: + method = ".*" + else: + method = "(" + ",".join(method) + ")" + for (short_name, _, func) in find_benchmarks(method): + print("Benchmark %s:" % short_name) + metrics = suites.benchmark_metrics(func) + if func.__doc__: + print(" " + func.__doc__.lstrip().rstrip()) + if metrics: + print("\n Metrics:") + for metric in metrics: + print("\t{name}: {doc}".format(name=metric[0], doc=metric[1])) + print("\n") + + +@runner.command("run-local", commands.LocalCommand) +@click.pass_context +def run_local(ctx, limit: float, **kwargs): + """Runs benchmarks locally.""" + run(ctx, machine_producer.LocalMachineProducer(limit=limit), **kwargs) + + +@runner.command("run-mock", commands.RunCommand) +@click.pass_context +def run_mock(ctx, **kwargs): + """Runs benchmarks on Mock machines. Used for testing.""" + run(ctx, mock_producer.MockMachineProducer(), **kwargs) + + +@runner.command("run-gcp", commands.GCPCommand) +@click.pass_context +def run_gcp(ctx, image_file: str, zone_file: str, internal: bool, + machine_type: str, installers: List[str], **kwargs): + """Runs all benchmarks on GCP instances.""" + + # Resolve all files. + image = subprocess.check_output([image_file]).rstrip() + zone = subprocess.check_output([zone_file]).rstrip() + key_file = harness.make_key() + + producer = gcloud_producer.GCloudProducer( + image, + zone, + machine_type, + installers, + ssh_key_file=key_file, + ssh_user=harness.DEFAULT_USER, + ssh_password="", + internal=internal) + + try: + run(ctx, producer, **kwargs) + finally: + harness.delete_key() + + +def run(ctx, producer: machine_producer.MachineProducer, method: str, runs: int, + runtime: List[str], metric: List[str], stat: str, **kwargs): + """Runs arbitrary benchmarks. + + All unknown command line flags are passed through to the underlying benchmark + method. Flags may be specified multiple times, in which case it is considered + a "dimension" for the test, and a comma-separated table will be emitted + instead of a single result. + + See the output of list to see available metrics for any given benchmark + method. The method parameter is a regular expression that will match against + available benchmarks. If multiple benchmarks match, then that is considered a + distinct "dimension" for the test. + + All benchmarks are run in parallel where possible, but have exclusive + ownership over the individual machines. + + Every benchmark method will be run the times indicated by --runs. + + Args: + ctx: Click context. + producer: A Machine Producer from which to get Machines. + method: A regular expression for methods to be run. + runs: Number of runs. + runtime: A list of runtimes to test. + metric: A list of metrics to extract. + stat: The class of statistics to extract. + **kwargs: Dimensions to test. + """ + # First, calculate additional arguments. + # + # This essentially calculates any arguments that appear multiple times, and + # moves those to the "dimensions" dictionary, which maps to lists. These + # dimensions are then iterated over to generate the relevant csv output. + dimensions = {} + + if stat not in ["median", "all", "meanstd"]: + raise ValueError("Illegal value for --result, see help.") + + def squish(key: str, value: str): + """Collapse an argument into kwargs or dimensions.""" + if key in dimensions: + # Extend an existing dimension. + dimensions[key].append(value) + elif key in kwargs: + # Create a new dimension. + dimensions[key] = [kwargs[key], value] + del kwargs[key] + else: + # A single value. + kwargs[key] = value + + for item in ctx.args: + if "=" in method: + # This must be the method. The method is simply set to the first + # non-matching argument, which we're also parsing here. + item, method = method, item + if "=" not in item: + logging.error("illegal argument: %s", item) + sys.exit(1) + (key, value) = item.lstrip("-").split("=", 1) + squish(key, value) + + # Convert runtime and metric to dimensions. + # + # They exist only in the arguments above for documentation purposes. + # Essentially here we are treating them like anything else. Note however, + # that an empty set here will result in a dimension. This is important for + # metrics, where an empty set actually means all metrics. + def fold(key: str, value, allow_flatten=False): + """Collapse a list value into kwargs or dimensions.""" + if len(value) == 1 and allow_flatten: + kwargs[key] = value[0] + else: + dimensions[key] = value + + fold("runtime", runtime, allow_flatten=True) + fold("metric", metric) + + # Lookup the methods. + # + # We match the method parameter to a regular expression. This allows you to + # do things like `run --mock .*` for a broad test. Note that we track the + # short_names in the dimensions here, and look up again in the recursion. + methods = { + short_name: func for (short_name, _, func) in find_benchmarks(method) + } + if not methods: + # Must match at least one method. + logging.error("no matching benchmarks for %s: try list.", method) + sys.exit(1) + fold("method", list(methods.keys()), allow_flatten=True) + + # Spin up the drivers. + # + # We ensure that metric is the last entry, because we have special behavior. + # They actually run the test once and the benchmark is a generator that + # produces all viable metrics. + dimension_keys = list(dimensions.keys()) + if "metric" in dimension_keys: + dimension_keys.remove("metric") + dimension_keys.append("metric") + drivers = [] + + def _start(keywords, finished, left): + """Runs a test across dimensions recursively.""" + # Resolve the method fully, it starts as a string. + if "method" in keywords and isinstance(keywords["method"], str): + keywords["method"] = methods[keywords["method"]] + # Is this a non-recursive case? + if not left: + driver = benchmark_driver.BenchmarkDriver(producer, runs=runs, **keywords) + driver.start() + drivers.append((finished, driver)) + else: + # Recurse on the next dimension. + current, left = left[0], left[1:] + keywords = copy.deepcopy(keywords) + if current == "metric": + # We use a generator, popped below. Note that metric is + # guaranteed to be the last element here, and we will provide + # the value for 'done' below when generating the csv. + keywords[current] = dimensions[current] + _start(keywords, finished, left) + else: + # Generate manually. + for value in dimensions[current]: + keywords[current] = value + _start(keywords, finished + [value], left) + + # Start all the drivers, recursively. + _start(kwargs, [], dimension_keys) + + # Finish all tests, write results. + output = csv.writer(sys.stdout) + output.writerow(dimension_keys + ["result"]) + for (done, driver) in drivers: + driver.join() + for (metric_name, result) in getattr(driver, stat)(): + output.writerow([ # Collapse the method name. + hasattr(x, "__name__") and x.__name__ or x for x in done + ] + [metric_name] + result) + + +@runner.command() +@click.argument("env") +@click.option( + "--cmd", default="uname -a", help="command to run on all found machines") +@click.option( + "--workload", default="true", help="workload to run all found machines") +def validate(env, cmd, workload): + """Validates an environment described by yaml file.""" + producer = yaml_producer.YamlMachineProducer(env) + for machine in producer.machines: + print("Machine %s:" % machine) + stdout, _ = machine.run(cmd) + print(" Output of '%s': %s" % (cmd, stdout.lstrip().rstrip())) + image = machine.pull(workload) + stdout = machine.container(image).run() + print(" Container %s: %s" % (workload, stdout.lstrip().rstrip())) diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py new file mode 100644 index 000000000..9a391eb01 --- /dev/null +++ b/benchmarks/runner/commands.py @@ -0,0 +1,135 @@ +# python3 +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module with the guts of `click` commands. + +Overrides of the click.core.Command. This is done so flags are inherited between +similar commands (the run command). The classes below are meant to be used in +click templates like so. + +@runner.command("run-mock", RunCommand) +def run_mock(**kwargs): + # mock implementation + +""" +import os + +import click + + +class RunCommand(click.core.Command): + """Base Run Command with flags. + + Attributes: + method: regex of which suite to choose (e.g. sysbench would run + sysbench.cpu, sysbench.memory, and sysbench.mutex) See list command for + details. + metric: metric(s) to extract. See list command for details. + runtime: the runtime(s) on which to run. + runs: the number of runs to do of each method. + stat: how to compile results in the case of multiple run (e.g. median). + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + method = click.core.Argument(("method",)) + + metric = click.core.Option(("--metric",), + help="The metric to extract.", + multiple=True) + + runtime = click.core.Option(("--runtime",), + default=["runc"], + help="The runtime to use.", + multiple=True) + runs = click.core.Option(("--runs",), + default=1, + help="The number of times to run each benchmark.") + stat = click.core.Option( + ("--stat",), + default="median", + help="How to aggregate the data from all runs." + "\nmedian - returns the median of all runs (default)" + "\nall - returns all results comma separated" + "\nmeanstd - returns result as mean,std") + self.params.extend([method, runtime, runs, stat, metric]) + self.ignore_unknown_options = True + self.allow_extra_args = True + + +class LocalCommand(RunCommand): + """LocalCommand inherits all flags from RunCommand. + + Attributes: + limit: limits the number of machines on which to run benchmarks. This limits + for local how many benchmarks may run at a time. e.g. "startup" requires + one machine -- passing two machines would limit two startup jobs at a + time. Default is infinity. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.params.append( + click.core.Option( + ("--limit",), + default=1, + help="Limit of number of benchmarks that can run at a given time.")) + + +class GCPCommand(RunCommand): + """GCPCommand inherits all flags from RunCommand and adds flags for run_gcp method. + + Attributes: + image_file: name of the image to build machines from + zone_file: a GCP zone (e.g. us-west1-b) + installers: named installers for post-create + machine_type: type of machine to create (e.g. n1-standard-4) + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + image_file = click.core.Option( + ("--image_file",), + help="The binary that emits the GCP image.", + default=os.path.join( + os.path.dirname(__file__), "../../tools/vm/ubuntu1604"), + ) + zone_file = click.core.Option( + ("--zone_file",), + help="The binary that emits the GCP zone.", + default=os.path.join(os.path.dirname(__file__), "../../tools/vm/zone"), + ) + internal = click.core.Option( + ("--internal/--no-internal",), + help="""Use instance internal IPs. Used if bm-tools runner is running on + GCP instance with firewall rules blocking external IPs.""", + default=False, + ) + installers = click.core.Option( + ("--installers",), + help="The set of installers to use.", + multiple=True, + ) + machine_type = click.core.Option( + ("--machine_type",), + help="Type to make all machines.", + default="n1-standard-4", + ) + self.params.extend([ + image_file, + zone_file, + internal, + machine_type, + installers, + ]) diff --git a/benchmarks/runner/runner_test.py b/benchmarks/runner/runner_test.py new file mode 100644 index 000000000..7818d631a --- /dev/null +++ b/benchmarks/runner/runner_test.py @@ -0,0 +1,59 @@ +# python3 +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Top-level tests.""" + +import os +import subprocess +import sys + +from click import testing +import pytest + +from benchmarks import runner + + +def _get_locale(): + output = subprocess.check_output(["locale", "-a"]) + locales = output.split() + if b"en_US.utf8" in locales: + return "en_US.UTF-8" + else: + return "C.UTF-8" + + +def _set_locale(): + locale = _get_locale() + if os.getenv("LANG") != locale: + os.environ["LANG"] = locale + os.environ["LC_ALL"] = locale + os.execv("/proc/self/exe", ["python"] + sys.argv) + + +def test_list(): + cli_runner = testing.CliRunner() + result = cli_runner.invoke(runner.runner, ["list"]) + print(result.output) + assert result.exit_code == 0 + + +def test_run(): + cli_runner = testing.CliRunner() + result = cli_runner.invoke(runner.runner, ["run-mock", "."]) + print(result.output) + assert result.exit_code == 0 + + +if __name__ == "__main__": + _set_locale() + sys.exit(pytest.main([__file__])) |