diff options
Diffstat (limited to 'benchmarks/runner/__init__.py')
-rw-r--r-- | benchmarks/runner/__init__.py | 338 |
1 files changed, 0 insertions, 338 deletions
diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py deleted file mode 100644 index ba80d83d7..000000000 --- a/benchmarks/runner/__init__.py +++ /dev/null @@ -1,338 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""High-level benchmark utility.""" - -import copy -import csv -import json -import logging -import os -import pkgutil -import pydoc -import re -import subprocess -import sys -import types -from typing import List -from typing import Tuple - -import click - -from benchmarks import harness -from benchmarks import suites -from benchmarks.harness import benchmark_driver -from benchmarks.harness.machine_producers import gcloud_producer -from benchmarks.harness.machine_producers import machine_producer -from benchmarks.harness.machine_producers import mock_producer -from benchmarks.harness.machine_producers import yaml_producer -from benchmarks.runner import commands - - -@click.group() -@click.option( - "--verbose/--no-verbose", default=False, help="Enable verbose logging.") -@click.option("--debug/--no-debug", default=False, help="Enable debug logging.") -def runner(verbose: bool = False, debug: bool = False): - """Run distributed benchmarks. - - See the run and list commands for details. - - Args: - verbose: Enable verbose logging. - debug: Enable debug logging (supercedes verbose). - """ - if debug: - logging.basicConfig(level=logging.DEBUG) - elif verbose: - logging.basicConfig(level=logging.INFO) - - -def find_benchmarks( - regex: str) -> List[Tuple[str, types.ModuleType, types.FunctionType]]: - """Finds all available benchmarks. - - Args: - regex: A regular expression to match. - - Returns: - A (short_name, module, function) tuple for each match. - """ - pkgs = pkgutil.walk_packages(suites.__path__, suites.__name__ + ".") - found = [] - for _, name, _ in pkgs: - mod = pydoc.locate(name) - funcs = [ - getattr(mod, x) - for x in dir(mod) - if suites.is_benchmark(getattr(mod, x)) - ] - for func in funcs: - # Use the short_name with the benchmarks. prefix stripped. - prefix_len = len(suites.__name__ + ".") - short_name = mod.__name__[prefix_len:] + "." + func.__name__ - # Add to the list if a pattern is provided. - if re.compile(regex).match(short_name): - found.append((short_name, mod, func)) - return found - - -@runner.command("list") -@click.argument("method", nargs=-1) -def list_all(method): - """Lists available benchmarks.""" - if not method: - method = ".*" - else: - method = "(" + ",".join(method) + ")" - for (short_name, _, func) in find_benchmarks(method): - print("Benchmark %s:" % short_name) - metrics = suites.benchmark_metrics(func) - if func.__doc__: - print(" " + func.__doc__.lstrip().rstrip()) - if metrics: - print("\n Metrics:") - for metric in metrics: - print("\t{name}: {doc}".format(name=metric[0], doc=metric[1])) - print("\n") - - -@runner.command("run-local", commands.LocalCommand) -@click.pass_context -def run_local(ctx, limit: float, **kwargs): - """Runs benchmarks locally.""" - run(ctx, machine_producer.LocalMachineProducer(limit=limit), **kwargs) - - -@runner.command("run-mock", commands.RunCommand) -@click.pass_context -def run_mock(ctx, **kwargs): - """Runs benchmarks on Mock machines. Used for testing.""" - run(ctx, mock_producer.MockMachineProducer(), **kwargs) - - -@runner.command("run-gcp", commands.GCPCommand) -@click.pass_context -def run_gcp(ctx, project: str, ssh_key_file: str, image: str, - image_project: str, machine_type: str, zone: str, ssh_user: str, - ssh_password: str, **kwargs): - """Runs all benchmarks on GCP instances.""" - - if not ssh_user: - ssh_user = harness.DEFAULT_USER - - # Get the default project if one was not provided. - if not project: - sub = subprocess.run( - "gcloud config get-value project".split(" "), stdout=subprocess.PIPE) - if sub.returncode: - raise ValueError( - "Cannot get default project from gcloud. Is it configured>") - project = sub.stdout.decode("utf-8").strip("\n") - - if not image_project: - image_project = project - - # Check that the ssh-key exists and is readable. - if not os.access(ssh_key_file, os.R_OK): - raise ValueError( - "ssh key given `{ssh_key}` is does not exist or is not readable." - .format(ssh_key=ssh_key_file)) - - # Check that the image exists. - sub = subprocess.run( - "gcloud compute images describe {image} --project {image_project} --format=json" - .format(image=image, image_project=image_project).split(" "), - stdout=subprocess.PIPE) - if sub.returncode or "READY" not in json.loads(sub.stdout)["status"]: - raise ValueError( - "given image was not found or is not ready: {image} {image_project}." - .format(image=image, image_project=image_project)) - - # Check and set zone to default. - if not zone: - sub = subprocess.run( - "gcloud config get-value compute/zone".split(" "), - stdout=subprocess.PIPE) - if sub.returncode: - raise ValueError( - "Default zone is not set in gcloud. Set one or pass a zone with the --zone flag." - ) - zone = sub.stdout.decode("utf-8").strip("\n") - - producer = gcloud_producer.GCloudProducer(project, ssh_key_file, image, - image_project, machine_type, zone, - ssh_user, ssh_password) - run(ctx, producer, **kwargs) - - -def run(ctx, producer: machine_producer.MachineProducer, method: str, runs: int, - runtime: List[str], metric: List[str], stat: str, **kwargs): - """Runs arbitrary benchmarks. - - All unknown command line flags are passed through to the underlying benchmark - method. Flags may be specified multiple times, in which case it is considered - a "dimension" for the test, and a comma-separated table will be emitted - instead of a single result. - - See the output of list to see available metrics for any given benchmark - method. The method parameter is a regular expression that will match against - available benchmarks. If multiple benchmarks match, then that is considered a - distinct "dimension" for the test. - - All benchmarks are run in parallel where possible, but have exclusive - ownership over the individual machines. - - Every benchmark method will be run the times indicated by --runs. - - Args: - ctx: Click context. - producer: A Machine Producer from which to get Machines. - method: A regular expression for methods to be run. - runs: Number of runs. - runtime: A list of runtimes to test. - metric: A list of metrics to extract. - stat: The class of statistics to extract. - **kwargs: Dimensions to test. - """ - # First, calculate additional arguments. - # - # This essentially calculates any arguments that appear multiple times, and - # moves those to the "dimensions" dictionary, which maps to lists. These - # dimensions are then iterated over to generate the relevant csv output. - dimensions = {} - - if stat not in ["median", "all", "meanstd"]: - raise ValueError("Illegal value for --result, see help.") - - def squish(key: str, value: str): - """Collapse an argument into kwargs or dimensions.""" - if key in dimensions: - # Extend an existing dimension. - dimensions[key].append(value) - elif key in kwargs: - # Create a new dimension. - dimensions[key] = [kwargs[key], value] - del kwargs[key] - else: - # A single value. - kwargs[key] = value - - for item in ctx.args: - if "=" in method: - # This must be the method. The method is simply set to the first - # non-matching argument, which we're also parsing here. - item, method = method, item - if "=" not in item: - logging.error("illegal argument: %s", item) - sys.exit(1) - (key, value) = item.lstrip("-").split("=", 1) - squish(key, value) - - # Convert runtime and metric to dimensions. - # - # They exist only in the arguments above for documentation purposes. - # Essentially here we are treating them like anything else. Note however, - # that an empty set here will result in a dimension. This is important for - # metrics, where an empty set actually means all metrics. - def fold(key: str, value, allow_flatten=False): - """Collapse a list value into kwargs or dimensions.""" - if len(value) == 1 and allow_flatten: - kwargs[key] = value[0] - else: - dimensions[key] = value - - fold("runtime", runtime, allow_flatten=True) - fold("metric", metric) - - # Lookup the methods. - # - # We match the method parameter to a regular expression. This allows you to - # do things like `run --mock .*` for a broad test. Note that we track the - # short_names in the dimensions here, and look up again in the recursion. - methods = { - short_name: func for (short_name, _, func) in find_benchmarks(method) - } - if not methods: - # Must match at least one method. - logging.error("no matching benchmarks for %s: try list.", method) - sys.exit(1) - fold("method", list(methods.keys()), allow_flatten=True) - - # Spin up the drivers. - # - # We ensure that metric is the last entry, because we have special behavior. - # They actually run the test once and the benchmark is a generator that - # produces all viable metrics. - dimension_keys = list(dimensions.keys()) - if "metric" in dimension_keys: - dimension_keys.remove("metric") - dimension_keys.append("metric") - drivers = [] - - def _start(keywords, finished, left): - """Runs a test across dimensions recursively.""" - # Resolve the method fully, it starts as a string. - if "method" in keywords and isinstance(keywords["method"], str): - keywords["method"] = methods[keywords["method"]] - # Is this a non-recursive case? - if not left: - driver = benchmark_driver.BenchmarkDriver(producer, runs=runs, **keywords) - driver.start() - drivers.append((finished, driver)) - else: - # Recurse on the next dimension. - current, left = left[0], left[1:] - keywords = copy.deepcopy(keywords) - if current == "metric": - # We use a generator, popped below. Note that metric is - # guaranteed to be the last element here, and we will provide - # the value for 'done' below when generating the csv. - keywords[current] = dimensions[current] - _start(keywords, finished, left) - else: - # Generate manually. - for value in dimensions[current]: - keywords[current] = value - _start(keywords, finished + [value], left) - - # Start all the drivers, recursively. - _start(kwargs, [], dimension_keys) - - # Finish all tests, write results. - output = csv.writer(sys.stdout) - output.writerow(dimension_keys + ["result"]) - for (done, driver) in drivers: - driver.join() - for (metric_name, result) in getattr(driver, stat)(): - output.writerow([ # Collapse the method name. - hasattr(x, "__name__") and x.__name__ or x for x in done - ] + [metric_name] + result) - - -@runner.command() -@click.argument("env") -@click.option( - "--cmd", default="uname -a", help="command to run on all found machines") -@click.option( - "--workload", default="true", help="workload to run all found machines") -def validate(env, cmd, workload): - """Validates an environment described by yaml file.""" - producer = yaml_producer.YamlMachineProducer(env) - for machine in producer.machines: - print("Machine %s:" % machine) - stdout, _ = machine.run(cmd) - print(" Output of '%s': %s" % (cmd, stdout.lstrip().rstrip())) - image = machine.pull(workload) - stdout = machine.container(image).run() - print(" Container %s: %s" % (workload, stdout.lstrip().rstrip())) |