From 94be30a18dc7c75dc70716ce1ede74a7fb1352fb Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 16 Jan 2020 13:00:58 -0800 Subject: Add run-gcp command. Add command to run benchmarks on GCP backed machines using the gcloud producer. Run with: `bazel run :benchmarks -- run-gcp [BENCHMARK_NAME]` Tested with the startup benchmark. PiperOrigin-RevId: 290126444 --- benchmarks/harness/__init__.py | 7 +++ benchmarks/harness/machine.py | 3 + .../harness/machine_producers/gcloud_producer.py | 70 ++++++++++++++-------- benchmarks/harness/ssh_connection.py | 9 +-- benchmarks/runner/__init__.py | 60 +++++++++++++++++++ benchmarks/runner/commands.py | 51 ++++++++++++++++ 6 files changed, 168 insertions(+), 32 deletions(-) diff --git a/benchmarks/harness/__init__.py b/benchmarks/harness/__init__.py index 7b96d1666..61fd25f73 100644 --- a/benchmarks/harness/__init__.py +++ b/benchmarks/harness/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. """Core benchmark utilities.""" +import getpass import os # LOCAL_WORKLOADS_PATH defines the path to use for local workloads. This is a @@ -23,3 +24,9 @@ LOCAL_WORKLOADS_PATH = os.path.join( # REMOTE_WORKLOADS_PATH defines the path to use for storing the workloads on the # remote host. This is a format string that accepts a single string parameter. REMOTE_WORKLOADS_PATH = "workloads/{}" + +# DEFAULT_USER is the default user running this script. +DEFAULT_USER = getpass.getuser() + +# DEFAULT_USER_HOME is the home directory of the user running the script. +DEFAULT_USER_HOME = os.environ["HOME"] if "HOME" in os.environ else "" diff --git a/benchmarks/harness/machine.py b/benchmarks/harness/machine.py index af037dbcc..2df4c9e31 100644 --- a/benchmarks/harness/machine.py +++ b/benchmarks/harness/machine.py @@ -214,6 +214,9 @@ class RemoteMachine(Machine): # Push to the remote machine and build. logging.info("Building %s@%s remotely...", workload, self._name) remote_path = self._ssh_connection.send_workload(workload) + # Workloads are all tarballs. + self.run("tar -xvf {remote_path}/tar.tar -C {remote_path}".format( + remote_path=remote_path)) self.run("docker build --tag={} {}".format(workload, remote_path)) return workload # Workload is the tag. diff --git a/benchmarks/harness/machine_producers/gcloud_producer.py b/benchmarks/harness/machine_producers/gcloud_producer.py index 4693dd8a2..e0b77d52b 100644 --- a/benchmarks/harness/machine_producers/gcloud_producer.py +++ b/benchmarks/harness/machine_producers/gcloud_producer.py @@ -29,7 +29,6 @@ collisions with user instances shouldn't happen. producer.release_machines(NUM_MACHINES) """ import datetime -import getpass import json import subprocess import threading @@ -40,8 +39,6 @@ from benchmarks.harness import machine from benchmarks.harness.machine_producers import gcloud_mock_recorder from benchmarks.harness.machine_producers import machine_producer -DEFAULT_USER = getpass.getuser() - class GCloudProducer(machine_producer.MachineProducer): """Implementation of MachineProducer backed by GCP. @@ -50,9 +47,10 @@ class GCloudProducer(machine_producer.MachineProducer): Attributes: project: The GCP project name under which to create the machines. - ssh_key_path: path to a valid ssh key. See README on vaild ssh keys. + ssh_key_file: path to a valid ssh private key. See README on vaild ssh keys. image: image name as a string. image_project: image project as a string. + machine_type: type of GCP to create. e.g. n1-standard-4 zone: string to a valid GCP zone. ssh_user: string of user name for ssh_key ssh_password: string of password for ssh key @@ -63,18 +61,22 @@ class GCloudProducer(machine_producer.MachineProducer): def __init__(self, project: str, - ssh_key_path: str, + ssh_key_file: str, image: str, image_project: str, + machine_type: str, zone: str, ssh_user: str, + ssh_password: str, mock: gcloud_mock_recorder.MockPrinter = None): self.project = project - self.ssh_key_path = ssh_key_path + self.ssh_key_file = ssh_key_file self.image = image self.image_project = image_project + self.machine_type = machine_type self.zone = zone - self.ssh_user = ssh_user if ssh_user else DEFAULT_USER + self.ssh_user = ssh_user + self.ssh_password = ssh_password self.mock = mock self.condition = threading.Condition() @@ -86,20 +88,19 @@ class GCloudProducer(machine_producer.MachineProducer): with self.condition: names = self._get_unique_names(num_machines) self._build_instances(names) - instances = self._start_command(names) - self._add_ssh_key_to_instances(names) - return self._machines_from_instances(instances) + instances = self._start_command(names) + self._add_ssh_key_to_instances(names) + return self._machines_from_instances(instances) def release_machines(self, machine_list: List[machine.Machine]): """Releases the requested number of machines, deleting the instances.""" if not machine_list: return - with self.condition: - cmd = "gcloud compute instances delete --quiet".split(" ") - names = [str(m) for m in machine_list] - cmd.extend(names) - cmd.append("--zone={zone}".format(zone=self.zone)) - self._run_command(cmd) + cmd = "gcloud compute instances delete --quiet".split(" ") + names = [str(m) for m in machine_list] + cmd.extend(names) + cmd.append("--zone={zone}".format(zone=self.zone)) + self._run_command(cmd, detach=True) def _machines_from_instances( self, instances: List[Dict[str, Any]]) -> List[machine.Machine]: @@ -111,9 +112,11 @@ class GCloudProducer(machine_producer.MachineProducer): "hostname": instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"], "key_path": - self.ssh_key_path, + self.ssh_key_file, "username": - self.ssh_user + self.ssh_user, + "key_password": + self.ssh_password } machines.append(machine.RemoteMachine(name=name, **kwargs)) return machines @@ -148,12 +151,15 @@ class GCloudProducer(machine_producer.MachineProducer): "_build_instances cannot create instances without names.") cmd = "gcloud compute instances create".split(" ") cmd.extend(names) - cmd.extend("--preemptible --image={image} --zone={zone}".format( - image=self.image, zone=self.zone).split(" ")) + cmd.extend( + "--preemptible --image={image} --zone={zone} --machine-type={machine_type}" + .format( + image=self.image, zone=self.zone, + machine_type=self.machine_type).split(" ")) if self.image_project: cmd.append("--image-project={project}".format(project=self.image_project)) - res = self._run_command(cmd) - return json.loads(res.stdout) + res = self._run_command(cmd) + return json.loads(res.stdout) def _start_command(self, names): """Starts instances using gcloud command. @@ -184,7 +190,7 @@ class GCloudProducer(machine_producer.MachineProducer): Args: names: list of machine names to which to add the ssh-key - self.ssh_key_path. + self.ssh_key_file. Raises: subprocess.CalledProcessError: when underlying subprocess call returns an @@ -193,7 +199,7 @@ class GCloudProducer(machine_producer.MachineProducer): """ for name in names: cmd = "gcloud compute ssh {name}".format(name=name).split(" ") - cmd.append("--ssh-key-file={key}".format(key=self.ssh_key_path)) + cmd.append("--ssh-key-file={key}".format(key=self.ssh_key_file)) cmd.append("--zone={zone}".format(zone=self.zone)) cmd.append("--command=uname") timeout = datetime.timedelta(seconds=5 * 60) @@ -221,7 +227,9 @@ class GCloudProducer(machine_producer.MachineProducer): res = self._run_command(cmd) return json.loads(res.stdout) - def _run_command(self, cmd: List[str]) -> subprocess.CompletedProcess: + def _run_command(self, + cmd: List[str], + detach: bool = False) -> [None, subprocess.CompletedProcess]: """Runs command as a subprocess. Runs command as subprocess and returns the result. @@ -230,14 +238,24 @@ class GCloudProducer(machine_producer.MachineProducer): Args: cmd: command to be run as a list of strings. + detach: if True, run the child process and don't wait for it to return. Returns: - Completed process object to be parsed by caller. + Completed process object to be parsed by caller or None if detach=True. Raises: CalledProcessError: if subprocess.run returns an error. """ cmd = cmd + ["--format=json"] + if detach: + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if self.mock: + out, _ = p.communicate() + self.mock.record( + subprocess.CompletedProcess( + returncode=p.returncode, stdout=out, args=p.args)) + return + res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if self.mock: self.mock.record(res) diff --git a/benchmarks/harness/ssh_connection.py b/benchmarks/harness/ssh_connection.py index fcbfbcdb2..e0bf258f1 100644 --- a/benchmarks/harness/ssh_connection.py +++ b/benchmarks/harness/ssh_connection.py @@ -94,7 +94,7 @@ class SSHConnection: return stdout, stderr def send_workload(self, name: str) -> str: - """Sends a workload to the remote machine. + """Sends a workload tarball to the remote machine. Args: name: The workload name. @@ -103,9 +103,6 @@ class SSHConnection: The remote path. """ with self._client() as client: - for dirpath, _, filenames in os.walk( - harness.LOCAL_WORKLOADS_PATH.format(name)): - for filename in filenames: - send_one_file(client, os.path.join(dirpath, filename), - harness.REMOTE_WORKLOADS_PATH.format(name)) + send_one_file(client, harness.LOCAL_WORKLOADS_PATH.format(name), + harness.REMOTE_WORKLOADS_PATH.format(name)) return harness.REMOTE_WORKLOADS_PATH.format(name) diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py index 6f56704d8..ba80d83d7 100644 --- a/benchmarks/runner/__init__.py +++ b/benchmarks/runner/__init__.py @@ -15,10 +15,13 @@ import copy import csv +import json import logging +import os import pkgutil import pydoc import re +import subprocess import sys import types from typing import List @@ -26,8 +29,10 @@ from typing import Tuple import click +from benchmarks import harness from benchmarks import suites from benchmarks.harness import benchmark_driver +from benchmarks.harness.machine_producers import gcloud_producer from benchmarks.harness.machine_producers import machine_producer from benchmarks.harness.machine_producers import mock_producer from benchmarks.harness.machine_producers import yaml_producer @@ -116,6 +121,61 @@ def run_mock(ctx, **kwargs): run(ctx, mock_producer.MockMachineProducer(), **kwargs) +@runner.command("run-gcp", commands.GCPCommand) +@click.pass_context +def run_gcp(ctx, project: str, ssh_key_file: str, image: str, + image_project: str, machine_type: str, zone: str, ssh_user: str, + ssh_password: str, **kwargs): + """Runs all benchmarks on GCP instances.""" + + if not ssh_user: + ssh_user = harness.DEFAULT_USER + + # Get the default project if one was not provided. + if not project: + sub = subprocess.run( + "gcloud config get-value project".split(" "), stdout=subprocess.PIPE) + if sub.returncode: + raise ValueError( + "Cannot get default project from gcloud. Is it configured>") + project = sub.stdout.decode("utf-8").strip("\n") + + if not image_project: + image_project = project + + # Check that the ssh-key exists and is readable. + if not os.access(ssh_key_file, os.R_OK): + raise ValueError( + "ssh key given `{ssh_key}` is does not exist or is not readable." + .format(ssh_key=ssh_key_file)) + + # Check that the image exists. + sub = subprocess.run( + "gcloud compute images describe {image} --project {image_project} --format=json" + .format(image=image, image_project=image_project).split(" "), + stdout=subprocess.PIPE) + if sub.returncode or "READY" not in json.loads(sub.stdout)["status"]: + raise ValueError( + "given image was not found or is not ready: {image} {image_project}." + .format(image=image, image_project=image_project)) + + # Check and set zone to default. + if not zone: + sub = subprocess.run( + "gcloud config get-value compute/zone".split(" "), + stdout=subprocess.PIPE) + if sub.returncode: + raise ValueError( + "Default zone is not set in gcloud. Set one or pass a zone with the --zone flag." + ) + zone = sub.stdout.decode("utf-8").strip("\n") + + producer = gcloud_producer.GCloudProducer(project, ssh_key_file, image, + image_project, machine_type, zone, + ssh_user, ssh_password) + run(ctx, producer, **kwargs) + + def run(ctx, producer: machine_producer.MachineProducer, method: str, runs: int, runtime: List[str], metric: List[str], stat: str, **kwargs): """Runs arbitrary benchmarks. diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py index 4973843b9..7ab12fac6 100644 --- a/benchmarks/runner/commands.py +++ b/benchmarks/runner/commands.py @@ -24,6 +24,8 @@ def run_mock(**kwargs): """ import click +from benchmarks import harness + class RunCommand(click.core.Command): """Base Run Command with flags. @@ -82,3 +84,52 @@ class LocalCommand(RunCommand): ("--limit",), default=1, help="Limit of number of benchmarks that can run at a given time.")) + + +class GCPCommand(RunCommand): + """GCPCommand inherits all flags from RunCommand and adds flags for run_gcp method. + + Attributes: + project: GCP project + ssh_key_path: path to the ssh-key to use for the run + image: name of the image to build machines from + image_project: GCP project under which to find image + zone: a GCP zone (e.g. us-west1-b) + ssh_user: username to use for the ssh-key + ssh_password: password to use for the ssh-key + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + project = click.core.Option( + ("--project",), + help="Project to run on if not default value given by 'gcloud config get-value project'." + ) + ssh_key_path = click.core.Option( + ("--ssh-key-file",), + help="Path to a valid ssh private key to use. See README on generating a valid ssh key. Set to ~/.ssh/benchmark-tools by default.", + default=harness.DEFAULT_USER_HOME + "/.ssh/benchmark-tools") + image = click.core.Option(("--image",), + help="The image on which to build VMs.", + default="bm-tools-testing") + image_project = click.core.Option( + ("--image_project",), + help="The project under which the image to be used is listed.", + default="") + machine_type = click.core.Option(("--machine_type",), + help="Type to make all machines.", + default="n1-standard-4") + zone = click.core.Option(("--zone",), + help="The GCP zone to run on.", + default="") + ssh_user = click.core.Option(("--ssh-user",), + help="User for the ssh key.", + default=harness.DEFAULT_USER) + ssh_password = click.core.Option(("--ssh-password",), + help="Password for the ssh key.", + default="") + self.params.extend([ + project, ssh_key_path, image, image_project, machine_type, zone, + ssh_user, ssh_password + ]) -- cgit v1.2.3