summaryrefslogtreecommitdiffhomepage
path: root/benchmarks/harness
diff options
context:
space:
mode:
authorZach Koopmans <zkoopmans@google.com>2020-02-06 15:59:44 -0800
committergVisor bot <gvisor-bot@google.com>2020-02-06 16:02:48 -0800
commit16561e461e82f8d846ef1f3ada990270ef39ccc6 (patch)
treee05d7d8c9ce5a7d02191d5511567cfea060eaeb0 /benchmarks/harness
parent940d255971c38af9f91ceed1345fd973f8fdb41d (diff)
Add logic to run from baked images.
Change adds the following: - logic to run from "baked images". See [GVISOR_DIR]/tools/images - installers which install modified files from a workspace. This allows users to run benchmarks while modifying runsc. - removes the --preemptible tag from built GCE instances. Preemptible instances are much more likely to be preempted on startup, which manifests for the user as a failed benchmark. I don't currently have a way to detect if a VM has been preempted that will work for this change. https://cloud.google.com/compute/docs/instances/preemptible#preemption_process https://cloud.google.com/compute/docs/instances/preemptible#preemption_selection PiperOrigin-RevId: 293697949
Diffstat (limited to 'benchmarks/harness')
-rw-r--r--benchmarks/harness/BUILD21
-rw-r--r--benchmarks/harness/__init__.py36
-rw-r--r--benchmarks/harness/machine.py43
-rw-r--r--benchmarks/harness/machine_producers/BUILD1
-rw-r--r--benchmarks/harness/machine_producers/gcloud_producer.py114
-rw-r--r--benchmarks/harness/ssh_connection.py25
6 files changed, 157 insertions, 83 deletions
diff --git a/benchmarks/harness/BUILD b/benchmarks/harness/BUILD
index 52d4e42f8..4d03e3a06 100644
--- a/benchmarks/harness/BUILD
+++ b/benchmarks/harness/BUILD
@@ -1,3 +1,4 @@
+load("//tools:defs.bzl", "pkg_tar")
load("//tools:defs.bzl", "py_library", "py_requirement")
package(
@@ -5,9 +6,29 @@ package(
licenses = ["notice"],
)
+pkg_tar(
+ name = "installers",
+ srcs = [
+ "//tools/installers:head",
+ "//tools/installers:master",
+ "//tools/installers:runsc",
+ ],
+ mode = "0755",
+)
+
+filegroup(
+ name = "files",
+ srcs = [
+ ":installers",
+ ],
+)
+
py_library(
name = "harness",
srcs = ["__init__.py"],
+ data = [
+ ":files",
+ ],
)
py_library(
diff --git a/benchmarks/harness/__init__.py b/benchmarks/harness/__init__.py
index 61fd25f73..15aa2a69a 100644
--- a/benchmarks/harness/__init__.py
+++ b/benchmarks/harness/__init__.py
@@ -1,5 +1,5 @@
# python3
-# Copyright 2019 Google LLC
+# Copyright 2019 The gVisor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,18 +15,48 @@
import getpass
import os
+import subprocess
+import tempfile
# LOCAL_WORKLOADS_PATH defines the path to use for local workloads. This is a
# format string that accepts a single string parameter.
-LOCAL_WORKLOADS_PATH = os.path.join(
- os.path.dirname(__file__), "../workloads/{}/tar.tar")
+LOCAL_WORKLOADS_PATH = os.path.dirname(__file__) + "/../workloads/{}/tar.tar"
# REMOTE_WORKLOADS_PATH defines the path to use for storing the workloads on the
# remote host. This is a format string that accepts a single string parameter.
REMOTE_WORKLOADS_PATH = "workloads/{}"
+# INSTALLER_ROOT is the set of files that needs to be copied.
+INSTALLER_ARCHIVE = os.readlink(os.path.join(
+ os.path.dirname(__file__), "installers.tar"))
+
+# SSH_KEY_DIR holds SSH_PRIVATE_KEY for this run. bm-tools paramiko requires
+# keys generated with the '-t rsa -m PEM' options from ssh-keygen. This is
+# abstracted away from the user.
+SSH_KEY_DIR = tempfile.TemporaryDirectory()
+SSH_PRIVATE_KEY = "key"
+
# DEFAULT_USER is the default user running this script.
DEFAULT_USER = getpass.getuser()
# DEFAULT_USER_HOME is the home directory of the user running the script.
DEFAULT_USER_HOME = os.environ["HOME"] if "HOME" in os.environ else ""
+
+# Default directory to remotely installer "installer" targets.
+REMOTE_INSTALLERS_PATH = "installers"
+
+
+def make_key():
+ """Wraps a valid ssh key in a temporary directory."""
+ path = os.path.join(SSH_KEY_DIR.name, SSH_PRIVATE_KEY)
+ if not os.path.exists(path):
+ cmd = "ssh-keygen -t rsa -m PEM -b 4096 -f {key} -q -N".format(
+ key=path).split(" ")
+ cmd.append("")
+ subprocess.run(cmd, check=True)
+ return path
+
+
+def delete_key():
+ """Deletes temporary directory containing private key."""
+ SSH_KEY_DIR.cleanup()
diff --git a/benchmarks/harness/machine.py b/benchmarks/harness/machine.py
index 2df4c9e31..3d32d3dda 100644
--- a/benchmarks/harness/machine.py
+++ b/benchmarks/harness/machine.py
@@ -1,5 +1,5 @@
# python3
-# Copyright 2019 Google LLC
+# Copyright 2019 The gVisor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -29,10 +29,11 @@ to run contianers.
"""
import logging
+import os
import re
import subprocess
import time
-from typing import Tuple
+from typing import List, Tuple
import docker
@@ -201,6 +202,7 @@ class RemoteMachine(Machine):
self._tunnel = tunnel_dispatcher.Tunnel(name, **kwargs)
self._tunnel.connect()
self._docker_client = self._tunnel.get_docker_client()
+ self._has_installers = False
def run(self, cmd: str) -> Tuple[str, str]:
return self._ssh_connection.run(cmd)
@@ -210,14 +212,45 @@ class RemoteMachine(Machine):
stdout, stderr = self._ssh_connection.run("cat '{}'".format(path))
return stdout + stderr
+ def install(self,
+ installer: str,
+ results: List[bool] = None,
+ index: int = -1):
+ """Method unique to RemoteMachine to handle installation of installers.
+
+ Handles installers, which install things that may change between runs (e.g.
+ runsc). Usually called from gcloud_producer, which expects this method to
+ to store results.
+
+ Args:
+ installer: the installer target to run.
+ results: Passed by the caller of where to store success.
+ index: Index for this method to store the result in the passed results
+ list.
+ """
+ # This generates a tarball of the full installer root (which will generate
+ # be the full bazel root directory) and sends it over.
+ if not self._has_installers:
+ archive = self._ssh_connection.send_installers()
+ self.run("tar -xvf {archive} -C {dir}".format(
+ archive=archive, dir=harness.REMOTE_INSTALLERS_PATH))
+ self._has_installers = True
+
+ # Execute the remote installer.
+ self.run("sudo {dir}/{file}".format(
+ dir=harness.REMOTE_INSTALLERS_PATH, file=installer))
+ if results:
+ results[index] = True
+
def pull(self, workload: str) -> str:
# Push to the remote machine and build.
logging.info("Building %s@%s remotely...", workload, self._name)
remote_path = self._ssh_connection.send_workload(workload)
+ remote_dir = os.path.dirname(remote_path)
# Workloads are all tarballs.
- self.run("tar -xvf {remote_path}/tar.tar -C {remote_path}".format(
- remote_path=remote_path))
- self.run("docker build --tag={} {}".format(workload, remote_path))
+ self.run("tar -xvf {remote_path} -C {remote_dir}".format(
+ remote_path=remote_path, remote_dir=remote_dir))
+ self.run("docker build --tag={} {}".format(workload, remote_dir))
return workload # Workload is the tag.
def container(self, image: str, **kwargs) -> container.Container:
diff --git a/benchmarks/harness/machine_producers/BUILD b/benchmarks/harness/machine_producers/BUILD
index 48ea0ef39..3711a397f 100644
--- a/benchmarks/harness/machine_producers/BUILD
+++ b/benchmarks/harness/machine_producers/BUILD
@@ -76,5 +76,6 @@ py_test(
python_version = "PY3",
tags = [
"local",
+ "manual",
],
)
diff --git a/benchmarks/harness/machine_producers/gcloud_producer.py b/benchmarks/harness/machine_producers/gcloud_producer.py
index e0b77d52b..513d16e4f 100644
--- a/benchmarks/harness/machine_producers/gcloud_producer.py
+++ b/benchmarks/harness/machine_producers/gcloud_producer.py
@@ -1,5 +1,5 @@
# python3
-# Copyright 2019 Google LLC
+# Copyright 2019 The gVisor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -46,12 +46,11 @@ class GCloudProducer(machine_producer.MachineProducer):
Produces Machine objects backed by GCP instances.
Attributes:
- project: The GCP project name under which to create the machines.
- ssh_key_file: path to a valid ssh private key. See README on vaild ssh keys.
image: image name as a string.
- image_project: image project as a string.
- machine_type: type of GCP to create. e.g. n1-standard-4
zone: string to a valid GCP zone.
+ machine_type: type of GCP to create (e.g. n1-standard-4).
+ installers: list of installers post-boot.
+ ssh_key_file: path to a valid ssh private key. See README on vaild ssh keys.
ssh_user: string of user name for ssh_key
ssh_password: string of password for ssh key
mock: a mock printer which will print mock data if required. Mock data is
@@ -60,21 +59,19 @@ class GCloudProducer(machine_producer.MachineProducer):
"""
def __init__(self,
- project: str,
- ssh_key_file: str,
image: str,
- image_project: str,
- machine_type: str,
zone: str,
+ machine_type: str,
+ installers: List[str],
+ ssh_key_file: str,
ssh_user: str,
ssh_password: str,
mock: gcloud_mock_recorder.MockPrinter = None):
- self.project = project
- self.ssh_key_file = ssh_key_file
self.image = image
- self.image_project = image_project
- self.machine_type = machine_type
self.zone = zone
+ self.machine_type = machine_type
+ self.installers = installers
+ self.ssh_key_file = ssh_key_file
self.ssh_user = ssh_user
self.ssh_password = ssh_password
self.mock = mock
@@ -87,10 +84,34 @@ class GCloudProducer(machine_producer.MachineProducer):
"Cannot ask for {num} machines!".format(num=num_machines))
with self.condition:
names = self._get_unique_names(num_machines)
- self._build_instances(names)
- instances = self._start_command(names)
+ instances = self._build_instances(names)
self._add_ssh_key_to_instances(names)
- return self._machines_from_instances(instances)
+ machines = self._machines_from_instances(instances)
+
+ # Install all bits in lock-step.
+ #
+ # This will perform paralell installations for however many machines we
+ # have, but it's easy to track errors because if installing (a, b, c), we
+ # won't install "c" until "b" is installed on all machines.
+ for installer in self.installers:
+ threads = [None] * len(machines)
+ results = [False] * len(machines)
+ for i in range(len(machines)):
+ threads[i] = threading.Thread(
+ target=machines[i].install, args=(installer, results, i))
+ threads[i].start()
+ for thread in threads:
+ thread.join()
+ for result in results:
+ if not result:
+ raise NotImplementedError(
+ "Installers failed on at least one machine!")
+
+ # Add this user to each machine's docker group.
+ for m in machines:
+ m.run("sudo setfacl -m user:$USER:rw /var/run/docker.sock")
+
+ return machines
def release_machines(self, machine_list: List[machine.Machine]):
"""Releases the requested number of machines, deleting the instances."""
@@ -123,15 +144,7 @@ class GCloudProducer(machine_producer.MachineProducer):
def _get_unique_names(self, num_names) -> List[str]:
"""Returns num_names unique names based on data from the GCP project."""
- curr_machines = self._list_machines()
- curr_names = set([machine["name"] for machine in curr_machines])
- ret = []
- while len(ret) < num_names:
- new_name = "machine-" + str(uuid.uuid4())
- if new_name not in curr_names:
- ret.append(new_name)
- curr_names.update(new_name)
- return ret
+ return ["machine-" + str(uuid.uuid4()) for _ in range(0, num_names)]
def _build_instances(self, names: List[str]) -> List[Dict[str, Any]]:
"""Creates instances using gcloud command.
@@ -151,34 +164,9 @@ class GCloudProducer(machine_producer.MachineProducer):
"_build_instances cannot create instances without names.")
cmd = "gcloud compute instances create".split(" ")
cmd.extend(names)
- cmd.extend(
- "--preemptible --image={image} --zone={zone} --machine-type={machine_type}"
- .format(
- image=self.image, zone=self.zone,
- machine_type=self.machine_type).split(" "))
- if self.image_project:
- cmd.append("--image-project={project}".format(project=self.image_project))
- res = self._run_command(cmd)
- return json.loads(res.stdout)
-
- def _start_command(self, names):
- """Starts instances using gcloud command.
-
- Runs the command `gcloud compute instances start` on list of instances by
- name and returns json data on started instances on success.
-
- Args:
- names: list of names of instances to start.
-
- Returns:
- List of json data describing started machines.
- """
- if not names:
- raise ValueError("_start_command cannot start empty instance list.")
- cmd = "gcloud compute instances start".split(" ")
- cmd.extend(names)
- cmd.append("--zone={zone}".format(zone=self.zone))
- cmd.append("--project={project}".format(project=self.project))
+ cmd.append("--image=" + self.image)
+ cmd.append("--zone=" + self.zone)
+ cmd.append("--machine-type=" + self.machine_type)
res = self._run_command(cmd)
return json.loads(res.stdout)
@@ -186,7 +174,7 @@ class GCloudProducer(machine_producer.MachineProducer):
"""Adds ssh key to instances by calling gcloud ssh command.
Runs the command `gcloud compute ssh instance_name` on list of images by
- name. Tries to ssh into given instance
+ name. Tries to ssh into given instance.
Args:
names: list of machine names to which to add the ssh-key
@@ -202,30 +190,18 @@ class GCloudProducer(machine_producer.MachineProducer):
cmd.append("--ssh-key-file={key}".format(key=self.ssh_key_file))
cmd.append("--zone={zone}".format(zone=self.zone))
cmd.append("--command=uname")
+ cmd.append("--ssh-key-expire-after=60m")
timeout = datetime.timedelta(seconds=5 * 60)
start = datetime.datetime.now()
while datetime.datetime.now() <= timeout + start:
try:
self._run_command(cmd)
break
- except subprocess.CalledProcessError as e:
+ except subprocess.CalledProcessError:
if datetime.datetime.now() > timeout + start:
raise TimeoutError(
"Could not SSH into instance after 5 min: {name}".format(
name=name))
- # 255 is the returncode for ssh connection refused.
- elif e.returncode == 255:
-
- continue
- else:
- raise e
-
- def _list_machines(self) -> List[Dict[str, Any]]:
- """Runs `list` gcloud command and returns list of Machine data."""
- cmd = "gcloud compute instances list --project {project}".format(
- project=self.project).split(" ")
- res = self._run_command(cmd)
- return json.loads(res.stdout)
def _run_command(self,
cmd: List[str],
@@ -261,7 +237,7 @@ class GCloudProducer(machine_producer.MachineProducer):
self.mock.record(res)
if res.returncode != 0:
raise subprocess.CalledProcessError(
- cmd=res.args,
+ cmd=" ".join(res.args),
output=res.stdout,
stderr=res.stderr,
returncode=res.returncode)
diff --git a/benchmarks/harness/ssh_connection.py b/benchmarks/harness/ssh_connection.py
index e0bf258f1..a50e34293 100644
--- a/benchmarks/harness/ssh_connection.py
+++ b/benchmarks/harness/ssh_connection.py
@@ -1,5 +1,5 @@
# python3
-# Copyright 2019 Google LLC
+# Copyright 2019 The gVisor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@
# limitations under the License.
"""SSHConnection handles the details of SSH connections."""
+
import os
import warnings
@@ -24,18 +25,24 @@ from benchmarks import harness
warnings.filterwarnings(action="ignore", module=".*paramiko.*")
-def send_one_file(client: paramiko.SSHClient, path: str, remote_dir: str):
+def send_one_file(client: paramiko.SSHClient, path: str,
+ remote_dir: str) -> str:
"""Sends a single file via an SSH client.
Args:
client: The existing SSH client.
path: The local path.
remote_dir: The remote directory.
+
+ Returns:
+ :return: The remote path as a string.
"""
filename = path.split("/").pop()
- client.exec_command("mkdir -p " + remote_dir)
+ if remote_dir != ".":
+ client.exec_command("mkdir -p " + remote_dir)
with client.open_sftp() as ftp_client:
ftp_client.put(path, os.path.join(remote_dir, filename))
+ return os.path.join(remote_dir, filename)
class SSHConnection:
@@ -103,6 +110,12 @@ class SSHConnection:
The remote path.
"""
with self._client() as client:
- send_one_file(client, harness.LOCAL_WORKLOADS_PATH.format(name),
- harness.REMOTE_WORKLOADS_PATH.format(name))
- return harness.REMOTE_WORKLOADS_PATH.format(name)
+ return send_one_file(client, harness.LOCAL_WORKLOADS_PATH.format(name),
+ harness.REMOTE_WORKLOADS_PATH.format(name))
+
+ def send_installers(self) -> str:
+ with self._client() as client:
+ return send_one_file(
+ client,
+ path=harness.INSTALLER_ARCHIVE,
+ remote_dir=harness.REMOTE_INSTALLERS_PATH)