Add parser for open source benchmarks.

Add a parser binary for parsing files containing Benchmark output and sending data to BigQuery. PiperOrigin-RevId: 339066396
author: Zach Koopmans <zkoopmans@google.com> 2020-10-26 10:27:25 -0700
committer: gVisor bot <gvisor-bot@google.com> 2020-10-26 10:29:20 -0700
commit: e2dce046037c30b585cc62db45d517f59d1a08fc (patch)
tree: 6344e2ed13193c0822d3136d495bef111f3f9380 /tools
parent: 73a18635385d6a90942370e15fe2cbeb2a5a4386 (diff)
6 files changed, 192 insertions, 45 deletions
diff --git a/tools/bigquery/BUILD b/tools/bigquery/BUILD
index 2b0062a63..1cea9e1c9 100644
--- a/tools/bigquery/BUILD
+++ b/tools/bigquery/BUILD
@@ -9,5 +9,8 @@ go_library(
     visibility = [
         "//:sandbox",
     ],
-    deps = ["@com_google_cloud_go_bigquery//:go_default_library"],
+    deps = [
+        "@com_google_cloud_go_bigquery//:go_default_library",
+        "@org_golang_google_api//option:go_default_library",
+    ],
 )
diff --git a/tools/bigquery/bigquery.go b/tools/bigquery/bigquery.go
index 5f1a882de..34b270cc0 100644
--- a/tools/bigquery/bigquery.go
+++ b/tools/bigquery/bigquery.go
@@ -25,22 +25,30 @@ import (
 	"time"
 
 	bq "cloud.google.com/go/bigquery"
+	"google.golang.org/api/option"
 )
 
-// Benchmark is the top level structure of recorded benchmark data. BigQuery
+// Suite is the top level structure for a benchmark run. BigQuery
 // will infer the schema from this.
+type Suite struct {
+	Name       string       `bq:"name"`
+	Conditions []*Condition `bq:"conditions"`
+	Benchmarks []*Benchmark `bq:"benchmarks"`
+	Official   bool         `bq:"official"`
+	Timestamp  time.Time    `bq:"timestamp"`
+}
+
+// Benchmark represents an individual benchmark in a suite.
 type Benchmark struct {
 	Name      string       `bq:"name"`
 	Condition []*Condition `bq:"condition"`
-	Timestamp time.Time    `bq:"timestamp"`
-	Official  bool         `bq:"official"`
 	Metric    []*Metric    `bq:"metric"`
-	Metadata  *Metadata    `bq:"metadata"`
 }
 
-// Condition represents qualifiers for the benchmark. For example:
+// Condition represents qualifiers for the benchmark or suite. For example:
 // Get_Pid/1/real_time would have Benchmark Name "Get_Pid" with "1"
-// and "real_time" parameters as conditions.
+// and "real_time" parameters as conditions. Suite conditions include
+// information such as the CL number and platform name.
 type Condition struct {
 	Name  string `bq:"name"`
 	Value string `bq:"value"`
@@ -53,19 +61,9 @@ type Metric struct {
 	Sample float64 `bq:"sample"`
 }
 
-// Metadata about this benchmark.
-type Metadata struct {
-	CL          string `bq:"changelist"`
-	IterationID string `bq:"iteration_id"`
-	PendingCL   string `bq:"pending_cl"`
-	Workflow    string `bq:"workflow"`
-	Platform    string `bq:"platform"`
-	Gofer       string `bq:"gofer"`
-}
-
 // InitBigQuery initializes a BigQuery dataset/table in the project. If the dataset/table already exists, it is not duplicated.
-func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) error {
-	client, err := bq.NewClient(ctx, projectID)
+func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string, opts []option.ClientOption) error {
+	client, err := bq.NewClient(ctx, projectID, opts...)
 	if err != nil {
 		return fmt.Errorf("failed to initialize client on project %s: %v", projectID, err)
 	}
@@ -77,7 +75,7 @@ func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) err
 	}
 
 	table := dataset.Table(tableID)
-	schema, err := bq.InferSchema(Benchmark{})
+	schema, err := bq.InferSchema(Suite{})
 	if err != nil {
 		return fmt.Errorf("failed to infer schema: %v", err)
 	}
@@ -109,24 +107,32 @@ func (bm *Benchmark) AddMetric(metricName, unit string, sample float64) {
 // NewBenchmark initializes a new benchmark.
 func NewBenchmark(name string, iters int, official bool) *Benchmark {
 	return &Benchmark{
-		Name:      name,
-		Timestamp: time.Now().UTC(),
-		Official:  official,
-		Metric:    make([]*Metric, 0),
+		Name:   name,
+		Metric: make([]*Metric, 0),
+	}
+}
+
+// NewSuite initializes a new Suite.
+func NewSuite(name string) *Suite {
+	return &Suite{
+		Name:       name,
+		Timestamp:  time.Now().UTC(),
+		Benchmarks: make([]*Benchmark, 0),
+		Conditions: make([]*Condition, 0),
 	}
 }
 
 // SendBenchmarks sends the slice of benchmarks to the BigQuery dataset/table.
-func SendBenchmarks(ctx context.Context, benchmarks []*Benchmark, projectID, datasetID, tableID string) error {
-	client, err := bq.NewClient(ctx, projectID)
+func SendBenchmarks(ctx context.Context, suite *Suite, projectID, datasetID, tableID string, opts []option.ClientOption) error {
+	client, err := bq.NewClient(ctx, projectID, opts...)
 	if err != nil {
 		return fmt.Errorf("failed to initialize client on project: %s: %v", projectID, err)
 	}
 	defer client.Close()
 
 	uploader := client.Dataset(datasetID).Table(tableID).Uploader()
-	if err = uploader.Put(ctx, benchmarks); err != nil {
-		return fmt.Errorf("failed to upload benchmarks to proejct %s, table %s.%s: %v", projectID, datasetID, tableID, err)
+	if err = uploader.Put(ctx, suite); err != nil {
+		return fmt.Errorf("failed to upload benchmarks %s to project %s, table %s.%s: %v", suite.Name, projectID, datasetID, tableID, err)
 	}
 
 	return nil
diff --git a/tools/parsers/BUILD b/tools/parsers/BUILD
index 8038be606..dab954e25 100644
--- a/tools/parsers/BUILD
+++ b/tools/parsers/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_binary", "go_library", "go_test")
 
 package(licenses = ["notice"])
 
@@ -27,3 +27,15 @@ go_library(
         "//tools/bigquery",
     ],
 )
+
+go_binary(
+    name = "parser",
+    testonly = 1,
+    srcs = ["parser_main.go"],
+    nogo = False,
+    deps = [
+        ":parsers",
+        "//runsc/flag",
+        "//tools/bigquery",
+    ],
+)
diff --git a/tools/parsers/go_parser.go b/tools/parsers/go_parser.go
index 2cf74c883..df4875e6a 100644
--- a/tools/parsers/go_parser.go
+++ b/tools/parsers/go_parser.go
@@ -27,20 +27,21 @@ import (
 	"gvisor.dev/gvisor/tools/bigquery"
 )
 
-// parseOutput expects golang benchmark output returns a Benchmark struct formatted for BigQuery.
-func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*bigquery.Benchmark, error) {
-	var benchmarks []*bigquery.Benchmark
+// ParseOutput expects golang benchmark output and returns a struct formatted
+// for BigQuery.
+func ParseOutput(output string, name string, official bool) (*bigquery.Suite, error) {
+	suite := bigquery.NewSuite(name)
 	lines := strings.Split(output, "\n")
 	for _, line := range lines {
-		bm, err := parseLine(line, metadata, official)
+		bm, err := parseLine(line, official)
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse line '%s': %v", line, err)
 		}
 		if bm != nil {
-			benchmarks = append(benchmarks, bm)
+			suite.Benchmarks = append(suite.Benchmarks, bm)
 		}
 	}
-	return benchmarks, nil
+	return suite, nil
 }
 
 // parseLine handles parsing a benchmark line into a bigquery.Benchmark.
@@ -58,9 +59,8 @@ func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*
 //		{Name: ns/op, Unit: ns/op, Sample: 1397875880}
 //		{Name: requests_per_second, Unit: QPS, Sample: 140 }
 //  }
-//  Metadata: metadata
 //}
-func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigquery.Benchmark, error) {
+func parseLine(line string, official bool) (*bigquery.Benchmark, error) {
 	fields := strings.Fields(line)
 
 	// Check if this line is a Benchmark line. Otherwise ignore the line.
@@ -79,7 +79,6 @@ func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigque
 	}
 
 	bm := bigquery.NewBenchmark(name, iters, official)
-	bm.Metadata = metadata
 	for _, p := range params {
 		bm.AddCondition(p.Name, p.Value)
 	}
diff --git a/tools/parsers/go_parser_test.go b/tools/parsers/go_parser_test.go
index 36996b7c8..0aa1152a2 100644
--- a/tools/parsers/go_parser_test.go
+++ b/tools/parsers/go_parser_test.go
@@ -94,13 +94,11 @@ func TestParseLine(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			got, err := parseLine(tc.data, nil, false)
+			got, err := parseLine(tc.data, false)
 			if err != nil {
 				t.Fatalf("parseLine failed with: %v", err)
 			}
 
-			tc.want.Timestamp = got.Timestamp
-
 			if !cmp.Equal(tc.want, got, nil) {
 				for _, c := range got.Condition {
 					t.Logf("Cond: %+v", c)
@@ -150,14 +148,14 @@ BenchmarkRuby/server_threads.5-6 1	1416003331 ns/op	0.00950 average_latency.s 46
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			bms, err := parseOutput(tc.data, nil, false)
+			suite, err := ParseOutput(tc.data, "", false)
 			if err != nil {
 				t.Fatalf("parseOutput failed: %v", err)
-			} else if len(bms) != tc.numBenchmarks {
-				t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(bms), bms)
+			} else if len(suite.Benchmarks) != tc.numBenchmarks {
+				t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(suite.Benchmarks), suite.Benchmarks)
 			}
 
-			for _, bm := range bms {
+			for _, bm := range suite.Benchmarks {
 				if len(bm.Metric) != tc.numMetrics {
 					t.Fatalf("NumMetrics failed want: %d got: %d %+v", tc.numMetrics, len(bm.Metric), bm.Metric)
 				}
diff --git a/tools/parsers/parser_main.go b/tools/parsers/parser_main.go
new file mode 100644
index 000000000..6c6182464
--- /dev/null
+++ b/tools/parsers/parser_main.go
@@ -0,0 +1,129 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Binary parser parses Benchmark data from golang benchmarks,
+// puts it into a Schema for BigQuery, and sends it to BigQuery.
+// parser will also initialize a table with the Benchmarks BigQuery schema.
+package main
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"os"
+
+	"gvisor.dev/gvisor/runsc/flag"
+	bq "gvisor.dev/gvisor/tools/bigquery"
+	"gvisor.dev/gvisor/tools/parsers"
+)
+
+const (
+	initString       = "init"
+	initDescription  = "initializes a new table with benchmarks schema"
+	parseString      = "parse"
+	parseDescription = "parses given benchmarks file and sends it to BigQuery table."
+)
+
+var (
+	// The init command will create a new dataset/table in the given project and initialize
+	// the table with the schema in //tools/bigquery/bigquery.go. If the table/dataset exists
+	// or has been initialized, init has no effect and successfully returns.
+	initCmd     = flag.NewFlagSet(initString, flag.ContinueOnError)
+	initProject = initCmd.String("project", "", "GCP project to send benchmarks.")
+	initDataset = initCmd.String("dataset", "", "dataset to send benchmarks data.")
+	initTable   = initCmd.String("table", "", "table to send benchmarks data.")
+
+	// The parse command parses benchmark data in `file` and sends it to the
+	// requested table.
+	parseCmd     = flag.NewFlagSet(parseString, flag.ContinueOnError)
+	file         = parseCmd.String("file", "", "file to parse for benchmarks")
+	name         = parseCmd.String("suite_name", "", "name of the benchmark suite")
+	clNumber     = parseCmd.String("cl", "", "changelist number of this run")
+	gitCommit    = parseCmd.String("git_commit", "", "git commit sha for this run")
+	parseProject = parseCmd.String("project", "", "GCP project to send benchmarks.")
+	parseDataset = parseCmd.String("dataset", "", "dataset to send benchmarks data.")
+	parseTable   = parseCmd.String("table", "", "table to send benchmarks data.")
+	official     = parseCmd.Bool("official", false, "mark input data as official.")
+)
+
+// initBenchmarks initializes a dataset/table in a BigQuery project.
+func initBenchmarks(ctx context.Context) error {
+	return bq.InitBigQuery(ctx, *initProject, *initDataset, *initTable, nil)
+}
+
+// parseBenchmarks parses the given file into the BigQuery schema,
+// adds some custom data for the commit, and sends the data to BigQuery.
+func parseBenchmarks(ctx context.Context) error {
+	data, err := ioutil.ReadFile(*file)
+	if err != nil {
+		return fmt.Errorf("failed to read file: %v", err)
+	}
+	suite, err := parsers.ParseOutput(string(data), *name, *official)
+	if err != nil {
+		return fmt.Errorf("failed parse data: %v", err)
+	}
+	extraConditions := []*bq.Condition{
+		{
+			Name:  "change_list",
+			Value: *clNumber,
+		},
+		{
+			Name:  "commit",
+			Value: *gitCommit,
+		},
+	}
+
+	suite.Conditions = append(suite.Conditions, extraConditions...)
+	return bq.SendBenchmarks(ctx, suite, *parseProject, *parseDataset, *parseTable, nil)
+}
+
+func main() {
+	ctx := context.Background()
+	switch {
+	// the "init" command
+	case len(os.Args) >= 2 && os.Args[1] == initString:
+		if err := initCmd.Parse(os.Args[2:]); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+			os.Exit(1)
+		}
+		if err := initBenchmarks(ctx); err != nil {
+			failure := "failed to initialize project: %s dataset: %s table: %s: %v"
+			fmt.Fprintf(os.Stderr, failure, *parseProject, *parseDataset, *parseTable, err)
+			os.Exit(1)
+		}
+	// the "parse" command.
+	case len(os.Args) >= 2 && os.Args[1] == parseString:
+		if err := parseCmd.Parse(os.Args[2:]); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+			os.Exit(1)
+		}
+		if err := parseBenchmarks(ctx); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse benchmarks: %v", err)
+			os.Exit(1)
+		}
+	default:
+		printUsage()
+	}
+}
+
+// printUsage prints the top level usage string.
+func printUsage() {
+	usage := `Usage: parser <command> <flags> ...
+
+Available commands:
+  %s     %s
+  %s     %s
+`
+	fmt.Fprintf(os.Stderr, usage, initCmd.Name(), initDescription, parseCmd.Name(), parseDescription)
+}
author	Zach Koopmans <zkoopmans@google.com>	2020-10-26 10:27:25 -0700
committer	gVisor bot <gvisor-bot@google.com>	2020-10-26 10:29:20 -0700
commit	e2dce046037c30b585cc62db45d517f59d1a08fc (patch)
tree	6344e2ed13193c0822d3136d495bef111f3f9380 /tools
parent	73a18635385d6a90942370e15fe2cbeb2a5a4386 (diff)