From e2dce046037c30b585cc62db45d517f59d1a08fc Mon Sep 17 00:00:00 2001
From: Zach Koopmans <zkoopmans@google.com>
Date: Mon, 26 Oct 2020 10:27:25 -0700
Subject: Add parser for open source benchmarks.

Add a parser binary for parsing files containing
Benchmark output and sending data to BigQuery.

PiperOrigin-RevId: 339066396
---
 tools/parsers/BUILD             |  14 ++++-
 tools/parsers/go_parser.go      |  17 +++---
 tools/parsers/go_parser_test.go |  12 ++--
 tools/parsers/parser_main.go    | 129 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+), 17 deletions(-)
 create mode 100644 tools/parsers/parser_main.go

(limited to 'tools/parsers')

diff --git a/tools/parsers/BUILD b/tools/parsers/BUILD
index 8038be606..dab954e25 100644
--- a/tools/parsers/BUILD
+++ b/tools/parsers/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_binary", "go_library", "go_test")
 
 package(licenses = ["notice"])
 
@@ -27,3 +27,15 @@ go_library(
         "//tools/bigquery",
     ],
 )
+
+go_binary(
+    name = "parser",
+    testonly = 1,
+    srcs = ["parser_main.go"],
+    nogo = False,
+    deps = [
+        ":parsers",
+        "//runsc/flag",
+        "//tools/bigquery",
+    ],
+)
diff --git a/tools/parsers/go_parser.go b/tools/parsers/go_parser.go
index 2cf74c883..df4875e6a 100644
--- a/tools/parsers/go_parser.go
+++ b/tools/parsers/go_parser.go
@@ -27,20 +27,21 @@ import (
 	"gvisor.dev/gvisor/tools/bigquery"
 )
 
-// parseOutput expects golang benchmark output returns a Benchmark struct formatted for BigQuery.
-func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*bigquery.Benchmark, error) {
-	var benchmarks []*bigquery.Benchmark
+// ParseOutput expects golang benchmark output and returns a struct formatted
+// for BigQuery.
+func ParseOutput(output string, name string, official bool) (*bigquery.Suite, error) {
+	suite := bigquery.NewSuite(name)
 	lines := strings.Split(output, "\n")
 	for _, line := range lines {
-		bm, err := parseLine(line, metadata, official)
+		bm, err := parseLine(line, official)
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse line '%s': %v", line, err)
 		}
 		if bm != nil {
-			benchmarks = append(benchmarks, bm)
+			suite.Benchmarks = append(suite.Benchmarks, bm)
 		}
 	}
-	return benchmarks, nil
+	return suite, nil
 }
 
 // parseLine handles parsing a benchmark line into a bigquery.Benchmark.
@@ -58,9 +59,8 @@ func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*
 //		{Name: ns/op, Unit: ns/op, Sample: 1397875880}
 //		{Name: requests_per_second, Unit: QPS, Sample: 140 }
 //  }
-//  Metadata: metadata
 //}
-func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigquery.Benchmark, error) {
+func parseLine(line string, official bool) (*bigquery.Benchmark, error) {
 	fields := strings.Fields(line)
 
 	// Check if this line is a Benchmark line. Otherwise ignore the line.
@@ -79,7 +79,6 @@ func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigque
 	}
 
 	bm := bigquery.NewBenchmark(name, iters, official)
-	bm.Metadata = metadata
 	for _, p := range params {
 		bm.AddCondition(p.Name, p.Value)
 	}
diff --git a/tools/parsers/go_parser_test.go b/tools/parsers/go_parser_test.go
index 36996b7c8..0aa1152a2 100644
--- a/tools/parsers/go_parser_test.go
+++ b/tools/parsers/go_parser_test.go
@@ -94,13 +94,11 @@ func TestParseLine(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			got, err := parseLine(tc.data, nil, false)
+			got, err := parseLine(tc.data, false)
 			if err != nil {
 				t.Fatalf("parseLine failed with: %v", err)
 			}
 
-			tc.want.Timestamp = got.Timestamp
-
 			if !cmp.Equal(tc.want, got, nil) {
 				for _, c := range got.Condition {
 					t.Logf("Cond: %+v", c)
@@ -150,14 +148,14 @@ BenchmarkRuby/server_threads.5-6 1	1416003331 ns/op	0.00950 average_latency.s 46
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			bms, err := parseOutput(tc.data, nil, false)
+			suite, err := ParseOutput(tc.data, "", false)
 			if err != nil {
 				t.Fatalf("parseOutput failed: %v", err)
-			} else if len(bms) != tc.numBenchmarks {
-				t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(bms), bms)
+			} else if len(suite.Benchmarks) != tc.numBenchmarks {
+				t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(suite.Benchmarks), suite.Benchmarks)
 			}
 
-			for _, bm := range bms {
+			for _, bm := range suite.Benchmarks {
 				if len(bm.Metric) != tc.numMetrics {
 					t.Fatalf("NumMetrics failed want: %d got: %d %+v", tc.numMetrics, len(bm.Metric), bm.Metric)
 				}
diff --git a/tools/parsers/parser_main.go b/tools/parsers/parser_main.go
new file mode 100644
index 000000000..6c6182464
--- /dev/null
+++ b/tools/parsers/parser_main.go
@@ -0,0 +1,129 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Binary parser parses Benchmark data from golang benchmarks,
+// puts it into a Schema for BigQuery, and sends it to BigQuery.
+// parser will also initialize a table with the Benchmarks BigQuery schema.
+package main
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"os"
+
+	"gvisor.dev/gvisor/runsc/flag"
+	bq "gvisor.dev/gvisor/tools/bigquery"
+	"gvisor.dev/gvisor/tools/parsers"
+)
+
+const (
+	initString       = "init"
+	initDescription  = "initializes a new table with benchmarks schema"
+	parseString      = "parse"
+	parseDescription = "parses given benchmarks file and sends it to BigQuery table."
+)
+
+var (
+	// The init command will create a new dataset/table in the given project and initialize
+	// the table with the schema in //tools/bigquery/bigquery.go. If the table/dataset exists
+	// or has been initialized, init has no effect and successfully returns.
+	initCmd     = flag.NewFlagSet(initString, flag.ContinueOnError)
+	initProject = initCmd.String("project", "", "GCP project to send benchmarks.")
+	initDataset = initCmd.String("dataset", "", "dataset to send benchmarks data.")
+	initTable   = initCmd.String("table", "", "table to send benchmarks data.")
+
+	// The parse command parses benchmark data in `file` and sends it to the
+	// requested table.
+	parseCmd     = flag.NewFlagSet(parseString, flag.ContinueOnError)
+	file         = parseCmd.String("file", "", "file to parse for benchmarks")
+	name         = parseCmd.String("suite_name", "", "name of the benchmark suite")
+	clNumber     = parseCmd.String("cl", "", "changelist number of this run")
+	gitCommit    = parseCmd.String("git_commit", "", "git commit sha for this run")
+	parseProject = parseCmd.String("project", "", "GCP project to send benchmarks.")
+	parseDataset = parseCmd.String("dataset", "", "dataset to send benchmarks data.")
+	parseTable   = parseCmd.String("table", "", "table to send benchmarks data.")
+	official     = parseCmd.Bool("official", false, "mark input data as official.")
+)
+
+// initBenchmarks initializes a dataset/table in a BigQuery project.
+func initBenchmarks(ctx context.Context) error {
+	return bq.InitBigQuery(ctx, *initProject, *initDataset, *initTable, nil)
+}
+
+// parseBenchmarks parses the given file into the BigQuery schema,
+// adds some custom data for the commit, and sends the data to BigQuery.
+func parseBenchmarks(ctx context.Context) error {
+	data, err := ioutil.ReadFile(*file)
+	if err != nil {
+		return fmt.Errorf("failed to read file: %v", err)
+	}
+	suite, err := parsers.ParseOutput(string(data), *name, *official)
+	if err != nil {
+		return fmt.Errorf("failed parse data: %v", err)
+	}
+	extraConditions := []*bq.Condition{
+		{
+			Name:  "change_list",
+			Value: *clNumber,
+		},
+		{
+			Name:  "commit",
+			Value: *gitCommit,
+		},
+	}
+
+	suite.Conditions = append(suite.Conditions, extraConditions...)
+	return bq.SendBenchmarks(ctx, suite, *parseProject, *parseDataset, *parseTable, nil)
+}
+
+func main() {
+	ctx := context.Background()
+	switch {
+	// the "init" command
+	case len(os.Args) >= 2 && os.Args[1] == initString:
+		if err := initCmd.Parse(os.Args[2:]); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+			os.Exit(1)
+		}
+		if err := initBenchmarks(ctx); err != nil {
+			failure := "failed to initialize project: %s dataset: %s table: %s: %v"
+			fmt.Fprintf(os.Stderr, failure, *parseProject, *parseDataset, *parseTable, err)
+			os.Exit(1)
+		}
+	// the "parse" command.
+	case len(os.Args) >= 2 && os.Args[1] == parseString:
+		if err := parseCmd.Parse(os.Args[2:]); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+			os.Exit(1)
+		}
+		if err := parseBenchmarks(ctx); err != nil {
+			fmt.Fprintf(os.Stderr, "failed parse benchmarks: %v", err)
+			os.Exit(1)
+		}
+	default:
+		printUsage()
+	}
+}
+
+// printUsage prints the top level usage string.
+func printUsage() {
+	usage := `Usage: parser <command> <flags> ...
+
+Available commands:
+  %s     %s
+  %s     %s
+`
+	fmt.Fprintf(os.Stderr, usage, initCmd.Name(), initDescription, parseCmd.Name(), parseDescription)
+}
-- 
cgit v1.2.3