summaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorZach Koopmans <zkoopmans@google.com>2020-10-26 10:27:25 -0700
committergVisor bot <gvisor-bot@google.com>2020-10-26 10:29:20 -0700
commite2dce046037c30b585cc62db45d517f59d1a08fc (patch)
tree6344e2ed13193c0822d3136d495bef111f3f9380 /tools
parent73a18635385d6a90942370e15fe2cbeb2a5a4386 (diff)
Add parser for open source benchmarks.
Add a parser binary for parsing files containing Benchmark output and sending data to BigQuery. PiperOrigin-RevId: 339066396
Diffstat (limited to 'tools')
-rw-r--r--tools/bigquery/BUILD5
-rw-r--r--tools/bigquery/bigquery.go60
-rw-r--r--tools/parsers/BUILD14
-rw-r--r--tools/parsers/go_parser.go17
-rw-r--r--tools/parsers/go_parser_test.go12
-rw-r--r--tools/parsers/parser_main.go129
6 files changed, 192 insertions, 45 deletions
diff --git a/tools/bigquery/BUILD b/tools/bigquery/BUILD
index 2b0062a63..1cea9e1c9 100644
--- a/tools/bigquery/BUILD
+++ b/tools/bigquery/BUILD
@@ -9,5 +9,8 @@ go_library(
visibility = [
"//:sandbox",
],
- deps = ["@com_google_cloud_go_bigquery//:go_default_library"],
+ deps = [
+ "@com_google_cloud_go_bigquery//:go_default_library",
+ "@org_golang_google_api//option:go_default_library",
+ ],
)
diff --git a/tools/bigquery/bigquery.go b/tools/bigquery/bigquery.go
index 5f1a882de..34b270cc0 100644
--- a/tools/bigquery/bigquery.go
+++ b/tools/bigquery/bigquery.go
@@ -25,22 +25,30 @@ import (
"time"
bq "cloud.google.com/go/bigquery"
+ "google.golang.org/api/option"
)
-// Benchmark is the top level structure of recorded benchmark data. BigQuery
+// Suite is the top level structure for a benchmark run. BigQuery
// will infer the schema from this.
+type Suite struct {
+ Name string `bq:"name"`
+ Conditions []*Condition `bq:"conditions"`
+ Benchmarks []*Benchmark `bq:"benchmarks"`
+ Official bool `bq:"official"`
+ Timestamp time.Time `bq:"timestamp"`
+}
+
+// Benchmark represents an individual benchmark in a suite.
type Benchmark struct {
Name string `bq:"name"`
Condition []*Condition `bq:"condition"`
- Timestamp time.Time `bq:"timestamp"`
- Official bool `bq:"official"`
Metric []*Metric `bq:"metric"`
- Metadata *Metadata `bq:"metadata"`
}
-// Condition represents qualifiers for the benchmark. For example:
+// Condition represents qualifiers for the benchmark or suite. For example:
// Get_Pid/1/real_time would have Benchmark Name "Get_Pid" with "1"
-// and "real_time" parameters as conditions.
+// and "real_time" parameters as conditions. Suite conditions include
+// information such as the CL number and platform name.
type Condition struct {
Name string `bq:"name"`
Value string `bq:"value"`
@@ -53,19 +61,9 @@ type Metric struct {
Sample float64 `bq:"sample"`
}
-// Metadata about this benchmark.
-type Metadata struct {
- CL string `bq:"changelist"`
- IterationID string `bq:"iteration_id"`
- PendingCL string `bq:"pending_cl"`
- Workflow string `bq:"workflow"`
- Platform string `bq:"platform"`
- Gofer string `bq:"gofer"`
-}
-
// InitBigQuery initializes a BigQuery dataset/table in the project. If the dataset/table already exists, it is not duplicated.
-func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) error {
- client, err := bq.NewClient(ctx, projectID)
+func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string, opts []option.ClientOption) error {
+ client, err := bq.NewClient(ctx, projectID, opts...)
if err != nil {
return fmt.Errorf("failed to initialize client on project %s: %v", projectID, err)
}
@@ -77,7 +75,7 @@ func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) err
}
table := dataset.Table(tableID)
- schema, err := bq.InferSchema(Benchmark{})
+ schema, err := bq.InferSchema(Suite{})
if err != nil {
return fmt.Errorf("failed to infer schema: %v", err)
}
@@ -109,24 +107,32 @@ func (bm *Benchmark) AddMetric(metricName, unit string, sample float64) {
// NewBenchmark initializes a new benchmark.
func NewBenchmark(name string, iters int, official bool) *Benchmark {
return &Benchmark{
- Name: name,
- Timestamp: time.Now().UTC(),
- Official: official,
- Metric: make([]*Metric, 0),
+ Name: name,
+ Metric: make([]*Metric, 0),
+ }
+}
+
+// NewSuite initializes a new Suite.
+func NewSuite(name string) *Suite {
+ return &Suite{
+ Name: name,
+ Timestamp: time.Now().UTC(),
+ Benchmarks: make([]*Benchmark, 0),
+ Conditions: make([]*Condition, 0),
}
}
// SendBenchmarks sends the slice of benchmarks to the BigQuery dataset/table.
-func SendBenchmarks(ctx context.Context, benchmarks []*Benchmark, projectID, datasetID, tableID string) error {
- client, err := bq.NewClient(ctx, projectID)
+func SendBenchmarks(ctx context.Context, suite *Suite, projectID, datasetID, tableID string, opts []option.ClientOption) error {
+ client, err := bq.NewClient(ctx, projectID, opts...)
if err != nil {
return fmt.Errorf("failed to initialize client on project: %s: %v", projectID, err)
}
defer client.Close()
uploader := client.Dataset(datasetID).Table(tableID).Uploader()
- if err = uploader.Put(ctx, benchmarks); err != nil {
- return fmt.Errorf("failed to upload benchmarks to proejct %s, table %s.%s: %v", projectID, datasetID, tableID, err)
+ if err = uploader.Put(ctx, suite); err != nil {
+ return fmt.Errorf("failed to upload benchmarks %s to project %s, table %s.%s: %v", suite.Name, projectID, datasetID, tableID, err)
}
return nil
diff --git a/tools/parsers/BUILD b/tools/parsers/BUILD
index 8038be606..dab954e25 100644
--- a/tools/parsers/BUILD
+++ b/tools/parsers/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_binary", "go_library", "go_test")
package(licenses = ["notice"])
@@ -27,3 +27,15 @@ go_library(
"//tools/bigquery",
],
)
+
+go_binary(
+ name = "parser",
+ testonly = 1,
+ srcs = ["parser_main.go"],
+ nogo = False,
+ deps = [
+ ":parsers",
+ "//runsc/flag",
+ "//tools/bigquery",
+ ],
+)
diff --git a/tools/parsers/go_parser.go b/tools/parsers/go_parser.go
index 2cf74c883..df4875e6a 100644
--- a/tools/parsers/go_parser.go
+++ b/tools/parsers/go_parser.go
@@ -27,20 +27,21 @@ import (
"gvisor.dev/gvisor/tools/bigquery"
)
-// parseOutput expects golang benchmark output returns a Benchmark struct formatted for BigQuery.
-func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*bigquery.Benchmark, error) {
- var benchmarks []*bigquery.Benchmark
+// ParseOutput expects golang benchmark output and returns a struct formatted
+// for BigQuery.
+func ParseOutput(output string, name string, official bool) (*bigquery.Suite, error) {
+ suite := bigquery.NewSuite(name)
lines := strings.Split(output, "\n")
for _, line := range lines {
- bm, err := parseLine(line, metadata, official)
+ bm, err := parseLine(line, official)
if err != nil {
return nil, fmt.Errorf("failed to parse line '%s': %v", line, err)
}
if bm != nil {
- benchmarks = append(benchmarks, bm)
+ suite.Benchmarks = append(suite.Benchmarks, bm)
}
}
- return benchmarks, nil
+ return suite, nil
}
// parseLine handles parsing a benchmark line into a bigquery.Benchmark.
@@ -58,9 +59,8 @@ func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*
// {Name: ns/op, Unit: ns/op, Sample: 1397875880}
// {Name: requests_per_second, Unit: QPS, Sample: 140 }
// }
-// Metadata: metadata
//}
-func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigquery.Benchmark, error) {
+func parseLine(line string, official bool) (*bigquery.Benchmark, error) {
fields := strings.Fields(line)
// Check if this line is a Benchmark line. Otherwise ignore the line.
@@ -79,7 +79,6 @@ func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigque
}
bm := bigquery.NewBenchmark(name, iters, official)
- bm.Metadata = metadata
for _, p := range params {
bm.AddCondition(p.Name, p.Value)
}
diff --git a/tools/parsers/go_parser_test.go b/tools/parsers/go_parser_test.go
index 36996b7c8..0aa1152a2 100644
--- a/tools/parsers/go_parser_test.go
+++ b/tools/parsers/go_parser_test.go
@@ -94,13 +94,11 @@ func TestParseLine(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- got, err := parseLine(tc.data, nil, false)
+ got, err := parseLine(tc.data, false)
if err != nil {
t.Fatalf("parseLine failed with: %v", err)
}
- tc.want.Timestamp = got.Timestamp
-
if !cmp.Equal(tc.want, got, nil) {
for _, c := range got.Condition {
t.Logf("Cond: %+v", c)
@@ -150,14 +148,14 @@ BenchmarkRuby/server_threads.5-6 1 1416003331 ns/op 0.00950 average_latency.s 46
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- bms, err := parseOutput(tc.data, nil, false)
+ suite, err := ParseOutput(tc.data, "", false)
if err != nil {
t.Fatalf("parseOutput failed: %v", err)
- } else if len(bms) != tc.numBenchmarks {
- t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(bms), bms)
+ } else if len(suite.Benchmarks) != tc.numBenchmarks {
+ t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(suite.Benchmarks), suite.Benchmarks)
}
- for _, bm := range bms {
+ for _, bm := range suite.Benchmarks {
if len(bm.Metric) != tc.numMetrics {
t.Fatalf("NumMetrics failed want: %d got: %d %+v", tc.numMetrics, len(bm.Metric), bm.Metric)
}
diff --git a/tools/parsers/parser_main.go b/tools/parsers/parser_main.go
new file mode 100644
index 000000000..6c6182464
--- /dev/null
+++ b/tools/parsers/parser_main.go
@@ -0,0 +1,129 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Binary parser parses Benchmark data from golang benchmarks,
+// puts it into a Schema for BigQuery, and sends it to BigQuery.
+// parser will also initialize a table with the Benchmarks BigQuery schema.
+package main
+
+import (
+ "context"
+ "fmt"
+ "io/ioutil"
+ "os"
+
+ "gvisor.dev/gvisor/runsc/flag"
+ bq "gvisor.dev/gvisor/tools/bigquery"
+ "gvisor.dev/gvisor/tools/parsers"
+)
+
+const (
+ initString = "init"
+ initDescription = "initializes a new table with benchmarks schema"
+ parseString = "parse"
+ parseDescription = "parses given benchmarks file and sends it to BigQuery table."
+)
+
+var (
+ // The init command will create a new dataset/table in the given project and initialize
+ // the table with the schema in //tools/bigquery/bigquery.go. If the table/dataset exists
+ // or has been initialized, init has no effect and successfully returns.
+ initCmd = flag.NewFlagSet(initString, flag.ContinueOnError)
+ initProject = initCmd.String("project", "", "GCP project to send benchmarks.")
+ initDataset = initCmd.String("dataset", "", "dataset to send benchmarks data.")
+ initTable = initCmd.String("table", "", "table to send benchmarks data.")
+
+ // The parse command parses benchmark data in `file` and sends it to the
+ // requested table.
+ parseCmd = flag.NewFlagSet(parseString, flag.ContinueOnError)
+ file = parseCmd.String("file", "", "file to parse for benchmarks")
+ name = parseCmd.String("suite_name", "", "name of the benchmark suite")
+ clNumber = parseCmd.String("cl", "", "changelist number of this run")
+ gitCommit = parseCmd.String("git_commit", "", "git commit sha for this run")
+ parseProject = parseCmd.String("project", "", "GCP project to send benchmarks.")
+ parseDataset = parseCmd.String("dataset", "", "dataset to send benchmarks data.")
+ parseTable = parseCmd.String("table", "", "table to send benchmarks data.")
+ official = parseCmd.Bool("official", false, "mark input data as official.")
+)
+
+// initBenchmarks initializes a dataset/table in a BigQuery project.
+func initBenchmarks(ctx context.Context) error {
+ return bq.InitBigQuery(ctx, *initProject, *initDataset, *initTable, nil)
+}
+
+// parseBenchmarks parses the given file into the BigQuery schema,
+// adds some custom data for the commit, and sends the data to BigQuery.
+func parseBenchmarks(ctx context.Context) error {
+ data, err := ioutil.ReadFile(*file)
+ if err != nil {
+ return fmt.Errorf("failed to read file: %v", err)
+ }
+ suite, err := parsers.ParseOutput(string(data), *name, *official)
+ if err != nil {
+ return fmt.Errorf("failed parse data: %v", err)
+ }
+ extraConditions := []*bq.Condition{
+ {
+ Name: "change_list",
+ Value: *clNumber,
+ },
+ {
+ Name: "commit",
+ Value: *gitCommit,
+ },
+ }
+
+ suite.Conditions = append(suite.Conditions, extraConditions...)
+ return bq.SendBenchmarks(ctx, suite, *parseProject, *parseDataset, *parseTable, nil)
+}
+
+func main() {
+ ctx := context.Background()
+ switch {
+ // the "init" command
+ case len(os.Args) >= 2 && os.Args[1] == initString:
+ if err := initCmd.Parse(os.Args[2:]); err != nil {
+ fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+ os.Exit(1)
+ }
+ if err := initBenchmarks(ctx); err != nil {
+ failure := "failed to initialize project: %s dataset: %s table: %s: %v"
+ fmt.Fprintf(os.Stderr, failure, *parseProject, *parseDataset, *parseTable, err)
+ os.Exit(1)
+ }
+ // the "parse" command.
+ case len(os.Args) >= 2 && os.Args[1] == parseString:
+ if err := parseCmd.Parse(os.Args[2:]); err != nil {
+ fmt.Fprintf(os.Stderr, "failed parse flags: %v", err)
+ os.Exit(1)
+ }
+ if err := parseBenchmarks(ctx); err != nil {
+ fmt.Fprintf(os.Stderr, "failed parse benchmarks: %v", err)
+ os.Exit(1)
+ }
+ default:
+ printUsage()
+ }
+}
+
+// printUsage prints the top level usage string.
+func printUsage() {
+ usage := `Usage: parser <command> <flags> ...
+
+Available commands:
+ %s %s
+ %s %s
+`
+ fmt.Fprintf(os.Stderr, usage, initCmd.Name(), initDescription, parseCmd.Name(), parseDescription)
+}