From e2dce046037c30b585cc62db45d517f59d1a08fc Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 26 Oct 2020 10:27:25 -0700 Subject: Add parser for open source benchmarks. Add a parser binary for parsing files containing Benchmark output and sending data to BigQuery. PiperOrigin-RevId: 339066396 --- tools/parsers/BUILD | 14 ++++- tools/parsers/go_parser.go | 17 +++--- tools/parsers/go_parser_test.go | 12 ++-- tools/parsers/parser_main.go | 129 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 17 deletions(-) create mode 100644 tools/parsers/parser_main.go (limited to 'tools/parsers') diff --git a/tools/parsers/BUILD b/tools/parsers/BUILD index 8038be606..dab954e25 100644 --- a/tools/parsers/BUILD +++ b/tools/parsers/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_binary", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,3 +27,15 @@ go_library( "//tools/bigquery", ], ) + +go_binary( + name = "parser", + testonly = 1, + srcs = ["parser_main.go"], + nogo = False, + deps = [ + ":parsers", + "//runsc/flag", + "//tools/bigquery", + ], +) diff --git a/tools/parsers/go_parser.go b/tools/parsers/go_parser.go index 2cf74c883..df4875e6a 100644 --- a/tools/parsers/go_parser.go +++ b/tools/parsers/go_parser.go @@ -27,20 +27,21 @@ import ( "gvisor.dev/gvisor/tools/bigquery" ) -// parseOutput expects golang benchmark output returns a Benchmark struct formatted for BigQuery. -func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*bigquery.Benchmark, error) { - var benchmarks []*bigquery.Benchmark +// ParseOutput expects golang benchmark output and returns a struct formatted +// for BigQuery. +func ParseOutput(output string, name string, official bool) (*bigquery.Suite, error) { + suite := bigquery.NewSuite(name) lines := strings.Split(output, "\n") for _, line := range lines { - bm, err := parseLine(line, metadata, official) + bm, err := parseLine(line, official) if err != nil { return nil, fmt.Errorf("failed to parse line '%s': %v", line, err) } if bm != nil { - benchmarks = append(benchmarks, bm) + suite.Benchmarks = append(suite.Benchmarks, bm) } } - return benchmarks, nil + return suite, nil } // parseLine handles parsing a benchmark line into a bigquery.Benchmark. @@ -58,9 +59,8 @@ func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]* // {Name: ns/op, Unit: ns/op, Sample: 1397875880} // {Name: requests_per_second, Unit: QPS, Sample: 140 } // } -// Metadata: metadata //} -func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigquery.Benchmark, error) { +func parseLine(line string, official bool) (*bigquery.Benchmark, error) { fields := strings.Fields(line) // Check if this line is a Benchmark line. Otherwise ignore the line. @@ -79,7 +79,6 @@ func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigque } bm := bigquery.NewBenchmark(name, iters, official) - bm.Metadata = metadata for _, p := range params { bm.AddCondition(p.Name, p.Value) } diff --git a/tools/parsers/go_parser_test.go b/tools/parsers/go_parser_test.go index 36996b7c8..0aa1152a2 100644 --- a/tools/parsers/go_parser_test.go +++ b/tools/parsers/go_parser_test.go @@ -94,13 +94,11 @@ func TestParseLine(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - got, err := parseLine(tc.data, nil, false) + got, err := parseLine(tc.data, false) if err != nil { t.Fatalf("parseLine failed with: %v", err) } - tc.want.Timestamp = got.Timestamp - if !cmp.Equal(tc.want, got, nil) { for _, c := range got.Condition { t.Logf("Cond: %+v", c) @@ -150,14 +148,14 @@ BenchmarkRuby/server_threads.5-6 1 1416003331 ns/op 0.00950 average_latency.s 46 for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - bms, err := parseOutput(tc.data, nil, false) + suite, err := ParseOutput(tc.data, "", false) if err != nil { t.Fatalf("parseOutput failed: %v", err) - } else if len(bms) != tc.numBenchmarks { - t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(bms), bms) + } else if len(suite.Benchmarks) != tc.numBenchmarks { + t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(suite.Benchmarks), suite.Benchmarks) } - for _, bm := range bms { + for _, bm := range suite.Benchmarks { if len(bm.Metric) != tc.numMetrics { t.Fatalf("NumMetrics failed want: %d got: %d %+v", tc.numMetrics, len(bm.Metric), bm.Metric) } diff --git a/tools/parsers/parser_main.go b/tools/parsers/parser_main.go new file mode 100644 index 000000000..6c6182464 --- /dev/null +++ b/tools/parsers/parser_main.go @@ -0,0 +1,129 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary parser parses Benchmark data from golang benchmarks, +// puts it into a Schema for BigQuery, and sends it to BigQuery. +// parser will also initialize a table with the Benchmarks BigQuery schema. +package main + +import ( + "context" + "fmt" + "io/ioutil" + "os" + + "gvisor.dev/gvisor/runsc/flag" + bq "gvisor.dev/gvisor/tools/bigquery" + "gvisor.dev/gvisor/tools/parsers" +) + +const ( + initString = "init" + initDescription = "initializes a new table with benchmarks schema" + parseString = "parse" + parseDescription = "parses given benchmarks file and sends it to BigQuery table." +) + +var ( + // The init command will create a new dataset/table in the given project and initialize + // the table with the schema in //tools/bigquery/bigquery.go. If the table/dataset exists + // or has been initialized, init has no effect and successfully returns. + initCmd = flag.NewFlagSet(initString, flag.ContinueOnError) + initProject = initCmd.String("project", "", "GCP project to send benchmarks.") + initDataset = initCmd.String("dataset", "", "dataset to send benchmarks data.") + initTable = initCmd.String("table", "", "table to send benchmarks data.") + + // The parse command parses benchmark data in `file` and sends it to the + // requested table. + parseCmd = flag.NewFlagSet(parseString, flag.ContinueOnError) + file = parseCmd.String("file", "", "file to parse for benchmarks") + name = parseCmd.String("suite_name", "", "name of the benchmark suite") + clNumber = parseCmd.String("cl", "", "changelist number of this run") + gitCommit = parseCmd.String("git_commit", "", "git commit sha for this run") + parseProject = parseCmd.String("project", "", "GCP project to send benchmarks.") + parseDataset = parseCmd.String("dataset", "", "dataset to send benchmarks data.") + parseTable = parseCmd.String("table", "", "table to send benchmarks data.") + official = parseCmd.Bool("official", false, "mark input data as official.") +) + +// initBenchmarks initializes a dataset/table in a BigQuery project. +func initBenchmarks(ctx context.Context) error { + return bq.InitBigQuery(ctx, *initProject, *initDataset, *initTable, nil) +} + +// parseBenchmarks parses the given file into the BigQuery schema, +// adds some custom data for the commit, and sends the data to BigQuery. +func parseBenchmarks(ctx context.Context) error { + data, err := ioutil.ReadFile(*file) + if err != nil { + return fmt.Errorf("failed to read file: %v", err) + } + suite, err := parsers.ParseOutput(string(data), *name, *official) + if err != nil { + return fmt.Errorf("failed parse data: %v", err) + } + extraConditions := []*bq.Condition{ + { + Name: "change_list", + Value: *clNumber, + }, + { + Name: "commit", + Value: *gitCommit, + }, + } + + suite.Conditions = append(suite.Conditions, extraConditions...) + return bq.SendBenchmarks(ctx, suite, *parseProject, *parseDataset, *parseTable, nil) +} + +func main() { + ctx := context.Background() + switch { + // the "init" command + case len(os.Args) >= 2 && os.Args[1] == initString: + if err := initCmd.Parse(os.Args[2:]); err != nil { + fmt.Fprintf(os.Stderr, "failed parse flags: %v", err) + os.Exit(1) + } + if err := initBenchmarks(ctx); err != nil { + failure := "failed to initialize project: %s dataset: %s table: %s: %v" + fmt.Fprintf(os.Stderr, failure, *parseProject, *parseDataset, *parseTable, err) + os.Exit(1) + } + // the "parse" command. + case len(os.Args) >= 2 && os.Args[1] == parseString: + if err := parseCmd.Parse(os.Args[2:]); err != nil { + fmt.Fprintf(os.Stderr, "failed parse flags: %v", err) + os.Exit(1) + } + if err := parseBenchmarks(ctx); err != nil { + fmt.Fprintf(os.Stderr, "failed parse benchmarks: %v", err) + os.Exit(1) + } + default: + printUsage() + } +} + +// printUsage prints the top level usage string. +func printUsage() { + usage := `Usage: parser ... + +Available commands: + %s %s + %s %s +` + fmt.Fprintf(os.Stderr, usage, initCmd.Name(), initDescription, parseCmd.Name(), parseDescription) +} -- cgit v1.2.3