diff options
author | Zach Koopmans <zkoopmans@google.com> | 2020-10-26 10:27:25 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-10-26 10:29:20 -0700 |
commit | e2dce046037c30b585cc62db45d517f59d1a08fc (patch) | |
tree | 6344e2ed13193c0822d3136d495bef111f3f9380 | |
parent | 73a18635385d6a90942370e15fe2cbeb2a5a4386 (diff) |
Add parser for open source benchmarks.
Add a parser binary for parsing files containing
Benchmark output and sending data to BigQuery.
PiperOrigin-RevId: 339066396
-rw-r--r-- | tools/bigquery/BUILD | 5 | ||||
-rw-r--r-- | tools/bigquery/bigquery.go | 60 | ||||
-rw-r--r-- | tools/parsers/BUILD | 14 | ||||
-rw-r--r-- | tools/parsers/go_parser.go | 17 | ||||
-rw-r--r-- | tools/parsers/go_parser_test.go | 12 | ||||
-rw-r--r-- | tools/parsers/parser_main.go | 129 |
6 files changed, 192 insertions, 45 deletions
diff --git a/tools/bigquery/BUILD b/tools/bigquery/BUILD index 2b0062a63..1cea9e1c9 100644 --- a/tools/bigquery/BUILD +++ b/tools/bigquery/BUILD @@ -9,5 +9,8 @@ go_library( visibility = [ "//:sandbox", ], - deps = ["@com_google_cloud_go_bigquery//:go_default_library"], + deps = [ + "@com_google_cloud_go_bigquery//:go_default_library", + "@org_golang_google_api//option:go_default_library", + ], ) diff --git a/tools/bigquery/bigquery.go b/tools/bigquery/bigquery.go index 5f1a882de..34b270cc0 100644 --- a/tools/bigquery/bigquery.go +++ b/tools/bigquery/bigquery.go @@ -25,22 +25,30 @@ import ( "time" bq "cloud.google.com/go/bigquery" + "google.golang.org/api/option" ) -// Benchmark is the top level structure of recorded benchmark data. BigQuery +// Suite is the top level structure for a benchmark run. BigQuery // will infer the schema from this. +type Suite struct { + Name string `bq:"name"` + Conditions []*Condition `bq:"conditions"` + Benchmarks []*Benchmark `bq:"benchmarks"` + Official bool `bq:"official"` + Timestamp time.Time `bq:"timestamp"` +} + +// Benchmark represents an individual benchmark in a suite. type Benchmark struct { Name string `bq:"name"` Condition []*Condition `bq:"condition"` - Timestamp time.Time `bq:"timestamp"` - Official bool `bq:"official"` Metric []*Metric `bq:"metric"` - Metadata *Metadata `bq:"metadata"` } -// Condition represents qualifiers for the benchmark. For example: +// Condition represents qualifiers for the benchmark or suite. For example: // Get_Pid/1/real_time would have Benchmark Name "Get_Pid" with "1" -// and "real_time" parameters as conditions. +// and "real_time" parameters as conditions. Suite conditions include +// information such as the CL number and platform name. type Condition struct { Name string `bq:"name"` Value string `bq:"value"` @@ -53,19 +61,9 @@ type Metric struct { Sample float64 `bq:"sample"` } -// Metadata about this benchmark. -type Metadata struct { - CL string `bq:"changelist"` - IterationID string `bq:"iteration_id"` - PendingCL string `bq:"pending_cl"` - Workflow string `bq:"workflow"` - Platform string `bq:"platform"` - Gofer string `bq:"gofer"` -} - // InitBigQuery initializes a BigQuery dataset/table in the project. If the dataset/table already exists, it is not duplicated. -func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) error { - client, err := bq.NewClient(ctx, projectID) +func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string, opts []option.ClientOption) error { + client, err := bq.NewClient(ctx, projectID, opts...) if err != nil { return fmt.Errorf("failed to initialize client on project %s: %v", projectID, err) } @@ -77,7 +75,7 @@ func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string) err } table := dataset.Table(tableID) - schema, err := bq.InferSchema(Benchmark{}) + schema, err := bq.InferSchema(Suite{}) if err != nil { return fmt.Errorf("failed to infer schema: %v", err) } @@ -109,24 +107,32 @@ func (bm *Benchmark) AddMetric(metricName, unit string, sample float64) { // NewBenchmark initializes a new benchmark. func NewBenchmark(name string, iters int, official bool) *Benchmark { return &Benchmark{ - Name: name, - Timestamp: time.Now().UTC(), - Official: official, - Metric: make([]*Metric, 0), + Name: name, + Metric: make([]*Metric, 0), + } +} + +// NewSuite initializes a new Suite. +func NewSuite(name string) *Suite { + return &Suite{ + Name: name, + Timestamp: time.Now().UTC(), + Benchmarks: make([]*Benchmark, 0), + Conditions: make([]*Condition, 0), } } // SendBenchmarks sends the slice of benchmarks to the BigQuery dataset/table. -func SendBenchmarks(ctx context.Context, benchmarks []*Benchmark, projectID, datasetID, tableID string) error { - client, err := bq.NewClient(ctx, projectID) +func SendBenchmarks(ctx context.Context, suite *Suite, projectID, datasetID, tableID string, opts []option.ClientOption) error { + client, err := bq.NewClient(ctx, projectID, opts...) if err != nil { return fmt.Errorf("failed to initialize client on project: %s: %v", projectID, err) } defer client.Close() uploader := client.Dataset(datasetID).Table(tableID).Uploader() - if err = uploader.Put(ctx, benchmarks); err != nil { - return fmt.Errorf("failed to upload benchmarks to proejct %s, table %s.%s: %v", projectID, datasetID, tableID, err) + if err = uploader.Put(ctx, suite); err != nil { + return fmt.Errorf("failed to upload benchmarks %s to project %s, table %s.%s: %v", suite.Name, projectID, datasetID, tableID, err) } return nil diff --git a/tools/parsers/BUILD b/tools/parsers/BUILD index 8038be606..dab954e25 100644 --- a/tools/parsers/BUILD +++ b/tools/parsers/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_binary", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,3 +27,15 @@ go_library( "//tools/bigquery", ], ) + +go_binary( + name = "parser", + testonly = 1, + srcs = ["parser_main.go"], + nogo = False, + deps = [ + ":parsers", + "//runsc/flag", + "//tools/bigquery", + ], +) diff --git a/tools/parsers/go_parser.go b/tools/parsers/go_parser.go index 2cf74c883..df4875e6a 100644 --- a/tools/parsers/go_parser.go +++ b/tools/parsers/go_parser.go @@ -27,20 +27,21 @@ import ( "gvisor.dev/gvisor/tools/bigquery" ) -// parseOutput expects golang benchmark output returns a Benchmark struct formatted for BigQuery. -func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]*bigquery.Benchmark, error) { - var benchmarks []*bigquery.Benchmark +// ParseOutput expects golang benchmark output and returns a struct formatted +// for BigQuery. +func ParseOutput(output string, name string, official bool) (*bigquery.Suite, error) { + suite := bigquery.NewSuite(name) lines := strings.Split(output, "\n") for _, line := range lines { - bm, err := parseLine(line, metadata, official) + bm, err := parseLine(line, official) if err != nil { return nil, fmt.Errorf("failed to parse line '%s': %v", line, err) } if bm != nil { - benchmarks = append(benchmarks, bm) + suite.Benchmarks = append(suite.Benchmarks, bm) } } - return benchmarks, nil + return suite, nil } // parseLine handles parsing a benchmark line into a bigquery.Benchmark. @@ -58,9 +59,8 @@ func parseOutput(output string, metadata *bigquery.Metadata, official bool) ([]* // {Name: ns/op, Unit: ns/op, Sample: 1397875880} // {Name: requests_per_second, Unit: QPS, Sample: 140 } // } -// Metadata: metadata //} -func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigquery.Benchmark, error) { +func parseLine(line string, official bool) (*bigquery.Benchmark, error) { fields := strings.Fields(line) // Check if this line is a Benchmark line. Otherwise ignore the line. @@ -79,7 +79,6 @@ func parseLine(line string, metadata *bigquery.Metadata, official bool) (*bigque } bm := bigquery.NewBenchmark(name, iters, official) - bm.Metadata = metadata for _, p := range params { bm.AddCondition(p.Name, p.Value) } diff --git a/tools/parsers/go_parser_test.go b/tools/parsers/go_parser_test.go index 36996b7c8..0aa1152a2 100644 --- a/tools/parsers/go_parser_test.go +++ b/tools/parsers/go_parser_test.go @@ -94,13 +94,11 @@ func TestParseLine(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - got, err := parseLine(tc.data, nil, false) + got, err := parseLine(tc.data, false) if err != nil { t.Fatalf("parseLine failed with: %v", err) } - tc.want.Timestamp = got.Timestamp - if !cmp.Equal(tc.want, got, nil) { for _, c := range got.Condition { t.Logf("Cond: %+v", c) @@ -150,14 +148,14 @@ BenchmarkRuby/server_threads.5-6 1 1416003331 ns/op 0.00950 average_latency.s 46 for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - bms, err := parseOutput(tc.data, nil, false) + suite, err := ParseOutput(tc.data, "", false) if err != nil { t.Fatalf("parseOutput failed: %v", err) - } else if len(bms) != tc.numBenchmarks { - t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(bms), bms) + } else if len(suite.Benchmarks) != tc.numBenchmarks { + t.Fatalf("NumBenchmarks failed want: %d got: %d %+v", tc.numBenchmarks, len(suite.Benchmarks), suite.Benchmarks) } - for _, bm := range bms { + for _, bm := range suite.Benchmarks { if len(bm.Metric) != tc.numMetrics { t.Fatalf("NumMetrics failed want: %d got: %d %+v", tc.numMetrics, len(bm.Metric), bm.Metric) } diff --git a/tools/parsers/parser_main.go b/tools/parsers/parser_main.go new file mode 100644 index 000000000..6c6182464 --- /dev/null +++ b/tools/parsers/parser_main.go @@ -0,0 +1,129 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary parser parses Benchmark data from golang benchmarks, +// puts it into a Schema for BigQuery, and sends it to BigQuery. +// parser will also initialize a table with the Benchmarks BigQuery schema. +package main + +import ( + "context" + "fmt" + "io/ioutil" + "os" + + "gvisor.dev/gvisor/runsc/flag" + bq "gvisor.dev/gvisor/tools/bigquery" + "gvisor.dev/gvisor/tools/parsers" +) + +const ( + initString = "init" + initDescription = "initializes a new table with benchmarks schema" + parseString = "parse" + parseDescription = "parses given benchmarks file and sends it to BigQuery table." +) + +var ( + // The init command will create a new dataset/table in the given project and initialize + // the table with the schema in //tools/bigquery/bigquery.go. If the table/dataset exists + // or has been initialized, init has no effect and successfully returns. + initCmd = flag.NewFlagSet(initString, flag.ContinueOnError) + initProject = initCmd.String("project", "", "GCP project to send benchmarks.") + initDataset = initCmd.String("dataset", "", "dataset to send benchmarks data.") + initTable = initCmd.String("table", "", "table to send benchmarks data.") + + // The parse command parses benchmark data in `file` and sends it to the + // requested table. + parseCmd = flag.NewFlagSet(parseString, flag.ContinueOnError) + file = parseCmd.String("file", "", "file to parse for benchmarks") + name = parseCmd.String("suite_name", "", "name of the benchmark suite") + clNumber = parseCmd.String("cl", "", "changelist number of this run") + gitCommit = parseCmd.String("git_commit", "", "git commit sha for this run") + parseProject = parseCmd.String("project", "", "GCP project to send benchmarks.") + parseDataset = parseCmd.String("dataset", "", "dataset to send benchmarks data.") + parseTable = parseCmd.String("table", "", "table to send benchmarks data.") + official = parseCmd.Bool("official", false, "mark input data as official.") +) + +// initBenchmarks initializes a dataset/table in a BigQuery project. +func initBenchmarks(ctx context.Context) error { + return bq.InitBigQuery(ctx, *initProject, *initDataset, *initTable, nil) +} + +// parseBenchmarks parses the given file into the BigQuery schema, +// adds some custom data for the commit, and sends the data to BigQuery. +func parseBenchmarks(ctx context.Context) error { + data, err := ioutil.ReadFile(*file) + if err != nil { + return fmt.Errorf("failed to read file: %v", err) + } + suite, err := parsers.ParseOutput(string(data), *name, *official) + if err != nil { + return fmt.Errorf("failed parse data: %v", err) + } + extraConditions := []*bq.Condition{ + { + Name: "change_list", + Value: *clNumber, + }, + { + Name: "commit", + Value: *gitCommit, + }, + } + + suite.Conditions = append(suite.Conditions, extraConditions...) + return bq.SendBenchmarks(ctx, suite, *parseProject, *parseDataset, *parseTable, nil) +} + +func main() { + ctx := context.Background() + switch { + // the "init" command + case len(os.Args) >= 2 && os.Args[1] == initString: + if err := initCmd.Parse(os.Args[2:]); err != nil { + fmt.Fprintf(os.Stderr, "failed parse flags: %v", err) + os.Exit(1) + } + if err := initBenchmarks(ctx); err != nil { + failure := "failed to initialize project: %s dataset: %s table: %s: %v" + fmt.Fprintf(os.Stderr, failure, *parseProject, *parseDataset, *parseTable, err) + os.Exit(1) + } + // the "parse" command. + case len(os.Args) >= 2 && os.Args[1] == parseString: + if err := parseCmd.Parse(os.Args[2:]); err != nil { + fmt.Fprintf(os.Stderr, "failed parse flags: %v", err) + os.Exit(1) + } + if err := parseBenchmarks(ctx); err != nil { + fmt.Fprintf(os.Stderr, "failed parse benchmarks: %v", err) + os.Exit(1) + } + default: + printUsage() + } +} + +// printUsage prints the top level usage string. +func printUsage() { + usage := `Usage: parser <command> <flags> ... + +Available commands: + %s %s + %s %s +` + fmt.Fprintf(os.Stderr, usage, initCmd.Name(), initDescription, parseCmd.Name(), parseDescription) +} |