diff options
author | Adin Scannell <ascannell@google.com> | 2021-04-26 11:40:10 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-04-26 11:42:49 -0700 |
commit | 5b7b7daa425ffc93e98c12cbd37ea7b15a8bcc8d (patch) | |
tree | 82079eb5110b5e36f7e657c076a12ba45c669475 /tools/nogo | |
parent | bf64560681182b0024790f683f4c9aea142e70c5 (diff) |
nogo: enable bazel workers and other optimizations.
This is a suite of changes intended to dramatically speed up nogo speed.
First, there are minor changes that help efficiency significantly.
* Gob-based encoding is used internally, and JSON only used for the final
set of findings. This is done to preserve the existing format (which is
consumed by external tooling), and to facilitate manual debugging.
* Unnecessary regex compilation is elided in the configuration, and care is
taken for merges to prevent redundant entries. I'm not sure quite sure how,
but it turns out that this was consumed a significant amount of time,
presumably compiling the same regexes over and over again.
Second, this change enables bazel workers for nogo analyzers.
Workers enable persistent processes instead of creating and tearing down a
sandbox every invocation. A library is introduced to abstraction these details,
and allow the tools to still be written using standard flags, etc.
The key here is that these binaries and the core of nogo become aware of
caches with worker.Cache. This allows us to save significant time loading the
same set of files and findings over and over again. These caches are keyed by
the digests that are provided by bazel, and are capped in overall size.
Note that the worker package attempts to capture output during each run, but
tools are no longer permitted to write to stdout. This necessitated dropping
some spurious output from checklocks.
PiperOrigin-RevId: 370505732
Diffstat (limited to 'tools/nogo')
-rw-r--r-- | tools/nogo/BUILD | 1 | ||||
-rw-r--r-- | tools/nogo/check/BUILD | 5 | ||||
-rw-r--r-- | tools/nogo/check/main.go | 14 | ||||
-rw-r--r-- | tools/nogo/config.go | 80 | ||||
-rw-r--r-- | tools/nogo/defs.bzl | 118 | ||||
-rw-r--r-- | tools/nogo/filter/BUILD | 1 | ||||
-rw-r--r-- | tools/nogo/filter/main.go | 128 | ||||
-rw-r--r-- | tools/nogo/findings.go | 90 | ||||
-rw-r--r-- | tools/nogo/nogo.go | 175 |
9 files changed, 425 insertions, 187 deletions
diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD index 5fc60d8d8..6c6f604b5 100644 --- a/tools/nogo/BUILD +++ b/tools/nogo/BUILD @@ -37,6 +37,7 @@ go_library( "//tools/checkescape", "//tools/checklocks", "//tools/checkunsafe", + "//tools/worker", "@co_honnef_go_tools//staticcheck:go_default_library", "@co_honnef_go_tools//stylecheck:go_default_library", "@org_golang_x_tools//go/analysis:go_default_library", diff --git a/tools/nogo/check/BUILD b/tools/nogo/check/BUILD index e18483a18..666780dd3 100644 --- a/tools/nogo/check/BUILD +++ b/tools/nogo/check/BUILD @@ -7,5 +7,8 @@ go_binary( srcs = ["main.go"], nogo = False, visibility = ["//visibility:public"], - deps = ["//tools/nogo"], + deps = [ + "//tools/nogo", + "//tools/worker", + ], ) diff --git a/tools/nogo/check/main.go b/tools/nogo/check/main.go index 4194770be..3a6c3fb08 100644 --- a/tools/nogo/check/main.go +++ b/tools/nogo/check/main.go @@ -24,6 +24,7 @@ import ( "os" "gvisor.dev/gvisor/tools/nogo" + "gvisor.dev/gvisor/tools/worker" ) var ( @@ -49,9 +50,10 @@ func loadConfig(file string, config interface{}) interface{} { } func main() { - // Parse all flags. - flag.Parse() + worker.Work(run) +} +func run([]string) int { var ( findings []nogo.Finding factData []byte @@ -90,7 +92,11 @@ func main() { // Write all findings. if *findingsOutput != "" { - if err := nogo.WriteFindingsToFile(findings, *findingsOutput); err != nil { + w, err := os.OpenFile(*findingsOutput, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + log.Fatalf("error opening output file %q: %v", *findingsOutput, err) + } + if err := nogo.WriteFindingsTo(w, findings, false /* json */); err != nil { log.Fatalf("error writing findings to %q: %v", *findingsOutput, err) } } else { @@ -98,4 +104,6 @@ func main() { fmt.Fprintf(os.Stdout, "%s\n", finding.String()) } } + + return 0 } diff --git a/tools/nogo/config.go b/tools/nogo/config.go index 2fea5b3e1..6436f9d34 100644 --- a/tools/nogo/config.go +++ b/tools/nogo/config.go @@ -73,17 +73,28 @@ type ItemConfig struct { } func compileRegexps(ss []string, rs *[]*regexp.Regexp) error { - *rs = make([]*regexp.Regexp, 0, len(ss)) - for _, s := range ss { + *rs = make([]*regexp.Regexp, len(ss)) + for i, s := range ss { r, err := regexp.Compile(s) if err != nil { return err } - *rs = append(*rs, r) + (*rs)[i] = r } return nil } +// RegexpCount is used by AnalyzerConfig.RegexpCount. +func (i *ItemConfig) RegexpCount() int64 { + if i == nil { + // See compile. + return 0 + } + // Return the number of regular expressions compiled for these items. + // This is how the cache size of the configuration is measured. + return int64(len(i.exclude) + len(i.suppress)) +} + func (i *ItemConfig) compile() error { if i == nil { // This may be nil if nothing is included in the @@ -100,9 +111,25 @@ func (i *ItemConfig) compile() error { return nil } +func merge(a, b []string) []string { + found := make(map[string]struct{}) + result := make([]string, 0, len(a)+len(b)) + for _, elem := range a { + found[elem] = struct{}{} + result = append(result, elem) + } + for _, elem := range b { + if _, ok := found[elem]; ok { + continue + } + result = append(result, elem) + } + return result +} + func (i *ItemConfig) merge(other *ItemConfig) { - i.Exclude = append(i.Exclude, other.Exclude...) - i.Suppress = append(i.Suppress, other.Suppress...) + i.Exclude = merge(i.Exclude, other.Exclude) + i.Suppress = merge(i.Suppress, other.Suppress) } func (i *ItemConfig) shouldReport(fullPos, msg string) bool { @@ -129,6 +156,15 @@ func (i *ItemConfig) shouldReport(fullPos, msg string) bool { // configurations depending on what Group the file belongs to. type AnalyzerConfig map[GroupName]*ItemConfig +// RegexpCount is used by Config.Size. +func (a AnalyzerConfig) RegexpCount() int64 { + count := int64(0) + for _, gc := range a { + count += gc.RegexpCount() + } + return count +} + func (a AnalyzerConfig) compile() error { for name, gc := range a { if err := gc.compile(); err != nil { @@ -179,22 +215,36 @@ type Config struct { Analyzers map[AnalyzerName]AnalyzerConfig `yaml:"analyzers"` } +// Size implements worker.Sizer.Size. +func (c *Config) Size() int64 { + count := c.Global.RegexpCount() + for _, config := range c.Analyzers { + count += config.RegexpCount() + } + // The size is measured as the number of regexps that are compiled + // here. We multiply by 1k to produce an estimate. + return 1024 * count +} + // Merge merges two configurations. func (c *Config) Merge(other *Config) { // Merge all groups. + // + // Select the other first, as the order provided in the second will + // provide precendence over the same group defined in the first one. + seenGroups := make(map[GroupName]struct{}) + newGroups := make([]Group, 0, len(c.Groups)+len(other.Groups)) for _, g := range other.Groups { - // Is there a matching group? If yes, we just delete - // it. This will preserve the order provided in the - // overriding file, even if it differs. - for i := 0; i < len(c.Groups); i++ { - if g.Name == c.Groups[i].Name { - copy(c.Groups[i:], c.Groups[i+1:]) - c.Groups = c.Groups[:len(c.Groups)-1] - break - } + newGroups = append(newGroups, g) + seenGroups[g.Name] = struct{}{} + } + for _, g := range c.Groups { + if _, ok := seenGroups[g.Name]; ok { + continue } - c.Groups = append(c.Groups, g) + newGroups = append(newGroups, g) } + c.Groups = newGroups // Merge global configurations. c.Global.merge(other.Global) diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl index be8b82f9c..ddf5816a6 100644 --- a/tools/nogo/defs.bzl +++ b/tools/nogo/defs.bzl @@ -29,6 +29,7 @@ NogoTargetInfo = provider( fields = { "goarch": "the build architecture (GOARCH)", "goos": "the build OS target (GOOS)", + "worker_debug": "transitive debugging", }, ) @@ -36,6 +37,7 @@ def _nogo_target_impl(ctx): return [NogoTargetInfo( goarch = ctx.attr.goarch, goos = ctx.attr.goos, + worker_debug = ctx.attr.worker_debug, )] nogo_target = go_rule( @@ -50,6 +52,10 @@ nogo_target = go_rule( doc = "the Go OS target (propagated to other rules).", mandatory = True, ), + "worker_debug": attr.bool( + doc = "whether worker debugging should be enabled.", + default = False, + ), }, ) @@ -61,7 +67,7 @@ def _nogo_objdump_tool_impl(ctx): # we need the tool to handle this case by creating a temporary file. # # [1] https://github.com/golang/go/issues/41051 - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()]) dumper = ctx.actions.declare_file(ctx.label.name) @@ -94,7 +100,7 @@ nogo_objdump_tool = go_rule( rule, implementation = _nogo_objdump_tool_impl, attrs = { - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), @@ -112,7 +118,7 @@ NogoStdlibInfo = provider( def _nogo_stdlib_impl(ctx): # Build the standard library facts. - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) facts = ctx.actions.declare_file(ctx.label.name + ".facts") raw_findings = ctx.actions.declare_file(ctx.label.name + ".raw_findings") @@ -124,18 +130,29 @@ def _nogo_stdlib_impl(ctx): ) config_file = ctx.actions.declare_file(ctx.label.name + ".cfg") ctx.actions.write(config_file, config.to_json()) - ctx.actions.run( - inputs = [config_file] + go_ctx.stdlib_srcs, - outputs = [facts, raw_findings], - tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool), - executable = ctx.files._nogo_check[0], - mnemonic = "NogoStandardLibraryAnalysis", - progress_message = "Analyzing Go Standard Library", - arguments = go_ctx.nogo_args + [ - "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path, + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join(go_ctx.nogo_args + [ + "-objdump_tool=%s" % ctx.files._objdump_tool[0].path, "-stdlib=%s" % config_file.path, "-findings=%s" % raw_findings.path, "-facts=%s" % facts.path, + ]), + ) + ctx.actions.run( + inputs = [config_file] + go_ctx.stdlib_srcs + [args_file], + outputs = [facts, raw_findings], + tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool), + executable = ctx.files._check[0], + mnemonic = "GoStandardLibraryAnalysis", + # Note that this does not support work execution currently. There is an + # issue with stdout pollution that is not yet resolved, so this is kept + # as a separate menomic. + progress_message = "Analyzing Go Standard Library", + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, ], ) @@ -149,15 +166,15 @@ nogo_stdlib = go_rule( rule, implementation = _nogo_stdlib_impl, attrs = { - "_nogo_check": attr.label( + "_check": attr.label( default = "//tools/nogo/check:check", cfg = "host", ), - "_nogo_objdump_tool": attr.label( + "_objdump_tool": attr.label( default = "//tools/nogo:objdump_tool", cfg = "host", ), - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), @@ -276,7 +293,7 @@ def _nogo_aspect_impl(target, ctx): inputs.append(stdlib_facts) # The nogo tool operates on a configuration serialized in JSON format. - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) facts = ctx.actions.declare_file(target.label.name + ".facts") raw_findings = ctx.actions.declare_file(target.label.name + ".raw_findings") @@ -294,19 +311,28 @@ def _nogo_aspect_impl(target, ctx): config_file = ctx.actions.declare_file(target.label.name + ".cfg") ctx.actions.write(config_file, config.to_json()) inputs.append(config_file) - ctx.actions.run( - inputs = inputs, - outputs = [facts, raw_findings], - tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool), - executable = ctx.files._nogo_check[0], - mnemonic = "NogoAnalysis", - progress_message = "Analyzing %s" % target.label, - arguments = go_ctx.nogo_args + [ + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join(go_ctx.nogo_args + [ "-binary=%s" % target_objfile.path, - "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path, + "-objdump_tool=%s" % ctx.files._objdump_tool[0].path, "-package=%s" % config_file.path, "-findings=%s" % raw_findings.path, "-facts=%s" % facts.path, + ]), + ) + ctx.actions.run( + inputs = inputs + [args_file], + outputs = [facts, raw_findings], + tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool), + executable = ctx.files._check[0], + mnemonic = "GoStaticAnalysis", + progress_message = "Analyzing %s" % target.label, + execution_requirements = {"supports-workers": "1"}, + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, ], ) @@ -339,27 +365,30 @@ nogo_aspect = go_rule( "embed", ], attrs = { - "_nogo_check": attr.label( + "_check": attr.label( default = "//tools/nogo/check:check", cfg = "host", ), - "_nogo_stdlib": attr.label( - default = "//tools/nogo:stdlib", - cfg = "host", - ), - "_nogo_objdump_tool": attr.label( + "_objdump_tool": attr.label( default = "//tools/nogo:objdump_tool", cfg = "host", ), - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), + # The name of this attribute must not be _stdlib, since that + # appears to be reserved for some internal bazel use. + "_nogo_stdlib": attr.label( + default = "//tools/nogo:stdlib", + cfg = "host", + ), }, ) def _nogo_test_impl(ctx): """Check nogo findings.""" + nogo_target_info = ctx.attr._target[NogoTargetInfo] # Ensure there's a single dependency. if len(ctx.attr.deps) != 1: @@ -369,16 +398,27 @@ def _nogo_test_impl(ctx): # Build a step that applies the configuration. config_srcs = ctx.attr.config[NogoConfigInfo].srcs findings = ctx.actions.declare_file(ctx.label.name + ".findings") + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join( + ["-input=%s" % f.path for f in raw_findings] + + ["-config=%s" % f.path for f in config_srcs] + + ["-output=%s" % findings.path], + ), + ) ctx.actions.run( - inputs = raw_findings + ctx.files.srcs + config_srcs, + inputs = raw_findings + ctx.files.srcs + config_srcs + [args_file], outputs = [findings], tools = depset(ctx.files._filter), executable = ctx.files._filter[0], mnemonic = "GoStaticAnalysis", progress_message = "Generating %s" % ctx.label, - arguments = ["-input=%s" % f.path for f in raw_findings] + - ["-config=%s" % f.path for f in config_srcs] + - ["-output=%s" % findings.path], + execution_requirements = {"supports-workers": "1"}, + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, + ], ) # Build a runner that checks the filtered facts. @@ -389,7 +429,7 @@ def _nogo_test_impl(ctx): runner = ctx.actions.declare_file(ctx.label.name) runner_content = [ "#!/bin/bash", - "exec %s -input=%s" % (ctx.files._filter[0].short_path, findings.short_path), + "exec %s -check -input=%s" % (ctx.files._filter[0].short_path, findings.short_path), "", ] ctx.actions.write(runner, "\n".join(runner_content), is_executable = True) @@ -423,6 +463,10 @@ nogo_test = rule( allow_files = True, doc = "Relevant src files. This is ignored except to make the nogo_test directly affected by the files.", ), + "_target": attr.label( + default = "//tools/nogo:target", + cfg = "target", + ), "_filter": attr.label(default = "//tools/nogo/filter:filter"), }, test = True, diff --git a/tools/nogo/filter/BUILD b/tools/nogo/filter/BUILD index e56a783e2..e3049521e 100644 --- a/tools/nogo/filter/BUILD +++ b/tools/nogo/filter/BUILD @@ -9,6 +9,7 @@ go_binary( visibility = ["//visibility:public"], deps = [ "//tools/nogo", + "//tools/worker", "@in_gopkg_yaml_v2//:go_default_library", ], ) diff --git a/tools/nogo/filter/main.go b/tools/nogo/filter/main.go index 8be38ca6d..d50336b9b 100644 --- a/tools/nogo/filter/main.go +++ b/tools/nogo/filter/main.go @@ -26,6 +26,7 @@ import ( yaml "gopkg.in/yaml.v2" "gvisor.dev/gvisor/tools/nogo" + "gvisor.dev/gvisor/tools/worker" ) type stringList []string @@ -44,34 +45,44 @@ var ( configFiles stringList outputFile string showConfig bool + check bool ) func init() { - flag.Var(&inputFiles, "input", "findings input files") - flag.StringVar(&outputFile, "output", "", "findings output file") + flag.Var(&inputFiles, "input", "findings input files (gob format)") + flag.StringVar(&outputFile, "output", "", "findings output file (json format)") flag.Var(&configFiles, "config", "findings configuration files") flag.BoolVar(&showConfig, "show-config", false, "dump configuration only") + flag.BoolVar(&check, "check", false, "assume input is in json format") } func main() { - flag.Parse() + worker.Work(run) +} - // Load all available findings. - var findings []nogo.Finding - for _, filename := range inputFiles { - inputFindings, err := nogo.ExtractFindingsFromFile(filename) +var ( + cachedFindings = worker.NewCache("findings") // With nogo.FindingSet. + cachedFiltered = worker.NewCache("filtered") // With nogo.FindingSet. + cachedConfigs = worker.NewCache("configs") // With nogo.Config. + cachedFullConfigs = worker.NewCache("compiled") // With nogo.Config. +) + +func loadFindings(filename string) nogo.FindingSet { + return cachedFindings.Lookup([]string{filename}, func() worker.Sizer { + r, err := os.Open(filename) + if err != nil { + log.Fatalf("unable to open input %q: %v", filename, err) + } + inputFindings, err := nogo.ExtractFindingsFrom(r, check /* json */) if err != nil { log.Fatalf("unable to extract findings from %s: %v", filename, err) } - findings = append(findings, inputFindings...) - } + return inputFindings + }).(nogo.FindingSet) +} - // Open and merge all configuations. - config := &nogo.Config{ - Global: make(nogo.AnalyzerConfig), - Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig), - } - for _, filename := range configFiles { +func loadConfig(filename string) *nogo.Config { + return cachedConfigs.Lookup([]string{filename}, func() worker.Sizer { content, err := ioutil.ReadFile(filename) if err != nil { log.Fatalf("unable to read %s: %v", filename, err) @@ -82,53 +93,98 @@ func main() { if err := dec.Decode(&newConfig); err != nil { log.Fatalf("unable to decode %s: %v", filename, err) } - config.Merge(&newConfig) if showConfig { content, err := yaml.Marshal(&newConfig) if err != nil { log.Fatalf("error marshalling config: %v", err) } - mergedBytes, err := yaml.Marshal(config) - if err != nil { - log.Fatalf("error marshalling config: %v", err) - } fmt.Fprintf(os.Stdout, "Loaded configuration from %s:\n%s\n", filename, string(content)) - fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes)) } - } - if err := config.Compile(); err != nil { - log.Fatalf("error compiling config: %v", err) - } + return &newConfig + }).(*nogo.Config) +} + +func loadConfigs(filenames []string) *nogo.Config { + return cachedFullConfigs.Lookup(filenames, func() worker.Sizer { + config := &nogo.Config{ + Global: make(nogo.AnalyzerConfig), + Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig), + } + for _, filename := range configFiles { + config.Merge(loadConfig(filename)) + if showConfig { + mergedBytes, err := yaml.Marshal(config) + if err != nil { + log.Fatalf("error marshalling config: %v", err) + } + fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes)) + } + } + if err := config.Compile(); err != nil { + log.Fatalf("error compiling config: %v", err) + } + return config + }).(*nogo.Config) +} + +func run([]string) int { + // Open and merge all configuations. + config := loadConfigs(configFiles) if showConfig { - os.Exit(0) + return 0 } - // Filter the findings (and aggregate by group). - filteredFindings := make([]nogo.Finding, 0, len(findings)) - for _, finding := range findings { - if ok := config.ShouldReport(finding); ok { - filteredFindings = append(filteredFindings, finding) - } + // Load and filer available findings. + var filteredFindings []nogo.Finding + for _, filename := range inputFiles { + // Note that this applies a caching strategy to the filtered + // findings, because *this is by far the most expensive part of + // evaluation*. The set of findings is large and applying the + // configuration is complex. Therefore, we segment this cache + // on each individual raw findings input file and the + // configuration files. Note that this cache is keyed on all + // the configuration files and each individual raw findings, so + // is guaranteed to be safe. This allows us to reuse the same + // filter result many times over, because e.g. all standard + // library findings will be available to all packages. + filteredFindings = append(filteredFindings, + cachedFiltered.Lookup(append(configFiles, filename), func() worker.Sizer { + inputFindings := loadFindings(filename) + filteredFindings := make(nogo.FindingSet, 0, len(inputFindings)) + for _, finding := range inputFindings { + if ok := config.ShouldReport(finding); ok { + filteredFindings = append(filteredFindings, finding) + } + } + return filteredFindings + }).(nogo.FindingSet)...) } // Write the output (if required). // // If the outputFile is specified, then we exit here. Otherwise, // we continue to write to stdout and treat like a test. + // + // Note that the output of the filter is always json, which is + // human readable and the format that is consumed by tricorder. if outputFile != "" { - if err := nogo.WriteFindingsToFile(filteredFindings, outputFile); err != nil { + w, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + log.Fatalf("unable to open output file %q: %v", outputFile, err) + } + if err := nogo.WriteFindingsTo(w, filteredFindings, true /* json */); err != nil { log.Fatalf("unable to write findings: %v", err) } - return + return 0 } // Treat the run as a test. if len(filteredFindings) == 0 { fmt.Fprintf(os.Stdout, "PASS\n") - os.Exit(0) + return 0 } for _, finding := range filteredFindings { fmt.Fprintf(os.Stdout, "%s\n", finding.String()) } - os.Exit(1) + return 1 } diff --git a/tools/nogo/findings.go b/tools/nogo/findings.go index a00cfe813..329a7062e 100644 --- a/tools/nogo/findings.go +++ b/tools/nogo/findings.go @@ -15,10 +15,13 @@ package nogo import ( + "encoding/gob" "encoding/json" "fmt" "go/token" - "io/ioutil" + "io" + "os" + "reflect" "sort" ) @@ -29,63 +32,96 @@ type Finding struct { Message string } +// findingSize is the size of the finding struct itself. +var findingSize = int64(reflect.TypeOf(Finding{}).Size()) + +// Size implements worker.Sizer.Size. +func (f *Finding) Size() int64 { + return int64(len(f.Category)) + int64(len(f.Message)) + findingSize +} + // String implements fmt.Stringer.String. func (f *Finding) String() string { return fmt.Sprintf("%s: %s: %s", f.Category, f.Position.String(), f.Message) } -// WriteFindingsToFile writes findings to a file. -func WriteFindingsToFile(findings []Finding, filename string) error { - content, err := WriteFindingsToBytes(findings) - if err != nil { - return err +// FindingSet is a collection of findings. +type FindingSet []Finding + +// Size implmements worker.Sizer.Size. +func (fs FindingSet) Size() int64 { + size := int64(0) + for _, finding := range fs { + size += finding.Size() } - return ioutil.WriteFile(filename, content, 0644) + return size } -// WriteFindingsToBytes serializes findings as bytes. -func WriteFindingsToBytes(findings []Finding) ([]byte, error) { - // N.B. Sort all the findings in order to maximize cacheability. - sort.Slice(findings, func(i, j int) bool { +// Sort sorts all findings. +func (fs FindingSet) Sort() { + sort.Slice(fs, func(i, j int) bool { switch { - case findings[i].Position.Filename < findings[j].Position.Filename: + case fs[i].Position.Filename < fs[j].Position.Filename: return true - case findings[i].Position.Filename > findings[j].Position.Filename: + case fs[i].Position.Filename > fs[j].Position.Filename: return false - case findings[i].Position.Line < findings[j].Position.Line: + case fs[i].Position.Line < fs[j].Position.Line: return true - case findings[i].Position.Line > findings[j].Position.Line: + case fs[i].Position.Line > fs[j].Position.Line: return false - case findings[i].Position.Column < findings[j].Position.Column: + case fs[i].Position.Column < fs[j].Position.Column: return true - case findings[i].Position.Column > findings[j].Position.Column: + case fs[i].Position.Column > fs[j].Position.Column: return false - case findings[i].Category < findings[j].Category: + case fs[i].Category < fs[j].Category: return true - case findings[i].Category > findings[j].Category: + case fs[i].Category > fs[j].Category: return false - case findings[i].Message < findings[j].Message: + case fs[i].Message < fs[j].Message: return true - case findings[i].Message > findings[j].Message: + case fs[i].Message > fs[j].Message: return false default: return false } }) - return json.Marshal(findings) +} + +// WriteFindingsTo serializes findings. +func WriteFindingsTo(w io.Writer, findings FindingSet, asJSON bool) error { + // N.B. Sort all the findings in order to maximize cacheability. + findings.Sort() + if asJSON { + enc := json.NewEncoder(w) + return enc.Encode(findings) + } + enc := gob.NewEncoder(w) + return enc.Encode(findings) } // ExtractFindingsFromFile loads findings from a file. -func ExtractFindingsFromFile(filename string) ([]Finding, error) { - content, err := ioutil.ReadFile(filename) +func ExtractFindingsFromFile(filename string, asJSON bool) (FindingSet, error) { + r, err := os.Open(filename) if err != nil { return nil, err } - return ExtractFindingsFromBytes(content) + defer r.Close() + return ExtractFindingsFrom(r, asJSON) } // ExtractFindingsFromBytes loads findings from bytes. -func ExtractFindingsFromBytes(content []byte) (findings []Finding, err error) { - err = json.Unmarshal(content, &findings) +func ExtractFindingsFrom(r io.Reader, asJSON bool) (findings FindingSet, err error) { + if asJSON { + dec := json.NewDecoder(r) + err = dec.Decode(&findings) + } else { + dec := gob.NewDecoder(r) + err = dec.Decode(&findings) + } return findings, err } + +func init() { + gob.Register((*Finding)(nil)) + gob.Register((*FindingSet)(nil)) +} diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go index c1b88e89f..acee7c8bc 100644 --- a/tools/nogo/nogo.go +++ b/tools/nogo/nogo.go @@ -19,7 +19,8 @@ package nogo import ( - "encoding/json" + "bytes" + "encoding/gob" "errors" "fmt" "go/ast" @@ -43,6 +44,7 @@ import ( // Special case: flags live here and change overall behavior. "gvisor.dev/gvisor/tools/checkescape" + "gvisor.dev/gvisor/tools/worker" ) // StdlibConfig is serialized as the configuration. @@ -82,46 +84,94 @@ type stdlibFact struct { Facts []byte } -// factLoader returns a function that loads facts. -// -// This resolves all standard library facts and imported package facts up -// front. The returned loader function will never return an error, only -// empty facts. -// -// This is done because all stdlib data is stored together, and we don't want -// to load this data many times over. -func (c *PackageConfig) factLoader() (loader, error) { - allFacts := make(map[string][]byte) - if c.StdlibFacts != "" { - data, err := ioutil.ReadFile(c.StdlibFacts) - if err != nil { - return nil, fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, err) - } - var ( - stdlibFactsSorted []stdlibFact - stdlibFacts = make(map[string][]byte) - ) - // See below re: sorted serialization. - if err := json.Unmarshal(data, &stdlibFactsSorted); err != nil { - return nil, fmt.Errorf("error loading stdlib facts: %w", err) - } - for _, stdlibFact := range stdlibFactsSorted { - stdlibFacts[stdlibFact.Package] = stdlibFact.Facts - } - for pkg, data := range stdlibFacts { - allFacts[pkg] = data +// stdlibFacts is a set of standard library facts. +type stdlibFacts map[string][]byte + +// Size implements worker.Sizer.Size. +func (sf stdlibFacts) Size() int64 { + size := int64(0) + for filename, data := range sf { + size += int64(len(filename)) + size += int64(len(data)) + } + return size +} + +// EncodeTo serializes stdlibFacts. +func (sf stdlibFacts) EncodeTo(w io.Writer) error { + stdlibFactsSorted := make([]stdlibFact, 0, len(sf)) + for pkg, facts := range sf { + stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{ + Package: pkg, + Facts: facts, + }) + } + sort.Slice(stdlibFactsSorted, func(i, j int) bool { + return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package + }) + enc := gob.NewEncoder(w) + if err := enc.Encode(stdlibFactsSorted); err != nil { + return err + } + return nil +} + +// DecodeFrom deserializes stdlibFacts. +func (sf stdlibFacts) DecodeFrom(r io.Reader) error { + var stdlibFactsSorted []stdlibFact + dec := gob.NewDecoder(r) + if err := dec.Decode(&stdlibFactsSorted); err != nil { + return err + } + for _, stdlibFact := range stdlibFactsSorted { + sf[stdlibFact.Package] = stdlibFact.Facts + } + return nil +} + +var ( + // cachedFacts caches by file (just byte data). + cachedFacts = worker.NewCache("facts") + + // stdlibCachedFacts caches the standard library (stdlibFacts). + stdlibCachedFacts = worker.NewCache("stdlib") +) + +// factLoader loads facts. +func (c *PackageConfig) factLoader(path string) (data []byte, err error) { + filename, ok := c.FactMap[path] + if ok { + cb := cachedFacts.Lookup([]string{filename}, func() worker.Sizer { + data, readErr := ioutil.ReadFile(filename) + if readErr != nil { + err = fmt.Errorf("error loading %q: %w", filename, readErr) + return nil + } + return worker.CacheBytes(data) + }) + if cb != nil { + return []byte(cb.(worker.CacheBytes)), err } + return nil, err } - for pkg, file := range c.FactMap { - data, err := ioutil.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("error loading %q: %w", file, err) + cb := stdlibCachedFacts.Lookup([]string{c.StdlibFacts}, func() worker.Sizer { + r, openErr := os.Open(c.StdlibFacts) + if openErr != nil { + err = fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, openErr) + return nil + } + defer r.Close() + sf := make(stdlibFacts) + if readErr := sf.DecodeFrom(r); readErr != nil { + err = fmt.Errorf("error loading stdlib facts: %w", readErr) + return nil } - allFacts[pkg] = data + return sf + }) + if cb != nil { + return (cb.(stdlibFacts))[path], err } - return func(path string) ([]byte, error) { - return allFacts[path], nil - }, nil + return nil, err } // shouldInclude indicates whether the file should be included. @@ -205,7 +255,7 @@ var ErrSkip = errors.New("skipped") // // Note that not all parts of the source are expected to build. We skip obvious // test files, and cmd files, which should not be dependencies. -func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings []Finding, facts []byte, err error) { +func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings FindingSet, facts []byte, err error) { if len(config.Srcs) == 0 { return nil, nil, nil } @@ -275,16 +325,16 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi } // Closure to check a single package. - stdlibFacts := make(map[string][]byte) - stdlibErrs := make(map[string]error) + localStdlibFacts := make(stdlibFacts) + localStdlibErrs := make(map[string]error) var checkOne func(pkg string) error // Recursive. checkOne = func(pkg string) error { // Is this already done? - if _, ok := stdlibFacts[pkg]; ok { + if _, ok := localStdlibFacts[pkg]; ok { return nil } // Did this fail previously? - if _, ok := stdlibErrs[pkg]; ok { + if _, ok := localStdlibErrs[pkg]; ok { return nil } @@ -300,7 +350,7 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi // If there's no binary for this package, it is likely // not built with the distribution. That's fine, we can // just skip analysis. - stdlibErrs[pkg] = err + localStdlibErrs[pkg] = err return nil } @@ -317,10 +367,10 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi if err != nil { // If we can't analyze a package from the standard library, // then we skip it. It will simply not have any findings. - stdlibErrs[pkg] = err + localStdlibErrs[pkg] = err return nil } - stdlibFacts[pkg] = factData + localStdlibFacts[pkg] = factData allFindings = append(allFindings, findings...) return nil } @@ -337,34 +387,23 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi } // Sanity check. - if len(stdlibFacts) == 0 { + if len(localStdlibFacts) == 0 { return nil, nil, fmt.Errorf("no stdlib facts found: misconfiguration?") } - // Write out all findings. Note that the standard library facts - // must be serialized in a sorted order to ensure cacheability. - stdlibFactsSorted := make([]stdlibFact, 0, len(stdlibFacts)) - for pkg, facts := range stdlibFacts { - stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{ - Package: pkg, - Facts: facts, - }) - } - sort.Slice(stdlibFactsSorted, func(i, j int) bool { - return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package - }) - factData, err := json.Marshal(stdlibFactsSorted) - if err != nil { - return nil, nil, fmt.Errorf("error saving stdlib facts: %w", err) + // Write out all findings. + buf := bytes.NewBuffer(nil) + if err := localStdlibFacts.EncodeTo(buf); err != nil { + return nil, nil, fmt.Errorf("error serialized stdlib facts: %v", err) } // Write out all errors. - for pkg, err := range stdlibErrs { + for pkg, err := range localStdlibErrs { log.Printf("WARNING: error while processing %v: %v", pkg, err) } // Return all findings. - return allFindings, factData, nil + return allFindings, buf.Bytes(), nil } // CheckPackage runs all given analyzers. @@ -417,11 +456,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC } // Load all package facts. - loader, err := config.factLoader() - if err != nil { - return nil, nil, fmt.Errorf("error loading facts: %w", err) - } - facts, err := facts.Decode(types, loader) + facts, err := facts.Decode(types, config.factLoader) if err != nil { return nil, nil, fmt.Errorf("error decoding facts: %w", err) } @@ -496,3 +531,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC // Return all findings. return findings, facts.Encode(), nil } + +func init() { + gob.Register((*stdlibFact)(nil)) +} |