nogo: enable bazel workers and other optimizations.

This is a suite of changes intended to dramatically speed up nogo speed. First, there are minor changes that help efficiency significantly. * Gob-based encoding is used internally, and JSON only used for the final set of findings. This is done to preserve the existing format (which is consumed by external tooling), and to facilitate manual debugging. * Unnecessary regex compilation is elided in the configuration, and care is taken for merges to prevent redundant entries. I'm not sure quite sure how, but it turns out that this was consumed a significant amount of time, presumably compiling the same regexes over and over again. Second, this change enables bazel workers for nogo analyzers. Workers enable persistent processes instead of creating and tearing down a sandbox every invocation. A library is introduced to abstraction these details, and allow the tools to still be written using standard flags, etc. The key here is that these binaries and the core of nogo become aware of caches with worker.Cache. This allows us to save significant time loading the same set of files and findings over and over again. These caches are keyed by the digests that are provided by bazel, and are capped in overall size. Note that the worker package attempts to capture output during each run, but tools are no longer permitted to write to stdout. This necessitated dropping some spurious output from checklocks. PiperOrigin-RevId: 370505732
author: Adin Scannell <ascannell@google.com> 2021-04-26 11:40:10 -0700
committer: gVisor bot <gvisor-bot@google.com> 2021-04-26 11:42:49 -0700
commit: 5b7b7daa425ffc93e98c12cbd37ea7b15a8bcc8d (patch)
tree: 82079eb5110b5e36f7e657c076a12ba45c669475 /tools
parent: bf64560681182b0024790f683f4c9aea142e70c5 (diff)
15 files changed, 790 insertions, 193 deletions
diff --git a/tools/bazeldefs/BUILD b/tools/bazeldefs/BUILD
index c2c1287a1..24e6f8a94 100644
--- a/tools/bazeldefs/BUILD
+++ b/tools/bazeldefs/BUILD
@@ -1,6 +1,9 @@
-load("//tools:defs.bzl", "bzl_library")
+load("//tools:defs.bzl", "bzl_library", "go_proto_library")
 
-package(licenses = ["notice"])
+package(
+    default_visibility = ["//:sandbox"],
+    licenses = ["notice"],
+)
 
 bzl_library(
     name = "platforms_bzl",
@@ -45,3 +48,9 @@ genrule(
     stamp = True,
     visibility = ["//:sandbox"],
 )
+
+go_proto_library(
+    name = "worker_protocol_go_proto",
+    importpath = "gvisor.dev/bazel/worker_protocol_go_proto",
+    proto = "@bazel_tools//src/main/protobuf:worker_protocol_proto",
+)
diff --git a/tools/bazeldefs/go.bzl b/tools/bazeldefs/go.bzl
index d16376032..da027846b 100644
--- a/tools/bazeldefs/go.bzl
+++ b/tools/bazeldefs/go.bzl
@@ -8,8 +8,13 @@ load("//tools/bazeldefs:defs.bzl", "select_arch", "select_system")
 gazelle = _gazelle
 go_embed_data = _go_embed_data
 go_path = _go_path
+bazel_worker_proto = "//tools/bazeldefs:worker_protocol_go_proto"
 
 def _go_proto_or_grpc_library(go_library_func, name, **kwargs):
+    if "importpath" in kwargs:
+        # If importpath is explicit, pass straight through.
+        go_library_func(name = name, **kwargs)
+        return
     deps = [
         dep.replace("_proto", "_go_proto")
         for dep in (kwargs.pop("deps", []) or [])
diff --git a/tools/checklocks/checklocks.go b/tools/checklocks/checklocks.go
index 4ec2918f6..1e877d394 100644
--- a/tools/checklocks/checklocks.go
+++ b/tools/checklocks/checklocks.go
@@ -563,9 +563,7 @@ func (pc *passContext) checkFunctionCall(call *ssa.Call, isExempted bool, lh *lo
 	if !ok {
 		return
 	}
-
 	if fn.Object() == nil {
-		log.Warningf("fn w/ nil Object is: %+v", fn)
 		return
 	}
 
@@ -579,7 +577,6 @@ func (pc *passContext) checkFunctionCall(call *ssa.Call, isExempted bool, lh *lo
 			r := (*call.Value().Operands(nil)[fg.ParameterNumber+1])
 			guardObj := findField(r, fg.FieldNumber)
 			if guardObj == nil {
-				log.Infof("guardObj nil but funcFact: %+v", funcFact)
 				continue
 			}
 			var fieldFacts lockFieldFacts
diff --git a/tools/defs.bzl b/tools/defs.bzl
index d2c697c0d..27542a2f5 100644
--- a/tools/defs.bzl
+++ b/tools/defs.bzl
@@ -10,7 +10,7 @@ load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_
 load("//tools/nogo:defs.bzl", "nogo_test")
 load("//tools/bazeldefs:defs.bzl", _arch_genrule = "arch_genrule", _build_test = "build_test", _bzl_library = "bzl_library", _coreutil = "coreutil", _default_installer = "default_installer", _default_net_util = "default_net_util", _more_shards = "more_shards", _most_shards = "most_shards", _proto_library = "proto_library", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _version = "version")
 load("//tools/bazeldefs:cc.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _gbenchmark = "gbenchmark", _grpcpp = "grpcpp", _gtest = "gtest", _vdso_linker_option = "vdso_linker_option")
-load("//tools/bazeldefs:go.bzl", _gazelle = "gazelle", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_rule = "go_rule", _go_test = "go_test", _select_goarch = "select_goarch", _select_goos = "select_goos")
+load("//tools/bazeldefs:go.bzl", _bazel_worker_proto = "bazel_worker_proto", _gazelle = "gazelle", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_rule = "go_rule", _go_test = "go_test", _select_goarch = "select_goarch", _select_goos = "select_goos")
 load("//tools/bazeldefs:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar")
 load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms")
 load("//tools/bazeldefs:tags.bzl", "go_suffixes")
@@ -47,6 +47,8 @@ go_path = _go_path
 select_goos = _select_goos
 select_goarch = _select_goarch
 go_embed_data = _go_embed_data
+go_proto_library = _go_proto_library
+bazel_worker_proto = _bazel_worker_proto
 
 # Packaging rules.
 pkg_deb = _pkg_deb
diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD
index 5fc60d8d8..6c6f604b5 100644
--- a/tools/nogo/BUILD
+++ b/tools/nogo/BUILD
@@ -37,6 +37,7 @@ go_library(
         "//tools/checkescape",
         "//tools/checklocks",
         "//tools/checkunsafe",
+        "//tools/worker",
         "@co_honnef_go_tools//staticcheck:go_default_library",
         "@co_honnef_go_tools//stylecheck:go_default_library",
         "@org_golang_x_tools//go/analysis:go_default_library",
diff --git a/tools/nogo/check/BUILD b/tools/nogo/check/BUILD
index e18483a18..666780dd3 100644
--- a/tools/nogo/check/BUILD
+++ b/tools/nogo/check/BUILD
@@ -7,5 +7,8 @@ go_binary(
     srcs = ["main.go"],
     nogo = False,
     visibility = ["//visibility:public"],
-    deps = ["//tools/nogo"],
+    deps = [
+        "//tools/nogo",
+        "//tools/worker",
+    ],
 )
diff --git a/tools/nogo/check/main.go b/tools/nogo/check/main.go
index 4194770be..3a6c3fb08 100644
--- a/tools/nogo/check/main.go
+++ b/tools/nogo/check/main.go
@@ -24,6 +24,7 @@ import (
 	"os"
 
 	"gvisor.dev/gvisor/tools/nogo"
+	"gvisor.dev/gvisor/tools/worker"
 )
 
 var (
@@ -49,9 +50,10 @@ func loadConfig(file string, config interface{}) interface{} {
 }
 
 func main() {
-	// Parse all flags.
-	flag.Parse()
+	worker.Work(run)
+}
 
+func run([]string) int {
 	var (
 		findings []nogo.Finding
 		factData []byte
@@ -90,7 +92,11 @@ func main() {
 
 	// Write all findings.
 	if *findingsOutput != "" {
-		if err := nogo.WriteFindingsToFile(findings, *findingsOutput); err != nil {
+		w, err := os.OpenFile(*findingsOutput, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+		if err != nil {
+			log.Fatalf("error opening output file %q: %v", *findingsOutput, err)
+		}
+		if err := nogo.WriteFindingsTo(w, findings, false /* json */); err != nil {
 			log.Fatalf("error writing findings to %q: %v", *findingsOutput, err)
 		}
 	} else {
@@ -98,4 +104,6 @@ func main() {
 			fmt.Fprintf(os.Stdout, "%s\n", finding.String())
 		}
 	}
+
+	return 0
 }
diff --git a/tools/nogo/config.go b/tools/nogo/config.go
index 2fea5b3e1..6436f9d34 100644
--- a/tools/nogo/config.go
+++ b/tools/nogo/config.go
@@ -73,17 +73,28 @@ type ItemConfig struct {
 }
 
 func compileRegexps(ss []string, rs *[]*regexp.Regexp) error {
-	*rs = make([]*regexp.Regexp, 0, len(ss))
-	for _, s := range ss {
+	*rs = make([]*regexp.Regexp, len(ss))
+	for i, s := range ss {
 		r, err := regexp.Compile(s)
 		if err != nil {
 			return err
 		}
-		*rs = append(*rs, r)
+		(*rs)[i] = r
 	}
 	return nil
 }
 
+// RegexpCount is used by AnalyzerConfig.RegexpCount.
+func (i *ItemConfig) RegexpCount() int64 {
+	if i == nil {
+		// See compile.
+		return 0
+	}
+	// Return the number of regular expressions compiled for these items.
+	// This is how the cache size of the configuration is measured.
+	return int64(len(i.exclude) + len(i.suppress))
+}
+
 func (i *ItemConfig) compile() error {
 	if i == nil {
 		// This may be nil if nothing is included in the
@@ -100,9 +111,25 @@ func (i *ItemConfig) compile() error {
 	return nil
 }
 
+func merge(a, b []string) []string {
+	found := make(map[string]struct{})
+	result := make([]string, 0, len(a)+len(b))
+	for _, elem := range a {
+		found[elem] = struct{}{}
+		result = append(result, elem)
+	}
+	for _, elem := range b {
+		if _, ok := found[elem]; ok {
+			continue
+		}
+		result = append(result, elem)
+	}
+	return result
+}
+
 func (i *ItemConfig) merge(other *ItemConfig) {
-	i.Exclude = append(i.Exclude, other.Exclude...)
-	i.Suppress = append(i.Suppress, other.Suppress...)
+	i.Exclude = merge(i.Exclude, other.Exclude)
+	i.Suppress = merge(i.Suppress, other.Suppress)
 }
 
 func (i *ItemConfig) shouldReport(fullPos, msg string) bool {
@@ -129,6 +156,15 @@ func (i *ItemConfig) shouldReport(fullPos, msg string) bool {
 // configurations depending on what Group the file belongs to.
 type AnalyzerConfig map[GroupName]*ItemConfig
 
+// RegexpCount is used by Config.Size.
+func (a AnalyzerConfig) RegexpCount() int64 {
+	count := int64(0)
+	for _, gc := range a {
+		count += gc.RegexpCount()
+	}
+	return count
+}
+
 func (a AnalyzerConfig) compile() error {
 	for name, gc := range a {
 		if err := gc.compile(); err != nil {
@@ -179,22 +215,36 @@ type Config struct {
 	Analyzers map[AnalyzerName]AnalyzerConfig `yaml:"analyzers"`
 }
 
+// Size implements worker.Sizer.Size.
+func (c *Config) Size() int64 {
+	count := c.Global.RegexpCount()
+	for _, config := range c.Analyzers {
+		count += config.RegexpCount()
+	}
+	// The size is measured as the number of regexps that are compiled
+	// here. We multiply by 1k to produce an estimate.
+	return 1024 * count
+}
+
 // Merge merges two configurations.
 func (c *Config) Merge(other *Config) {
 	// Merge all groups.
+	//
+	// Select the other first, as the order provided in the second will
+	// provide precendence over the same group defined in the first one.
+	seenGroups := make(map[GroupName]struct{})
+	newGroups := make([]Group, 0, len(c.Groups)+len(other.Groups))
 	for _, g := range other.Groups {
-		// Is there a matching group? If yes, we just delete
-		// it. This will preserve the order provided in the
-		// overriding file, even if it differs.
-		for i := 0; i < len(c.Groups); i++ {
-			if g.Name == c.Groups[i].Name {
-				copy(c.Groups[i:], c.Groups[i+1:])
-				c.Groups = c.Groups[:len(c.Groups)-1]
-				break
-			}
+		newGroups = append(newGroups, g)
+		seenGroups[g.Name] = struct{}{}
+	}
+	for _, g := range c.Groups {
+		if _, ok := seenGroups[g.Name]; ok {
+			continue
 		}
-		c.Groups = append(c.Groups, g)
+		newGroups = append(newGroups, g)
 	}
+	c.Groups = newGroups
 
 	// Merge global configurations.
 	c.Global.merge(other.Global)
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index be8b82f9c..ddf5816a6 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -29,6 +29,7 @@ NogoTargetInfo = provider(
     fields = {
         "goarch": "the build architecture (GOARCH)",
         "goos": "the build OS target (GOOS)",
+        "worker_debug": "transitive debugging",
     },
 )
 
@@ -36,6 +37,7 @@ def _nogo_target_impl(ctx):
     return [NogoTargetInfo(
         goarch = ctx.attr.goarch,
         goos = ctx.attr.goos,
+        worker_debug = ctx.attr.worker_debug,
     )]
 
 nogo_target = go_rule(
@@ -50,6 +52,10 @@ nogo_target = go_rule(
             doc = "the Go OS target (propagated to other rules).",
             mandatory = True,
         ),
+        "worker_debug": attr.bool(
+            doc = "whether worker debugging should be enabled.",
+            default = False,
+        ),
     },
 )
 
@@ -61,7 +67,7 @@ def _nogo_objdump_tool_impl(ctx):
     # we need the tool to handle this case by creating a temporary file.
     #
     # [1] https://github.com/golang/go/issues/41051
-    nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo]
+    nogo_target_info = ctx.attr._target[NogoTargetInfo]
     go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch)
     env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()])
     dumper = ctx.actions.declare_file(ctx.label.name)
@@ -94,7 +100,7 @@ nogo_objdump_tool = go_rule(
     rule,
     implementation = _nogo_objdump_tool_impl,
     attrs = {
-        "_nogo_target": attr.label(
+        "_target": attr.label(
             default = "//tools/nogo:target",
             cfg = "target",
         ),
@@ -112,7 +118,7 @@ NogoStdlibInfo = provider(
 
 def _nogo_stdlib_impl(ctx):
     # Build the standard library facts.
-    nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo]
+    nogo_target_info = ctx.attr._target[NogoTargetInfo]
     go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch)
     facts = ctx.actions.declare_file(ctx.label.name + ".facts")
     raw_findings = ctx.actions.declare_file(ctx.label.name + ".raw_findings")
@@ -124,18 +130,29 @@ def _nogo_stdlib_impl(ctx):
     )
     config_file = ctx.actions.declare_file(ctx.label.name + ".cfg")
     ctx.actions.write(config_file, config.to_json())
-    ctx.actions.run(
-        inputs = [config_file] + go_ctx.stdlib_srcs,
-        outputs = [facts, raw_findings],
-        tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool),
-        executable = ctx.files._nogo_check[0],
-        mnemonic = "NogoStandardLibraryAnalysis",
-        progress_message = "Analyzing Go Standard Library",
-        arguments = go_ctx.nogo_args + [
-            "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path,
+    args_file = ctx.actions.declare_file(ctx.label.name + "_args_file")
+    ctx.actions.write(
+        output = args_file,
+        content = "\n".join(go_ctx.nogo_args + [
+            "-objdump_tool=%s" % ctx.files._objdump_tool[0].path,
             "-stdlib=%s" % config_file.path,
             "-findings=%s" % raw_findings.path,
             "-facts=%s" % facts.path,
+        ]),
+    )
+    ctx.actions.run(
+        inputs = [config_file] + go_ctx.stdlib_srcs + [args_file],
+        outputs = [facts, raw_findings],
+        tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool),
+        executable = ctx.files._check[0],
+        mnemonic = "GoStandardLibraryAnalysis",
+        # Note that this does not support work execution currently. There is an
+        # issue with stdout pollution that is not yet resolved, so this is kept
+        # as a separate menomic.
+        progress_message = "Analyzing Go Standard Library",
+        arguments = [
+            "--worker_debug=%s" % nogo_target_info.worker_debug,
+            "@%s" % args_file.path,
         ],
     )
 
@@ -149,15 +166,15 @@ nogo_stdlib = go_rule(
     rule,
     implementation = _nogo_stdlib_impl,
     attrs = {
-        "_nogo_check": attr.label(
+        "_check": attr.label(
             default = "//tools/nogo/check:check",
             cfg = "host",
         ),
-        "_nogo_objdump_tool": attr.label(
+        "_objdump_tool": attr.label(
             default = "//tools/nogo:objdump_tool",
             cfg = "host",
         ),
-        "_nogo_target": attr.label(
+        "_target": attr.label(
             default = "//tools/nogo:target",
             cfg = "target",
         ),
@@ -276,7 +293,7 @@ def _nogo_aspect_impl(target, ctx):
     inputs.append(stdlib_facts)
 
     # The nogo tool operates on a configuration serialized in JSON format.
-    nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo]
+    nogo_target_info = ctx.attr._target[NogoTargetInfo]
     go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch)
     facts = ctx.actions.declare_file(target.label.name + ".facts")
     raw_findings = ctx.actions.declare_file(target.label.name + ".raw_findings")
@@ -294,19 +311,28 @@ def _nogo_aspect_impl(target, ctx):
     config_file = ctx.actions.declare_file(target.label.name + ".cfg")
     ctx.actions.write(config_file, config.to_json())
     inputs.append(config_file)
-    ctx.actions.run(
-        inputs = inputs,
-        outputs = [facts, raw_findings],
-        tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool),
-        executable = ctx.files._nogo_check[0],
-        mnemonic = "NogoAnalysis",
-        progress_message = "Analyzing %s" % target.label,
-        arguments = go_ctx.nogo_args + [
+    args_file = ctx.actions.declare_file(ctx.label.name + "_args_file")
+    ctx.actions.write(
+        output = args_file,
+        content = "\n".join(go_ctx.nogo_args + [
             "-binary=%s" % target_objfile.path,
-            "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path,
+            "-objdump_tool=%s" % ctx.files._objdump_tool[0].path,
             "-package=%s" % config_file.path,
             "-findings=%s" % raw_findings.path,
             "-facts=%s" % facts.path,
+        ]),
+    )
+    ctx.actions.run(
+        inputs = inputs + [args_file],
+        outputs = [facts, raw_findings],
+        tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool),
+        executable = ctx.files._check[0],
+        mnemonic = "GoStaticAnalysis",
+        progress_message = "Analyzing %s" % target.label,
+        execution_requirements = {"supports-workers": "1"},
+        arguments = [
+            "--worker_debug=%s" % nogo_target_info.worker_debug,
+            "@%s" % args_file.path,
         ],
     )
 
@@ -339,27 +365,30 @@ nogo_aspect = go_rule(
         "embed",
     ],
     attrs = {
-        "_nogo_check": attr.label(
+        "_check": attr.label(
             default = "//tools/nogo/check:check",
             cfg = "host",
         ),
-        "_nogo_stdlib": attr.label(
-            default = "//tools/nogo:stdlib",
-            cfg = "host",
-        ),
-        "_nogo_objdump_tool": attr.label(
+        "_objdump_tool": attr.label(
             default = "//tools/nogo:objdump_tool",
             cfg = "host",
         ),
-        "_nogo_target": attr.label(
+        "_target": attr.label(
             default = "//tools/nogo:target",
             cfg = "target",
         ),
+        # The name of this attribute must not be _stdlib, since that
+        # appears to be reserved for some internal bazel use.
+        "_nogo_stdlib": attr.label(
+            default = "//tools/nogo:stdlib",
+            cfg = "host",
+        ),
     },
 )
 
 def _nogo_test_impl(ctx):
     """Check nogo findings."""
+    nogo_target_info = ctx.attr._target[NogoTargetInfo]
 
     # Ensure there's a single dependency.
     if len(ctx.attr.deps) != 1:
@@ -369,16 +398,27 @@ def _nogo_test_impl(ctx):
     # Build a step that applies the configuration.
     config_srcs = ctx.attr.config[NogoConfigInfo].srcs
     findings = ctx.actions.declare_file(ctx.label.name + ".findings")
+    args_file = ctx.actions.declare_file(ctx.label.name + "_args_file")
+    ctx.actions.write(
+        output = args_file,
+        content = "\n".join(
+            ["-input=%s" % f.path for f in raw_findings] +
+            ["-config=%s" % f.path for f in config_srcs] +
+            ["-output=%s" % findings.path],
+        ),
+    )
     ctx.actions.run(
-        inputs = raw_findings + ctx.files.srcs + config_srcs,
+        inputs = raw_findings + ctx.files.srcs + config_srcs + [args_file],
         outputs = [findings],
         tools = depset(ctx.files._filter),
         executable = ctx.files._filter[0],
         mnemonic = "GoStaticAnalysis",
         progress_message = "Generating %s" % ctx.label,
-        arguments = ["-input=%s" % f.path for f in raw_findings] +
-                    ["-config=%s" % f.path for f in config_srcs] +
-                    ["-output=%s" % findings.path],
+        execution_requirements = {"supports-workers": "1"},
+        arguments = [
+            "--worker_debug=%s" % nogo_target_info.worker_debug,
+            "@%s" % args_file.path,
+        ],
     )
 
     # Build a runner that checks the filtered facts.
@@ -389,7 +429,7 @@ def _nogo_test_impl(ctx):
     runner = ctx.actions.declare_file(ctx.label.name)
     runner_content = [
         "#!/bin/bash",
-        "exec %s -input=%s" % (ctx.files._filter[0].short_path, findings.short_path),
+        "exec %s -check -input=%s" % (ctx.files._filter[0].short_path, findings.short_path),
         "",
     ]
     ctx.actions.write(runner, "\n".join(runner_content), is_executable = True)
@@ -423,6 +463,10 @@ nogo_test = rule(
             allow_files = True,
             doc = "Relevant src files. This is ignored except to make the nogo_test directly affected by the files.",
         ),
+        "_target": attr.label(
+            default = "//tools/nogo:target",
+            cfg = "target",
+        ),
         "_filter": attr.label(default = "//tools/nogo/filter:filter"),
     },
     test = True,
diff --git a/tools/nogo/filter/BUILD b/tools/nogo/filter/BUILD
index e56a783e2..e3049521e 100644
--- a/tools/nogo/filter/BUILD
+++ b/tools/nogo/filter/BUILD
@@ -9,6 +9,7 @@ go_binary(
     visibility = ["//visibility:public"],
     deps = [
         "//tools/nogo",
+        "//tools/worker",
         "@in_gopkg_yaml_v2//:go_default_library",
     ],
 )
diff --git a/tools/nogo/filter/main.go b/tools/nogo/filter/main.go
index 8be38ca6d..d50336b9b 100644
--- a/tools/nogo/filter/main.go
+++ b/tools/nogo/filter/main.go
@@ -26,6 +26,7 @@ import (
 
 	yaml "gopkg.in/yaml.v2"
 	"gvisor.dev/gvisor/tools/nogo"
+	"gvisor.dev/gvisor/tools/worker"
 )
 
 type stringList []string
@@ -44,34 +45,44 @@ var (
 	configFiles stringList
 	outputFile  string
 	showConfig  bool
+	check       bool
 )
 
 func init() {
-	flag.Var(&inputFiles, "input", "findings input files")
-	flag.StringVar(&outputFile, "output", "", "findings output file")
+	flag.Var(&inputFiles, "input", "findings input files (gob format)")
+	flag.StringVar(&outputFile, "output", "", "findings output file (json format)")
 	flag.Var(&configFiles, "config", "findings configuration files")
 	flag.BoolVar(&showConfig, "show-config", false, "dump configuration only")
+	flag.BoolVar(&check, "check", false, "assume input is in json format")
 }
 
 func main() {
-	flag.Parse()
+	worker.Work(run)
+}
 
-	// Load all available findings.
-	var findings []nogo.Finding
-	for _, filename := range inputFiles {
-		inputFindings, err := nogo.ExtractFindingsFromFile(filename)
+var (
+	cachedFindings    = worker.NewCache("findings") // With nogo.FindingSet.
+	cachedFiltered    = worker.NewCache("filtered") // With nogo.FindingSet.
+	cachedConfigs     = worker.NewCache("configs")  // With nogo.Config.
+	cachedFullConfigs = worker.NewCache("compiled") // With nogo.Config.
+)
+
+func loadFindings(filename string) nogo.FindingSet {
+	return cachedFindings.Lookup([]string{filename}, func() worker.Sizer {
+		r, err := os.Open(filename)
+		if err != nil {
+			log.Fatalf("unable to open input %q: %v", filename, err)
+		}
+		inputFindings, err := nogo.ExtractFindingsFrom(r, check /* json */)
 		if err != nil {
 			log.Fatalf("unable to extract findings from %s: %v", filename, err)
 		}
-		findings = append(findings, inputFindings...)
-	}
+		return inputFindings
+	}).(nogo.FindingSet)
+}
 
-	// Open and merge all configuations.
-	config := &nogo.Config{
-		Global:    make(nogo.AnalyzerConfig),
-		Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig),
-	}
-	for _, filename := range configFiles {
+func loadConfig(filename string) *nogo.Config {
+	return cachedConfigs.Lookup([]string{filename}, func() worker.Sizer {
 		content, err := ioutil.ReadFile(filename)
 		if err != nil {
 			log.Fatalf("unable to read %s: %v", filename, err)
@@ -82,53 +93,98 @@ func main() {
 		if err := dec.Decode(&newConfig); err != nil {
 			log.Fatalf("unable to decode %s: %v", filename, err)
 		}
-		config.Merge(&newConfig)
 		if showConfig {
 			content, err := yaml.Marshal(&newConfig)
 			if err != nil {
 				log.Fatalf("error marshalling config: %v", err)
 			}
-			mergedBytes, err := yaml.Marshal(config)
-			if err != nil {
-				log.Fatalf("error marshalling config: %v", err)
-			}
 			fmt.Fprintf(os.Stdout, "Loaded configuration from %s:\n%s\n", filename, string(content))
-			fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes))
 		}
-	}
-	if err := config.Compile(); err != nil {
-		log.Fatalf("error compiling config: %v", err)
-	}
+		return &newConfig
+	}).(*nogo.Config)
+}
+
+func loadConfigs(filenames []string) *nogo.Config {
+	return cachedFullConfigs.Lookup(filenames, func() worker.Sizer {
+		config := &nogo.Config{
+			Global:    make(nogo.AnalyzerConfig),
+			Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig),
+		}
+		for _, filename := range configFiles {
+			config.Merge(loadConfig(filename))
+			if showConfig {
+				mergedBytes, err := yaml.Marshal(config)
+				if err != nil {
+					log.Fatalf("error marshalling config: %v", err)
+				}
+				fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes))
+			}
+		}
+		if err := config.Compile(); err != nil {
+			log.Fatalf("error compiling config: %v", err)
+		}
+		return config
+	}).(*nogo.Config)
+}
+
+func run([]string) int {
+	// Open and merge all configuations.
+	config := loadConfigs(configFiles)
 	if showConfig {
-		os.Exit(0)
+		return 0
 	}
 
-	// Filter the findings (and aggregate by group).
-	filteredFindings := make([]nogo.Finding, 0, len(findings))
-	for _, finding := range findings {
-		if ok := config.ShouldReport(finding); ok {
-			filteredFindings = append(filteredFindings, finding)
-		}
+	// Load and filer available findings.
+	var filteredFindings []nogo.Finding
+	for _, filename := range inputFiles {
+		// Note that this applies a caching strategy to the filtered
+		// findings, because *this is by far the most expensive part of
+		// evaluation*. The set of findings is large and applying the
+		// configuration is complex. Therefore, we segment this cache
+		// on each individual raw findings input file and the
+		// configuration files. Note that this cache is keyed on all
+		// the configuration files and each individual raw findings, so
+		// is guaranteed to be safe. This allows us to reuse the same
+		// filter result many times over, because e.g. all standard
+		// library findings will be available to all packages.
+		filteredFindings = append(filteredFindings,
+			cachedFiltered.Lookup(append(configFiles, filename), func() worker.Sizer {
+				inputFindings := loadFindings(filename)
+				filteredFindings := make(nogo.FindingSet, 0, len(inputFindings))
+				for _, finding := range inputFindings {
+					if ok := config.ShouldReport(finding); ok {
+						filteredFindings = append(filteredFindings, finding)
+					}
+				}
+				return filteredFindings
+			}).(nogo.FindingSet)...)
 	}
 
 	// Write the output (if required).
 	//
 	// If the outputFile is specified, then we exit here. Otherwise,
 	// we continue to write to stdout and treat like a test.
+	//
+	// Note that the output of the filter is always json, which is
+	// human readable and the format that is consumed by tricorder.
 	if outputFile != "" {
-		if err := nogo.WriteFindingsToFile(filteredFindings, outputFile); err != nil {
+		w, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+		if err != nil {
+			log.Fatalf("unable to open output file %q: %v", outputFile, err)
+		}
+		if err := nogo.WriteFindingsTo(w, filteredFindings, true /* json */); err != nil {
 			log.Fatalf("unable to write findings: %v", err)
 		}
-		return
+		return 0
 	}
 
 	// Treat the run as a test.
 	if len(filteredFindings) == 0 {
 		fmt.Fprintf(os.Stdout, "PASS\n")
-		os.Exit(0)
+		return 0
 	}
 	for _, finding := range filteredFindings {
 		fmt.Fprintf(os.Stdout, "%s\n", finding.String())
 	}
-	os.Exit(1)
+	return 1
 }
diff --git a/tools/nogo/findings.go b/tools/nogo/findings.go
index a00cfe813..329a7062e 100644
--- a/tools/nogo/findings.go
+++ b/tools/nogo/findings.go
@@ -15,10 +15,13 @@
 package nogo
 
 import (
+	"encoding/gob"
 	"encoding/json"
 	"fmt"
 	"go/token"
-	"io/ioutil"
+	"io"
+	"os"
+	"reflect"
 	"sort"
 )
 
@@ -29,63 +32,96 @@ type Finding struct {
 	Message  string
 }
 
+// findingSize is the size of the finding struct itself.
+var findingSize = int64(reflect.TypeOf(Finding{}).Size())
+
+// Size implements worker.Sizer.Size.
+func (f *Finding) Size() int64 {
+	return int64(len(f.Category)) + int64(len(f.Message)) + findingSize
+}
+
 // String implements fmt.Stringer.String.
 func (f *Finding) String() string {
 	return fmt.Sprintf("%s: %s: %s", f.Category, f.Position.String(), f.Message)
 }
 
-// WriteFindingsToFile writes findings to a file.
-func WriteFindingsToFile(findings []Finding, filename string) error {
-	content, err := WriteFindingsToBytes(findings)
-	if err != nil {
-		return err
+// FindingSet is a collection of findings.
+type FindingSet []Finding
+
+// Size implmements worker.Sizer.Size.
+func (fs FindingSet) Size() int64 {
+	size := int64(0)
+	for _, finding := range fs {
+		size += finding.Size()
 	}
-	return ioutil.WriteFile(filename, content, 0644)
+	return size
 }
 
-// WriteFindingsToBytes serializes findings as bytes.
-func WriteFindingsToBytes(findings []Finding) ([]byte, error) {
-	// N.B. Sort all the findings in order to maximize cacheability.
-	sort.Slice(findings, func(i, j int) bool {
+// Sort sorts all findings.
+func (fs FindingSet) Sort() {
+	sort.Slice(fs, func(i, j int) bool {
 		switch {
-		case findings[i].Position.Filename < findings[j].Position.Filename:
+		case fs[i].Position.Filename < fs[j].Position.Filename:
 			return true
-		case findings[i].Position.Filename > findings[j].Position.Filename:
+		case fs[i].Position.Filename > fs[j].Position.Filename:
 			return false
-		case findings[i].Position.Line < findings[j].Position.Line:
+		case fs[i].Position.Line < fs[j].Position.Line:
 			return true
-		case findings[i].Position.Line > findings[j].Position.Line:
+		case fs[i].Position.Line > fs[j].Position.Line:
 			return false
-		case findings[i].Position.Column < findings[j].Position.Column:
+		case fs[i].Position.Column < fs[j].Position.Column:
 			return true
-		case findings[i].Position.Column > findings[j].Position.Column:
+		case fs[i].Position.Column > fs[j].Position.Column:
 			return false
-		case findings[i].Category < findings[j].Category:
+		case fs[i].Category < fs[j].Category:
 			return true
-		case findings[i].Category > findings[j].Category:
+		case fs[i].Category > fs[j].Category:
 			return false
-		case findings[i].Message < findings[j].Message:
+		case fs[i].Message < fs[j].Message:
 			return true
-		case findings[i].Message > findings[j].Message:
+		case fs[i].Message > fs[j].Message:
 			return false
 		default:
 			return false
 		}
 	})
-	return json.Marshal(findings)
+}
+
+// WriteFindingsTo serializes findings.
+func WriteFindingsTo(w io.Writer, findings FindingSet, asJSON bool) error {
+	// N.B. Sort all the findings in order to maximize cacheability.
+	findings.Sort()
+	if asJSON {
+		enc := json.NewEncoder(w)
+		return enc.Encode(findings)
+	}
+	enc := gob.NewEncoder(w)
+	return enc.Encode(findings)
 }
 
 // ExtractFindingsFromFile loads findings from a file.
-func ExtractFindingsFromFile(filename string) ([]Finding, error) {
-	content, err := ioutil.ReadFile(filename)
+func ExtractFindingsFromFile(filename string, asJSON bool) (FindingSet, error) {
+	r, err := os.Open(filename)
 	if err != nil {
 		return nil, err
 	}
-	return ExtractFindingsFromBytes(content)
+	defer r.Close()
+	return ExtractFindingsFrom(r, asJSON)
 }
 
 // ExtractFindingsFromBytes loads findings from bytes.
-func ExtractFindingsFromBytes(content []byte) (findings []Finding, err error) {
-	err = json.Unmarshal(content, &findings)
+func ExtractFindingsFrom(r io.Reader, asJSON bool) (findings FindingSet, err error) {
+	if asJSON {
+		dec := json.NewDecoder(r)
+		err = dec.Decode(&findings)
+	} else {
+		dec := gob.NewDecoder(r)
+		err = dec.Decode(&findings)
+	}
 	return findings, err
 }
+
+func init() {
+	gob.Register((*Finding)(nil))
+	gob.Register((*FindingSet)(nil))
+}
diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go
index c1b88e89f..acee7c8bc 100644
--- a/tools/nogo/nogo.go
+++ b/tools/nogo/nogo.go
@@ -19,7 +19,8 @@
 package nogo
 
 import (
-	"encoding/json"
+	"bytes"
+	"encoding/gob"
 	"errors"
 	"fmt"
 	"go/ast"
@@ -43,6 +44,7 @@ import (
 
 	// Special case: flags live here and change overall behavior.
 	"gvisor.dev/gvisor/tools/checkescape"
+	"gvisor.dev/gvisor/tools/worker"
 )
 
 // StdlibConfig is serialized as the configuration.
@@ -82,46 +84,94 @@ type stdlibFact struct {
 	Facts   []byte
 }
 
-// factLoader returns a function that loads facts.
-//
-// This resolves all standard library facts and imported package facts up
-// front. The returned loader function will never return an error, only
-// empty facts.
-//
-// This is done because all stdlib data is stored together, and we don't want
-// to load this data many times over.
-func (c *PackageConfig) factLoader() (loader, error) {
-	allFacts := make(map[string][]byte)
-	if c.StdlibFacts != "" {
-		data, err := ioutil.ReadFile(c.StdlibFacts)
-		if err != nil {
-			return nil, fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, err)
-		}
-		var (
-			stdlibFactsSorted []stdlibFact
-			stdlibFacts       = make(map[string][]byte)
-		)
-		// See below re: sorted serialization.
-		if err := json.Unmarshal(data, &stdlibFactsSorted); err != nil {
-			return nil, fmt.Errorf("error loading stdlib facts: %w", err)
-		}
-		for _, stdlibFact := range stdlibFactsSorted {
-			stdlibFacts[stdlibFact.Package] = stdlibFact.Facts
-		}
-		for pkg, data := range stdlibFacts {
-			allFacts[pkg] = data
+// stdlibFacts is a set of standard library facts.
+type stdlibFacts map[string][]byte
+
+// Size implements worker.Sizer.Size.
+func (sf stdlibFacts) Size() int64 {
+	size := int64(0)
+	for filename, data := range sf {
+		size += int64(len(filename))
+		size += int64(len(data))
+	}
+	return size
+}
+
+// EncodeTo serializes stdlibFacts.
+func (sf stdlibFacts) EncodeTo(w io.Writer) error {
+	stdlibFactsSorted := make([]stdlibFact, 0, len(sf))
+	for pkg, facts := range sf {
+		stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{
+			Package: pkg,
+			Facts:   facts,
+		})
+	}
+	sort.Slice(stdlibFactsSorted, func(i, j int) bool {
+		return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package
+	})
+	enc := gob.NewEncoder(w)
+	if err := enc.Encode(stdlibFactsSorted); err != nil {
+		return err
+	}
+	return nil
+}
+
+// DecodeFrom deserializes stdlibFacts.
+func (sf stdlibFacts) DecodeFrom(r io.Reader) error {
+	var stdlibFactsSorted []stdlibFact
+	dec := gob.NewDecoder(r)
+	if err := dec.Decode(&stdlibFactsSorted); err != nil {
+		return err
+	}
+	for _, stdlibFact := range stdlibFactsSorted {
+		sf[stdlibFact.Package] = stdlibFact.Facts
+	}
+	return nil
+}
+
+var (
+	// cachedFacts caches by file (just byte data).
+	cachedFacts = worker.NewCache("facts")
+
+	// stdlibCachedFacts caches the standard library (stdlibFacts).
+	stdlibCachedFacts = worker.NewCache("stdlib")
+)
+
+// factLoader loads facts.
+func (c *PackageConfig) factLoader(path string) (data []byte, err error) {
+	filename, ok := c.FactMap[path]
+	if ok {
+		cb := cachedFacts.Lookup([]string{filename}, func() worker.Sizer {
+			data, readErr := ioutil.ReadFile(filename)
+			if readErr != nil {
+				err = fmt.Errorf("error loading %q: %w", filename, readErr)
+				return nil
+			}
+			return worker.CacheBytes(data)
+		})
+		if cb != nil {
+			return []byte(cb.(worker.CacheBytes)), err
 		}
+		return nil, err
 	}
-	for pkg, file := range c.FactMap {
-		data, err := ioutil.ReadFile(file)
-		if err != nil {
-			return nil, fmt.Errorf("error loading %q: %w", file, err)
+	cb := stdlibCachedFacts.Lookup([]string{c.StdlibFacts}, func() worker.Sizer {
+		r, openErr := os.Open(c.StdlibFacts)
+		if openErr != nil {
+			err = fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, openErr)
+			return nil
+		}
+		defer r.Close()
+		sf := make(stdlibFacts)
+		if readErr := sf.DecodeFrom(r); readErr != nil {
+			err = fmt.Errorf("error loading stdlib facts: %w", readErr)
+			return nil
 		}
-		allFacts[pkg] = data
+		return sf
+	})
+	if cb != nil {
+		return (cb.(stdlibFacts))[path], err
 	}
-	return func(path string) ([]byte, error) {
-		return allFacts[path], nil
-	}, nil
+	return nil, err
 }
 
 // shouldInclude indicates whether the file should be included.
@@ -205,7 +255,7 @@ var ErrSkip = errors.New("skipped")
 //
 // Note that not all parts of the source are expected to build. We skip obvious
 // test files, and cmd files, which should not be dependencies.
-func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings []Finding, facts []byte, err error) {
+func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings FindingSet, facts []byte, err error) {
 	if len(config.Srcs) == 0 {
 		return nil, nil, nil
 	}
@@ -275,16 +325,16 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 	}
 
 	// Closure to check a single package.
-	stdlibFacts := make(map[string][]byte)
-	stdlibErrs := make(map[string]error)
+	localStdlibFacts := make(stdlibFacts)
+	localStdlibErrs := make(map[string]error)
 	var checkOne func(pkg string) error // Recursive.
 	checkOne = func(pkg string) error {
 		// Is this already done?
-		if _, ok := stdlibFacts[pkg]; ok {
+		if _, ok := localStdlibFacts[pkg]; ok {
 			return nil
 		}
 		// Did this fail previously?
-		if _, ok := stdlibErrs[pkg]; ok {
+		if _, ok := localStdlibErrs[pkg]; ok {
 			return nil
 		}
 
@@ -300,7 +350,7 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 			// If there's no binary for this package, it is likely
 			// not built with the distribution. That's fine, we can
 			// just skip analysis.
-			stdlibErrs[pkg] = err
+			localStdlibErrs[pkg] = err
 			return nil
 		}
 
@@ -317,10 +367,10 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 		if err != nil {
 			// If we can't analyze a package from the standard library,
 			// then we skip it. It will simply not have any findings.
-			stdlibErrs[pkg] = err
+			localStdlibErrs[pkg] = err
 			return nil
 		}
-		stdlibFacts[pkg] = factData
+		localStdlibFacts[pkg] = factData
 		allFindings = append(allFindings, findings...)
 		return nil
 	}
@@ -337,34 +387,23 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi
 	}
 
 	// Sanity check.
-	if len(stdlibFacts) == 0 {
+	if len(localStdlibFacts) == 0 {
 		return nil, nil, fmt.Errorf("no stdlib facts found: misconfiguration?")
 	}
 
-	// Write out all findings. Note that the standard library facts
-	// must be serialized in a sorted order to ensure cacheability.
-	stdlibFactsSorted := make([]stdlibFact, 0, len(stdlibFacts))
-	for pkg, facts := range stdlibFacts {
-		stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{
-			Package: pkg,
-			Facts:   facts,
-		})
-	}
-	sort.Slice(stdlibFactsSorted, func(i, j int) bool {
-		return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package
-	})
-	factData, err := json.Marshal(stdlibFactsSorted)
-	if err != nil {
-		return nil, nil, fmt.Errorf("error saving stdlib facts: %w", err)
+	// Write out all findings.
+	buf := bytes.NewBuffer(nil)
+	if err := localStdlibFacts.EncodeTo(buf); err != nil {
+		return nil, nil, fmt.Errorf("error serialized stdlib facts: %v", err)
 	}
 
 	// Write out all errors.
-	for pkg, err := range stdlibErrs {
+	for pkg, err := range localStdlibErrs {
 		log.Printf("WARNING: error while processing %v: %v", pkg, err)
 	}
 
 	// Return all findings.
-	return allFindings, factData, nil
+	return allFindings, buf.Bytes(), nil
 }
 
 // CheckPackage runs all given analyzers.
@@ -417,11 +456,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC
 	}
 
 	// Load all package facts.
-	loader, err := config.factLoader()
-	if err != nil {
-		return nil, nil, fmt.Errorf("error loading facts: %w", err)
-	}
-	facts, err := facts.Decode(types, loader)
+	facts, err := facts.Decode(types, config.factLoader)
 	if err != nil {
 		return nil, nil, fmt.Errorf("error decoding facts: %w", err)
 	}
@@ -496,3 +531,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC
 	// Return all findings.
 	return findings, facts.Encode(), nil
 }
+
+func init() {
+	gob.Register((*stdlibFact)(nil))
+}
diff --git a/tools/worker/BUILD b/tools/worker/BUILD
new file mode 100644
index 000000000..dc03ce11e
--- /dev/null
+++ b/tools/worker/BUILD
@@ -0,0 +1,21 @@
+load("//tools:defs.bzl", "bazel_worker_proto", "go_library")
+
+package(licenses = ["notice"])
+
+# For Google-tooling.
+# @unused
+glaze_ignore = [
+    "worker.go",
+]
+
+go_library(
+    name = "worker",
+    srcs = ["worker.go"],
+    visibility = ["//tools:__subpackages__"],
+    deps = [
+        bazel_worker_proto,
+        "@org_golang_google_protobuf//encoding/protowire:go_default_library",
+        "@org_golang_google_protobuf//proto:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/tools/worker/worker.go b/tools/worker/worker.go
new file mode 100644
index 000000000..669a5f203
--- /dev/null
+++ b/tools/worker/worker.go
@@ -0,0 +1,325 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package worker provides an implementation of the bazel worker protocol.
+//
+// Tools may be written as a normal command line utility, except the passed
+// run function may be invoked multiple times.
+package worker
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net"
+	"net/http"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	_ "net/http/pprof" // For profiling.
+
+	"golang.org/x/sys/unix"
+	"google.golang.org/protobuf/encoding/protowire"
+	"google.golang.org/protobuf/proto"
+	wpb "gvisor.dev/bazel/worker_protocol_go_proto"
+)
+
+var (
+	persistentWorker  = flag.Bool("persistent_worker", false, "enable persistent worker.")
+	workerDebug       = flag.Bool("worker_debug", false, "debug persistent workers.")
+	maximumCacheUsage = flag.Int64("maximum_cache_usage", 1024*1024*1024, "maximum cache size.")
+)
+
+var (
+	// inputFiles is the last set of input files.
+	//
+	// This is used for cache invalidation. The key is the *absolute* path
+	// name, and the value is the digest in the current run.
+	inputFiles = make(map[string]string)
+
+	// activeCaches is the set of active caches.
+	activeCaches = make(map[*Cache]struct{})
+
+	// totalCacheUsage is the total usage of all caches.
+	totalCacheUsage int64
+)
+
+// mustAbs returns the absolute path of a filename or dies.
+func mustAbs(filename string) string {
+	abs, err := filepath.Abs(filename)
+	if err != nil {
+		log.Fatalf("error getting absolute path: %v", err)
+	}
+	return abs
+}
+
+// updateInputFiles creates an entry in inputFiles.
+func updateInputFile(filename, digest string) {
+	inputFiles[mustAbs(filename)] = digest
+}
+
+// Sizer returns a size.
+type Sizer interface {
+	Size() int64
+}
+
+// CacheBytes is an example of a Sizer.
+type CacheBytes []byte
+
+// Size implements Sizer.Size.
+func (cb CacheBytes) Size() int64 {
+	return int64(len(cb))
+}
+
+// Cache is a worker cache.
+//
+// They can be created via NewCache.
+type Cache struct {
+	name    string
+	entries map[string]Sizer
+	size    int64
+	hits    int64
+	misses  int64
+}
+
+// NewCache returns a new cache.
+func NewCache(name string) *Cache {
+	return &Cache{
+		name: name,
+	}
+}
+
+// Lookup looks up an entry in the cache.
+//
+// It is a function of the given files.
+func (c *Cache) Lookup(filenames []string, generate func() Sizer) Sizer {
+	digests := make([]string, 0, len(filenames))
+	for _, filename := range filenames {
+		digest, ok := inputFiles[mustAbs(filename)]
+		if !ok {
+			// This is not a valid input. We may not be running as
+			// persistent worker in this cache. If that's the case,
+			// then the file's contents will not change across the
+			// run, and we just use the filename itself.
+			digest = filename
+		}
+		digests = append(digests, digest)
+	}
+
+	// Attempt the lookup.
+	sort.Slice(digests, func(i, j int) bool {
+		return digests[i] < digests[j]
+	})
+	cacheKey := strings.Join(digests, "+")
+	if c.entries == nil {
+		c.entries = make(map[string]Sizer)
+		activeCaches[c] = struct{}{}
+	}
+	entry, ok := c.entries[cacheKey]
+	if ok {
+		c.hits++
+		return entry
+	}
+
+	// Generate a new entry.
+	entry = generate()
+	c.misses++
+	c.entries[cacheKey] = entry
+	if entry != nil {
+		sz := entry.Size()
+		c.size += sz
+		totalCacheUsage += sz
+	}
+
+	// Check the capacity of all caches. If it greater than the maximum, we
+	// flush everything but still return this entry.
+	if totalCacheUsage > *maximumCacheUsage {
+		for entry, _ := range activeCaches {
+			// Drop all entries.
+			entry.size = 0
+			entry.entries = nil
+		}
+		totalCacheUsage = 0 // Reset.
+	}
+
+	return entry
+}
+
+// allCacheStats returns stats for all caches.
+func allCacheStats() string {
+	var sb strings.Builder
+	for entry, _ := range activeCaches {
+		ratio := float64(entry.hits) / float64(entry.hits+entry.misses)
+		fmt.Fprintf(&sb,
+			"% 10s: count: % 5d  size: % 10d  hits: % 7d  misses: % 7d  ratio: %2.2f\n",
+			entry.name, len(entry.entries), entry.size, entry.hits, entry.misses, ratio)
+	}
+	if len(activeCaches) > 0 {
+		fmt.Fprintf(&sb, "total: % 10d\n", totalCacheUsage)
+	}
+	return sb.String()
+}
+
+// LookupDigest returns a digest for the given file.
+func LookupDigest(filename string) (string, bool) {
+	digest, ok := inputFiles[filename]
+	return digest, ok
+}
+
+// Work invokes the main function.
+func Work(run func([]string) int) {
+	flag.CommandLine.Parse(os.Args[1:])
+	if !*persistentWorker {
+		// Handle the argument file.
+		args := flag.CommandLine.Args()
+		if len(args) == 1 && len(args[0]) > 1 && args[0][0] == '@' {
+			content, err := ioutil.ReadFile(args[0][1:])
+			if err != nil {
+				log.Fatalf("unable to parse args file: %v", err)
+			}
+			// Pull arguments from the file.
+			args = strings.Split(string(content), "\n")
+			flag.CommandLine.Parse(args)
+			args = flag.CommandLine.Args()
+		}
+		os.Exit(run(args))
+	}
+
+	var listenHeader string // Emitted always.
+	if *workerDebug {
+		// Bind a server for profiling.
+		listener, err := net.Listen("tcp", "localhost:0")
+		if err != nil {
+			log.Fatalf("unable to bind a server: %v", err)
+		}
+		// Construct the header for stats output, below.
+		listenHeader = fmt.Sprintf("Listening @ http://localhost:%d\n", listener.Addr().(*net.TCPAddr).Port)
+		go http.Serve(listener, nil)
+	}
+
+	// Move stdout. This is done to prevent anything else from accidentally
+	// printing to stdout, which must contain only the valid WorkerResponse
+	// serialized protos.
+	newOutput, err := unix.Dup(1)
+	if err != nil {
+		log.Fatalf("unable to move stdout: %v", err)
+	}
+	// Stderr may be closed or may be a copy of stdout. We make sure that
+	// we have an output that is in a completely separate range.
+	for newOutput <= 2 {
+		newOutput, err = unix.Dup(newOutput)
+		if err != nil {
+			log.Fatalf("unable to move stdout: %v", err)
+		}
+	}
+
+	// Best-effort: collect logs.
+	rPipe, wPipe, err := os.Pipe()
+	if err != nil {
+		log.Fatalf("unable to create pipe: %v", err)
+	}
+	if err := unix.Dup2(int(wPipe.Fd()), 1); err != nil {
+		log.Fatalf("error duping over stdout: %v", err)
+	}
+	if err := unix.Dup2(int(wPipe.Fd()), 2); err != nil {
+		log.Fatalf("error duping over stderr: %v", err)
+	}
+	wPipe.Close()
+	defer rPipe.Close()
+
+	// Read requests from stdin.
+	input := bufio.NewReader(os.NewFile(0, "input"))
+	output := bufio.NewWriter(os.NewFile(uintptr(newOutput), "output"))
+	for {
+		szBuf, err := input.Peek(4)
+		if err != nil {
+			log.Fatalf("unabel to read header: %v", err)
+		}
+
+		// Parse the size, and discard bits.
+		sz, szBytes := protowire.ConsumeVarint(szBuf)
+		if szBytes < 0 {
+			szBytes = 0
+		}
+		if _, err := input.Discard(szBytes); err != nil {
+			log.Fatalf("error discarding size: %v", err)
+		}
+
+		// Read a full message.
+		msg := make([]byte, int(sz))
+		if _, err := io.ReadFull(input, msg); err != nil {
+			log.Fatalf("error reading worker request: %v", err)
+		}
+		var wreq wpb.WorkRequest
+		if err := proto.Unmarshal(msg, &wreq); err != nil {
+			log.Fatalf("error unmarshaling worker request: %v", err)
+		}
+
+		// Flush relevant caches.
+		inputFiles = make(map[string]string)
+		for _, input := range wreq.GetInputs() {
+			updateInputFile(input.GetPath(), string(input.GetDigest()))
+		}
+
+		// Prepare logging.
+		outputBuffer := bytes.NewBuffer(nil)
+		outputBuffer.WriteString(listenHeader)
+		log.SetOutput(outputBuffer)
+
+		// Parse all arguments.
+		flag.CommandLine.Parse(wreq.GetArguments())
+		var exitCode int
+		exitChan := make(chan int)
+		go func() { exitChan <- run(flag.CommandLine.Args()) }()
+		for running := true; running; {
+			select {
+			case exitCode = <-exitChan:
+				running = false
+			default:
+			}
+			// N.B. rPipe is given a read deadline of 1ms. We expect
+			// this to turn a copy error after 1ms, and we just keep
+			// flushing this buffer while the task is running.
+			rPipe.SetReadDeadline(time.Now().Add(time.Millisecond))
+			outputBuffer.ReadFrom(rPipe)
+		}
+
+		if *workerDebug {
+			// Attach all cache stats.
+			outputBuffer.WriteString(allCacheStats())
+		}
+
+		// Send the response.
+		var wresp wpb.WorkResponse
+		wresp.ExitCode = int32(exitCode)
+		wresp.Output = string(outputBuffer.Bytes())
+		rmsg, err := proto.Marshal(&wresp)
+		if err != nil {
+			log.Fatalf("error marshaling response: %v", err)
+		}
+		if _, err := output.Write(append(protowire.AppendVarint(nil, uint64(len(rmsg))), rmsg...)); err != nil {
+			log.Fatalf("error sending worker response: %v", err)
+		}
+		if err := output.Flush(); err != nil {
+			log.Fatalf("error flushing output: %v", err)
+		}
+	}
+}
author	Adin Scannell <ascannell@google.com>	2021-04-26 11:40:10 -0700
committer	gVisor bot <gvisor-bot@google.com>	2021-04-26 11:42:49 -0700
commit	5b7b7daa425ffc93e98c12cbd37ea7b15a8bcc8d (patch)
tree	82079eb5110b5e36f7e657c076a12ba45c669475 /tools
parent	bf64560681182b0024790f683f4c9aea142e70c5 (diff)