diff options
-rw-r--r-- | test/syscalls/linux/mount.cc | 7 | ||||
-rw-r--r-- | test/util/BUILD | 12 | ||||
-rw-r--r-- | test/util/mount_util.cc | 32 | ||||
-rw-r--r-- | test/util/mount_util.h | 10 | ||||
-rw-r--r-- | test/util/mount_util_test.cc | 47 | ||||
-rw-r--r-- | tools/bazeldefs/BUILD | 13 | ||||
-rw-r--r-- | tools/bazeldefs/go.bzl | 5 | ||||
-rw-r--r-- | tools/checklocks/checklocks.go | 3 | ||||
-rw-r--r-- | tools/defs.bzl | 4 | ||||
-rw-r--r-- | tools/nogo/BUILD | 1 | ||||
-rw-r--r-- | tools/nogo/check/BUILD | 5 | ||||
-rw-r--r-- | tools/nogo/check/main.go | 14 | ||||
-rw-r--r-- | tools/nogo/config.go | 80 | ||||
-rw-r--r-- | tools/nogo/defs.bzl | 118 | ||||
-rw-r--r-- | tools/nogo/filter/BUILD | 1 | ||||
-rw-r--r-- | tools/nogo/filter/main.go | 128 | ||||
-rw-r--r-- | tools/nogo/findings.go | 90 | ||||
-rw-r--r-- | tools/nogo/nogo.go | 175 | ||||
-rw-r--r-- | tools/worker/BUILD | 21 | ||||
-rw-r--r-- | tools/worker/worker.go | 325 |
20 files changed, 885 insertions, 206 deletions
diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index 92d0fb05c..3c7311782 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -45,6 +45,7 @@ namespace testing { namespace { +using ::testing::AnyOf; using ::testing::Contains; using ::testing::Pair; @@ -360,7 +361,8 @@ TEST(MountTest, MountInfo) { if (e.mount_point == dir.path()) { EXPECT_EQ(e.fstype, "tmpfs"); auto mopts = ParseMountOptions(e.mount_opts); - EXPECT_THAT(mopts, Contains(Pair("mode", "0123"))); + EXPECT_THAT(mopts, AnyOf(Contains(Pair("mode", "0123")), + Contains(Pair("mode", "123")))); } } @@ -371,7 +373,8 @@ TEST(MountTest, MountInfo) { if (e.mount_point == dir.path()) { EXPECT_EQ(e.fstype, "tmpfs"); auto mopts = ParseMountOptions(e.super_opts); - EXPECT_THAT(mopts, Contains(Pair("mode", "0123"))); + EXPECT_THAT(mopts, AnyOf(Contains(Pair("mode", "0123")), + Contains(Pair("mode", "123")))); } } } diff --git a/test/util/BUILD b/test/util/BUILD index 6feda0e26..8985b54af 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -149,6 +149,18 @@ cc_library( ], ) +cc_test( + name = "mount_util_test", + size = "small", + srcs = ["mount_util_test.cc"], + deps = [ + ":mount_util", + ":test_main", + ":test_util", + gtest, + ], +) + cc_library( name = "save_util", testonly = 1, diff --git a/test/util/mount_util.cc b/test/util/mount_util.cc index a79ce6420..48640d6a1 100644 --- a/test/util/mount_util.cc +++ b/test/util/mount_util.cc @@ -26,9 +26,14 @@ namespace testing { PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntries() { std::string content; RETURN_IF_ERRNO(GetContents("/proc/self/mounts", &content)); + return ProcSelfMountsEntriesFrom(content); +} +PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntriesFrom( + const std::string& content) { std::vector<ProcMountsEntry> entries; - std::vector<std::string> lines = absl::StrSplit(content, '\n'); + std::vector<std::string> lines = + absl::StrSplit(content, absl::ByChar('\n'), absl::AllowEmpty()); std::cerr << "<contents of /proc/self/mounts>" << std::endl; for (const std::string& line : lines) { std::cerr << line << std::endl; @@ -47,11 +52,11 @@ PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntries() { ProcMountsEntry entry; std::vector<std::string> fields = - absl::StrSplit(line, absl::ByChar(' '), absl::SkipEmpty()); + absl::StrSplit(line, absl::ByChar(' '), absl::AllowEmpty()); if (fields.size() != 6) { - return PosixError(EINVAL, - absl::StrFormat("Not enough tokens, got %d, line: %s", - fields.size(), line)); + return PosixError( + EINVAL, absl::StrFormat("Not enough tokens, got %d, content: <<%s>>", + fields.size(), content)); } entry.spec = fields[0]; @@ -71,9 +76,14 @@ PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntries() { PosixErrorOr<std::vector<ProcMountInfoEntry>> ProcSelfMountInfoEntries() { std::string content; RETURN_IF_ERRNO(GetContents("/proc/self/mountinfo", &content)); + return ProcSelfMountInfoEntriesFrom(content); +} +PosixErrorOr<std::vector<ProcMountInfoEntry>> ProcSelfMountInfoEntriesFrom( + const std::string& content) { std::vector<ProcMountInfoEntry> entries; - std::vector<std::string> lines = absl::StrSplit(content, '\n'); + std::vector<std::string> lines = + absl::StrSplit(content, absl::ByChar('\n'), absl::AllowEmpty()); std::cerr << "<contents of /proc/self/mountinfo>" << std::endl; for (const std::string& line : lines) { std::cerr << line << std::endl; @@ -92,12 +102,12 @@ PosixErrorOr<std::vector<ProcMountInfoEntry>> ProcSelfMountInfoEntries() { ProcMountInfoEntry entry; std::vector<std::string> fields = - absl::StrSplit(line, absl::ByChar(' '), absl::SkipEmpty()); + absl::StrSplit(line, absl::ByChar(' '), absl::AllowEmpty()); if (fields.size() < 10 || fields.size() > 11) { return PosixError( - EINVAL, - absl::StrFormat("Unexpected number of tokens, got %d, line: %s", - fields.size(), line)); + EINVAL, absl::StrFormat( + "Unexpected number of tokens, got %d, content: <<%s>>", + fields.size(), content)); } ASSIGN_OR_RETURN_ERRNO(entry.id, Atoi<uint64_t>(fields[0])); @@ -142,7 +152,7 @@ absl::flat_hash_map<std::string, std::string> ParseMountOptions( std::string mopts) { absl::flat_hash_map<std::string, std::string> entries; const std::vector<std::string> tokens = - absl::StrSplit(mopts, absl::ByChar(','), absl::SkipEmpty()); + absl::StrSplit(mopts, absl::ByChar(','), absl::AllowEmpty()); for (const auto& token : tokens) { std::vector<std::string> kv = absl::StrSplit(token, absl::MaxSplits('=', 1)); diff --git a/test/util/mount_util.h b/test/util/mount_util.h index b75a490fb..3f8a1c0f1 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -58,6 +58,11 @@ struct ProcMountsEntry { // ProcSelfMountsEntries returns a parsed representation of /proc/self/mounts. PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntries(); +// ProcSelfMountsEntries returns a parsed representation of mounts from the +// provided content. +PosixErrorOr<std::vector<ProcMountsEntry>> ProcSelfMountsEntriesFrom( + const std::string& content); + struct ProcMountInfoEntry { uint64_t id; uint64_t parent_id; @@ -76,6 +81,11 @@ struct ProcMountInfoEntry { // /proc/self/mountinfo. PosixErrorOr<std::vector<ProcMountInfoEntry>> ProcSelfMountInfoEntries(); +// ProcSelfMountInfoEntriesFrom returns a parsed representation of +// mountinfo from the provided content. +PosixErrorOr<std::vector<ProcMountInfoEntry>> ProcSelfMountInfoEntriesFrom( + const std::string&); + // Interprets the input string mopts as a comma separated list of mount // options. A mount option can either be just a value, or a key=value pair. For // example, the string "rw,relatime,fd=7" will be parsed into a map like { "rw": diff --git a/test/util/mount_util_test.cc b/test/util/mount_util_test.cc new file mode 100644 index 000000000..2bcb6cc43 --- /dev/null +++ b/test/util/mount_util_test.cc @@ -0,0 +1,47 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/mount_util.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ParseMounts, Mounts) { + auto entries = ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountsEntriesFrom( + R"proc(sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0 +proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0 + /mnt tmpfs rw,noexec 0 0 +)proc")); + EXPECT_EQ(entries.size(), 3); +} + +TEST(ParseMounts, MountInfo) { + auto entries = ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountInfoEntriesFrom( + R"proc(22 28 0:20 / /sys rw,relatime shared:7 - sysfs sysfs rw +23 28 0:21 / /proc rw,relatime shared:14 - proc proc rw +2007 8844 0:278 / /mnt rw,noexec - tmpfs rw,mode=123,uid=268601820,gid=5000 +)proc")); + EXPECT_EQ(entries.size(), 3); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/tools/bazeldefs/BUILD b/tools/bazeldefs/BUILD index c2c1287a1..24e6f8a94 100644 --- a/tools/bazeldefs/BUILD +++ b/tools/bazeldefs/BUILD @@ -1,6 +1,9 @@ -load("//tools:defs.bzl", "bzl_library") +load("//tools:defs.bzl", "bzl_library", "go_proto_library") -package(licenses = ["notice"]) +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) bzl_library( name = "platforms_bzl", @@ -45,3 +48,9 @@ genrule( stamp = True, visibility = ["//:sandbox"], ) + +go_proto_library( + name = "worker_protocol_go_proto", + importpath = "gvisor.dev/bazel/worker_protocol_go_proto", + proto = "@bazel_tools//src/main/protobuf:worker_protocol_proto", +) diff --git a/tools/bazeldefs/go.bzl b/tools/bazeldefs/go.bzl index d16376032..da027846b 100644 --- a/tools/bazeldefs/go.bzl +++ b/tools/bazeldefs/go.bzl @@ -8,8 +8,13 @@ load("//tools/bazeldefs:defs.bzl", "select_arch", "select_system") gazelle = _gazelle go_embed_data = _go_embed_data go_path = _go_path +bazel_worker_proto = "//tools/bazeldefs:worker_protocol_go_proto" def _go_proto_or_grpc_library(go_library_func, name, **kwargs): + if "importpath" in kwargs: + # If importpath is explicit, pass straight through. + go_library_func(name = name, **kwargs) + return deps = [ dep.replace("_proto", "_go_proto") for dep in (kwargs.pop("deps", []) or []) diff --git a/tools/checklocks/checklocks.go b/tools/checklocks/checklocks.go index 4ec2918f6..1e877d394 100644 --- a/tools/checklocks/checklocks.go +++ b/tools/checklocks/checklocks.go @@ -563,9 +563,7 @@ func (pc *passContext) checkFunctionCall(call *ssa.Call, isExempted bool, lh *lo if !ok { return } - if fn.Object() == nil { - log.Warningf("fn w/ nil Object is: %+v", fn) return } @@ -579,7 +577,6 @@ func (pc *passContext) checkFunctionCall(call *ssa.Call, isExempted bool, lh *lo r := (*call.Value().Operands(nil)[fg.ParameterNumber+1]) guardObj := findField(r, fg.FieldNumber) if guardObj == nil { - log.Infof("guardObj nil but funcFact: %+v", funcFact) continue } var fieldFacts lockFieldFacts diff --git a/tools/defs.bzl b/tools/defs.bzl index d2c697c0d..27542a2f5 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -10,7 +10,7 @@ load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_ load("//tools/nogo:defs.bzl", "nogo_test") load("//tools/bazeldefs:defs.bzl", _arch_genrule = "arch_genrule", _build_test = "build_test", _bzl_library = "bzl_library", _coreutil = "coreutil", _default_installer = "default_installer", _default_net_util = "default_net_util", _more_shards = "more_shards", _most_shards = "most_shards", _proto_library = "proto_library", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _version = "version") load("//tools/bazeldefs:cc.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _gbenchmark = "gbenchmark", _grpcpp = "grpcpp", _gtest = "gtest", _vdso_linker_option = "vdso_linker_option") -load("//tools/bazeldefs:go.bzl", _gazelle = "gazelle", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_rule = "go_rule", _go_test = "go_test", _select_goarch = "select_goarch", _select_goos = "select_goos") +load("//tools/bazeldefs:go.bzl", _bazel_worker_proto = "bazel_worker_proto", _gazelle = "gazelle", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_rule = "go_rule", _go_test = "go_test", _select_goarch = "select_goarch", _select_goos = "select_goos") load("//tools/bazeldefs:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") @@ -47,6 +47,8 @@ go_path = _go_path select_goos = _select_goos select_goarch = _select_goarch go_embed_data = _go_embed_data +go_proto_library = _go_proto_library +bazel_worker_proto = _bazel_worker_proto # Packaging rules. pkg_deb = _pkg_deb diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD index 5fc60d8d8..6c6f604b5 100644 --- a/tools/nogo/BUILD +++ b/tools/nogo/BUILD @@ -37,6 +37,7 @@ go_library( "//tools/checkescape", "//tools/checklocks", "//tools/checkunsafe", + "//tools/worker", "@co_honnef_go_tools//staticcheck:go_default_library", "@co_honnef_go_tools//stylecheck:go_default_library", "@org_golang_x_tools//go/analysis:go_default_library", diff --git a/tools/nogo/check/BUILD b/tools/nogo/check/BUILD index e18483a18..666780dd3 100644 --- a/tools/nogo/check/BUILD +++ b/tools/nogo/check/BUILD @@ -7,5 +7,8 @@ go_binary( srcs = ["main.go"], nogo = False, visibility = ["//visibility:public"], - deps = ["//tools/nogo"], + deps = [ + "//tools/nogo", + "//tools/worker", + ], ) diff --git a/tools/nogo/check/main.go b/tools/nogo/check/main.go index 4194770be..3a6c3fb08 100644 --- a/tools/nogo/check/main.go +++ b/tools/nogo/check/main.go @@ -24,6 +24,7 @@ import ( "os" "gvisor.dev/gvisor/tools/nogo" + "gvisor.dev/gvisor/tools/worker" ) var ( @@ -49,9 +50,10 @@ func loadConfig(file string, config interface{}) interface{} { } func main() { - // Parse all flags. - flag.Parse() + worker.Work(run) +} +func run([]string) int { var ( findings []nogo.Finding factData []byte @@ -90,7 +92,11 @@ func main() { // Write all findings. if *findingsOutput != "" { - if err := nogo.WriteFindingsToFile(findings, *findingsOutput); err != nil { + w, err := os.OpenFile(*findingsOutput, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + log.Fatalf("error opening output file %q: %v", *findingsOutput, err) + } + if err := nogo.WriteFindingsTo(w, findings, false /* json */); err != nil { log.Fatalf("error writing findings to %q: %v", *findingsOutput, err) } } else { @@ -98,4 +104,6 @@ func main() { fmt.Fprintf(os.Stdout, "%s\n", finding.String()) } } + + return 0 } diff --git a/tools/nogo/config.go b/tools/nogo/config.go index 2fea5b3e1..6436f9d34 100644 --- a/tools/nogo/config.go +++ b/tools/nogo/config.go @@ -73,17 +73,28 @@ type ItemConfig struct { } func compileRegexps(ss []string, rs *[]*regexp.Regexp) error { - *rs = make([]*regexp.Regexp, 0, len(ss)) - for _, s := range ss { + *rs = make([]*regexp.Regexp, len(ss)) + for i, s := range ss { r, err := regexp.Compile(s) if err != nil { return err } - *rs = append(*rs, r) + (*rs)[i] = r } return nil } +// RegexpCount is used by AnalyzerConfig.RegexpCount. +func (i *ItemConfig) RegexpCount() int64 { + if i == nil { + // See compile. + return 0 + } + // Return the number of regular expressions compiled for these items. + // This is how the cache size of the configuration is measured. + return int64(len(i.exclude) + len(i.suppress)) +} + func (i *ItemConfig) compile() error { if i == nil { // This may be nil if nothing is included in the @@ -100,9 +111,25 @@ func (i *ItemConfig) compile() error { return nil } +func merge(a, b []string) []string { + found := make(map[string]struct{}) + result := make([]string, 0, len(a)+len(b)) + for _, elem := range a { + found[elem] = struct{}{} + result = append(result, elem) + } + for _, elem := range b { + if _, ok := found[elem]; ok { + continue + } + result = append(result, elem) + } + return result +} + func (i *ItemConfig) merge(other *ItemConfig) { - i.Exclude = append(i.Exclude, other.Exclude...) - i.Suppress = append(i.Suppress, other.Suppress...) + i.Exclude = merge(i.Exclude, other.Exclude) + i.Suppress = merge(i.Suppress, other.Suppress) } func (i *ItemConfig) shouldReport(fullPos, msg string) bool { @@ -129,6 +156,15 @@ func (i *ItemConfig) shouldReport(fullPos, msg string) bool { // configurations depending on what Group the file belongs to. type AnalyzerConfig map[GroupName]*ItemConfig +// RegexpCount is used by Config.Size. +func (a AnalyzerConfig) RegexpCount() int64 { + count := int64(0) + for _, gc := range a { + count += gc.RegexpCount() + } + return count +} + func (a AnalyzerConfig) compile() error { for name, gc := range a { if err := gc.compile(); err != nil { @@ -179,22 +215,36 @@ type Config struct { Analyzers map[AnalyzerName]AnalyzerConfig `yaml:"analyzers"` } +// Size implements worker.Sizer.Size. +func (c *Config) Size() int64 { + count := c.Global.RegexpCount() + for _, config := range c.Analyzers { + count += config.RegexpCount() + } + // The size is measured as the number of regexps that are compiled + // here. We multiply by 1k to produce an estimate. + return 1024 * count +} + // Merge merges two configurations. func (c *Config) Merge(other *Config) { // Merge all groups. + // + // Select the other first, as the order provided in the second will + // provide precendence over the same group defined in the first one. + seenGroups := make(map[GroupName]struct{}) + newGroups := make([]Group, 0, len(c.Groups)+len(other.Groups)) for _, g := range other.Groups { - // Is there a matching group? If yes, we just delete - // it. This will preserve the order provided in the - // overriding file, even if it differs. - for i := 0; i < len(c.Groups); i++ { - if g.Name == c.Groups[i].Name { - copy(c.Groups[i:], c.Groups[i+1:]) - c.Groups = c.Groups[:len(c.Groups)-1] - break - } + newGroups = append(newGroups, g) + seenGroups[g.Name] = struct{}{} + } + for _, g := range c.Groups { + if _, ok := seenGroups[g.Name]; ok { + continue } - c.Groups = append(c.Groups, g) + newGroups = append(newGroups, g) } + c.Groups = newGroups // Merge global configurations. c.Global.merge(other.Global) diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl index be8b82f9c..ddf5816a6 100644 --- a/tools/nogo/defs.bzl +++ b/tools/nogo/defs.bzl @@ -29,6 +29,7 @@ NogoTargetInfo = provider( fields = { "goarch": "the build architecture (GOARCH)", "goos": "the build OS target (GOOS)", + "worker_debug": "transitive debugging", }, ) @@ -36,6 +37,7 @@ def _nogo_target_impl(ctx): return [NogoTargetInfo( goarch = ctx.attr.goarch, goos = ctx.attr.goos, + worker_debug = ctx.attr.worker_debug, )] nogo_target = go_rule( @@ -50,6 +52,10 @@ nogo_target = go_rule( doc = "the Go OS target (propagated to other rules).", mandatory = True, ), + "worker_debug": attr.bool( + doc = "whether worker debugging should be enabled.", + default = False, + ), }, ) @@ -61,7 +67,7 @@ def _nogo_objdump_tool_impl(ctx): # we need the tool to handle this case by creating a temporary file. # # [1] https://github.com/golang/go/issues/41051 - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()]) dumper = ctx.actions.declare_file(ctx.label.name) @@ -94,7 +100,7 @@ nogo_objdump_tool = go_rule( rule, implementation = _nogo_objdump_tool_impl, attrs = { - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), @@ -112,7 +118,7 @@ NogoStdlibInfo = provider( def _nogo_stdlib_impl(ctx): # Build the standard library facts. - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) facts = ctx.actions.declare_file(ctx.label.name + ".facts") raw_findings = ctx.actions.declare_file(ctx.label.name + ".raw_findings") @@ -124,18 +130,29 @@ def _nogo_stdlib_impl(ctx): ) config_file = ctx.actions.declare_file(ctx.label.name + ".cfg") ctx.actions.write(config_file, config.to_json()) - ctx.actions.run( - inputs = [config_file] + go_ctx.stdlib_srcs, - outputs = [facts, raw_findings], - tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool), - executable = ctx.files._nogo_check[0], - mnemonic = "NogoStandardLibraryAnalysis", - progress_message = "Analyzing Go Standard Library", - arguments = go_ctx.nogo_args + [ - "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path, + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join(go_ctx.nogo_args + [ + "-objdump_tool=%s" % ctx.files._objdump_tool[0].path, "-stdlib=%s" % config_file.path, "-findings=%s" % raw_findings.path, "-facts=%s" % facts.path, + ]), + ) + ctx.actions.run( + inputs = [config_file] + go_ctx.stdlib_srcs + [args_file], + outputs = [facts, raw_findings], + tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool), + executable = ctx.files._check[0], + mnemonic = "GoStandardLibraryAnalysis", + # Note that this does not support work execution currently. There is an + # issue with stdout pollution that is not yet resolved, so this is kept + # as a separate menomic. + progress_message = "Analyzing Go Standard Library", + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, ], ) @@ -149,15 +166,15 @@ nogo_stdlib = go_rule( rule, implementation = _nogo_stdlib_impl, attrs = { - "_nogo_check": attr.label( + "_check": attr.label( default = "//tools/nogo/check:check", cfg = "host", ), - "_nogo_objdump_tool": attr.label( + "_objdump_tool": attr.label( default = "//tools/nogo:objdump_tool", cfg = "host", ), - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), @@ -276,7 +293,7 @@ def _nogo_aspect_impl(target, ctx): inputs.append(stdlib_facts) # The nogo tool operates on a configuration serialized in JSON format. - nogo_target_info = ctx.attr._nogo_target[NogoTargetInfo] + nogo_target_info = ctx.attr._target[NogoTargetInfo] go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch) facts = ctx.actions.declare_file(target.label.name + ".facts") raw_findings = ctx.actions.declare_file(target.label.name + ".raw_findings") @@ -294,19 +311,28 @@ def _nogo_aspect_impl(target, ctx): config_file = ctx.actions.declare_file(target.label.name + ".cfg") ctx.actions.write(config_file, config.to_json()) inputs.append(config_file) - ctx.actions.run( - inputs = inputs, - outputs = [facts, raw_findings], - tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool), - executable = ctx.files._nogo_check[0], - mnemonic = "NogoAnalysis", - progress_message = "Analyzing %s" % target.label, - arguments = go_ctx.nogo_args + [ + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join(go_ctx.nogo_args + [ "-binary=%s" % target_objfile.path, - "-objdump_tool=%s" % ctx.files._nogo_objdump_tool[0].path, + "-objdump_tool=%s" % ctx.files._objdump_tool[0].path, "-package=%s" % config_file.path, "-findings=%s" % raw_findings.path, "-facts=%s" % facts.path, + ]), + ) + ctx.actions.run( + inputs = inputs + [args_file], + outputs = [facts, raw_findings], + tools = depset(go_ctx.runfiles.to_list() + ctx.files._objdump_tool), + executable = ctx.files._check[0], + mnemonic = "GoStaticAnalysis", + progress_message = "Analyzing %s" % target.label, + execution_requirements = {"supports-workers": "1"}, + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, ], ) @@ -339,27 +365,30 @@ nogo_aspect = go_rule( "embed", ], attrs = { - "_nogo_check": attr.label( + "_check": attr.label( default = "//tools/nogo/check:check", cfg = "host", ), - "_nogo_stdlib": attr.label( - default = "//tools/nogo:stdlib", - cfg = "host", - ), - "_nogo_objdump_tool": attr.label( + "_objdump_tool": attr.label( default = "//tools/nogo:objdump_tool", cfg = "host", ), - "_nogo_target": attr.label( + "_target": attr.label( default = "//tools/nogo:target", cfg = "target", ), + # The name of this attribute must not be _stdlib, since that + # appears to be reserved for some internal bazel use. + "_nogo_stdlib": attr.label( + default = "//tools/nogo:stdlib", + cfg = "host", + ), }, ) def _nogo_test_impl(ctx): """Check nogo findings.""" + nogo_target_info = ctx.attr._target[NogoTargetInfo] # Ensure there's a single dependency. if len(ctx.attr.deps) != 1: @@ -369,16 +398,27 @@ def _nogo_test_impl(ctx): # Build a step that applies the configuration. config_srcs = ctx.attr.config[NogoConfigInfo].srcs findings = ctx.actions.declare_file(ctx.label.name + ".findings") + args_file = ctx.actions.declare_file(ctx.label.name + "_args_file") + ctx.actions.write( + output = args_file, + content = "\n".join( + ["-input=%s" % f.path for f in raw_findings] + + ["-config=%s" % f.path for f in config_srcs] + + ["-output=%s" % findings.path], + ), + ) ctx.actions.run( - inputs = raw_findings + ctx.files.srcs + config_srcs, + inputs = raw_findings + ctx.files.srcs + config_srcs + [args_file], outputs = [findings], tools = depset(ctx.files._filter), executable = ctx.files._filter[0], mnemonic = "GoStaticAnalysis", progress_message = "Generating %s" % ctx.label, - arguments = ["-input=%s" % f.path for f in raw_findings] + - ["-config=%s" % f.path for f in config_srcs] + - ["-output=%s" % findings.path], + execution_requirements = {"supports-workers": "1"}, + arguments = [ + "--worker_debug=%s" % nogo_target_info.worker_debug, + "@%s" % args_file.path, + ], ) # Build a runner that checks the filtered facts. @@ -389,7 +429,7 @@ def _nogo_test_impl(ctx): runner = ctx.actions.declare_file(ctx.label.name) runner_content = [ "#!/bin/bash", - "exec %s -input=%s" % (ctx.files._filter[0].short_path, findings.short_path), + "exec %s -check -input=%s" % (ctx.files._filter[0].short_path, findings.short_path), "", ] ctx.actions.write(runner, "\n".join(runner_content), is_executable = True) @@ -423,6 +463,10 @@ nogo_test = rule( allow_files = True, doc = "Relevant src files. This is ignored except to make the nogo_test directly affected by the files.", ), + "_target": attr.label( + default = "//tools/nogo:target", + cfg = "target", + ), "_filter": attr.label(default = "//tools/nogo/filter:filter"), }, test = True, diff --git a/tools/nogo/filter/BUILD b/tools/nogo/filter/BUILD index e56a783e2..e3049521e 100644 --- a/tools/nogo/filter/BUILD +++ b/tools/nogo/filter/BUILD @@ -9,6 +9,7 @@ go_binary( visibility = ["//visibility:public"], deps = [ "//tools/nogo", + "//tools/worker", "@in_gopkg_yaml_v2//:go_default_library", ], ) diff --git a/tools/nogo/filter/main.go b/tools/nogo/filter/main.go index 8be38ca6d..d50336b9b 100644 --- a/tools/nogo/filter/main.go +++ b/tools/nogo/filter/main.go @@ -26,6 +26,7 @@ import ( yaml "gopkg.in/yaml.v2" "gvisor.dev/gvisor/tools/nogo" + "gvisor.dev/gvisor/tools/worker" ) type stringList []string @@ -44,34 +45,44 @@ var ( configFiles stringList outputFile string showConfig bool + check bool ) func init() { - flag.Var(&inputFiles, "input", "findings input files") - flag.StringVar(&outputFile, "output", "", "findings output file") + flag.Var(&inputFiles, "input", "findings input files (gob format)") + flag.StringVar(&outputFile, "output", "", "findings output file (json format)") flag.Var(&configFiles, "config", "findings configuration files") flag.BoolVar(&showConfig, "show-config", false, "dump configuration only") + flag.BoolVar(&check, "check", false, "assume input is in json format") } func main() { - flag.Parse() + worker.Work(run) +} - // Load all available findings. - var findings []nogo.Finding - for _, filename := range inputFiles { - inputFindings, err := nogo.ExtractFindingsFromFile(filename) +var ( + cachedFindings = worker.NewCache("findings") // With nogo.FindingSet. + cachedFiltered = worker.NewCache("filtered") // With nogo.FindingSet. + cachedConfigs = worker.NewCache("configs") // With nogo.Config. + cachedFullConfigs = worker.NewCache("compiled") // With nogo.Config. +) + +func loadFindings(filename string) nogo.FindingSet { + return cachedFindings.Lookup([]string{filename}, func() worker.Sizer { + r, err := os.Open(filename) + if err != nil { + log.Fatalf("unable to open input %q: %v", filename, err) + } + inputFindings, err := nogo.ExtractFindingsFrom(r, check /* json */) if err != nil { log.Fatalf("unable to extract findings from %s: %v", filename, err) } - findings = append(findings, inputFindings...) - } + return inputFindings + }).(nogo.FindingSet) +} - // Open and merge all configuations. - config := &nogo.Config{ - Global: make(nogo.AnalyzerConfig), - Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig), - } - for _, filename := range configFiles { +func loadConfig(filename string) *nogo.Config { + return cachedConfigs.Lookup([]string{filename}, func() worker.Sizer { content, err := ioutil.ReadFile(filename) if err != nil { log.Fatalf("unable to read %s: %v", filename, err) @@ -82,53 +93,98 @@ func main() { if err := dec.Decode(&newConfig); err != nil { log.Fatalf("unable to decode %s: %v", filename, err) } - config.Merge(&newConfig) if showConfig { content, err := yaml.Marshal(&newConfig) if err != nil { log.Fatalf("error marshalling config: %v", err) } - mergedBytes, err := yaml.Marshal(config) - if err != nil { - log.Fatalf("error marshalling config: %v", err) - } fmt.Fprintf(os.Stdout, "Loaded configuration from %s:\n%s\n", filename, string(content)) - fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes)) } - } - if err := config.Compile(); err != nil { - log.Fatalf("error compiling config: %v", err) - } + return &newConfig + }).(*nogo.Config) +} + +func loadConfigs(filenames []string) *nogo.Config { + return cachedFullConfigs.Lookup(filenames, func() worker.Sizer { + config := &nogo.Config{ + Global: make(nogo.AnalyzerConfig), + Analyzers: make(map[nogo.AnalyzerName]nogo.AnalyzerConfig), + } + for _, filename := range configFiles { + config.Merge(loadConfig(filename)) + if showConfig { + mergedBytes, err := yaml.Marshal(config) + if err != nil { + log.Fatalf("error marshalling config: %v", err) + } + fmt.Fprintf(os.Stdout, "Merged configuration:\n%s\n", string(mergedBytes)) + } + } + if err := config.Compile(); err != nil { + log.Fatalf("error compiling config: %v", err) + } + return config + }).(*nogo.Config) +} + +func run([]string) int { + // Open and merge all configuations. + config := loadConfigs(configFiles) if showConfig { - os.Exit(0) + return 0 } - // Filter the findings (and aggregate by group). - filteredFindings := make([]nogo.Finding, 0, len(findings)) - for _, finding := range findings { - if ok := config.ShouldReport(finding); ok { - filteredFindings = append(filteredFindings, finding) - } + // Load and filer available findings. + var filteredFindings []nogo.Finding + for _, filename := range inputFiles { + // Note that this applies a caching strategy to the filtered + // findings, because *this is by far the most expensive part of + // evaluation*. The set of findings is large and applying the + // configuration is complex. Therefore, we segment this cache + // on each individual raw findings input file and the + // configuration files. Note that this cache is keyed on all + // the configuration files and each individual raw findings, so + // is guaranteed to be safe. This allows us to reuse the same + // filter result many times over, because e.g. all standard + // library findings will be available to all packages. + filteredFindings = append(filteredFindings, + cachedFiltered.Lookup(append(configFiles, filename), func() worker.Sizer { + inputFindings := loadFindings(filename) + filteredFindings := make(nogo.FindingSet, 0, len(inputFindings)) + for _, finding := range inputFindings { + if ok := config.ShouldReport(finding); ok { + filteredFindings = append(filteredFindings, finding) + } + } + return filteredFindings + }).(nogo.FindingSet)...) } // Write the output (if required). // // If the outputFile is specified, then we exit here. Otherwise, // we continue to write to stdout and treat like a test. + // + // Note that the output of the filter is always json, which is + // human readable and the format that is consumed by tricorder. if outputFile != "" { - if err := nogo.WriteFindingsToFile(filteredFindings, outputFile); err != nil { + w, err := os.OpenFile(outputFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + log.Fatalf("unable to open output file %q: %v", outputFile, err) + } + if err := nogo.WriteFindingsTo(w, filteredFindings, true /* json */); err != nil { log.Fatalf("unable to write findings: %v", err) } - return + return 0 } // Treat the run as a test. if len(filteredFindings) == 0 { fmt.Fprintf(os.Stdout, "PASS\n") - os.Exit(0) + return 0 } for _, finding := range filteredFindings { fmt.Fprintf(os.Stdout, "%s\n", finding.String()) } - os.Exit(1) + return 1 } diff --git a/tools/nogo/findings.go b/tools/nogo/findings.go index a00cfe813..329a7062e 100644 --- a/tools/nogo/findings.go +++ b/tools/nogo/findings.go @@ -15,10 +15,13 @@ package nogo import ( + "encoding/gob" "encoding/json" "fmt" "go/token" - "io/ioutil" + "io" + "os" + "reflect" "sort" ) @@ -29,63 +32,96 @@ type Finding struct { Message string } +// findingSize is the size of the finding struct itself. +var findingSize = int64(reflect.TypeOf(Finding{}).Size()) + +// Size implements worker.Sizer.Size. +func (f *Finding) Size() int64 { + return int64(len(f.Category)) + int64(len(f.Message)) + findingSize +} + // String implements fmt.Stringer.String. func (f *Finding) String() string { return fmt.Sprintf("%s: %s: %s", f.Category, f.Position.String(), f.Message) } -// WriteFindingsToFile writes findings to a file. -func WriteFindingsToFile(findings []Finding, filename string) error { - content, err := WriteFindingsToBytes(findings) - if err != nil { - return err +// FindingSet is a collection of findings. +type FindingSet []Finding + +// Size implmements worker.Sizer.Size. +func (fs FindingSet) Size() int64 { + size := int64(0) + for _, finding := range fs { + size += finding.Size() } - return ioutil.WriteFile(filename, content, 0644) + return size } -// WriteFindingsToBytes serializes findings as bytes. -func WriteFindingsToBytes(findings []Finding) ([]byte, error) { - // N.B. Sort all the findings in order to maximize cacheability. - sort.Slice(findings, func(i, j int) bool { +// Sort sorts all findings. +func (fs FindingSet) Sort() { + sort.Slice(fs, func(i, j int) bool { switch { - case findings[i].Position.Filename < findings[j].Position.Filename: + case fs[i].Position.Filename < fs[j].Position.Filename: return true - case findings[i].Position.Filename > findings[j].Position.Filename: + case fs[i].Position.Filename > fs[j].Position.Filename: return false - case findings[i].Position.Line < findings[j].Position.Line: + case fs[i].Position.Line < fs[j].Position.Line: return true - case findings[i].Position.Line > findings[j].Position.Line: + case fs[i].Position.Line > fs[j].Position.Line: return false - case findings[i].Position.Column < findings[j].Position.Column: + case fs[i].Position.Column < fs[j].Position.Column: return true - case findings[i].Position.Column > findings[j].Position.Column: + case fs[i].Position.Column > fs[j].Position.Column: return false - case findings[i].Category < findings[j].Category: + case fs[i].Category < fs[j].Category: return true - case findings[i].Category > findings[j].Category: + case fs[i].Category > fs[j].Category: return false - case findings[i].Message < findings[j].Message: + case fs[i].Message < fs[j].Message: return true - case findings[i].Message > findings[j].Message: + case fs[i].Message > fs[j].Message: return false default: return false } }) - return json.Marshal(findings) +} + +// WriteFindingsTo serializes findings. +func WriteFindingsTo(w io.Writer, findings FindingSet, asJSON bool) error { + // N.B. Sort all the findings in order to maximize cacheability. + findings.Sort() + if asJSON { + enc := json.NewEncoder(w) + return enc.Encode(findings) + } + enc := gob.NewEncoder(w) + return enc.Encode(findings) } // ExtractFindingsFromFile loads findings from a file. -func ExtractFindingsFromFile(filename string) ([]Finding, error) { - content, err := ioutil.ReadFile(filename) +func ExtractFindingsFromFile(filename string, asJSON bool) (FindingSet, error) { + r, err := os.Open(filename) if err != nil { return nil, err } - return ExtractFindingsFromBytes(content) + defer r.Close() + return ExtractFindingsFrom(r, asJSON) } // ExtractFindingsFromBytes loads findings from bytes. -func ExtractFindingsFromBytes(content []byte) (findings []Finding, err error) { - err = json.Unmarshal(content, &findings) +func ExtractFindingsFrom(r io.Reader, asJSON bool) (findings FindingSet, err error) { + if asJSON { + dec := json.NewDecoder(r) + err = dec.Decode(&findings) + } else { + dec := gob.NewDecoder(r) + err = dec.Decode(&findings) + } return findings, err } + +func init() { + gob.Register((*Finding)(nil)) + gob.Register((*FindingSet)(nil)) +} diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go index c1b88e89f..acee7c8bc 100644 --- a/tools/nogo/nogo.go +++ b/tools/nogo/nogo.go @@ -19,7 +19,8 @@ package nogo import ( - "encoding/json" + "bytes" + "encoding/gob" "errors" "fmt" "go/ast" @@ -43,6 +44,7 @@ import ( // Special case: flags live here and change overall behavior. "gvisor.dev/gvisor/tools/checkescape" + "gvisor.dev/gvisor/tools/worker" ) // StdlibConfig is serialized as the configuration. @@ -82,46 +84,94 @@ type stdlibFact struct { Facts []byte } -// factLoader returns a function that loads facts. -// -// This resolves all standard library facts and imported package facts up -// front. The returned loader function will never return an error, only -// empty facts. -// -// This is done because all stdlib data is stored together, and we don't want -// to load this data many times over. -func (c *PackageConfig) factLoader() (loader, error) { - allFacts := make(map[string][]byte) - if c.StdlibFacts != "" { - data, err := ioutil.ReadFile(c.StdlibFacts) - if err != nil { - return nil, fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, err) - } - var ( - stdlibFactsSorted []stdlibFact - stdlibFacts = make(map[string][]byte) - ) - // See below re: sorted serialization. - if err := json.Unmarshal(data, &stdlibFactsSorted); err != nil { - return nil, fmt.Errorf("error loading stdlib facts: %w", err) - } - for _, stdlibFact := range stdlibFactsSorted { - stdlibFacts[stdlibFact.Package] = stdlibFact.Facts - } - for pkg, data := range stdlibFacts { - allFacts[pkg] = data +// stdlibFacts is a set of standard library facts. +type stdlibFacts map[string][]byte + +// Size implements worker.Sizer.Size. +func (sf stdlibFacts) Size() int64 { + size := int64(0) + for filename, data := range sf { + size += int64(len(filename)) + size += int64(len(data)) + } + return size +} + +// EncodeTo serializes stdlibFacts. +func (sf stdlibFacts) EncodeTo(w io.Writer) error { + stdlibFactsSorted := make([]stdlibFact, 0, len(sf)) + for pkg, facts := range sf { + stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{ + Package: pkg, + Facts: facts, + }) + } + sort.Slice(stdlibFactsSorted, func(i, j int) bool { + return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package + }) + enc := gob.NewEncoder(w) + if err := enc.Encode(stdlibFactsSorted); err != nil { + return err + } + return nil +} + +// DecodeFrom deserializes stdlibFacts. +func (sf stdlibFacts) DecodeFrom(r io.Reader) error { + var stdlibFactsSorted []stdlibFact + dec := gob.NewDecoder(r) + if err := dec.Decode(&stdlibFactsSorted); err != nil { + return err + } + for _, stdlibFact := range stdlibFactsSorted { + sf[stdlibFact.Package] = stdlibFact.Facts + } + return nil +} + +var ( + // cachedFacts caches by file (just byte data). + cachedFacts = worker.NewCache("facts") + + // stdlibCachedFacts caches the standard library (stdlibFacts). + stdlibCachedFacts = worker.NewCache("stdlib") +) + +// factLoader loads facts. +func (c *PackageConfig) factLoader(path string) (data []byte, err error) { + filename, ok := c.FactMap[path] + if ok { + cb := cachedFacts.Lookup([]string{filename}, func() worker.Sizer { + data, readErr := ioutil.ReadFile(filename) + if readErr != nil { + err = fmt.Errorf("error loading %q: %w", filename, readErr) + return nil + } + return worker.CacheBytes(data) + }) + if cb != nil { + return []byte(cb.(worker.CacheBytes)), err } + return nil, err } - for pkg, file := range c.FactMap { - data, err := ioutil.ReadFile(file) - if err != nil { - return nil, fmt.Errorf("error loading %q: %w", file, err) + cb := stdlibCachedFacts.Lookup([]string{c.StdlibFacts}, func() worker.Sizer { + r, openErr := os.Open(c.StdlibFacts) + if openErr != nil { + err = fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, openErr) + return nil + } + defer r.Close() + sf := make(stdlibFacts) + if readErr := sf.DecodeFrom(r); readErr != nil { + err = fmt.Errorf("error loading stdlib facts: %w", readErr) + return nil } - allFacts[pkg] = data + return sf + }) + if cb != nil { + return (cb.(stdlibFacts))[path], err } - return func(path string) ([]byte, error) { - return allFacts[path], nil - }, nil + return nil, err } // shouldInclude indicates whether the file should be included. @@ -205,7 +255,7 @@ var ErrSkip = errors.New("skipped") // // Note that not all parts of the source are expected to build. We skip obvious // test files, and cmd files, which should not be dependencies. -func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings []Finding, facts []byte, err error) { +func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings FindingSet, facts []byte, err error) { if len(config.Srcs) == 0 { return nil, nil, nil } @@ -275,16 +325,16 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi } // Closure to check a single package. - stdlibFacts := make(map[string][]byte) - stdlibErrs := make(map[string]error) + localStdlibFacts := make(stdlibFacts) + localStdlibErrs := make(map[string]error) var checkOne func(pkg string) error // Recursive. checkOne = func(pkg string) error { // Is this already done? - if _, ok := stdlibFacts[pkg]; ok { + if _, ok := localStdlibFacts[pkg]; ok { return nil } // Did this fail previously? - if _, ok := stdlibErrs[pkg]; ok { + if _, ok := localStdlibErrs[pkg]; ok { return nil } @@ -300,7 +350,7 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi // If there's no binary for this package, it is likely // not built with the distribution. That's fine, we can // just skip analysis. - stdlibErrs[pkg] = err + localStdlibErrs[pkg] = err return nil } @@ -317,10 +367,10 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi if err != nil { // If we can't analyze a package from the standard library, // then we skip it. It will simply not have any findings. - stdlibErrs[pkg] = err + localStdlibErrs[pkg] = err return nil } - stdlibFacts[pkg] = factData + localStdlibFacts[pkg] = factData allFindings = append(allFindings, findings...) return nil } @@ -337,34 +387,23 @@ func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindi } // Sanity check. - if len(stdlibFacts) == 0 { + if len(localStdlibFacts) == 0 { return nil, nil, fmt.Errorf("no stdlib facts found: misconfiguration?") } - // Write out all findings. Note that the standard library facts - // must be serialized in a sorted order to ensure cacheability. - stdlibFactsSorted := make([]stdlibFact, 0, len(stdlibFacts)) - for pkg, facts := range stdlibFacts { - stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{ - Package: pkg, - Facts: facts, - }) - } - sort.Slice(stdlibFactsSorted, func(i, j int) bool { - return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package - }) - factData, err := json.Marshal(stdlibFactsSorted) - if err != nil { - return nil, nil, fmt.Errorf("error saving stdlib facts: %w", err) + // Write out all findings. + buf := bytes.NewBuffer(nil) + if err := localStdlibFacts.EncodeTo(buf); err != nil { + return nil, nil, fmt.Errorf("error serialized stdlib facts: %v", err) } // Write out all errors. - for pkg, err := range stdlibErrs { + for pkg, err := range localStdlibErrs { log.Printf("WARNING: error while processing %v: %v", pkg, err) } // Return all findings. - return allFindings, factData, nil + return allFindings, buf.Bytes(), nil } // CheckPackage runs all given analyzers. @@ -417,11 +456,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC } // Load all package facts. - loader, err := config.factLoader() - if err != nil { - return nil, nil, fmt.Errorf("error loading facts: %w", err) - } - facts, err := facts.Decode(types, loader) + facts, err := facts.Decode(types, config.factLoader) if err != nil { return nil, nil, fmt.Errorf("error decoding facts: %w", err) } @@ -496,3 +531,7 @@ func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importC // Return all findings. return findings, facts.Encode(), nil } + +func init() { + gob.Register((*stdlibFact)(nil)) +} diff --git a/tools/worker/BUILD b/tools/worker/BUILD new file mode 100644 index 000000000..dc03ce11e --- /dev/null +++ b/tools/worker/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "bazel_worker_proto", "go_library") + +package(licenses = ["notice"]) + +# For Google-tooling. +# @unused +glaze_ignore = [ + "worker.go", +] + +go_library( + name = "worker", + srcs = ["worker.go"], + visibility = ["//tools:__subpackages__"], + deps = [ + bazel_worker_proto, + "@org_golang_google_protobuf//encoding/protowire:go_default_library", + "@org_golang_google_protobuf//proto:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/tools/worker/worker.go b/tools/worker/worker.go new file mode 100644 index 000000000..669a5f203 --- /dev/null +++ b/tools/worker/worker.go @@ -0,0 +1,325 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package worker provides an implementation of the bazel worker protocol. +// +// Tools may be written as a normal command line utility, except the passed +// run function may be invoked multiple times. +package worker + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "net" + "net/http" + "os" + "path/filepath" + "sort" + "strings" + "time" + + _ "net/http/pprof" // For profiling. + + "golang.org/x/sys/unix" + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/proto" + wpb "gvisor.dev/bazel/worker_protocol_go_proto" +) + +var ( + persistentWorker = flag.Bool("persistent_worker", false, "enable persistent worker.") + workerDebug = flag.Bool("worker_debug", false, "debug persistent workers.") + maximumCacheUsage = flag.Int64("maximum_cache_usage", 1024*1024*1024, "maximum cache size.") +) + +var ( + // inputFiles is the last set of input files. + // + // This is used for cache invalidation. The key is the *absolute* path + // name, and the value is the digest in the current run. + inputFiles = make(map[string]string) + + // activeCaches is the set of active caches. + activeCaches = make(map[*Cache]struct{}) + + // totalCacheUsage is the total usage of all caches. + totalCacheUsage int64 +) + +// mustAbs returns the absolute path of a filename or dies. +func mustAbs(filename string) string { + abs, err := filepath.Abs(filename) + if err != nil { + log.Fatalf("error getting absolute path: %v", err) + } + return abs +} + +// updateInputFiles creates an entry in inputFiles. +func updateInputFile(filename, digest string) { + inputFiles[mustAbs(filename)] = digest +} + +// Sizer returns a size. +type Sizer interface { + Size() int64 +} + +// CacheBytes is an example of a Sizer. +type CacheBytes []byte + +// Size implements Sizer.Size. +func (cb CacheBytes) Size() int64 { + return int64(len(cb)) +} + +// Cache is a worker cache. +// +// They can be created via NewCache. +type Cache struct { + name string + entries map[string]Sizer + size int64 + hits int64 + misses int64 +} + +// NewCache returns a new cache. +func NewCache(name string) *Cache { + return &Cache{ + name: name, + } +} + +// Lookup looks up an entry in the cache. +// +// It is a function of the given files. +func (c *Cache) Lookup(filenames []string, generate func() Sizer) Sizer { + digests := make([]string, 0, len(filenames)) + for _, filename := range filenames { + digest, ok := inputFiles[mustAbs(filename)] + if !ok { + // This is not a valid input. We may not be running as + // persistent worker in this cache. If that's the case, + // then the file's contents will not change across the + // run, and we just use the filename itself. + digest = filename + } + digests = append(digests, digest) + } + + // Attempt the lookup. + sort.Slice(digests, func(i, j int) bool { + return digests[i] < digests[j] + }) + cacheKey := strings.Join(digests, "+") + if c.entries == nil { + c.entries = make(map[string]Sizer) + activeCaches[c] = struct{}{} + } + entry, ok := c.entries[cacheKey] + if ok { + c.hits++ + return entry + } + + // Generate a new entry. + entry = generate() + c.misses++ + c.entries[cacheKey] = entry + if entry != nil { + sz := entry.Size() + c.size += sz + totalCacheUsage += sz + } + + // Check the capacity of all caches. If it greater than the maximum, we + // flush everything but still return this entry. + if totalCacheUsage > *maximumCacheUsage { + for entry, _ := range activeCaches { + // Drop all entries. + entry.size = 0 + entry.entries = nil + } + totalCacheUsage = 0 // Reset. + } + + return entry +} + +// allCacheStats returns stats for all caches. +func allCacheStats() string { + var sb strings.Builder + for entry, _ := range activeCaches { + ratio := float64(entry.hits) / float64(entry.hits+entry.misses) + fmt.Fprintf(&sb, + "% 10s: count: % 5d size: % 10d hits: % 7d misses: % 7d ratio: %2.2f\n", + entry.name, len(entry.entries), entry.size, entry.hits, entry.misses, ratio) + } + if len(activeCaches) > 0 { + fmt.Fprintf(&sb, "total: % 10d\n", totalCacheUsage) + } + return sb.String() +} + +// LookupDigest returns a digest for the given file. +func LookupDigest(filename string) (string, bool) { + digest, ok := inputFiles[filename] + return digest, ok +} + +// Work invokes the main function. +func Work(run func([]string) int) { + flag.CommandLine.Parse(os.Args[1:]) + if !*persistentWorker { + // Handle the argument file. + args := flag.CommandLine.Args() + if len(args) == 1 && len(args[0]) > 1 && args[0][0] == '@' { + content, err := ioutil.ReadFile(args[0][1:]) + if err != nil { + log.Fatalf("unable to parse args file: %v", err) + } + // Pull arguments from the file. + args = strings.Split(string(content), "\n") + flag.CommandLine.Parse(args) + args = flag.CommandLine.Args() + } + os.Exit(run(args)) + } + + var listenHeader string // Emitted always. + if *workerDebug { + // Bind a server for profiling. + listener, err := net.Listen("tcp", "localhost:0") + if err != nil { + log.Fatalf("unable to bind a server: %v", err) + } + // Construct the header for stats output, below. + listenHeader = fmt.Sprintf("Listening @ http://localhost:%d\n", listener.Addr().(*net.TCPAddr).Port) + go http.Serve(listener, nil) + } + + // Move stdout. This is done to prevent anything else from accidentally + // printing to stdout, which must contain only the valid WorkerResponse + // serialized protos. + newOutput, err := unix.Dup(1) + if err != nil { + log.Fatalf("unable to move stdout: %v", err) + } + // Stderr may be closed or may be a copy of stdout. We make sure that + // we have an output that is in a completely separate range. + for newOutput <= 2 { + newOutput, err = unix.Dup(newOutput) + if err != nil { + log.Fatalf("unable to move stdout: %v", err) + } + } + + // Best-effort: collect logs. + rPipe, wPipe, err := os.Pipe() + if err != nil { + log.Fatalf("unable to create pipe: %v", err) + } + if err := unix.Dup2(int(wPipe.Fd()), 1); err != nil { + log.Fatalf("error duping over stdout: %v", err) + } + if err := unix.Dup2(int(wPipe.Fd()), 2); err != nil { + log.Fatalf("error duping over stderr: %v", err) + } + wPipe.Close() + defer rPipe.Close() + + // Read requests from stdin. + input := bufio.NewReader(os.NewFile(0, "input")) + output := bufio.NewWriter(os.NewFile(uintptr(newOutput), "output")) + for { + szBuf, err := input.Peek(4) + if err != nil { + log.Fatalf("unabel to read header: %v", err) + } + + // Parse the size, and discard bits. + sz, szBytes := protowire.ConsumeVarint(szBuf) + if szBytes < 0 { + szBytes = 0 + } + if _, err := input.Discard(szBytes); err != nil { + log.Fatalf("error discarding size: %v", err) + } + + // Read a full message. + msg := make([]byte, int(sz)) + if _, err := io.ReadFull(input, msg); err != nil { + log.Fatalf("error reading worker request: %v", err) + } + var wreq wpb.WorkRequest + if err := proto.Unmarshal(msg, &wreq); err != nil { + log.Fatalf("error unmarshaling worker request: %v", err) + } + + // Flush relevant caches. + inputFiles = make(map[string]string) + for _, input := range wreq.GetInputs() { + updateInputFile(input.GetPath(), string(input.GetDigest())) + } + + // Prepare logging. + outputBuffer := bytes.NewBuffer(nil) + outputBuffer.WriteString(listenHeader) + log.SetOutput(outputBuffer) + + // Parse all arguments. + flag.CommandLine.Parse(wreq.GetArguments()) + var exitCode int + exitChan := make(chan int) + go func() { exitChan <- run(flag.CommandLine.Args()) }() + for running := true; running; { + select { + case exitCode = <-exitChan: + running = false + default: + } + // N.B. rPipe is given a read deadline of 1ms. We expect + // this to turn a copy error after 1ms, and we just keep + // flushing this buffer while the task is running. + rPipe.SetReadDeadline(time.Now().Add(time.Millisecond)) + outputBuffer.ReadFrom(rPipe) + } + + if *workerDebug { + // Attach all cache stats. + outputBuffer.WriteString(allCacheStats()) + } + + // Send the response. + var wresp wpb.WorkResponse + wresp.ExitCode = int32(exitCode) + wresp.Output = string(outputBuffer.Bytes()) + rmsg, err := proto.Marshal(&wresp) + if err != nil { + log.Fatalf("error marshaling response: %v", err) + } + if _, err := output.Write(append(protowire.AppendVarint(nil, uint64(len(rmsg))), rmsg...)); err != nil { + log.Fatalf("error sending worker response: %v", err) + } + if err := output.Flush(); err != nil { + log.Fatalf("error flushing output: %v", err) + } + } +} |