summaryrefslogtreecommitdiffhomepage
path: root/tools/checkescape/checkescape.go
blob: f8def48234e4baa14aba00df095f731cb5289ece (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package checkescape allows recursive escape analysis for hot paths.
//
// The analysis tracks multiple types of escapes, in two categories. First,
// 'hard' escapes are explicit allocations. Second, 'soft' escapes are
// interface dispatches or dynamic function dispatches; these don't necessarily
// escape but they *may* escape. The analysis is capable of making assertions
// recursively: soft escapes cannot be analyzed in this way, and therefore
// count as escapes for recursive purposes.
//
// The different types of escapes are as follows, with the category in
// parentheses:
//
// 	heap:      A direct allocation is made on the heap (hard).
// 	builtin:   A call is made to a built-in allocation function (hard).
// 	stack:     A stack split as part of a function preamble (soft).
// 	interface: A call is made via an interface whicy *may* escape (soft).
// 	dynamic:   A dynamic function is dispatched which *may* escape (soft).
//
// To the use the package, annotate a function-level comment with either the
// line "// +checkescape" or "// +checkescape:OPTION[,OPTION]". In the second
// case, the OPTION field is either a type above, or one of:
//
//	local: Escape analysis is limited to local hard escapes only.
//	all: All the escapes are included.
//	hard: All hard escapes are included.
//
// If the "// +checkescape" annotation is provided, this is equivalent to
// provided the local and hard options.
//
// Some examples of this syntax are:
//
// +checkescape:all               - Analyzes for all escapes in this function and all calls.
// +checkescape:local             - Analyzes only for default local hard escapes.
// +checkescape:heap              - Only analyzes for heap escapes.
// +checkescape:interface,dynamic - Only checks for dynamic calls and interface calls.
// +checkescape                   - Does the same as +checkescape:local,hard.
//
// Note that all of the above can be inverted by using +mustescape. The
// +checkescape keyword will ensure failure if the class of escape occurs,
// whereas +mustescape will fail if the given class of escape does not occur.
//
// Local exemptions can be made by a comment of the form "// escapes: reason."
// This must appear on the line of the escape and will also apply to callers of
// the function as well (for non-local escape analysis).
package checkescape

import (
	"bufio"
	"bytes"
	"fmt"
	"go/ast"
	"go/token"
	"go/types"
	"io"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"golang.org/x/tools/go/analysis"
	"golang.org/x/tools/go/analysis/passes/buildssa"
	"golang.org/x/tools/go/ssa"
	"gvisor.dev/gvisor/tools/nogo/data"
)

const (
	// magic is the magic annotation.
	magic = "// +checkescape"

	// magicParams is the magic annotation with specific parameters.
	magicParams = magic + ":"

	// testMagic is the test magic annotation (parameters required).
	testMagic = "// +mustescape:"

	// exempt is the exemption annotation.
	exempt = "// escapes"
)

// escapingBuiltins are builtins known to escape.
//
// These are lowered at an earlier stage of compilation to explicit function
// calls, but are not available for recursive analysis.
var escapingBuiltins = []string{
	"append",
	"makemap",
	"newobject",
	"mallocgc",
}

// Analyzer defines the entrypoint.
var Analyzer = &analysis.Analyzer{
	Name:      "checkescape",
	Doc:       "surfaces recursive escape analysis results",
	Run:       run,
	Requires:  []*analysis.Analyzer{buildssa.Analyzer},
	FactTypes: []analysis.Fact{(*packageEscapeFacts)(nil)},
}

// packageEscapeFacts is the set of all functions in a package, and whether or
// not they recursively pass escape analysis.
//
// All the type names for receivers are encoded in the full key. The key
// represents the fully qualified package and type name used at link time.
type packageEscapeFacts struct {
	Funcs map[string][]Escape
}

// AFact implements analysis.Fact.AFact.
func (*packageEscapeFacts) AFact() {}

// CallSite is a single call site.
//
// These can be chained.
type CallSite struct {
	LocalPos token.Pos
	Resolved LinePosition
}

// Escape is a single escape instance.
type Escape struct {
	Reason EscapeReason
	Detail string
	Chain  []CallSite
}

// LinePosition is a low-resolution token.Position.
//
// This is used to match against possible exemptions placed in the source.
type LinePosition struct {
	Filename string
	Line     int
}

// String implements fmt.Stringer.String.
func (e *LinePosition) String() string {
	return fmt.Sprintf("%s:%d", e.Filename, e.Line)
}

// String implements fmt.Stringer.String.
//
// Note that this string will contain new lines.
func (e *Escape) String() string {
	var b bytes.Buffer
	fmt.Fprintf(&b, "%s", e.Reason.String())
	for i, cs := range e.Chain {
		if i == len(e.Chain)-1 {
			fmt.Fprintf(&b, "\n @ %s → %s", cs.Resolved.String(), e.Detail)
		} else {
			fmt.Fprintf(&b, "\n + %s", cs.Resolved.String())
		}
	}
	return b.String()
}

// EscapeReason is an escape reason.
//
// This is a simple enum.
type EscapeReason int

const (
	interfaceInvoke EscapeReason = iota
	unknownPackage
	allocation
	builtin
	dynamicCall
	stackSplit
	reasonCount // Count for below.
)

// String returns the string for the EscapeReason.
//
// Note that this also implicitly defines the reverse string -> EscapeReason
// mapping, which is the word before the colon (computed below).
func (e EscapeReason) String() string {
	switch e {
	case interfaceInvoke:
		return "interface: function invocation via interface"
	case unknownPackage:
		return "unknown: no package information available"
	case allocation:
		return "heap: call to runtime heap allocation"
	case builtin:
		return "builtin: call to runtime builtin"
	case dynamicCall:
		return "dynamic: call via dynamic function"
	case stackSplit:
		return "stack: stack split on function entry"
	default:
		panic(fmt.Sprintf("unknown reason: %d", e))
	}
}

var hardReasons = []EscapeReason{
	allocation,
	builtin,
}

var softReasons = []EscapeReason{
	interfaceInvoke,
	unknownPackage,
	dynamicCall,
	stackSplit,
}

var allReasons = append(hardReasons, softReasons...)

var escapeTypes = func() map[string]EscapeReason {
	result := make(map[string]EscapeReason)
	for _, r := range allReasons {
		parts := strings.Split(r.String(), ":")
		result[parts[0]] = r // Key before ':'.
	}
	return result
}()

// EscapeCount counts escapes.
//
// It is used to avoid accumulating too many escapes for the same reason, for
// the same function. We limit each class to 3 instances (arbitrarily).
type EscapeCount struct {
	byReason [reasonCount]uint32
}

// maxRecordsPerReason is the number of explicit records.
//
// See EscapeCount (and usage), and Record implementation.
const maxRecordsPerReason = 5

// Record records the reason or returns false if it should not be added.
func (ec *EscapeCount) Record(reason EscapeReason) bool {
	ec.byReason[reason]++
	if ec.byReason[reason] > maxRecordsPerReason {
		return false
	}
	return true
}

// loadObjdump reads the objdump output.
//
// This records if there is a call any function for every source line. It is
// used only to remove false positives for escape analysis. The call will be
// elided if escape analysis is able to put the object on the heap exclusively.
func loadObjdump() (map[LinePosition]string, error) {
	f, err := os.Open(data.Objdump)
	if err != nil {
		return nil, err
	}
	defer f.Close()

	// Build the map.
	m := make(map[LinePosition]string)
	r := bufio.NewReader(f)
	var (
		lastField string
		lastPos   LinePosition
	)
	for {
		line, err := r.ReadString('\n')
		if err != nil && err != io.EOF {
			return nil, err
		}

		// We recognize lines corresponding to actual code (not the
		// symbol name or other metadata) and annotate them if they
		// correspond to an explicit CALL instruction. We assume that
		// the lack of a CALL for a given line is evidence that escape
		// analysis has eliminated an allocation.
		//
		// Lines look like this (including the first space):
		//  gohacks_unsafe.go:33  0xa39                   488b442408              MOVQ 0x8(SP), AX
		if len(line) > 0 && line[0] == ' ' {
			fields := strings.Fields(line)
			if !strings.Contains(fields[3], "CALL") {
				continue
			}

			// Ignore strings containing duffzero, which is just
			// used by stack allocations for types that are large
			// enough to warrant Duff's device.
			if strings.Contains(line, "runtime.duffzero") {
				continue
			}

			// Ignore the racefuncenter call, which is used for
			// race builds. This does not escape.
			if strings.Contains(line, "runtime.racefuncenter") {
				continue
			}

			// Calculate the filename and line. Note that per the
			// example above, the filename is not a fully qualified
			// base, just the basename (what we require).
			if fields[0] != lastField {
				parts := strings.SplitN(fields[0], ":", 2)
				lineNum, err := strconv.ParseInt(parts[1], 10, 64)
				if err != nil {
					return nil, err
				}
				lastPos = LinePosition{
					Filename: parts[0],
					Line:     int(lineNum),
				}
				lastField = fields[0]
			}
			if _, ok := m[lastPos]; ok {
				continue // Already marked.
			}

			// Save the actual call for the detail.
			m[lastPos] = strings.Join(fields[3:], " ")
		}
		if err == io.EOF {
			break
		}
	}

	return m, nil
}

// poser is a type that implements Pos.
type poser interface {
	Pos() token.Pos
}

// run performs the analysis.
func run(pass *analysis.Pass) (interface{}, error) {
	calls, err := loadObjdump()
	if err != nil {
		return nil, err
	}
	pef := packageEscapeFacts{
		Funcs: make(map[string][]Escape),
	}
	linePosition := func(inst, parent poser) LinePosition {
		p := pass.Fset.Position(inst.Pos())
		if (p.Filename == "" || p.Line == 0) && parent != nil {
			p = pass.Fset.Position(parent.Pos())
		}
		return LinePosition{
			Filename: filepath.Base(p.Filename),
			Line:     p.Line,
		}
	}
	hasCall := func(inst poser) (string, bool) {
		p := linePosition(inst, nil)
		s, ok := calls[p]
		return s, ok
	}
	callSite := func(inst ssa.Instruction) CallSite {
		return CallSite{
			LocalPos: inst.Pos(),
			Resolved: linePosition(inst, inst.Parent()),
		}
	}
	escapes := func(reason EscapeReason, detail string, inst ssa.Instruction, ec *EscapeCount) []Escape {
		if !ec.Record(reason) {
			return nil // Skip.
		}
		es := Escape{
			Reason: reason,
			Detail: detail,
			Chain:  []CallSite{callSite(inst)},
		}
		return []Escape{es}
	}
	resolve := func(sub []Escape, inst ssa.Instruction, ec *EscapeCount) (es []Escape) {
		for _, e := range sub {
			if !ec.Record(e.Reason) {
				continue // Skip.
			}
			es = append(es, Escape{
				Reason: e.Reason,
				Detail: e.Detail,
				Chain:  append([]CallSite{callSite(inst)}, e.Chain...),
			})
		}
		return es
	}
	state := pass.ResultOf[buildssa.Analyzer].(*buildssa.SSA)

	var loadFunc func(*ssa.Function) []Escape // Used below.

	analyzeInstruction := func(inst ssa.Instruction, ec *EscapeCount) []Escape {
		switch x := inst.(type) {
		case *ssa.Call:
			if x.Call.IsInvoke() {
				// This is an interface dispatch. There is no
				// way to know if this is actually escaping or
				// not, since we don't know the underlying
				// type.
				call, _ := hasCall(inst)
				return escapes(interfaceInvoke, call, inst, ec)
			}
			switch x := x.Call.Value.(type) {
			case *ssa.Function:
				if x.Pkg == nil {
					// Can't resolve the package.
					return escapes(unknownPackage, "no package", inst, ec)
				}

				// Atomic functions are instrinics. We can
				// assume that they don't escape.
				if x.Pkg.Pkg.Name() == "atomic" {
					return nil
				}

				// Is this a local function? If yes, call the
				// function to load the local function. The
				// local escapes are the escapes found in the
				// local function.
				if x.Pkg.Pkg == pass.Pkg {
					return resolve(loadFunc(x), inst, ec)
				}

				// Recursively collect information from
				// the other analyzers.
				var imp packageEscapeFacts
				if !pass.ImportPackageFact(x.Pkg.Pkg, &imp) {
					// Unable to import the dependency; we must
					// declare these as escaping.
					return escapes(unknownPackage, "no analysis", inst, ec)
				}

				// The escapes of this instruction are the
				// escapes of the called function directly.
				return resolve(imp.Funcs[x.RelString(x.Pkg.Pkg)], inst, ec)
			case *ssa.Builtin:
				// Ignore elided escapes.
				if _, has := hasCall(inst); !has {
					return nil
				}

				// Check if the builtin is escaping.
				for _, name := range escapingBuiltins {
					if x.Name() == name {
						return escapes(builtin, name, inst, ec)
					}
				}
			default:
				// All dynamic calls are counted as soft
				// escapes. They are similar to interface
				// dispatches. We cannot actually look up what
				// this refers to using static analysis alone.
				call, _ := hasCall(inst)
				return escapes(dynamicCall, call, inst, ec)
			}
		case *ssa.Alloc:
			// Ignore non-heap allocations.
			if !x.Heap {
				return nil
			}

			// Ignore elided escapes.
			call, has := hasCall(inst)
			if !has {
				return nil
			}

			// This is a real heap allocation.
			return escapes(allocation, call, inst, ec)
		case *ssa.MakeMap:
			return escapes(builtin, "makemap", inst, ec)
		case *ssa.MakeSlice:
			return escapes(builtin, "makeslice", inst, ec)
		case *ssa.MakeClosure:
			return escapes(builtin, "makeclosure", inst, ec)
		case *ssa.MakeChan:
			return escapes(builtin, "makechan", inst, ec)
		}
		return nil // No escapes.
	}

	var analyzeBasicBlock func(*ssa.BasicBlock, *EscapeCount) []Escape // Recursive.
	analyzeBasicBlock = func(block *ssa.BasicBlock, ec *EscapeCount) (rval []Escape) {
		for _, inst := range block.Instrs {
			rval = append(rval, analyzeInstruction(inst, ec)...)
		}
		return rval // N.B. may be empty.
	}

	loadFunc = func(fn *ssa.Function) []Escape {
		// Is this already available?
		name := fn.RelString(pass.Pkg)
		if es, ok := pef.Funcs[name]; ok {
			return es
		}

		// In the case of a true cycle, we assume that the current
		// function itself has no escapes until the rest of the
		// analysis is complete. This will trip the above in the case
		// of a cycle of any kind.
		pef.Funcs[name] = nil

		// Perform the basic analysis.
		var (
			es []Escape
			ec EscapeCount
		)
		if fn.Recover != nil {
			es = append(es, analyzeBasicBlock(fn.Recover, &ec)...)
		}
		for _, block := range fn.Blocks {
			es = append(es, analyzeBasicBlock(block, &ec)...)
		}

		// Check for a stack split.
		if call, has := hasCall(fn); has {
			es = append(es, Escape{
				Reason: stackSplit,
				Detail: call,
				Chain: []CallSite{CallSite{
					LocalPos: fn.Pos(),
					Resolved: linePosition(fn, fn.Parent()),
				}},
			})
		}

		// Save the result and return.
		pef.Funcs[name] = es
		return es
	}

	// Complete all local functions.
	for _, fn := range state.SrcFuncs {
		loadFunc(fn)
	}

	// Build the exception list.
	exemptions := make(map[LinePosition]string)
	for _, f := range pass.Files {
		for _, cg := range f.Comments {
			for _, c := range cg.List {
				p := pass.Fset.Position(c.Slash)
				if strings.HasPrefix(strings.ToLower(c.Text), exempt) {
					exemptions[LinePosition{
						Filename: filepath.Base(p.Filename),
						Line:     p.Line,
					}] = c.Text[len(exempt):]
				}
			}
		}
	}

	// Delete everything matching the excemtions.
	//
	// This has the implication that exceptions are applied recursively,
	// since this now modified set is what will be saved.
	for name, escapes := range pef.Funcs {
		var newEscapes []Escape
		for _, escape := range escapes {
			isExempt := false
			for line, _ := range exemptions {
				// Note that an exemption applies if it is
				// marked as an exemption anywhere in the call
				// chain. It need not be marked as escapes in
				// the function itself, nor in the top-level
				// caller.
				for _, callSite := range escape.Chain {
					if callSite.Resolved == line {
						isExempt = true
						break
					}
				}
				if isExempt {
					break
				}
			}
			if !isExempt {
				// Record this escape; not an exception.
				newEscapes = append(newEscapes, escape)
			}
		}
		pef.Funcs[name] = newEscapes // Update.
	}

	// Export all findings for future packages.
	pass.ExportPackageFact(&pef)

	// Scan all functions for violations.
	for _, f := range pass.Files {
		// Scan all declarations.
		for _, decl := range f.Decls {
			fdecl, ok := decl.(*ast.FuncDecl)
			// Function declaration?
			if !ok {
				continue
			}
			// Is there a comment?
			if fdecl.Doc == nil {
				continue
			}
			var (
				reasons     []EscapeReason
				found       bool
				local       bool
				testReasons = make(map[EscapeReason]bool) // reason -> local?
			)
			// Does the comment contain a +checkescape line?
			for _, c := range fdecl.Doc.List {
				if !strings.HasPrefix(c.Text, magic) && !strings.HasPrefix(c.Text, testMagic) {
					continue
				}
				if c.Text == magic {
					// Default: hard reasons, local only.
					reasons = hardReasons
					local = true
				} else if strings.HasPrefix(c.Text, magicParams) {
					// Extract specific reasons.
					types := strings.Split(c.Text[len(magicParams):], ",")
					found = true // For below.
					for i := 0; i < len(types); i++ {
						if types[i] == "local" {
							// Limit search to local escapes.
							local = true
						} else if types[i] == "all" {
							// Append all reasons.
							reasons = append(reasons, allReasons...)
						} else if types[i] == "hard" {
							// Append all hard reasons.
							reasons = append(reasons, hardReasons...)
						} else {
							r, ok := escapeTypes[types[i]]
							if !ok {
								// This is not a valid escape reason.
								pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
								continue
							}
							reasons = append(reasons, r)
						}
					}
				} else if strings.HasPrefix(c.Text, testMagic) {
					types := strings.Split(c.Text[len(testMagic):], ",")
					local := false
					for i := 0; i < len(types); i++ {
						if types[i] == "local" {
							local = true
						} else {
							r, ok := escapeTypes[types[i]]
							if !ok {
								// This is not a valid escape reason.
								pass.Reportf(fdecl.Pos(), "unknown reason: %v", types[i])
								continue
							}
							if v, ok := testReasons[r]; ok && v {
								// Already registered as local.
								continue
							}
							testReasons[r] = local
						}
					}
				}
			}
			if len(reasons) == 0 && found {
				// A magic annotation was provided, but no reasons.
				pass.Reportf(fdecl.Pos(), "no reasons provided")
				continue
			}

			// Scan for matches.
			fn := pass.TypesInfo.Defs[fdecl.Name].(*types.Func)
			name := state.Pkg.Prog.FuncValue(fn).RelString(pass.Pkg)
			es, ok := pef.Funcs[name]
			if !ok {
				pass.Reportf(fdecl.Pos(), "internal error: function %s not found.", name)
				continue
			}
			for _, e := range es {
				for _, r := range reasons {
					// Is does meet our local requirement?
					if local && len(e.Chain) > 1 {
						continue
					}
					// Does this match the reason? Emit
					// with a full stack trace that
					// explains why this violates our
					// constraints.
					if e.Reason == r {
						pass.Reportf(e.Chain[0].LocalPos, "%s", e.String())
					}
				}
			}

			// Scan for test (required) matches.
			testReasonsFound := make(map[EscapeReason]bool)
			for _, e := range es {
				// Is this local?
				local, ok := testReasons[e.Reason]
				wantLocal := len(e.Chain) == 1
				testReasonsFound[e.Reason] = wantLocal
				if !ok {
					continue
				}
				if local == wantLocal {
					delete(testReasons, e.Reason)
				}
			}
			for reason, local := range testReasons {
				// We didn't find the escapes we wanted.
				pass.Reportf(fdecl.Pos(), fmt.Sprintf("testescapes not found: reason=%s, local=%t", reason, local))
			}
			if len(testReasons) > 0 {
				// Dump all reasons found to help in debugging.
				for _, e := range es {
					pass.Reportf(e.Chain[0].LocalPos, "escape found: %s", e.String())
				}
			}
		}
	}

	return nil, nil
}