summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2020-09-02 11:35:18 -0700
committergVisor bot <gvisor-bot@google.com>2020-09-02 11:37:31 -0700
commit9bd01642377eef9d91f9243091b6da28f37d164a (patch)
tree62954e9e307ec32836f42e8000688df94d62ae0f
parentb9b6660dc4ec0cee77bb30ac7a44c061ada1b3d6 (diff)
Improve sync.SeqCount performance.
- Make sync.SeqCountEpoch not a struct. This allows sync.SeqCount.BeginRead() to be inlined. - Mark sync.SeqAtomicLoad<T> nosplit to mitigate the Go compiler's refusal to inline it. (Best I could get was "cost 92 exceeds budget 80".) - Use runtime-guided spinning in SeqCount.BeginRead(). Benchmarks: name old time/op new time/op delta pkg:pkg/sync/sync goos:linux goarch:amd64 SeqCountWriteUncontended-12 8.24ns ± 0% 11.40ns ± 0% +38.35% (p=0.000 n=10+10) SeqCountReadUncontended-12 0.33ns ± 0% 0.14ns ± 3% -57.77% (p=0.000 n=7+8) pkg:pkg/sync/seqatomictest/seqatomic goos:linux goarch:amd64 SeqAtomicLoadIntUncontended-12 0.64ns ± 1% 0.41ns ± 1% -36.40% (p=0.000 n=10+8) SeqAtomicTryLoadIntUncontended-12 0.18ns ± 4% 0.18ns ± 1% ~ (p=0.206 n=10+8) AtomicValueLoadIntUncontended-12 0.27ns ± 3% 0.27ns ± 0% -1.77% (p=0.000 n=10+8) (atomic.Value.Load is, of course, inlined. We would expect an uncontended inline SeqAtomicLoad<int> to perform identically to SeqAtomicTryLoad<int>.) The "regression" in BenchmarkSeqCountWriteUncontended, despite this CL changing nothing in that path, is attributed to microarchitectural subtlety; the benchmark loop is unchanged except for its address: Before this CL: :0 0x4e62d1 48ffc2 INCQ DX :0 0x4e62d4 48399110010000 CMPQ DX, 0x110(CX) :0 0x4e62db 7e26 JLE 0x4e6303 :0 0x4e62dd 90 NOPL :0 0x4e62de bb01000000 MOVL $0x1, BX :0 0x4e62e3 f00fc118 LOCK XADDL BX, 0(AX) :0 0x4e62e7 ffc3 INCL BX :0 0x4e62e9 0fbae300 BTL $0x0, BX :0 0x4e62ed 733a JAE 0x4e6329 :0 0x4e62ef 90 NOPL :0 0x4e62f0 bb01000000 MOVL $0x1, BX :0 0x4e62f5 f00fc118 LOCK XADDL BX, 0(AX) :0 0x4e62f9 ffc3 INCL BX :0 0x4e62fb 0fbae300 BTL $0x0, BX :0 0x4e62ff 73d0 JAE 0x4e62d1 After this CL: :0 0x4e6361 48ffc2 INCQ DX :0 0x4e6364 48399110010000 CMPQ DX, 0x110(CX) :0 0x4e636b 7e26 JLE 0x4e6393 :0 0x4e636d 90 NOPL :0 0x4e636e bb01000000 MOVL $0x1, BX :0 0x4e6373 f00fc118 LOCK XADDL BX, 0(AX) :0 0x4e6377 ffc3 INCL BX :0 0x4e6379 0fbae300 BTL $0x0, BX :0 0x4e637d 733a JAE 0x4e63b9 :0 0x4e637f 90 NOPL :0 0x4e6380 bb01000000 MOVL $0x1, BX :0 0x4e6385 f00fc118 LOCK XADDL BX, 0(AX) :0 0x4e6389 ffc3 INCL BX :0 0x4e638b 0fbae300 BTL $0x0, BX :0 0x4e638f 73d0 JAE 0x4e6361 PiperOrigin-RevId: 329754148
-rw-r--r--pkg/sync/BUILD1
-rw-r--r--pkg/sync/seqatomic_unsafe.go40
-rw-r--r--pkg/sync/seqcount.go30
-rw-r--r--pkg/sync/spin_unsafe.go24
4 files changed, 62 insertions, 33 deletions
diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD
index 4d47207f7..68535c3b1 100644
--- a/pkg/sync/BUILD
+++ b/pkg/sync/BUILD
@@ -38,6 +38,7 @@ go_library(
"race_unsafe.go",
"rwmutex_unsafe.go",
"seqcount.go",
+ "spin_unsafe.go",
"sync.go",
],
marshal = False,
diff --git a/pkg/sync/seqatomic_unsafe.go b/pkg/sync/seqatomic_unsafe.go
index eda6fb131..2184cb5ab 100644
--- a/pkg/sync/seqatomic_unsafe.go
+++ b/pkg/sync/seqatomic_unsafe.go
@@ -25,41 +25,35 @@ import (
type Value struct{}
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
-// with any writer critical sections in sc.
-func SeqAtomicLoad(sc *sync.SeqCount, ptr *Value) Value {
- // This function doesn't use SeqAtomicTryLoad because doing so is
- // measurably, significantly (~20%) slower; Go is awful at inlining.
- var val Value
+// with any writer critical sections in seq.
+//
+//go:nosplit
+func SeqAtomicLoad(seq *sync.SeqCount, ptr *Value) Value {
for {
- epoch := sc.BeginRead()
- if sync.RaceEnabled {
- // runtime.RaceDisable() doesn't actually stop the race detector,
- // so it can't help us here. Instead, call runtime.memmove
- // directly, which is not instrumented by the race detector.
- sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
- } else {
- // This is ~40% faster for short reads than going through memmove.
- val = *ptr
- }
- if sc.ReadOk(epoch) {
- break
+ if val, ok := SeqAtomicTryLoad(seq, seq.BeginRead(), ptr); ok {
+ return val
}
}
- return val
}
// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section
-// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read
-// would race with a writer critical section, SeqAtomicTryLoad returns
+// in seq initiated by a call to seq.BeginRead() that returned epoch. If the
+// read would race with a writer critical section, SeqAtomicTryLoad returns
// (unspecified, false).
-func SeqAtomicTryLoad(sc *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (Value, bool) {
- var val Value
+//
+//go:nosplit
+func SeqAtomicTryLoad(seq *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (val Value, ok bool) {
if sync.RaceEnabled {
+ // runtime.RaceDisable() doesn't actually stop the race detector, so it
+ // can't help us here. Instead, call runtime.memmove directly, which is
+ // not instrumented by the race detector.
sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val))
} else {
+ // This is ~40% faster for short reads than going through memmove.
val = *ptr
}
- return val, sc.ReadOk(epoch)
+ ok = seq.ReadOk(epoch)
+ return
}
func init() {
diff --git a/pkg/sync/seqcount.go b/pkg/sync/seqcount.go
index a1e895352..2c5d3df99 100644
--- a/pkg/sync/seqcount.go
+++ b/pkg/sync/seqcount.go
@@ -8,7 +8,6 @@ package sync
import (
"fmt"
"reflect"
- "runtime"
"sync/atomic"
)
@@ -43,9 +42,7 @@ type SeqCount struct {
}
// SeqCountEpoch tracks writer critical sections in a SeqCount.
-type SeqCountEpoch struct {
- val uint32
-}
+type SeqCountEpoch uint32
// We assume that:
//
@@ -83,12 +80,25 @@ type SeqCountEpoch struct {
// using this pattern. Most users of SeqCount will need to use the
// SeqAtomicLoad function template in seqatomic.go.
func (s *SeqCount) BeginRead() SeqCountEpoch {
- epoch := atomic.LoadUint32(&s.epoch)
- for epoch&1 != 0 {
- runtime.Gosched()
- epoch = atomic.LoadUint32(&s.epoch)
+ if epoch := atomic.LoadUint32(&s.epoch); epoch&1 == 0 {
+ return SeqCountEpoch(epoch)
+ }
+ return s.beginReadSlow()
+}
+
+func (s *SeqCount) beginReadSlow() SeqCountEpoch {
+ i := 0
+ for {
+ if canSpin(i) {
+ i++
+ doSpin()
+ } else {
+ goyield()
+ }
+ if epoch := atomic.LoadUint32(&s.epoch); epoch&1 == 0 {
+ return SeqCountEpoch(epoch)
+ }
}
- return SeqCountEpoch{epoch}
}
// ReadOk returns true if the reader critical section initiated by a previous
@@ -99,7 +109,7 @@ func (s *SeqCount) BeginRead() SeqCountEpoch {
// Reader critical sections do not need to be explicitly terminated; the last
// call to ReadOk is implicitly the end of the reader critical section.
func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool {
- return atomic.LoadUint32(&s.epoch) == epoch.val
+ return atomic.LoadUint32(&s.epoch) == uint32(epoch)
}
// BeginWrite indicates the beginning of a writer critical section.
diff --git a/pkg/sync/spin_unsafe.go b/pkg/sync/spin_unsafe.go
new file mode 100644
index 000000000..f721449e3
--- /dev/null
+++ b/pkg/sync/spin_unsafe.go
@@ -0,0 +1,24 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+// +build !go1.16
+
+// Check go:linkname function signatures when updating Go version.
+
+package sync
+
+import (
+ _ "unsafe" // for go:linkname
+)
+
+//go:linkname canSpin sync.runtime_canSpin
+func canSpin(i int) bool
+
+//go:linkname doSpin sync.runtime_doSpin
+func doSpin()
+
+//go:linkname goyield runtime.goyield
+func goyield()