1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tcp
import (
"encoding/binary"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip/hash/jenkins"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
// epQueue is a queue of endpoints.
type epQueue struct {
mu sync.Mutex
list endpointList
}
// enqueue adds e to the queue if the endpoint is not already on the queue.
func (q *epQueue) enqueue(e *endpoint) {
q.mu.Lock()
if e.pendingProcessing {
q.mu.Unlock()
return
}
q.list.PushBack(e)
e.pendingProcessing = true
q.mu.Unlock()
}
// dequeue removes and returns the first element from the queue if available,
// returns nil otherwise.
func (q *epQueue) dequeue() *endpoint {
q.mu.Lock()
if e := q.list.Front(); e != nil {
q.list.Remove(e)
e.pendingProcessing = false
q.mu.Unlock()
return e
}
q.mu.Unlock()
return nil
}
// empty returns true if the queue is empty, false otherwise.
func (q *epQueue) empty() bool {
q.mu.Lock()
v := q.list.Empty()
q.mu.Unlock()
return v
}
// processor is responsible for processing packets queued to a tcp endpoint.
type processor struct {
epQ epQueue
sleeper sleep.Sleeper
newEndpointWaker sleep.Waker
closeWaker sleep.Waker
}
func (p *processor) close() {
p.closeWaker.Assert()
}
func (p *processor) queueEndpoint(ep *endpoint) {
// Queue an endpoint for processing by the processor goroutine.
p.epQ.enqueue(ep)
p.newEndpointWaker.Assert()
}
const (
newEndpointWaker = 1
closeWaker = 2
)
func (p *processor) start(wg *sync.WaitGroup) {
defer wg.Done()
defer p.sleeper.Done()
for {
if id, _ := p.sleeper.Fetch(true); id == closeWaker {
break
}
for {
ep := p.epQ.dequeue()
if ep == nil {
break
}
if ep.segmentQueue.empty() {
continue
}
// If socket has transitioned out of connected state then just let the
// worker handle the packet.
//
// NOTE: We read this outside of e.mu lock which means that by the time
// we get to handleSegments the endpoint may not be in ESTABLISHED. But
// this should be fine as all normal shutdown states are handled by
// handleSegments and if the endpoint moves to a CLOSED/ERROR state
// then handleSegments is a noop.
if ep.EndpointState() == StateEstablished && ep.mu.TryLock() {
// If the endpoint is in a connected state then we do direct delivery
// to ensure low latency and avoid scheduler interactions.
switch err := ep.handleSegmentsLocked(true /* fastPath */); {
case err != nil:
// Send any active resets if required.
ep.resetConnectionLocked(err)
fallthrough
case ep.EndpointState() == StateClose:
ep.notifyProtocolGoroutine(notifyTickleWorker)
case !ep.segmentQueue.empty():
p.epQ.enqueue(ep)
}
ep.mu.Unlock()
} else {
ep.newSegmentWaker.Assert()
}
}
}
}
// dispatcher manages a pool of TCP endpoint processors which are responsible
// for the processing of inbound segments. This fixed pool of processor
// goroutines do full tcp processing. The processor is selected based on the
// hash of the endpoint id to ensure that delivery for the same endpoint happens
// in-order.
type dispatcher struct {
processors []processor
seed uint32
wg sync.WaitGroup
}
func (d *dispatcher) init(nProcessors int) {
d.close()
d.wait()
d.processors = make([]processor, nProcessors)
d.seed = generateRandUint32()
for i := range d.processors {
p := &d.processors[i]
p.sleeper.AddWaker(&p.newEndpointWaker, newEndpointWaker)
p.sleeper.AddWaker(&p.closeWaker, closeWaker)
d.wg.Add(1)
// NB: sleeper-waker registration must happen synchronously to avoid races
// with `close`. It's possible to pull all this logic into `start`, but
// that results in a heap-allocated function literal.
go p.start(&d.wg)
}
}
func (d *dispatcher) close() {
for i := range d.processors {
d.processors[i].close()
}
}
func (d *dispatcher) wait() {
d.wg.Wait()
}
func (d *dispatcher) queuePacket(stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
ep := stackEP.(*endpoint)
s := newIncomingSegment(id, pkt)
if !s.parse(pkt.RXTransportChecksumValidated) {
ep.stack.Stats().TCP.InvalidSegmentsReceived.Increment()
ep.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
s.decRef()
return
}
if !s.csumValid {
ep.stack.Stats().TCP.ChecksumErrors.Increment()
ep.stats.ReceiveErrors.ChecksumErrors.Increment()
s.decRef()
return
}
ep.stack.Stats().TCP.ValidSegmentsReceived.Increment()
ep.stats.SegmentsReceived.Increment()
if (s.flags & header.TCPFlagRst) != 0 {
ep.stack.Stats().TCP.ResetsReceived.Increment()
}
if !ep.enqueueSegment(s) {
s.decRef()
return
}
// For sockets not in established state let the worker goroutine
// handle the packets.
if ep.EndpointState() != StateEstablished {
ep.newSegmentWaker.Assert()
return
}
d.selectProcessor(id).queueEndpoint(ep)
}
func generateRandUint32() uint32 {
b := make([]byte, 4)
if _, err := rand.Read(b); err != nil {
panic(err)
}
return binary.LittleEndian.Uint32(b)
}
func (d *dispatcher) selectProcessor(id stack.TransportEndpointID) *processor {
var payload [4]byte
binary.LittleEndian.PutUint16(payload[0:], id.LocalPort)
binary.LittleEndian.PutUint16(payload[2:], id.RemotePort)
h := jenkins.Sum32(d.seed)
h.Write(payload[:])
h.Write([]byte(id.LocalAddress))
h.Write([]byte(id.RemoteAddress))
return &d.processors[h.Sum32()%uint32(len(d.processors))]
}
|