1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tcp
import (
"fmt"
"sync/atomic"
"time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
// queueFlags are used to indicate which queue of an endpoint a particular segment
// belongs to. This is used to track memory accounting correctly.
type queueFlags uint8
const (
recvQ queueFlags = 1 << iota
sendQ
)
// segment represents a TCP segment. It holds the payload and parsed TCP segment
// information, and can be added to intrusive lists.
// segment is mostly immutable, the only field allowed to change is viewToDeliver.
//
// +stateify savable
type segment struct {
segmentEntry
refCnt int32
ep *endpoint
qFlags queueFlags
id stack.TransportEndpointID `state:"manual"`
// TODO(gvisor.dev/issue/4417): Hold a stack.PacketBuffer instead of
// individual members for link/network packet info.
srcAddr tcpip.Address
dstAddr tcpip.Address
netProto tcpip.NetworkProtocolNumber
nicID tcpip.NICID
remoteLinkAddr tcpip.LinkAddress
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
hdr header.TCP
// views is used as buffer for data when its length is large
// enough to store a VectorisedView.
views [8]buffer.View `state:"nosave"`
// viewToDeliver keeps track of the next View that should be
// delivered by the Read endpoint.
viewToDeliver int
sequenceNumber seqnum.Value
ackNumber seqnum.Value
flags uint8
window seqnum.Size
// csum is only populated for received segments.
csum uint16
// csumValid is true if the csum in the received segment is valid.
csumValid bool
// parsedOptions stores the parsed values from the options in the segment.
parsedOptions header.TCPOptions
options []byte `state:".([]byte)"`
hasNewSACKInfo bool
rcvdTime time.Time `state:".(unixTime)"`
// xmitTime is the last transmit time of this segment.
xmitTime time.Time `state:".(unixTime)"`
xmitCount uint32
// acked indicates if the segment has already been SACKed.
acked bool
}
func newIncomingSegment(id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
netHdr := pkt.Network()
s := &segment{
refCnt: 1,
id: id,
srcAddr: netHdr.SourceAddress(),
dstAddr: netHdr.DestinationAddress(),
netProto: pkt.NetworkProtocolNumber,
nicID: pkt.NICID,
remoteLinkAddr: pkt.SourceLinkAddress(),
}
s.data = pkt.Data.Clone(s.views[:])
s.hdr = header.TCP(pkt.TransportHeader().View())
s.rcvdTime = time.Now()
return s
}
func newOutgoingSegment(id stack.TransportEndpointID, v buffer.View) *segment {
s := &segment{
refCnt: 1,
id: id,
}
s.rcvdTime = time.Now()
if len(v) != 0 {
s.views[0] = v
s.data = buffer.NewVectorisedView(len(v), s.views[:1])
}
return s
}
func (s *segment) clone() *segment {
t := &segment{
refCnt: 1,
id: s.id,
sequenceNumber: s.sequenceNumber,
ackNumber: s.ackNumber,
flags: s.flags,
window: s.window,
netProto: s.netProto,
nicID: s.nicID,
remoteLinkAddr: s.remoteLinkAddr,
viewToDeliver: s.viewToDeliver,
rcvdTime: s.rcvdTime,
xmitTime: s.xmitTime,
xmitCount: s.xmitCount,
ep: s.ep,
qFlags: s.qFlags,
}
t.data = s.data.Clone(t.views[:])
return t
}
// flagIsSet checks if at least one flag in flags is set in s.flags.
func (s *segment) flagIsSet(flags uint8) bool {
return s.flags&flags != 0
}
// flagsAreSet checks if all flags in flags are set in s.flags.
func (s *segment) flagsAreSet(flags uint8) bool {
return s.flags&flags == flags
}
// setOwner sets the owning endpoint for this segment. Its required
// to be called to ensure memory accounting for receive/send buffer
// queues is done properly.
func (s *segment) setOwner(ep *endpoint, qFlags queueFlags) {
switch qFlags {
case recvQ:
ep.updateReceiveMemUsed(s.segMemSize())
case sendQ:
// no memory account for sendQ yet.
default:
panic(fmt.Sprintf("unexpected queue flag %b", qFlags))
}
s.ep = ep
s.qFlags = qFlags
}
func (s *segment) decRef() {
if atomic.AddInt32(&s.refCnt, -1) == 0 {
if s.ep != nil {
switch s.qFlags {
case recvQ:
s.ep.updateReceiveMemUsed(-s.segMemSize())
case sendQ:
// no memory accounting for sendQ yet.
default:
panic(fmt.Sprintf("unexpected queue flag %b set for segment", s.qFlags))
}
}
}
}
func (s *segment) incRef() {
atomic.AddInt32(&s.refCnt, 1)
}
// logicalLen is the segment length in the sequence number space. It's defined
// as the data length plus one for each of the SYN and FIN bits set.
func (s *segment) logicalLen() seqnum.Size {
l := seqnum.Size(s.data.Size())
if s.flagIsSet(header.TCPFlagSyn) {
l++
}
if s.flagIsSet(header.TCPFlagFin) {
l++
}
return l
}
// payloadSize is the size of s.data.
func (s *segment) payloadSize() int {
return s.data.Size()
}
// segMemSize is the amount of memory used to hold the segment data and
// the associated metadata.
func (s *segment) segMemSize() int {
return segSize + s.data.Size()
}
// parse populates the sequence & ack numbers, flags, and window fields of the
// segment from the TCP header stored in the data. It then updates the view to
// skip the header.
//
// Returns boolean indicating if the parsing was successful.
//
// If checksum verification may not be skipped, parse also verifies the
// TCP checksum and stores the checksum and result of checksum verification in
// the csum and csumValid fields of the segment.
func (s *segment) parse(skipChecksumValidation bool) bool {
// h is the header followed by the payload. We check that the offset to
// the data respects the following constraints:
// 1. That it's at least the minimum header size; if we don't do this
// then part of the header would be delivered to user.
// 2. That the header fits within the buffer; if we don't do this, we
// would panic when we tried to access data beyond the buffer.
//
// N.B. The segment has already been validated as having at least the
// minimum TCP size before reaching here, so it's safe to read the
// fields.
offset := int(s.hdr.DataOffset())
if offset < header.TCPMinimumSize || offset > len(s.hdr) {
return false
}
s.options = []byte(s.hdr[header.TCPMinimumSize:])
s.parsedOptions = header.ParseTCPOptions(s.options)
verifyChecksum := true
if skipChecksumValidation {
s.csumValid = true
verifyChecksum = false
}
if verifyChecksum {
s.csum = s.hdr.Checksum()
xsum := header.PseudoHeaderChecksum(ProtocolNumber, s.srcAddr, s.dstAddr, uint16(s.data.Size()+len(s.hdr)))
xsum = s.hdr.CalculateChecksum(xsum)
xsum = header.ChecksumVV(s.data, xsum)
s.csumValid = xsum == 0xffff
}
s.sequenceNumber = seqnum.Value(s.hdr.SequenceNumber())
s.ackNumber = seqnum.Value(s.hdr.AckNumber())
s.flags = s.hdr.Flags()
s.window = seqnum.Size(s.hdr.WindowSize())
return true
}
// sackBlock returns a header.SACKBlock that represents this segment.
func (s *segment) sackBlock() header.SACKBlock {
return header.SACKBlock{s.sequenceNumber, s.sequenceNumber.Add(s.logicalLen())}
}
|