summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/socketops.go
blob: 4149b91da578fbd04410db4250bdc81f667f0254 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tcpip

import (
	"math"
	"sync/atomic"

	"gvisor.dev/gvisor/pkg/sync"
)

// PacketOverheadFactor is used to multiply the value provided by the user on a
// SetSockOpt for setting the send/receive buffer sizes sockets.
const PacketOverheadFactor = 2

// SocketOptionsHandler holds methods that help define endpoint specific
// behavior for socket level socket options. These must be implemented by
// endpoints to get notified when socket level options are set.
type SocketOptionsHandler interface {
	// OnReuseAddressSet is invoked when SO_REUSEADDR is set for an endpoint.
	OnReuseAddressSet(v bool)

	// OnReusePortSet is invoked when SO_REUSEPORT is set for an endpoint.
	OnReusePortSet(v bool)

	// OnKeepAliveSet is invoked when SO_KEEPALIVE is set for an endpoint.
	OnKeepAliveSet(v bool)

	// OnDelayOptionSet is invoked when TCP_NODELAY is set for an endpoint.
	// Note that v will be the inverse of TCP_NODELAY option.
	OnDelayOptionSet(v bool)

	// OnCorkOptionSet is invoked when TCP_CORK is set for an endpoint.
	OnCorkOptionSet(v bool)

	// LastError is invoked when SO_ERROR is read for an endpoint.
	LastError() *Error

	// UpdateLastError updates the endpoint specific last error field.
	UpdateLastError(err *Error)

	// HasNIC is invoked to check if the NIC is valid for SO_BINDTODEVICE.
	HasNIC(v int32) bool

	// GetSendBufferSize is invoked to get the SO_SNDBUFSIZE.
	GetSendBufferSize() (int64, *Error)

	// IsUnixSocket is invoked to check if the socket is of unix domain.
	IsUnixSocket() bool
}

// DefaultSocketOptionsHandler is an embeddable type that implements no-op
// implementations for SocketOptionsHandler methods.
type DefaultSocketOptionsHandler struct{}

var _ SocketOptionsHandler = (*DefaultSocketOptionsHandler)(nil)

// OnReuseAddressSet implements SocketOptionsHandler.OnReuseAddressSet.
func (*DefaultSocketOptionsHandler) OnReuseAddressSet(bool) {}

// OnReusePortSet implements SocketOptionsHandler.OnReusePortSet.
func (*DefaultSocketOptionsHandler) OnReusePortSet(bool) {}

// OnKeepAliveSet implements SocketOptionsHandler.OnKeepAliveSet.
func (*DefaultSocketOptionsHandler) OnKeepAliveSet(bool) {}

// OnDelayOptionSet implements SocketOptionsHandler.OnDelayOptionSet.
func (*DefaultSocketOptionsHandler) OnDelayOptionSet(bool) {}

// OnCorkOptionSet implements SocketOptionsHandler.OnCorkOptionSet.
func (*DefaultSocketOptionsHandler) OnCorkOptionSet(bool) {}

// LastError implements SocketOptionsHandler.LastError.
func (*DefaultSocketOptionsHandler) LastError() *Error {
	return nil
}

// UpdateLastError implements SocketOptionsHandler.UpdateLastError.
func (*DefaultSocketOptionsHandler) UpdateLastError(*Error) {}

// HasNIC implements SocketOptionsHandler.HasNIC.
func (*DefaultSocketOptionsHandler) HasNIC(int32) bool {
	return false
}

// GetSendBufferSize implements SocketOptionsHandler.GetSendBufferSize.
func (*DefaultSocketOptionsHandler) GetSendBufferSize() (int64, *Error) {
	return 0, nil
}

// IsUnixSocket implements SocketOptionsHandler.IsUnixSocket.
func (*DefaultSocketOptionsHandler) IsUnixSocket() bool {
	return false
}

// StackHandler holds methods to access the stack options. These must be
// implemented by the stack.
type StackHandler interface {
	// Option allows retrieving stack wide options.
	Option(option interface{}) *Error

	// TransportProtocolOption allows retrieving individual protocol level
	// option values.
	TransportProtocolOption(proto TransportProtocolNumber, option GettableTransportProtocolOption) *Error
}

// SocketOptions contains all the variables which store values for SOL_SOCKET,
// SOL_IP, SOL_IPV6 and SOL_TCP level options.
//
// +stateify savable
type SocketOptions struct {
	handler SocketOptionsHandler

	// StackHandler is initialized at the creation time and will not change.
	stackHandler StackHandler `state:"manual"`

	// These fields are accessed and modified using atomic operations.

	// broadcastEnabled determines whether datagram sockets are allowed to
	// send packets to a broadcast address.
	broadcastEnabled uint32

	// passCredEnabled determines whether SCM_CREDENTIALS socket control
	// messages are enabled.
	passCredEnabled uint32

	// noChecksumEnabled determines whether UDP checksum is disabled while
	// transmitting for this socket.
	noChecksumEnabled uint32

	// reuseAddressEnabled determines whether Bind() should allow reuse of
	// local address.
	reuseAddressEnabled uint32

	// reusePortEnabled determines whether to permit multiple sockets to be
	// bound to an identical socket address.
	reusePortEnabled uint32

	// keepAliveEnabled determines whether TCP keepalive is enabled for this
	// socket.
	keepAliveEnabled uint32

	// multicastLoopEnabled determines whether multicast packets sent over a
	// non-loopback interface will be looped back.
	multicastLoopEnabled uint32

	// receiveTOSEnabled is used to specify if the TOS ancillary message is
	// passed with incoming packets.
	receiveTOSEnabled uint32

	// receiveTClassEnabled is used to specify if the IPV6_TCLASS ancillary
	// message is passed with incoming packets.
	receiveTClassEnabled uint32

	// receivePacketInfoEnabled is used to specify if more inforamtion is
	// provided with incoming packets such as interface index and address.
	receivePacketInfoEnabled uint32

	// hdrIncludeEnabled is used to indicate for a raw endpoint that all packets
	// being written have an IP header and the endpoint should not attach an IP
	// header.
	hdrIncludedEnabled uint32

	// v6OnlyEnabled is used to determine whether an IPv6 socket is to be
	// restricted to sending and receiving IPv6 packets only.
	v6OnlyEnabled uint32

	// quickAckEnabled is used to represent the value of TCP_QUICKACK option.
	// It currently does not have any effect on the TCP endpoint.
	quickAckEnabled uint32

	// delayOptionEnabled is used to specify if data should be sent out immediately
	// by the transport protocol. For TCP, it determines if the Nagle algorithm
	// is on or off.
	delayOptionEnabled uint32

	// corkOptionEnabled is used to specify if data should be held until segments
	// are full by the TCP transport protocol.
	corkOptionEnabled uint32

	// receiveOriginalDstAddress is used to specify if the original destination of
	// the incoming packet should be returned as an ancillary message.
	receiveOriginalDstAddress uint32

	// recvErrEnabled determines whether extended reliable error message passing
	// is enabled.
	recvErrEnabled uint32

	// errQueue is the per-socket error queue. It is protected by errQueueMu.
	errQueueMu sync.Mutex `state:"nosave"`
	errQueue   sockErrorList

	// bindToDevice determines the device to which the socket is bound.
	bindToDevice int32

	// sendBufferSize determines the send buffer size for this socket.
	sendBufferSize int64

	// mu protects the access to the below fields.
	mu sync.Mutex `state:"nosave"`

	// linger determines the amount of time the socket should linger before
	// close. We currently implement this option for TCP socket only.
	linger LingerOption
}

// InitHandler initializes the handler. This must be called before using the
// socket options utility.
func (so *SocketOptions) InitHandler(handler SocketOptionsHandler, stack StackHandler) {
	so.handler = handler
	so.stackHandler = stack
}

func storeAtomicBool(addr *uint32, v bool) {
	var val uint32
	if v {
		val = 1
	}
	atomic.StoreUint32(addr, val)
}

// SetLastError sets the last error for a socket.
func (so *SocketOptions) SetLastError(err *Error) {
	so.handler.UpdateLastError(err)
}

// GetBroadcast gets value for SO_BROADCAST option.
func (so *SocketOptions) GetBroadcast() bool {
	return atomic.LoadUint32(&so.broadcastEnabled) != 0
}

// SetBroadcast sets value for SO_BROADCAST option.
func (so *SocketOptions) SetBroadcast(v bool) {
	storeAtomicBool(&so.broadcastEnabled, v)
}

// GetPassCred gets value for SO_PASSCRED option.
func (so *SocketOptions) GetPassCred() bool {
	return atomic.LoadUint32(&so.passCredEnabled) != 0
}

// SetPassCred sets value for SO_PASSCRED option.
func (so *SocketOptions) SetPassCred(v bool) {
	storeAtomicBool(&so.passCredEnabled, v)
}

// GetNoChecksum gets value for SO_NO_CHECK option.
func (so *SocketOptions) GetNoChecksum() bool {
	return atomic.LoadUint32(&so.noChecksumEnabled) != 0
}

// SetNoChecksum sets value for SO_NO_CHECK option.
func (so *SocketOptions) SetNoChecksum(v bool) {
	storeAtomicBool(&so.noChecksumEnabled, v)
}

// GetReuseAddress gets value for SO_REUSEADDR option.
func (so *SocketOptions) GetReuseAddress() bool {
	return atomic.LoadUint32(&so.reuseAddressEnabled) != 0
}

// SetReuseAddress sets value for SO_REUSEADDR option.
func (so *SocketOptions) SetReuseAddress(v bool) {
	storeAtomicBool(&so.reuseAddressEnabled, v)
	so.handler.OnReuseAddressSet(v)
}

// GetReusePort gets value for SO_REUSEPORT option.
func (so *SocketOptions) GetReusePort() bool {
	return atomic.LoadUint32(&so.reusePortEnabled) != 0
}

// SetReusePort sets value for SO_REUSEPORT option.
func (so *SocketOptions) SetReusePort(v bool) {
	storeAtomicBool(&so.reusePortEnabled, v)
	so.handler.OnReusePortSet(v)
}

// GetKeepAlive gets value for SO_KEEPALIVE option.
func (so *SocketOptions) GetKeepAlive() bool {
	return atomic.LoadUint32(&so.keepAliveEnabled) != 0
}

// SetKeepAlive sets value for SO_KEEPALIVE option.
func (so *SocketOptions) SetKeepAlive(v bool) {
	storeAtomicBool(&so.keepAliveEnabled, v)
	so.handler.OnKeepAliveSet(v)
}

// GetMulticastLoop gets value for IP_MULTICAST_LOOP option.
func (so *SocketOptions) GetMulticastLoop() bool {
	return atomic.LoadUint32(&so.multicastLoopEnabled) != 0
}

// SetMulticastLoop sets value for IP_MULTICAST_LOOP option.
func (so *SocketOptions) SetMulticastLoop(v bool) {
	storeAtomicBool(&so.multicastLoopEnabled, v)
}

// GetReceiveTOS gets value for IP_RECVTOS option.
func (so *SocketOptions) GetReceiveTOS() bool {
	return atomic.LoadUint32(&so.receiveTOSEnabled) != 0
}

// SetReceiveTOS sets value for IP_RECVTOS option.
func (so *SocketOptions) SetReceiveTOS(v bool) {
	storeAtomicBool(&so.receiveTOSEnabled, v)
}

// GetReceiveTClass gets value for IPV6_RECVTCLASS option.
func (so *SocketOptions) GetReceiveTClass() bool {
	return atomic.LoadUint32(&so.receiveTClassEnabled) != 0
}

// SetReceiveTClass sets value for IPV6_RECVTCLASS option.
func (so *SocketOptions) SetReceiveTClass(v bool) {
	storeAtomicBool(&so.receiveTClassEnabled, v)
}

// GetReceivePacketInfo gets value for IP_PKTINFO option.
func (so *SocketOptions) GetReceivePacketInfo() bool {
	return atomic.LoadUint32(&so.receivePacketInfoEnabled) != 0
}

// SetReceivePacketInfo sets value for IP_PKTINFO option.
func (so *SocketOptions) SetReceivePacketInfo(v bool) {
	storeAtomicBool(&so.receivePacketInfoEnabled, v)
}

// GetHeaderIncluded gets value for IP_HDRINCL option.
func (so *SocketOptions) GetHeaderIncluded() bool {
	return atomic.LoadUint32(&so.hdrIncludedEnabled) != 0
}

// SetHeaderIncluded sets value for IP_HDRINCL option.
func (so *SocketOptions) SetHeaderIncluded(v bool) {
	storeAtomicBool(&so.hdrIncludedEnabled, v)
}

// GetV6Only gets value for IPV6_V6ONLY option.
func (so *SocketOptions) GetV6Only() bool {
	return atomic.LoadUint32(&so.v6OnlyEnabled) != 0
}

// SetV6Only sets value for IPV6_V6ONLY option.
//
// Preconditions: the backing TCP or UDP endpoint must be in initial state.
func (so *SocketOptions) SetV6Only(v bool) {
	storeAtomicBool(&so.v6OnlyEnabled, v)
}

// GetQuickAck gets value for TCP_QUICKACK option.
func (so *SocketOptions) GetQuickAck() bool {
	return atomic.LoadUint32(&so.quickAckEnabled) != 0
}

// SetQuickAck sets value for TCP_QUICKACK option.
func (so *SocketOptions) SetQuickAck(v bool) {
	storeAtomicBool(&so.quickAckEnabled, v)
}

// GetDelayOption gets inverted value for TCP_NODELAY option.
func (so *SocketOptions) GetDelayOption() bool {
	return atomic.LoadUint32(&so.delayOptionEnabled) != 0
}

// SetDelayOption sets inverted value for TCP_NODELAY option.
func (so *SocketOptions) SetDelayOption(v bool) {
	storeAtomicBool(&so.delayOptionEnabled, v)
	so.handler.OnDelayOptionSet(v)
}

// GetCorkOption gets value for TCP_CORK option.
func (so *SocketOptions) GetCorkOption() bool {
	return atomic.LoadUint32(&so.corkOptionEnabled) != 0
}

// SetCorkOption sets value for TCP_CORK option.
func (so *SocketOptions) SetCorkOption(v bool) {
	storeAtomicBool(&so.corkOptionEnabled, v)
	so.handler.OnCorkOptionSet(v)
}

// GetReceiveOriginalDstAddress gets value for IP(V6)_RECVORIGDSTADDR option.
func (so *SocketOptions) GetReceiveOriginalDstAddress() bool {
	return atomic.LoadUint32(&so.receiveOriginalDstAddress) != 0
}

// SetReceiveOriginalDstAddress sets value for IP(V6)_RECVORIGDSTADDR option.
func (so *SocketOptions) SetReceiveOriginalDstAddress(v bool) {
	storeAtomicBool(&so.receiveOriginalDstAddress, v)
}

// GetRecvError gets value for IP*_RECVERR option.
func (so *SocketOptions) GetRecvError() bool {
	return atomic.LoadUint32(&so.recvErrEnabled) != 0
}

// SetRecvError sets value for IP*_RECVERR option.
func (so *SocketOptions) SetRecvError(v bool) {
	storeAtomicBool(&so.recvErrEnabled, v)
	if !v {
		so.pruneErrQueue()
	}
}

// GetLastError gets value for SO_ERROR option.
func (so *SocketOptions) GetLastError() *Error {
	return so.handler.LastError()
}

// GetOutOfBandInline gets value for SO_OOBINLINE option.
func (*SocketOptions) GetOutOfBandInline() bool {
	return true
}

// SetOutOfBandInline sets value for SO_OOBINLINE option. We currently do not
// support disabling this option.
func (*SocketOptions) SetOutOfBandInline(bool) {}

// GetLinger gets value for SO_LINGER option.
func (so *SocketOptions) GetLinger() LingerOption {
	so.mu.Lock()
	linger := so.linger
	so.mu.Unlock()
	return linger
}

// SetLinger sets value for SO_LINGER option.
func (so *SocketOptions) SetLinger(linger LingerOption) {
	so.mu.Lock()
	so.linger = linger
	so.mu.Unlock()
}

// SockErrOrigin represents the constants for error origin.
type SockErrOrigin uint8

const (
	// SockExtErrorOriginNone represents an unknown error origin.
	SockExtErrorOriginNone SockErrOrigin = iota

	// SockExtErrorOriginLocal indicates a local error.
	SockExtErrorOriginLocal

	// SockExtErrorOriginICMP indicates an IPv4 ICMP error.
	SockExtErrorOriginICMP

	// SockExtErrorOriginICMP6 indicates an IPv6 ICMP error.
	SockExtErrorOriginICMP6
)

// IsICMPErr indicates if the error originated from an ICMP error.
func (origin SockErrOrigin) IsICMPErr() bool {
	return origin == SockExtErrorOriginICMP || origin == SockExtErrorOriginICMP6
}

// SockError represents a queue entry in the per-socket error queue.
//
// +stateify savable
type SockError struct {
	sockErrorEntry

	// Err is the error caused by the errant packet.
	Err *Error
	// ErrOrigin indicates the error origin.
	ErrOrigin SockErrOrigin
	// ErrType is the type in the ICMP header.
	ErrType uint8
	// ErrCode is the code in the ICMP header.
	ErrCode uint8
	// ErrInfo is additional info about the error.
	ErrInfo uint32

	// Payload is the errant packet's payload.
	Payload []byte
	// Dst is the original destination address of the errant packet.
	Dst FullAddress
	// Offender is the original sender address of the errant packet.
	Offender FullAddress
	// NetProto is the network protocol being used to transmit the packet.
	NetProto NetworkProtocolNumber
}

// pruneErrQueue resets the queue.
func (so *SocketOptions) pruneErrQueue() {
	so.errQueueMu.Lock()
	so.errQueue.Reset()
	so.errQueueMu.Unlock()
}

// DequeueErr dequeues a socket extended error from the error queue and returns
// it. Returns nil if queue is empty.
func (so *SocketOptions) DequeueErr() *SockError {
	so.errQueueMu.Lock()
	defer so.errQueueMu.Unlock()

	err := so.errQueue.Front()
	if err != nil {
		so.errQueue.Remove(err)
	}
	return err
}

// PeekErr returns the error in the front of the error queue. Returns nil if
// the error queue is empty.
func (so *SocketOptions) PeekErr() *SockError {
	so.errQueueMu.Lock()
	defer so.errQueueMu.Unlock()
	return so.errQueue.Front()
}

// QueueErr inserts the error at the back of the error queue.
//
// Preconditions: so.GetRecvError() == true.
func (so *SocketOptions) QueueErr(err *SockError) {
	so.errQueueMu.Lock()
	defer so.errQueueMu.Unlock()
	so.errQueue.PushBack(err)
}

// QueueLocalErr queues a local error onto the local queue.
func (so *SocketOptions) QueueLocalErr(err *Error, net NetworkProtocolNumber, info uint32, dst FullAddress, payload []byte) {
	so.QueueErr(&SockError{
		Err:       err,
		ErrOrigin: SockExtErrorOriginLocal,
		ErrInfo:   info,
		Payload:   payload,
		Dst:       dst,
		NetProto:  net,
	})
}

// GetBindToDevice gets value for SO_BINDTODEVICE option.
func (so *SocketOptions) GetBindToDevice() int32 {
	return atomic.LoadInt32(&so.bindToDevice)
}

// SetBindToDevice sets value for SO_BINDTODEVICE option.
func (so *SocketOptions) SetBindToDevice(bindToDevice int32) *Error {
	if !so.handler.HasNIC(bindToDevice) {
		return ErrUnknownDevice
	}

	atomic.StoreInt32(&so.bindToDevice, bindToDevice)
	return nil
}

// GetSendBufferSize gets value for SO_SNDBUF option.
func (so *SocketOptions) GetSendBufferSize() (int64, *Error) {
	if so.handler.IsUnixSocket() {
		return so.handler.GetSendBufferSize()
	}
	return atomic.LoadInt64(&so.sendBufferSize), nil
}

// SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the
// stack handler should be invoked to set the send buffer size.
func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool, getBufferLimits GetSendBufferLimits) {
	v := sendBufferSize
	ss := getBufferLimits(so.stackHandler)

	if notify {
		// TODO(b/176170271): Notify waiters after size has grown.
		// Make sure the send buffer size is within the min and max
		// allowed.
		min := int64(ss.Min)
		max := int64(ss.Max)
		// Validate the send buffer size with min and max values.
		// Multiply it by factor of 2.
		if v > max {
			v = max
		}

		if v < math.MaxInt32/PacketOverheadFactor {
			v *= PacketOverheadFactor
			if v < min {
				v = min
			}
		} else {
			v = math.MaxInt32
		}
	}
	atomic.StoreInt64(&so.sendBufferSize, v)
}