summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/buffer/view.go2
-rw-r--r--pkg/tcpip/iptables/BUILD18
-rw-r--r--pkg/tcpip/iptables/iptables.go81
-rw-r--r--pkg/tcpip/iptables/targets.go35
-rw-r--r--pkg/tcpip/iptables/types.go183
-rw-r--r--pkg/tcpip/link/rawfile/errors.go2
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go2
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go2
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation.go4
-rw-r--r--pkg/tcpip/tcpip.go81
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go5
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go7
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go55
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go1
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go42
-rw-r--r--pkg/tcpip/transport/udp/endpoint_state.go2
16 files changed, 459 insertions, 63 deletions
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index 1a9d40778..150310c11 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -50,7 +50,7 @@ func (v View) ToVectorisedView() VectorisedView {
return NewVectorisedView(len(v), []View{v})
}
-// VectorisedView is a vectorised version of View using non contigous memory.
+// VectorisedView is a vectorised version of View using non contiguous memory.
// It supports all the convenience methods supported by View.
//
// +stateify savable
diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD
new file mode 100644
index 000000000..fc9abbb55
--- /dev/null
+++ b/pkg/tcpip/iptables/BUILD
@@ -0,0 +1,18 @@
+package(licenses = ["notice"])
+
+load("//tools/go_stateify:defs.bzl", "go_library")
+
+go_library(
+ name = "iptables",
+ srcs = [
+ "iptables.go",
+ "targets.go",
+ "types.go",
+ ],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ ],
+)
diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go
new file mode 100644
index 000000000..f1e1d1fad
--- /dev/null
+++ b/pkg/tcpip/iptables/iptables.go
@@ -0,0 +1,81 @@
+// Copyright 2019 The gVisor authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package iptables supports packet filtering and manipulation via the iptables
+// tool.
+package iptables
+
+const (
+ tablenameNat = "nat"
+ tablenameMangle = "mangle"
+)
+
+// Chain names as defined by net/ipv4/netfilter/ip_tables.c.
+const (
+ chainNamePrerouting = "PREROUTING"
+ chainNameInput = "INPUT"
+ chainNameForward = "FORWARD"
+ chainNameOutput = "OUTPUT"
+ chainNamePostrouting = "POSTROUTING"
+)
+
+// DefaultTables returns a default set of tables. Each chain is set to accept
+// all packets.
+func DefaultTables() *IPTables {
+ return &IPTables{
+ Tables: map[string]Table{
+ tablenameNat: Table{
+ BuiltinChains: map[Hook]Chain{
+ Prerouting: unconditionalAcceptChain(chainNamePrerouting),
+ Input: unconditionalAcceptChain(chainNameInput),
+ Output: unconditionalAcceptChain(chainNameOutput),
+ Postrouting: unconditionalAcceptChain(chainNamePostrouting),
+ },
+ DefaultTargets: map[Hook]Target{
+ Prerouting: UnconditionalAcceptTarget{},
+ Input: UnconditionalAcceptTarget{},
+ Output: UnconditionalAcceptTarget{},
+ Postrouting: UnconditionalAcceptTarget{},
+ },
+ UserChains: map[string]Chain{},
+ },
+ tablenameMangle: Table{
+ BuiltinChains: map[Hook]Chain{
+ Prerouting: unconditionalAcceptChain(chainNamePrerouting),
+ Output: unconditionalAcceptChain(chainNameOutput),
+ },
+ DefaultTargets: map[Hook]Target{
+ Prerouting: UnconditionalAcceptTarget{},
+ Output: UnconditionalAcceptTarget{},
+ },
+ UserChains: map[string]Chain{},
+ },
+ },
+ Priorities: map[Hook][]string{
+ Prerouting: []string{tablenameMangle, tablenameNat},
+ Output: []string{tablenameMangle, tablenameNat},
+ },
+ }
+}
+
+func unconditionalAcceptChain(name string) Chain {
+ return Chain{
+ Name: name,
+ Rules: []Rule{
+ Rule{
+ Target: UnconditionalAcceptTarget{},
+ },
+ },
+ }
+}
diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go
new file mode 100644
index 000000000..19a7f77e3
--- /dev/null
+++ b/pkg/tcpip/iptables/targets.go
@@ -0,0 +1,35 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains various Targets.
+
+package iptables
+
+import "gvisor.dev/gvisor/pkg/tcpip/buffer"
+
+// UnconditionalAcceptTarget accepts all packets.
+type UnconditionalAcceptTarget struct{}
+
+// Action implements Target.Action.
+func (UnconditionalAcceptTarget) Action(packet buffer.VectorisedView) (Verdict, string) {
+ return Accept, ""
+}
+
+// UnconditionalDropTarget denies all packets.
+type UnconditionalDropTarget struct{}
+
+// Action implements Target.Action.
+func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) {
+ return Drop, ""
+}
diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go
new file mode 100644
index 000000000..600bd9a10
--- /dev/null
+++ b/pkg/tcpip/iptables/types.go
@@ -0,0 +1,183 @@
+// Copyright 2019 The gVisor authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iptables
+
+import (
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+)
+
+// A Hook specifies one of the hooks built into the network stack.
+//
+// Userspace app Userspace app
+// ^ |
+// | v
+// [Input] [Output]
+// ^ |
+// | v
+// | routing
+// | |
+// | v
+// ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]----->
+type Hook uint
+
+// These values correspond to values in include/uapi/linux/netfilter.h.
+const (
+ // Prerouting happens before a packet is routed to applications or to
+ // be forwarded.
+ Prerouting Hook = iota
+
+ // Input happens before a packet reaches an application.
+ Input
+
+ // Forward happens once it's decided that a packet should be forwarded
+ // to another host.
+ Forward
+
+ // Output happens after a packet is written by an application to be
+ // sent out.
+ Output
+
+ // Postrouting happens just before a packet goes out on the wire.
+ Postrouting
+
+ // The total number of hooks.
+ NumHooks
+)
+
+// A Verdict is returned by a rule's target to indicate how traversal of rules
+// should (or should not) continue.
+type Verdict int
+
+const (
+ // Accept indicates the packet should continue traversing netstack as
+ // normal.
+ Accept Verdict = iota
+
+ // Drop inicates the packet should be dropped, stopping traversing
+ // netstack.
+ Drop
+
+ // Stolen indicates the packet was co-opted by the target and should
+ // stop traversing netstack.
+ Stolen
+
+ // Queue indicates the packet should be queued for userspace processing.
+ Queue
+
+ // Repeat indicates the packet should re-traverse the chains for the
+ // current hook.
+ Repeat
+
+ // None indicates no verdict was reached.
+ None
+
+ // Jump indicates a jump to another chain.
+ Jump
+
+ // Continue indicates that traversal should continue at the next rule.
+ Continue
+
+ // Return indicates that traversal should return to the calling chain.
+ Return
+)
+
+// IPTables holds all the tables for a netstack.
+type IPTables struct {
+ // Tables maps table names to tables. User tables have arbitrary names.
+ Tables map[string]Table
+
+ // Priorities maps each hook to a list of table names. The order of the
+ // list is the order in which each table should be visited for that
+ // hook.
+ Priorities map[Hook][]string
+}
+
+// A Table defines a set of chains and hooks into the network stack. The
+// currently supported tables are:
+// * nat
+// * mangle
+type Table struct {
+ // BuiltinChains holds the un-deletable chains built into netstack. If
+ // a hook isn't present in the map, this table doesn't utilize that
+ // hook.
+ BuiltinChains map[Hook]Chain
+
+ // DefaultTargets holds a target for each hook that will be executed if
+ // chain traversal doesn't yield a verdict.
+ DefaultTargets map[Hook]Target
+
+ // UserChains holds user-defined chains for the keyed by name. Users
+ // can give their chains arbitrary names.
+ UserChains map[string]Chain
+
+ // Chains maps names to chains for both builtin and user-defined chains.
+ // Its entries point to Chains already either in BuiltinChains or
+ // UserChains, and its purpose is to make looking up tables by name
+ // fast.
+ Chains map[string]*Chain
+}
+
+// ValidHooks returns a bitmap of the builtin hooks for the given table.
+func (table *Table) ValidHooks() (uint32, *tcpip.Error) {
+ hooks := uint32(0)
+ for hook, _ := range table.BuiltinChains {
+ hooks |= 1 << hook
+ }
+ return hooks, nil
+}
+
+// A Chain defines a list of rules for packet processing. When a packet
+// traverses a chain, it is checked against each rule until either a rule
+// returns a verdict or the chain ends.
+//
+// By convention, builtin chains end with a rule that matches everything and
+// returns either Accept or Drop. User-defined chains end with Return. These
+// aren't strictly necessary here, but the iptables tool writes tables this way.
+type Chain struct {
+ // Name is the chain name.
+ Name string
+
+ // Rules is the list of rules to traverse.
+ Rules []Rule
+}
+
+// A Rule is a packet processing rule. It consists of two pieces. First it
+// contains zero or more matchers, each of which is a specification of which
+// packets this rule applies to. If there are no matchers in the rule, it
+// applies to any packet.
+type Rule struct {
+ // Matchers is the list of matchers for this rule.
+ Matchers []Matcher
+
+ // Target is the action to invoke if all the matchers match the packet.
+ Target Target
+}
+
+// A Matcher is the interface for matching packets.
+type Matcher interface {
+ // Match returns whether the packet matches and whether the packet
+ // should be "hotdropped", i.e. dropped immediately. This is usually
+ // used for suspicious packets.
+ Match(hook Hook, packet buffer.VectorisedView, interfaceName string) (matches bool, hotdrop bool)
+}
+
+// A Target is the interface for taking an action for a packet.
+type Target interface {
+ // Action takes an action on the packet and returns a verdict on how
+ // traversal should (or should not) continue. If the return value is
+ // Jump, it also returns the name of the chain to jump to.
+ Action(packet buffer.VectorisedView) (Verdict, string)
+}
diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go
index 80e91bb34..a0a873c84 100644
--- a/pkg/tcpip/link/rawfile/errors.go
+++ b/pkg/tcpip/link/rawfile/errors.go
@@ -30,7 +30,7 @@ var translations [maxErrno]*tcpip.Error
// TranslateErrno translate an errno from the syscall package into a
// *tcpip.Error.
//
-// Valid, but unreconigized errnos will be translated to
+// Valid, but unrecognized errnos will be translated to
// tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos.
func TranslateErrno(e syscall.Errno) *tcpip.Error {
if err := translations[e]; err != nil {
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 08847f95f..e3fbb15c2 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -110,7 +110,7 @@ type PollEvent struct {
// BlockingRead reads from a file descriptor that is set up as non-blocking. If
// no data is available, it will block in a poll() syscall until the file
-// descirptor becomes readable.
+// descriptor becomes readable.
func BlockingRead(fd int, b []byte) (int, *tcpip.Error) {
for {
n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)))
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 75f08dfb0..98036f367 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -636,7 +636,7 @@ func TestSimpleReceive(t *testing.T) {
syscall.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
// Wait for packet to be received, then check it.
- c.waitForPackets(1, time.After(time.Second), "Error waiting for packet")
+ c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
c.mu.Lock()
rcvd := []byte(c.packets[0].vv.First())
c.packets = c.packets[:0]
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index 6822059d6..1628a82be 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -60,7 +60,7 @@ type Fragmentation struct {
// lowMemoryLimit specifies the limit on which we will reach by dropping
// fragments after reaching highMemoryLimit.
//
-// reassemblingTimeout specifes the maximum time allowed to reassemble a packet.
+// reassemblingTimeout specifies the maximum time allowed to reassemble a packet.
// Fragments are lazily evicted only when a new a packet with an
// already existing fragmentation-id arrives after the timeout.
func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation {
@@ -80,7 +80,7 @@ func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout t
}
}
-// Process processes an incoming fragment beloning to an ID
+// Process processes an incoming fragment belonging to an ID
// and returns a complete packet when all the packets belonging to that ID have been received.
func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool) {
f.mu.Lock()
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 4aafb51ab..c61f96fb0 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -66,43 +66,44 @@ func (e *Error) IgnoreStats() bool {
// Errors that can be returned by the network stack.
var (
- ErrUnknownProtocol = &Error{msg: "unknown protocol"}
- ErrUnknownNICID = &Error{msg: "unknown nic id"}
- ErrUnknownDevice = &Error{msg: "unknown device"}
- ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"}
- ErrDuplicateNICID = &Error{msg: "duplicate nic id"}
- ErrDuplicateAddress = &Error{msg: "duplicate address"}
- ErrNoRoute = &Error{msg: "no route"}
- ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"}
- ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true}
- ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"}
- ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true}
- ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true}
- ErrNoPortAvailable = &Error{msg: "no ports are available"}
- ErrPortInUse = &Error{msg: "port is in use"}
- ErrBadLocalAddress = &Error{msg: "bad local address"}
- ErrClosedForSend = &Error{msg: "endpoint is closed for send"}
- ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"}
- ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true}
- ErrConnectionRefused = &Error{msg: "connection was refused"}
- ErrTimeout = &Error{msg: "operation timed out"}
- ErrAborted = &Error{msg: "operation aborted"}
- ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true}
- ErrDestinationRequired = &Error{msg: "destination address is required"}
- ErrNotSupported = &Error{msg: "operation not supported"}
- ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"}
- ErrNotConnected = &Error{msg: "endpoint not connected"}
- ErrConnectionReset = &Error{msg: "connection reset by peer"}
- ErrConnectionAborted = &Error{msg: "connection aborted"}
- ErrNoSuchFile = &Error{msg: "no such file"}
- ErrInvalidOptionValue = &Error{msg: "invalid option value specified"}
- ErrNoLinkAddress = &Error{msg: "no remote link address"}
- ErrBadAddress = &Error{msg: "bad address"}
- ErrNetworkUnreachable = &Error{msg: "network is unreachable"}
- ErrMessageTooLong = &Error{msg: "message too long"}
- ErrNoBufferSpace = &Error{msg: "no buffer space available"}
- ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"}
- ErrNotPermitted = &Error{msg: "operation not permitted"}
+ ErrUnknownProtocol = &Error{msg: "unknown protocol"}
+ ErrUnknownNICID = &Error{msg: "unknown nic id"}
+ ErrUnknownDevice = &Error{msg: "unknown device"}
+ ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"}
+ ErrDuplicateNICID = &Error{msg: "duplicate nic id"}
+ ErrDuplicateAddress = &Error{msg: "duplicate address"}
+ ErrNoRoute = &Error{msg: "no route"}
+ ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"}
+ ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true}
+ ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"}
+ ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true}
+ ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true}
+ ErrNoPortAvailable = &Error{msg: "no ports are available"}
+ ErrPortInUse = &Error{msg: "port is in use"}
+ ErrBadLocalAddress = &Error{msg: "bad local address"}
+ ErrClosedForSend = &Error{msg: "endpoint is closed for send"}
+ ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"}
+ ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true}
+ ErrConnectionRefused = &Error{msg: "connection was refused"}
+ ErrTimeout = &Error{msg: "operation timed out"}
+ ErrAborted = &Error{msg: "operation aborted"}
+ ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true}
+ ErrDestinationRequired = &Error{msg: "destination address is required"}
+ ErrNotSupported = &Error{msg: "operation not supported"}
+ ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"}
+ ErrNotConnected = &Error{msg: "endpoint not connected"}
+ ErrConnectionReset = &Error{msg: "connection reset by peer"}
+ ErrConnectionAborted = &Error{msg: "connection aborted"}
+ ErrNoSuchFile = &Error{msg: "no such file"}
+ ErrInvalidOptionValue = &Error{msg: "invalid option value specified"}
+ ErrNoLinkAddress = &Error{msg: "no remote link address"}
+ ErrBadAddress = &Error{msg: "bad address"}
+ ErrNetworkUnreachable = &Error{msg: "network is unreachable"}
+ ErrMessageTooLong = &Error{msg: "message too long"}
+ ErrNoBufferSpace = &Error{msg: "no buffer space available"}
+ ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"}
+ ErrNotPermitted = &Error{msg: "operation not permitted"}
+ ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"}
)
// Errors related to Subnet
@@ -339,6 +340,10 @@ type Endpoint interface {
// get the actual result. The first call to Connect after the socket has
// connected returns nil. Calling connect again results in ErrAlreadyConnected.
// Anything else -- the attempt to connect failed.
+ //
+ // If address.Addr is empty, this means that Enpoint has to be
+ // disconnected if this is supported, otherwise
+ // ErrAddressFamilyNotSupported must be returned.
Connect(address FullAddress) *Error
// Shutdown closes the read and/or write end of the endpoint connection
@@ -534,7 +539,7 @@ type BroadcastOption int
// Route is a row in the routing table. It specifies through which NIC (and
// gateway) sets of packets should be routed. A row is considered viable if the
-// masked target address matches the destination adddress in the row.
+// masked target address matches the destination address in the row.
type Route struct {
// Destination is the address that must be matched against the masked
// target address to check if this row is viable.
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 33cefd937..ab9e80747 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -422,6 +422,11 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
+ if addr.Addr == "" {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
nicid := addr.NIC
localPort := uint16(0)
switch e.state {
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 03f495e48..42aded77f 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -16,7 +16,7 @@
// sockets allow applications to:
//
// * manually write and inspect transport layer headers and payloads
-// * receive all traffic of a given transport protcol (e.g. ICMP or UDP)
+// * receive all traffic of a given transport protocol (e.g. ICMP or UDP)
// * optionally write and inspect network layer and link layer headers for
// packets
//
@@ -298,6 +298,11 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
ep.mu.Lock()
defer ep.mu.Unlock()
+ if addr.Addr == "" {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
if ep.closed {
return tcpip.ErrInvalidEndpointState
}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 1aa1f12b4..cb40fea94 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -207,6 +207,12 @@ type endpoint struct {
rcvBufSize int
rcvBufUsed int
rcvAutoParams rcvBufAutoTuneParams
+ // zeroWindow indicates that the window was closed due to receive buffer
+ // space being filled up. This is set by the worker goroutine before
+ // moving a segment to the rcvList. This setting is cleared by the
+ // endpoint when a Read() call reads enough data for the new window to
+ // be non-zero.
+ zeroWindow bool
// The following fields are protected by the mutex.
mu sync.RWMutex `state:"nosave"`
@@ -719,10 +725,12 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
s.decRef()
}
- scale := e.rcv.rcvWndScale
- wasZero := e.zeroReceiveWindow(scale)
e.rcvBufUsed -= len(v)
- if wasZero && !e.zeroReceiveWindow(scale) {
+ // If the window was zero before this read and if the read freed up
+ // enough buffer space for the scaled window to be non-zero then notify
+ // the protocol goroutine to send a window update.
+ if e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale) {
+ e.zeroWindow = false
e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
@@ -942,10 +950,10 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
size = math.MaxInt32 / 2
}
- wasZero := e.zeroReceiveWindow(scale)
e.rcvBufSize = size
e.rcvAutoParams.disabled = true
- if wasZero && !e.zeroReceiveWindow(scale) {
+ if e.zeroWindow && !e.zeroReceiveWindow(scale) {
+ e.zeroWindow = false
mask |= notifyNonZeroReceiveWindow
}
e.rcvListMu.Unlock()
@@ -1263,6 +1271,11 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol
// Connect connects the endpoint to its peer.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+ if addr.Addr == "" && addr.Port == 0 {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
return e.connect(addr, true, true)
}
@@ -1747,6 +1760,13 @@ func (e *endpoint) readyToRead(s *segment) {
if s != nil {
s.incRef()
e.rcvBufUsed += s.data.Size()
+ // Check if the receive window is now closed. If so make sure
+ // we set the zero window before we deliver the segment to ensure
+ // that a subsequent read of the segment will correctly trigger
+ // a non-zero notification.
+ if avail := e.receiveBufferAvailableLocked(); avail>>e.rcv.rcvWndScale == 0 {
+ e.zeroWindow = true
+ }
e.rcvList.PushBack(s)
} else {
e.rcvClosed = true
@@ -1756,21 +1776,26 @@ func (e *endpoint) readyToRead(s *segment) {
e.waiterQueue.Notify(waiter.EventIn)
}
-// receiveBufferAvailable calculates how many bytes are still available in the
-// receive buffer.
-func (e *endpoint) receiveBufferAvailable() int {
- e.rcvListMu.Lock()
- size := e.rcvBufSize
- used := e.rcvBufUsed
- e.rcvListMu.Unlock()
-
+// receiveBufferAvailableLocked calculates how many bytes are still available
+// in the receive buffer.
+// rcvListMu must be held when this function is called.
+func (e *endpoint) receiveBufferAvailableLocked() int {
// We may use more bytes than the buffer size when the receive buffer
// shrinks.
- if used >= size {
+ if e.rcvBufUsed >= e.rcvBufSize {
return 0
}
- return size - used
+ return e.rcvBufSize - e.rcvBufUsed
+}
+
+// receiveBufferAvailable calculates how many bytes are still available in the
+// receive buffer.
+func (e *endpoint) receiveBufferAvailable() int {
+ e.rcvListMu.Lock()
+ available := e.receiveBufferAvailableLocked()
+ e.rcvListMu.Unlock()
+ return available
}
func (e *endpoint) receiveBufferSize() int {
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index ec61a3886..b93959034 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -342,6 +342,7 @@ func loadError(s string) *tcpip.Error {
tcpip.ErrNoBufferSpace,
tcpip.ErrBroadcastDisabled,
tcpip.ErrNotPermitted,
+ tcpip.ErrAddressFamilyNotSupported,
}
messageToError = make(map[string]*tcpip.Error)
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 99fdfb795..cb0ea83a6 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -698,8 +698,44 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
return netProto, nil
}
+func (e *endpoint) disconnect() *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ if e.state != stateConnected {
+ return nil
+ }
+ id := stack.TransportEndpointID{}
+ // Exclude ephemerally bound endpoints.
+ if e.bindNICID != 0 || e.id.LocalAddress == "" {
+ var err *tcpip.Error
+ id = stack.TransportEndpointID{
+ LocalPort: e.id.LocalPort,
+ LocalAddress: e.id.LocalAddress,
+ }
+ id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id)
+ if err != nil {
+ return err
+ }
+ e.state = stateBound
+ } else {
+ e.state = stateInitial
+ }
+
+ e.stack.UnregisterTransportEndpoint(e.regNICID, e.effectiveNetProtos, ProtocolNumber, e.id, e)
+ e.id = id
+ e.route.Release()
+ e.route = stack.Route{}
+ e.dstPort = 0
+
+ return nil
+}
+
// Connect connects the endpoint to its peer. Specifying a NIC is optional.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+ if addr.Addr == "" {
+ return e.disconnect()
+ }
if addr.Port == 0 {
// We don't support connecting to port zero.
return tcpip.ErrInvalidEndpointState
@@ -734,12 +770,16 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
defer r.Release()
id := stack.TransportEndpointID{
- LocalAddress: r.LocalAddress,
+ LocalAddress: e.id.LocalAddress,
LocalPort: localPort,
RemotePort: addr.Port,
RemoteAddress: r.RemoteAddress,
}
+ if e.state == stateInitial {
+ id.LocalAddress = r.LocalAddress
+ }
+
// Even if we're connected, this endpoint can still be used to send
// packets on a different network protocol, so we register both even if
// v6only is set to false and this is an ipv6 endpoint.
diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go
index 701bdd72b..18e786397 100644
--- a/pkg/tcpip/transport/udp/endpoint_state.go
+++ b/pkg/tcpip/transport/udp/endpoint_state.go
@@ -92,8 +92,6 @@ func (e *endpoint) afterLoad() {
if err != nil {
panic(*err)
}
-
- e.id.LocalAddress = e.route.LocalAddress
} else if len(e.id.LocalAddress) != 0 { // stateBound
if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 {
panic(tcpip.ErrBadLocalAddress)