diff options
Diffstat (limited to 'pkg/tcpip')
137 files changed, 2152 insertions, 716 deletions
diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 83524cc8a..047f8329a 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -8,7 +8,7 @@ go_library( "tcpip.go", "time_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip", + importpath = "gvisor.dev/gvisor/pkg/tcpip", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD index ee2417238..c40924852 100644 --- a/pkg/tcpip/adapters/gonet/BUILD +++ b/pkg/tcpip/adapters/gonet/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "gonet", srcs = ["gonet.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/adapters/gonet", + importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index 2153222cf..308f620e5 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -23,12 +23,12 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) var ( diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go index 2552004a9..39efe44c7 100644 --- a/pkg/tcpip/adapters/gonet/gonet_test.go +++ b/pkg/tcpip/adapters/gonet/gonet_test.go @@ -25,14 +25,14 @@ import ( "time" "golang.org/x/net/nettest" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index 648d12cdf..3301967fb 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -8,7 +8,7 @@ go_library( "prependable.go", "view.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer", + importpath = "gvisor.dev/gvisor/pkg/tcpip/buffer", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go index 1a9d40778..150310c11 100644 --- a/pkg/tcpip/buffer/view.go +++ b/pkg/tcpip/buffer/view.go @@ -50,7 +50,7 @@ func (v View) ToVectorisedView() VectorisedView { return NewVectorisedView(len(v), []View{v}) } -// VectorisedView is a vectorised version of View using non contigous memory. +// VectorisedView is a vectorised version of View using non contiguous memory. // It supports all the convenience methods supported by View. // // +stateify savable diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD index f597d0b24..4cecfb989 100644 --- a/pkg/tcpip/checker/BUILD +++ b/pkg/tcpip/checker/BUILD @@ -6,7 +6,7 @@ go_library( name = "checker", testonly = 1, srcs = ["checker.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/checker", + importpath = "gvisor.dev/gvisor/pkg/tcpip/checker", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 6e7edf3ab..afcabd51d 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // NetworkChecker is a function to check a property of a network packet. diff --git a/pkg/tcpip/hash/jenkins/BUILD b/pkg/tcpip/hash/jenkins/BUILD index ce2194a4d..29b30be9c 100644 --- a/pkg/tcpip/hash/jenkins/BUILD +++ b/pkg/tcpip/hash/jenkins/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "jenkins", srcs = ["jenkins.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins", + importpath = "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index a5c7290ee..76ef02f13 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -18,7 +18,7 @@ go_library( "tcp.go", "udp.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/header", + importpath = "gvisor.dev/gvisor/pkg/tcpip/header", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/header/arp.go b/pkg/tcpip/header/arp.go index 55fe7292c..718a4720a 100644 --- a/pkg/tcpip/header/arp.go +++ b/pkg/tcpip/header/arp.go @@ -14,7 +14,7 @@ package header -import "gvisor.googlesource.com/gvisor/pkg/tcpip" +import "gvisor.dev/gvisor/pkg/tcpip" const ( // ARPProtocolNumber is the ARP network protocol number. diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index 2eaa7938a..39a4d69be 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -19,8 +19,8 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) func calculateChecksum(buf []byte, initial uint32) uint16 { diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go index 76143f454..4c3d3311f 100644 --- a/pkg/tcpip/header/eth.go +++ b/pkg/tcpip/header/eth.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 782e1053c..c52c0d851 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -17,22 +17,18 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // ICMPv4 represents an ICMPv4 header stored in a byte array. type ICMPv4 []byte const ( - // ICMPv4MinimumSize is the minimum size of a valid ICMP packet. - ICMPv4MinimumSize = 4 - - // ICMPv4EchoMinimumSize is the minimum size of a valid ICMP echo packet. - ICMPv4EchoMinimumSize = 6 + // ICMPv4PayloadOffset defines the start of ICMP payload. + ICMPv4PayloadOffset = 4 - // ICMPv4DstUnreachableMinimumSize is the minimum size of a valid ICMP - // destination unreachable packet. - ICMPv4DstUnreachableMinimumSize = ICMPv4MinimumSize + 4 + // ICMPv4MinimumSize is the minimum size of a valid ICMP packet. + ICMPv4MinimumSize = 8 // ICMPv4ProtocolNumber is the ICMP transport protocol number. ICMPv4ProtocolNumber tcpip.TransportProtocolNumber = 1 @@ -104,5 +100,5 @@ func (ICMPv4) SetDestinationPort(uint16) { // Payload implements Transport.Payload. func (b ICMPv4) Payload() []byte { - return b[ICMPv4MinimumSize:] + return b[ICMPv4PayloadOffset:] } diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go index d0b10d849..3cc57e234 100644 --- a/pkg/tcpip/header/icmpv6.go +++ b/pkg/tcpip/header/icmpv6.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // ICMPv6 represents an ICMPv6 header stored in a byte array. diff --git a/pkg/tcpip/header/interfaces.go b/pkg/tcpip/header/interfaces.go index fb250ea30..861cbbb70 100644 --- a/pkg/tcpip/header/interfaces.go +++ b/pkg/tcpip/header/interfaces.go @@ -15,7 +15,7 @@ package header import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 96e461491..94a3af289 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( @@ -85,6 +85,10 @@ const ( // units, the header cannot exceed 15*4 = 60 bytes. IPv4MaximumHeaderSize = 60 + // MinIPFragmentPayloadSize is the minimum number of payload bytes that + // the first fragment must carry when an IPv4 packet is fragmented. + MinIPFragmentPayloadSize = 8 + // IPv4AddressSize is the size, in bytes, of an IPv4 address. IPv4AddressSize = 4 @@ -268,6 +272,10 @@ func (b IPv4) IsValid(pktSize int) bool { return false } + if IPVersion(b) != IPv4Version { + return false + } + return true } diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index 66820a466..95fe8bfc3 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -18,7 +18,7 @@ import ( "encoding/binary" "strings" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( @@ -184,6 +184,10 @@ func (b IPv6) IsValid(pktSize int) bool { return false } + if IPVersion(b) != IPv6Version { + return false + } + return true } diff --git a/pkg/tcpip/header/ipv6_fragment.go b/pkg/tcpip/header/ipv6_fragment.go index 6d896355a..018555a26 100644 --- a/pkg/tcpip/header/ipv6_fragment.go +++ b/pkg/tcpip/header/ipv6_fragment.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/header/ipversion_test.go b/pkg/tcpip/header/ipversion_test.go index 0c830180e..b5540bf66 100644 --- a/pkg/tcpip/header/ipversion_test.go +++ b/pkg/tcpip/header/ipversion_test.go @@ -17,7 +17,7 @@ package header_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/header" ) func TestIPv4(t *testing.T) { diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 0cd89b992..82cfe785c 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -18,8 +18,8 @@ import ( "encoding/binary" "github.com/google/btree" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // These constants are the offsets of the respective fields in the TCP header. @@ -176,6 +176,21 @@ const ( // TCPProtocolNumber is TCP's transport protocol number. TCPProtocolNumber tcpip.TransportProtocolNumber = 6 + + // TCPMinimumMSS is the minimum acceptable value for MSS. This is the + // same as the value TCP_MIN_MSS defined net/tcp.h. + TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize + + // TCPMaximumMSS is the maximum acceptable value for MSS. + TCPMaximumMSS = 0xffff + + // TCPDefaultMSS is the MSS value that should be used if an MSS option + // is not received from the peer. It's also the value returned by + // TCP_MAXSEG option for a socket in an unconnected state. + // + // Per RFC 1122, page 85: "If an MSS option is not received at + // connection setup, TCP MUST assume a default send MSS of 536." + TCPDefaultMSS = 536 ) // SourcePort returns the "source port" field of the tcp header. @@ -306,7 +321,7 @@ func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions { synOpts := TCPSynOptions{ // Per RFC 1122, page 85: "If an MSS option is not received at // connection setup, TCP MUST assume a default send MSS of 536." - MSS: 536, + MSS: TCPDefaultMSS, // If no window scale option is specified, WS in options is // returned as -1; this is because the absence of the option // indicates that the we cannot use window scaling on the diff --git a/pkg/tcpip/header/tcp_test.go b/pkg/tcpip/header/tcp_test.go index 9a2b99489..72563837b 100644 --- a/pkg/tcpip/header/tcp_test.go +++ b/pkg/tcpip/header/tcp_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/header" ) func TestEncodeSACKBlocks(t *testing.T) { diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go index 2205fec18..c1f454805 100644 --- a/pkg/tcpip/header/udp.go +++ b/pkg/tcpip/header/udp.go @@ -17,7 +17,7 @@ package header import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD new file mode 100644 index 000000000..fc9abbb55 --- /dev/null +++ b/pkg/tcpip/iptables/BUILD @@ -0,0 +1,18 @@ +package(licenses = ["notice"]) + +load("//tools/go_stateify:defs.bzl", "go_library") + +go_library( + name = "iptables", + srcs = [ + "iptables.go", + "targets.go", + "types.go", + ], + importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", + visibility = ["//visibility:public"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/buffer", + ], +) diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go new file mode 100644 index 000000000..f1e1d1fad --- /dev/null +++ b/pkg/tcpip/iptables/iptables.go @@ -0,0 +1,81 @@ +// Copyright 2019 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package iptables supports packet filtering and manipulation via the iptables +// tool. +package iptables + +const ( + tablenameNat = "nat" + tablenameMangle = "mangle" +) + +// Chain names as defined by net/ipv4/netfilter/ip_tables.c. +const ( + chainNamePrerouting = "PREROUTING" + chainNameInput = "INPUT" + chainNameForward = "FORWARD" + chainNameOutput = "OUTPUT" + chainNamePostrouting = "POSTROUTING" +) + +// DefaultTables returns a default set of tables. Each chain is set to accept +// all packets. +func DefaultTables() *IPTables { + return &IPTables{ + Tables: map[string]Table{ + tablenameNat: Table{ + BuiltinChains: map[Hook]Chain{ + Prerouting: unconditionalAcceptChain(chainNamePrerouting), + Input: unconditionalAcceptChain(chainNameInput), + Output: unconditionalAcceptChain(chainNameOutput), + Postrouting: unconditionalAcceptChain(chainNamePostrouting), + }, + DefaultTargets: map[Hook]Target{ + Prerouting: UnconditionalAcceptTarget{}, + Input: UnconditionalAcceptTarget{}, + Output: UnconditionalAcceptTarget{}, + Postrouting: UnconditionalAcceptTarget{}, + }, + UserChains: map[string]Chain{}, + }, + tablenameMangle: Table{ + BuiltinChains: map[Hook]Chain{ + Prerouting: unconditionalAcceptChain(chainNamePrerouting), + Output: unconditionalAcceptChain(chainNameOutput), + }, + DefaultTargets: map[Hook]Target{ + Prerouting: UnconditionalAcceptTarget{}, + Output: UnconditionalAcceptTarget{}, + }, + UserChains: map[string]Chain{}, + }, + }, + Priorities: map[Hook][]string{ + Prerouting: []string{tablenameMangle, tablenameNat}, + Output: []string{tablenameMangle, tablenameNat}, + }, + } +} + +func unconditionalAcceptChain(name string) Chain { + return Chain{ + Name: name, + Rules: []Rule{ + Rule{ + Target: UnconditionalAcceptTarget{}, + }, + }, + } +} diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go new file mode 100644 index 000000000..19a7f77e3 --- /dev/null +++ b/pkg/tcpip/iptables/targets.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains various Targets. + +package iptables + +import "gvisor.dev/gvisor/pkg/tcpip/buffer" + +// UnconditionalAcceptTarget accepts all packets. +type UnconditionalAcceptTarget struct{} + +// Action implements Target.Action. +func (UnconditionalAcceptTarget) Action(packet buffer.VectorisedView) (Verdict, string) { + return Accept, "" +} + +// UnconditionalDropTarget denies all packets. +type UnconditionalDropTarget struct{} + +// Action implements Target.Action. +func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) { + return Drop, "" +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go new file mode 100644 index 000000000..600bd9a10 --- /dev/null +++ b/pkg/tcpip/iptables/types.go @@ -0,0 +1,183 @@ +// Copyright 2019 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" +) + +// A Hook specifies one of the hooks built into the network stack. +// +// Userspace app Userspace app +// ^ | +// | v +// [Input] [Output] +// ^ | +// | v +// | routing +// | | +// | v +// ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]-----> +type Hook uint + +// These values correspond to values in include/uapi/linux/netfilter.h. +const ( + // Prerouting happens before a packet is routed to applications or to + // be forwarded. + Prerouting Hook = iota + + // Input happens before a packet reaches an application. + Input + + // Forward happens once it's decided that a packet should be forwarded + // to another host. + Forward + + // Output happens after a packet is written by an application to be + // sent out. + Output + + // Postrouting happens just before a packet goes out on the wire. + Postrouting + + // The total number of hooks. + NumHooks +) + +// A Verdict is returned by a rule's target to indicate how traversal of rules +// should (or should not) continue. +type Verdict int + +const ( + // Accept indicates the packet should continue traversing netstack as + // normal. + Accept Verdict = iota + + // Drop inicates the packet should be dropped, stopping traversing + // netstack. + Drop + + // Stolen indicates the packet was co-opted by the target and should + // stop traversing netstack. + Stolen + + // Queue indicates the packet should be queued for userspace processing. + Queue + + // Repeat indicates the packet should re-traverse the chains for the + // current hook. + Repeat + + // None indicates no verdict was reached. + None + + // Jump indicates a jump to another chain. + Jump + + // Continue indicates that traversal should continue at the next rule. + Continue + + // Return indicates that traversal should return to the calling chain. + Return +) + +// IPTables holds all the tables for a netstack. +type IPTables struct { + // Tables maps table names to tables. User tables have arbitrary names. + Tables map[string]Table + + // Priorities maps each hook to a list of table names. The order of the + // list is the order in which each table should be visited for that + // hook. + Priorities map[Hook][]string +} + +// A Table defines a set of chains and hooks into the network stack. The +// currently supported tables are: +// * nat +// * mangle +type Table struct { + // BuiltinChains holds the un-deletable chains built into netstack. If + // a hook isn't present in the map, this table doesn't utilize that + // hook. + BuiltinChains map[Hook]Chain + + // DefaultTargets holds a target for each hook that will be executed if + // chain traversal doesn't yield a verdict. + DefaultTargets map[Hook]Target + + // UserChains holds user-defined chains for the keyed by name. Users + // can give their chains arbitrary names. + UserChains map[string]Chain + + // Chains maps names to chains for both builtin and user-defined chains. + // Its entries point to Chains already either in BuiltinChains or + // UserChains, and its purpose is to make looking up tables by name + // fast. + Chains map[string]*Chain +} + +// ValidHooks returns a bitmap of the builtin hooks for the given table. +func (table *Table) ValidHooks() (uint32, *tcpip.Error) { + hooks := uint32(0) + for hook, _ := range table.BuiltinChains { + hooks |= 1 << hook + } + return hooks, nil +} + +// A Chain defines a list of rules for packet processing. When a packet +// traverses a chain, it is checked against each rule until either a rule +// returns a verdict or the chain ends. +// +// By convention, builtin chains end with a rule that matches everything and +// returns either Accept or Drop. User-defined chains end with Return. These +// aren't strictly necessary here, but the iptables tool writes tables this way. +type Chain struct { + // Name is the chain name. + Name string + + // Rules is the list of rules to traverse. + Rules []Rule +} + +// A Rule is a packet processing rule. It consists of two pieces. First it +// contains zero or more matchers, each of which is a specification of which +// packets this rule applies to. If there are no matchers in the rule, it +// applies to any packet. +type Rule struct { + // Matchers is the list of matchers for this rule. + Matchers []Matcher + + // Target is the action to invoke if all the matchers match the packet. + Target Target +} + +// A Matcher is the interface for matching packets. +type Matcher interface { + // Match returns whether the packet matches and whether the packet + // should be "hotdropped", i.e. dropped immediately. This is usually + // used for suspicious packets. + Match(hook Hook, packet buffer.VectorisedView, interfaceName string) (matches bool, hotdrop bool) +} + +// A Target is the interface for taking an action for a packet. +type Target interface { + // Action takes an action on the packet and returns a verdict on how + // traversal should (or should not) continue. If the return value is + // Jump, it also returns the name of the chain to jump to. + Action(packet buffer.VectorisedView) (Verdict, string) +} diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index ae285e495..97a794986 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "channel", srcs = ["channel.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index ee9dd8700..c40744b8e 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -18,9 +18,9 @@ package channel import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // PacketInfo holds all the information about an outbound packet. diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index cef98c353..d786d8fdf 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -12,7 +12,7 @@ go_library( "mmap_amd64_unsafe.go", "packet_dispatchers.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index b88e2e7bf..77f988b9f 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -44,11 +44,11 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // linkDispatcher reads packets from the link FD and dispatches them to the diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index ba3e09192..e305252d6 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -26,11 +26,11 @@ import ( "time" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 6b7f2a185..fe19c2bc2 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -16,7 +16,7 @@ package fdbased -import "gvisor.googlesource.com/gvisor/pkg/tcpip" +import "gvisor.dev/gvisor/pkg/tcpip" // Stubbed out version for non-linux/non-amd64 platforms. diff --git a/pkg/tcpip/link/fdbased/mmap_amd64.go b/pkg/tcpip/link/fdbased/mmap_amd64.go index 1c2d8c468..8bbb4f9ab 100644 --- a/pkg/tcpip/link/fdbased/mmap_amd64.go +++ b/pkg/tcpip/link/fdbased/mmap_amd64.go @@ -21,10 +21,10 @@ import ( "syscall" "golang.org/x/sys/unix" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" ) const ( diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 1ae0e3359..7ca217e5b 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -19,11 +19,11 @@ package fdbased import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // BufConfig defines the shape of the vectorised view used to read packets from the NIC. diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD index 710a05ede..47a54845c 100644 --- a/pkg/tcpip/link/loopback/BUILD +++ b/pkg/tcpip/link/loopback/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "loopback", srcs = ["loopback.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index 2c1148123..ab6a53988 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -21,9 +21,9 @@ package loopback import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) type endpoint struct { diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD index 84cfae784..ea12ef1ac 100644 --- a/pkg/tcpip/link/muxed/BUILD +++ b/pkg/tcpip/link/muxed/BUILD @@ -1,13 +1,11 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test") -package( - licenses = ["notice"], # Apache 2.0 -) +package(licenses = ["notice"]) go_library( name = "muxed", srcs = ["injectable.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/muxed", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index be07b7c29..a577a3d52 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -16,9 +16,9 @@ package muxed import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // InjectableEndpoint is an injectable multi endpoint. The endpoint has diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go index 5d40dfacc..174b9330f 100644 --- a/pkg/tcpip/link/muxed/injectable_test.go +++ b/pkg/tcpip/link/muxed/injectable_test.go @@ -21,11 +21,11 @@ import ( "syscall" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) func TestInjectableEndpointRawDispatch(t *testing.T) { diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD index f01bb2c07..6e3a7a9d7 100644 --- a/pkg/tcpip/link/rawfile/BUILD +++ b/pkg/tcpip/link/rawfile/BUILD @@ -11,7 +11,7 @@ go_library( "errors.go", "rawfile_unsafe.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 8bde41637..a0a873c84 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -20,7 +20,7 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const maxErrno = 134 @@ -30,7 +30,7 @@ var translations [maxErrno]*tcpip.Error // TranslateErrno translate an errno from the syscall package into a // *tcpip.Error. // -// Valid, but unreconigized errnos will be translated to +// Valid, but unrecognized errnos will be translated to // tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos. func TranslateErrno(e syscall.Errno) *tcpip.Error { if err := translations[e]; err != nil { diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 86db7a487..e3fbb15c2 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -22,7 +22,7 @@ import ( "syscall" "unsafe" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) // GetMTU determines the MTU of a network interface device. @@ -110,7 +110,7 @@ type PollEvent struct { // BlockingRead reads from a file descriptor that is set up as non-blocking. If // no data is available, it will block in a poll() syscall until the file -// descirptor becomes readable. +// descriptor becomes readable. func BlockingRead(fd int, b []byte) (int, *tcpip.Error) { for { n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b))) diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD index dc8f1543e..f2998aa98 100644 --- a/pkg/tcpip/link/sharedmem/BUILD +++ b/pkg/tcpip/link/sharedmem/BUILD @@ -10,7 +10,7 @@ go_library( "sharedmem_unsafe.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem", visibility = [ "//:sandbox", ], diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD index 85deafa38..94725cb11 100644 --- a/pkg/tcpip/link/sharedmem/pipe/BUILD +++ b/pkg/tcpip/link/sharedmem/pipe/BUILD @@ -10,7 +10,7 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe", visibility = ["//:sandbox"], ) diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD index d7dc631eb..160a8f864 100644 --- a/pkg/tcpip/link/sharedmem/queue/BUILD +++ b/pkg/tcpip/link/sharedmem/queue/BUILD @@ -8,7 +8,7 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/sharedmem/queue/queue_test.go b/pkg/tcpip/link/sharedmem/queue/queue_test.go index d3f8f4b8b..9a0aad5d7 100644 --- a/pkg/tcpip/link/sharedmem/queue/queue_test.go +++ b/pkg/tcpip/link/sharedmem/queue/queue_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) func TestBasicTxQueue(t *testing.T) { diff --git a/pkg/tcpip/link/sharedmem/queue/rx.go b/pkg/tcpip/link/sharedmem/queue/rx.go index d9aecf2d9..696e6c9e5 100644 --- a/pkg/tcpip/link/sharedmem/queue/rx.go +++ b/pkg/tcpip/link/sharedmem/queue/rx.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) const ( diff --git a/pkg/tcpip/link/sharedmem/queue/tx.go b/pkg/tcpip/link/sharedmem/queue/tx.go index a24dccd11..beffe807b 100644 --- a/pkg/tcpip/link/sharedmem/queue/tx.go +++ b/pkg/tcpip/link/sharedmem/queue/tx.go @@ -17,8 +17,8 @@ package queue import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" ) const ( diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go index 215cb607f..eec11e4cb 100644 --- a/pkg/tcpip/link/sharedmem/rx.go +++ b/pkg/tcpip/link/sharedmem/rx.go @@ -20,8 +20,8 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" ) // rx holds all state associated with an rx queue. diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index e34b780f8..834ea5c40 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -27,12 +27,12 @@ import ( "sync/atomic" "syscall" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // QueueConfig holds all the file descriptors needed to describe a tx or rx diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 65b9d7085..98036f367 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -27,12 +27,12 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -636,7 +636,7 @@ func TestSimpleReceive(t *testing.T) { syscall.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0}) // Wait for packet to be received, then check it. - c.waitForPackets(1, time.After(time.Second), "Error waiting for packet") + c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet") c.mu.Lock() rcvd := []byte(c.packets[0].vv.First()) c.packets = c.packets[:0] diff --git a/pkg/tcpip/link/sharedmem/tx.go b/pkg/tcpip/link/sharedmem/tx.go index ac3577aa6..6b8d7859d 100644 --- a/pkg/tcpip/link/sharedmem/tx.go +++ b/pkg/tcpip/link/sharedmem/tx.go @@ -18,7 +18,7 @@ import ( "math" "syscall" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue" + "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue" ) const ( diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD index 7d0d1781e..1756114e6 100644 --- a/pkg/tcpip/link/sniffer/BUILD +++ b/pkg/tcpip/link/sniffer/BUILD @@ -8,7 +8,7 @@ go_library( "pcap.go", "sniffer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 98581e50e..fc584c6a4 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -29,11 +29,11 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // LogPackets is a flag used to enable or disable packet logging via the log diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index e54852d3f..92dce8fac 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "tun", srcs = ["tun_unsafe.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD index 89a9eee23..2597d4b3e 100644 --- a/pkg/tcpip/link/waitable/BUILD +++ b/pkg/tcpip/link/waitable/BUILD @@ -7,7 +7,7 @@ go_library( srcs = [ "waitable.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/waitable", + importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index 21690a226..3b6ac2ff7 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -22,10 +22,10 @@ package waitable import ( - "gvisor.googlesource.com/gvisor/pkg/gate" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/gate" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // Endpoint is a waitable link-layer endpoint. diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index 62054fb7f..56e18ecb0 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -17,9 +17,9 @@ package waitable import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) type countedEndpoint struct { diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index 2a355e689..d95d44f56 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "arp", srcs = ["arp.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/arp", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index a3f2bce3e..cb35635fc 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -26,10 +26,10 @@ package arp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -83,6 +83,10 @@ func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, buffer.Prependable, buf return tcpip.ErrNotSupported } +func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error { + return tcpip.ErrNotSupported +} + func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { v := vv.First() h := header.ARP(v) diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 1b971b1a3..66c55821b 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -18,15 +18,15 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" ) const ( diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index bf0a7b99c..118bfc763 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -23,7 +23,7 @@ go_library( "reassembler.go", "reassembler_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/network/fragmentation/frag_heap.go b/pkg/tcpip/network/fragmentation/frag_heap.go index 9ad3e5a8a..0b570d25a 100644 --- a/pkg/tcpip/network/fragmentation/frag_heap.go +++ b/pkg/tcpip/network/fragmentation/frag_heap.go @@ -18,7 +18,7 @@ import ( "container/heap" "fmt" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) type fragment struct { diff --git a/pkg/tcpip/network/fragmentation/frag_heap_test.go b/pkg/tcpip/network/fragmentation/frag_heap_test.go index 3a2486ba8..9ececcb9f 100644 --- a/pkg/tcpip/network/fragmentation/frag_heap_test.go +++ b/pkg/tcpip/network/fragmentation/frag_heap_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) var reassambleTestCases = []struct { diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index e90edb375..1628a82be 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -21,7 +21,7 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time. @@ -60,7 +60,7 @@ type Fragmentation struct { // lowMemoryLimit specifies the limit on which we will reach by dropping // fragments after reaching highMemoryLimit. // -// reassemblingTimeout specifes the maximum time allowed to reassemble a packet. +// reassemblingTimeout specifies the maximum time allowed to reassemble a packet. // Fragments are lazily evicted only when a new a packet with an // already existing fragmentation-id arrives after the timeout. func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation { @@ -80,7 +80,7 @@ func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout t } } -// Process processes an incoming fragment beloning to an ID +// Process processes an incoming fragment belonging to an ID // and returns a complete packet when all the packets belonging to that ID have been received. func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool) { f.mu.Lock() diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go index 99ded68a3..799798544 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation_test.go +++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // vv is a helper to build VectorisedView from different strings. diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go index 04f9ab964..8037f734b 100644 --- a/pkg/tcpip/network/fragmentation/reassembler.go +++ b/pkg/tcpip/network/fragmentation/reassembler.go @@ -21,7 +21,7 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) type hole struct { diff --git a/pkg/tcpip/network/hash/BUILD b/pkg/tcpip/network/hash/BUILD index ea520c6ed..e6db5c0b0 100644 --- a/pkg/tcpip/network/hash/BUILD +++ b/pkg/tcpip/network/hash/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "hash", srcs = ["hash.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/hash", visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/network/hash/hash.go b/pkg/tcpip/network/hash/hash.go index 0c91905dc..6a215938b 100644 --- a/pkg/tcpip/network/hash/hash.go +++ b/pkg/tcpip/network/hash/hash.go @@ -18,8 +18,8 @@ package hash import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/tcpip/header" ) var hashIV = RandN32(1)[0] diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 4b822e2c6..8ff428445 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -17,15 +17,15 @@ package ip_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" ) const ( @@ -282,10 +282,10 @@ func TestIPv4ReceiveControl(t *testing.T) { {"Truncated (10 bytes missing)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 10}, {"Truncated (missing IPv4 header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.IPv4MinimumSize + 8}, {"Truncated (missing 'extra info')", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 4 + header.IPv4MinimumSize + 8}, - {"Truncated (missing ICMP header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.ICMPv4DstUnreachableMinimumSize + header.IPv4MinimumSize + 8}, + {"Truncated (missing ICMP header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.ICMPv4MinimumSize + header.IPv4MinimumSize + 8}, {"Port unreachable", 1, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0}, {"Non-zero fragment offset", 0, 100, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Zero-length packet", 0, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv4MinimumSize + header.ICMPv4DstUnreachableMinimumSize + 8}, + {"Zero-length packet", 0, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + 8}, } r, err := buildIPv4Route(localIpv4Addr, "\x0a\x00\x00\xbb") if err != nil { @@ -301,7 +301,7 @@ func TestIPv4ReceiveControl(t *testing.T) { } defer ep.Close() - const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize + 4 + const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize view := buffer.NewView(dataOffset + 8) // Create the outer IPv4 header. @@ -319,10 +319,10 @@ func TestIPv4ReceiveControl(t *testing.T) { icmp := header.ICMPv4(view[header.IPv4MinimumSize:]) icmp.SetType(header.ICMPv4DstUnreachable) icmp.SetCode(c.code) - copy(view[header.IPv4MinimumSize+header.ICMPv4MinimumSize:], []byte{0xde, 0xad, 0xbe, 0xef}) + copy(view[header.IPv4MinimumSize+header.ICMPv4PayloadOffset:], []byte{0xde, 0xad, 0xbe, 0xef}) // Create the inner IPv4 header. - ip = header.IPv4(view[header.IPv4MinimumSize+header.ICMPv4MinimumSize+4:]) + ip = header.IPv4(view[header.IPv4MinimumSize+header.ICMPv4MinimumSize:]) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, TotalLength: 100, diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 1b4f29e0c..be84fa63d 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -8,7 +8,7 @@ go_library( "icmp.go", "ipv4.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv4", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index 770f56c3d..fbef6947d 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -17,9 +17,9 @@ package ipv4 import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // handleControl handles the case when an ICMP packet contains the headers of @@ -68,10 +68,6 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V switch h.Type() { case header.ICMPv4Echo: received.Echo.Increment() - if len(v) < header.ICMPv4EchoMinimumSize { - received.Invalid.Increment() - return - } // Only send a reply if the checksum is valid. wantChecksum := h.Checksum() @@ -93,9 +89,9 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) vv := vv.Clone(nil) - vv.TrimFront(header.ICMPv4EchoMinimumSize) - hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv4EchoMinimumSize) - pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4EchoMinimumSize)) + vv.TrimFront(header.ICMPv4MinimumSize) + hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv4MinimumSize) + pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) copy(pkt, h) pkt.SetType(header.ICMPv4EchoReply) pkt.SetChecksum(^header.Checksum(pkt, header.ChecksumVV(vv, 0))) @@ -108,25 +104,19 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V case header.ICMPv4EchoReply: received.EchoReply.Increment() - if len(v) < header.ICMPv4EchoMinimumSize { - received.Invalid.Increment() - return - } + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) case header.ICMPv4DstUnreachable: received.DstUnreachable.Increment() - if len(v) < header.ICMPv4DstUnreachableMinimumSize { - received.Invalid.Increment() - return - } - vv.TrimFront(header.ICMPv4DstUnreachableMinimumSize) + + vv.TrimFront(header.ICMPv4MinimumSize) switch h.Code() { case header.ICMPv4PortUnreachable: e.handleControl(stack.ControlPortUnreachable, 0, vv) case header.ICMPv4FragmentationNeeded: - mtu := uint32(binary.BigEndian.Uint16(v[header.ICMPv4DstUnreachableMinimumSize-2:])) + mtu := uint32(binary.BigEndian.Uint16(v[header.ICMPv4PayloadOffset+2:])) e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), vv) } diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 44b1d5b9b..e44a73d96 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -23,12 +23,12 @@ package ipv4 import ( "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation" + "gvisor.dev/gvisor/pkg/tcpip/network/hash" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -232,6 +232,55 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen return nil } +// WriteHeaderIncludedPacket writes a packet already containing a network +// header through the given route. +func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error { + // The packet already has an IP header, but there are a few required + // checks. + ip := header.IPv4(payload.First()) + if !ip.IsValid(payload.Size()) { + return tcpip.ErrInvalidOptionValue + } + + // Always set the total length. + ip.SetTotalLength(uint16(payload.Size())) + + // Set the source address when zero. + if ip.SourceAddress() == tcpip.Address(([]byte{0, 0, 0, 0})) { + ip.SetSourceAddress(r.LocalAddress) + } + + // Set the destination. If the packet already included a destination, + // it will be part of the route. + ip.SetDestinationAddress(r.RemoteAddress) + + // Set the packet ID when zero. + if ip.ID() == 0 { + id := uint32(0) + if payload.Size() > header.IPv4MaximumHeaderSize+8 { + // Packets of 68 bytes or less are required by RFC 791 to not be + // fragmented, so we only assign ids to larger packets. + id = atomic.AddUint32(&ids[hashRoute(r, 0 /* protocol */)%buckets], 1) + } + ip.SetID(uint16(id)) + } + + // Always set the checksum. + ip.SetChecksum(0) + ip.SetChecksum(^ip.CalculateChecksum()) + + if loop&stack.PacketLoop != 0 { + e.HandlePacket(r, payload) + } + if loop&stack.PacketOut == 0 { + return nil + } + + hdr := buffer.NewPrependableFromView(payload.ToView()) + r.Stats().IP.PacketsSent.Increment() + return e.linkEP.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, ProtocolNumber) +} + // HandlePacket is called by the link layer when new ipv4 packets arrive for // this endpoint. func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 10a939287..3207a3d46 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -20,16 +20,16 @@ import ( "math/rand" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) func TestExcludeBroadcast(t *testing.T) { diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD index 247e14e37..fae7f4507 100644 --- a/pkg/tcpip/network/ipv6/BUILD +++ b/pkg/tcpip/network/ipv6/BUILD @@ -8,7 +8,7 @@ go_library( "icmp.go", "ipv6.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6", + importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv6", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 9c011e107..5e6a59e91 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -17,10 +17,10 @@ package ipv6 import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // handleControl handles the case when an ICMP packet contains the headers of diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index d8737a616..d46d68e73 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -20,14 +20,14 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index bcae98e1f..e3e8739fd 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -21,10 +21,10 @@ package ipv6 import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -120,6 +120,13 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen return e.linkEP.WritePacket(r, gso, hdr, payload, ProtocolNumber) } +// WriteHeaderIncludedPacker implements stack.NetworkEndpoint. It is not yet +// supported by IPv6. +func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error { + // TODO(b/119580726): Support IPv6 header-included packets. + return tcpip.ErrNotSupported +} + // HandlePacket is called by the link layer when new ipv6 packets arrive for // this endpoint. func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index 3ee80c62b..989058413 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "ports", srcs = ["ports.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/ports", + importpath = "gvisor.dev/gvisor/pkg/tcpip/ports", visibility = ["//:sandbox"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index a1712b590..315780c0c 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -20,7 +20,7 @@ import ( "math/rand" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index 8466c661b..689401661 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -17,7 +17,7 @@ package ports import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" ) const ( diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index 1fa899e7e..3ac381631 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -50,16 +50,16 @@ import ( "strconv" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) // writer reads from standard input and writes to the endpoint until standard diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index d47085581..da425394a 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -29,16 +29,16 @@ import ( "strings" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) var tap = flag.Bool("tap", false, "use tap istead of tun") diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD index a63665efc..76b5f4ffa 100644 --- a/pkg/tcpip/seqnum/BUILD +++ b/pkg/tcpip/seqnum/BUILD @@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library") go_library( name = "seqnum", srcs = ["seqnum.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum", + importpath = "gvisor.dev/gvisor/pkg/tcpip/seqnum", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 551c3c73e..28d11c797 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -13,7 +13,7 @@ go_library( "stack_global_state.go", "transport_demuxer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/stack", + importpath = "gvisor.dev/gvisor/pkg/tcpip/stack", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go index b952ad20f..77bb0ccb9 100644 --- a/pkg/tcpip/stack/linkaddrcache.go +++ b/pkg/tcpip/stack/linkaddrcache.go @@ -19,8 +19,8 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" ) const linkAddrCacheSize = 512 // max cache entries diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go index 91b2ffea8..924f4d240 100644 --- a/pkg/tcpip/stack/linkaddrcache_test.go +++ b/pkg/tcpip/stack/linkaddrcache_test.go @@ -20,8 +20,8 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" ) type testaddr struct { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 50d35de88..30c0dee42 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -19,10 +19,10 @@ import ( "sync" "sync/atomic" - "gvisor.googlesource.com/gvisor/pkg/ilist" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/ilist" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // NIC represents a "network interface card" to which the networking stack is diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index c70533a35..462265281 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -17,10 +17,10 @@ package stack import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // NetworkEndpointID is the identifier of a network layer protocol endpoint. @@ -174,6 +174,10 @@ type NetworkEndpoint interface { // protocol. WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, ttl uint8, loop PacketLooping) *tcpip.Error + // WriteHeaderIncludedPacket writes a packet that includes a network + // header to the given destination address. + WriteHeaderIncludedPacket(r *Route, payload buffer.VectorisedView, loop PacketLooping) *tcpip.Error + // ID returns the network protocol endpoint ID. ID() *NetworkEndpointID @@ -357,10 +361,19 @@ type TransportProtocolFactory func() TransportProtocol // instantiate network protocols. type NetworkProtocolFactory func() NetworkProtocol +// UnassociatedEndpointFactory produces endpoints for writing packets not +// associated with a particular transport protocol. Such endpoints can be used +// to write arbitrary packets that include the IP header. +type UnassociatedEndpointFactory interface { + NewUnassociatedRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) +} + var ( transportProtocols = make(map[string]TransportProtocolFactory) networkProtocols = make(map[string]NetworkProtocolFactory) + unassociatedFactory UnassociatedEndpointFactory + linkEPMu sync.RWMutex nextLinkEndpointID tcpip.LinkEndpointID = 1 linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint) @@ -380,6 +393,13 @@ func RegisterNetworkProtocolFactory(name string, p NetworkProtocolFactory) { networkProtocols[name] = p } +// RegisterUnassociatedFactory registers a factory to produce endpoints not +// associated with any particular transport protocol. This function is intended +// to be called by init() functions of the protocols. +func RegisterUnassociatedFactory(f UnassociatedEndpointFactory) { + unassociatedFactory = f +} + // RegisterLinkEndpoint register a link-layer protocol endpoint and returns an // ID that can be used to refer to it. func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID { diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index 55ed02479..391ab4344 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -15,10 +15,10 @@ package stack import ( - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // Route represents a route through the networking stack to a given destination. @@ -163,6 +163,18 @@ func (r *Route) WritePacket(gso *GSO, hdr buffer.Prependable, payload buffer.Vec return err } +// WriteHeaderIncludedPacket writes a packet already containing a network +// header through the given route. +func (r *Route) WriteHeaderIncludedPacket(payload buffer.VectorisedView) *tcpip.Error { + if err := r.ref.ep.WriteHeaderIncludedPacket(r, payload, r.loop); err != nil { + r.Stats().IP.OutgoingPacketErrors.Increment() + return err + } + r.ref.nic.stats.Tx.Packets.Increment() + r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(payload.Size())) + return nil +} + // DefaultTTL returns the default TTL of the underlying network endpoint. func (r *Route) DefaultTTL() uint8 { return r.ref.ep.DefaultTTL() diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 9d8e8cda5..3e8fb2a6c 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -28,13 +28,13 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/ports" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/ports" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -225,6 +225,45 @@ type TCPSACKInfo struct { MaxSACKED seqnum.Value } +// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning. +type RcvBufAutoTuneParams struct { + // MeasureTime is the time at which the current measurement + // was started. + MeasureTime time.Time + + // CopiedBytes is the number of bytes copied to user space since + // this measure began. + CopiedBytes int + + // PrevCopiedBytes is the number of bytes copied to user space in + // the previous RTT period. + PrevCopiedBytes int + + // RcvBufSize is the auto tuned receive buffer size. + RcvBufSize int + + // RTT is the smoothed RTT as measured by observing the time between + // when a byte is first acknowledged and the receipt of data that is at + // least one window beyond the sequence number that was acknowledged. + RTT time.Duration + + // RTTVar is the "round-trip time variation" as defined in section 2 + // of RFC6298. + RTTVar time.Duration + + // RTTMeasureSeqNumber is the highest acceptable sequence number at the + // time this RTT measurement period began. + RTTMeasureSeqNumber seqnum.Value + + // RTTMeasureTime is the absolute time at which the current RTT + // measurement period began. + RTTMeasureTime time.Time + + // Disabled is true if an explicit receive buffer is set for the + // endpoint. + Disabled bool +} + // TCPEndpointState is a copy of the internal state of a TCP endpoint. type TCPEndpointState struct { // ID is a copy of the TransportEndpointID for the endpoint. @@ -240,6 +279,10 @@ type TCPEndpointState struct { // buffer for the endpoint. RcvBufUsed int + // RcvBufAutoTuneParams is used to hold state variables to compute + // the auto tuned receive buffer size. + RcvAutoParams RcvBufAutoTuneParams + // RcvClosed if true, indicates the endpoint has been closed for reading. RcvClosed bool @@ -297,6 +340,8 @@ type Stack struct { networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver + unassociatedFactory UnassociatedEndpointFactory + demux *transportDemuxer stats tcpip.Stats @@ -399,6 +444,8 @@ func New(network []string, transport []string, opts Options) *Stack { } } + s.unassociatedFactory = unassociatedFactory + // Create the global transport demuxer. s.demux = newTransportDemuxer(s) @@ -531,11 +578,15 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp // NewRawEndpoint creates a new raw transport layer endpoint of the given // protocol. Raw endpoints receive all traffic for a given protocol regardless // of address. -func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { if !s.raw { return nil, tcpip.ErrNotPermitted } + if !associated { + return s.unassociatedFactory.NewUnassociatedRawEndpoint(s, network, transport, waiterQueue) + } + t, ok := s.transportProtocols[transport] if !ok { return nil, tcpip.ErrUnknownProtocol diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 351f63221..959071dbe 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -24,11 +24,11 @@ import ( "strings" "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) const ( @@ -137,6 +137,10 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr bu return f.linkEP.WritePacket(r, gso, hdr, payload, fakeNetNumber) } +func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error { + return tcpip.ErrNotSupported +} + func (*fakeNetworkEndpoint) Close() {} type fakeNetGoodOption bool diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 605bfadeb..cf8a6d129 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -19,10 +19,10 @@ import ( "math/rand" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" + "gvisor.dev/gvisor/pkg/tcpip/header" ) type protocolIDs struct { diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index e8a9392b5..788ffcc8c 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -17,12 +17,12 @@ package stack_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -192,6 +192,9 @@ func (f *fakeTransportEndpoint) State() uint32 { return 0 } +func (f *fakeTransportEndpoint) ModerateRecvBuf(copied int) { +} + type fakeTransportGoodOption bool type fakeTransportBadOption bool diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 85ef014d0..c4076666a 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -38,8 +38,8 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" ) // Error represents an error in the netstack error space. Using a special type @@ -66,43 +66,44 @@ func (e *Error) IgnoreStats() bool { // Errors that can be returned by the network stack. var ( - ErrUnknownProtocol = &Error{msg: "unknown protocol"} - ErrUnknownNICID = &Error{msg: "unknown nic id"} - ErrUnknownDevice = &Error{msg: "unknown device"} - ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} - ErrDuplicateNICID = &Error{msg: "duplicate nic id"} - ErrDuplicateAddress = &Error{msg: "duplicate address"} - ErrNoRoute = &Error{msg: "no route"} - ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} - ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} - ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} - ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} - ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} - ErrNoPortAvailable = &Error{msg: "no ports are available"} - ErrPortInUse = &Error{msg: "port is in use"} - ErrBadLocalAddress = &Error{msg: "bad local address"} - ErrClosedForSend = &Error{msg: "endpoint is closed for send"} - ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} - ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} - ErrConnectionRefused = &Error{msg: "connection was refused"} - ErrTimeout = &Error{msg: "operation timed out"} - ErrAborted = &Error{msg: "operation aborted"} - ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} - ErrDestinationRequired = &Error{msg: "destination address is required"} - ErrNotSupported = &Error{msg: "operation not supported"} - ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} - ErrNotConnected = &Error{msg: "endpoint not connected"} - ErrConnectionReset = &Error{msg: "connection reset by peer"} - ErrConnectionAborted = &Error{msg: "connection aborted"} - ErrNoSuchFile = &Error{msg: "no such file"} - ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} - ErrNoLinkAddress = &Error{msg: "no remote link address"} - ErrBadAddress = &Error{msg: "bad address"} - ErrNetworkUnreachable = &Error{msg: "network is unreachable"} - ErrMessageTooLong = &Error{msg: "message too long"} - ErrNoBufferSpace = &Error{msg: "no buffer space available"} - ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} - ErrNotPermitted = &Error{msg: "operation not permitted"} + ErrUnknownProtocol = &Error{msg: "unknown protocol"} + ErrUnknownNICID = &Error{msg: "unknown nic id"} + ErrUnknownDevice = &Error{msg: "unknown device"} + ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} + ErrDuplicateNICID = &Error{msg: "duplicate nic id"} + ErrDuplicateAddress = &Error{msg: "duplicate address"} + ErrNoRoute = &Error{msg: "no route"} + ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} + ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} + ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} + ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} + ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} + ErrNoPortAvailable = &Error{msg: "no ports are available"} + ErrPortInUse = &Error{msg: "port is in use"} + ErrBadLocalAddress = &Error{msg: "bad local address"} + ErrClosedForSend = &Error{msg: "endpoint is closed for send"} + ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} + ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} + ErrConnectionRefused = &Error{msg: "connection was refused"} + ErrTimeout = &Error{msg: "operation timed out"} + ErrAborted = &Error{msg: "operation aborted"} + ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} + ErrDestinationRequired = &Error{msg: "destination address is required"} + ErrNotSupported = &Error{msg: "operation not supported"} + ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} + ErrNotConnected = &Error{msg: "endpoint not connected"} + ErrConnectionReset = &Error{msg: "connection reset by peer"} + ErrConnectionAborted = &Error{msg: "connection aborted"} + ErrNoSuchFile = &Error{msg: "no such file"} + ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} + ErrNoLinkAddress = &Error{msg: "no remote link address"} + ErrBadAddress = &Error{msg: "bad address"} + ErrNetworkUnreachable = &Error{msg: "network is unreachable"} + ErrMessageTooLong = &Error{msg: "message too long"} + ErrNoBufferSpace = &Error{msg: "no buffer space available"} + ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} + ErrNotPermitted = &Error{msg: "operation not permitted"} + ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} ) // Errors related to Subnet @@ -339,6 +340,10 @@ type Endpoint interface { // get the actual result. The first call to Connect after the socket has // connected returns nil. Calling connect again results in ErrAlreadyConnected. // Anything else -- the attempt to connect failed. + // + // If address.Addr is empty, this means that Enpoint has to be + // disconnected if this is supported, otherwise + // ErrAddressFamilyNotSupported must be returned. Connect(address FullAddress) *Error // Shutdown closes the read and/or write end of the endpoint connection @@ -381,6 +386,13 @@ type Endpoint interface { // State returns a socket's lifecycle state. The returned value is // protocol-specific and is primarily used for diagnostics. State() uint32 + + // ModerateRecvBuf should be called everytime data is copied to the user + // space. This allows for dynamic tuning of recv buffer space for a + // given socket. + // + // NOTE: This method is a no-op for sockets other than TCP. + ModerateRecvBuf(copied int) } // WriteOptions contains options for Endpoint.Write. @@ -472,6 +484,22 @@ type KeepaliveIntervalOption time.Duration // closed. type KeepaliveCountOption int +// CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get +// the current congestion control algorithm. +type CongestionControlOption string + +// AvailableCongestionControlOption is used to query the supported congestion +// control algorithms. +type AvailableCongestionControlOption string + +// ModerateReceiveBufferOption allows the caller to enable/disable TCP receive +// buffer moderation. +type ModerateReceiveBufferOption bool + +// MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current +// Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. +type MaxSegOption int + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 @@ -515,7 +543,7 @@ type BroadcastOption int // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the -// masked target address matches the destination adddress in the row. +// masked target address matches the destination address in the row. type Route struct { // Destination is the address that must be matched against the masked // target address to check if this row is viable. diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index 84a2b53b7..62182a3e6 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -23,8 +23,8 @@ go_library( "icmp_packet_list.go", "protocol.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/icmp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/sleep", diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index b8005093a..a80ceafd0 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -18,11 +18,11 @@ import ( "encoding/binary" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -127,6 +127,9 @@ func (e *endpoint) Close() { e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (e *endpoint) ModerateRecvBuf(copied int) {} + // Read reads data from the endpoint. This method does not block if // there is no data pending. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { @@ -288,7 +291,7 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-c switch e.netProto { case header.IPv4ProtocolNumber: - err = e.send4(route, v) + err = send4(route, e.id.LocalPort, v) case header.IPv6ProtocolNumber: err = send6(route, e.id.LocalPort, v) @@ -349,20 +352,20 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } } -func (e *endpoint) send4(r *stack.Route, data buffer.View) *tcpip.Error { - if len(data) < header.ICMPv4EchoMinimumSize { +func send4(r *stack.Route, ident uint16, data buffer.View) *tcpip.Error { + if len(data) < header.ICMPv4MinimumSize { return tcpip.ErrInvalidEndpointState } // Set the ident to the user-specified port. Sequence number should // already be set by the user. - binary.BigEndian.PutUint16(data[header.ICMPv4MinimumSize:], e.id.LocalPort) + binary.BigEndian.PutUint16(data[header.ICMPv4PayloadOffset:], ident) - hdr := buffer.NewPrependable(header.ICMPv4EchoMinimumSize + int(r.MaxHeaderLength())) + hdr := buffer.NewPrependable(header.ICMPv4MinimumSize + int(r.MaxHeaderLength())) - icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4EchoMinimumSize)) + icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) copy(icmpv4, data) - data = data[header.ICMPv4EchoMinimumSize:] + data = data[header.ICMPv4MinimumSize:] // Linux performs these basic checks. if icmpv4.Type() != header.ICMPv4Echo || icmpv4.Code() != 0 { @@ -419,6 +422,11 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { e.mu.Lock() defer e.mu.Unlock() + if addr.Addr == "" { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + nicid := addr.NIC localPort := uint16(0) switch e.state { diff --git a/pkg/tcpip/transport/icmp/endpoint_state.go b/pkg/tcpip/transport/icmp/endpoint_state.go index 332b3cd33..99b8c4093 100644 --- a/pkg/tcpip/transport/icmp/endpoint_state.go +++ b/pkg/tcpip/transport/icmp/endpoint_state.go @@ -15,9 +15,9 @@ package icmp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves icmpPacket.data field. diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go index 954fde9d8..7fdba5d56 100644 --- a/pkg/tcpip/transport/icmp/protocol.go +++ b/pkg/tcpip/transport/icmp/protocol.go @@ -26,12 +26,12 @@ import ( "encoding/binary" "fmt" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -90,19 +90,18 @@ func (p *protocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProt func (p *protocol) MinimumPacketSize() int { switch p.number { case ProtocolNumber4: - return header.ICMPv4EchoMinimumSize + return header.ICMPv4MinimumSize case ProtocolNumber6: return header.ICMPv6EchoMinimumSize } panic(fmt.Sprint("unknown protocol number: ", p.number)) } -// ParsePorts returns the source and destination ports stored in the given icmp -// packet. +// ParsePorts in case of ICMP sets src to 0, dst to ICMP ID, and err to nil. func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { switch p.number { case ProtocolNumber4: - return 0, binary.BigEndian.Uint16(v[header.ICMPv4MinimumSize:]), nil + return 0, binary.BigEndian.Uint16(v[header.ICMPv4PayloadOffset:]), nil case ProtocolNumber6: return 0, binary.BigEndian.Uint16(v[header.ICMPv6MinimumSize:]), nil } diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 6d3f0130e..bc4b255b4 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -1,6 +1,4 @@ -package( - licenses = ["notice"], # Apache 2.0 -) +package(licenses = ["notice"]) load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") @@ -23,9 +21,10 @@ go_library( "endpoint.go", "endpoint_state.go", "packet_list.go", + "protocol.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index e4ff50c91..a29587658 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -16,7 +16,7 @@ // sockets allow applications to: // // * manually write and inspect transport layer headers and payloads -// * receive all traffic of a given transport protcol (e.g. ICMP or UDP) +// * receive all traffic of a given transport protocol (e.g. ICMP or UDP) // * optionally write and inspect network layer and link layer headers for // packets // @@ -29,11 +29,11 @@ package raw import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -67,6 +67,7 @@ type endpoint struct { netProto tcpip.NetworkProtocolNumber transProto tcpip.TransportProtocolNumber waiterQueue *waiter.Queue + associated bool // The following fields are used to manage the receive queue and are // protected by rcvMu. @@ -97,8 +98,12 @@ type endpoint struct { } // NewEndpoint returns a raw endpoint for the given protocols. -// TODO(b/129292371): IP_HDRINCL, IPPROTO_RAW, and AF_PACKET. +// TODO(b/129292371): IP_HDRINCL and AF_PACKET. func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { + return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */) +} + +func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { if netProto != header.IPv4ProtocolNumber { return nil, tcpip.ErrUnknownProtocol } @@ -110,6 +115,16 @@ func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, trans waiterQueue: waiterQueue, rcvBufSizeMax: 32 * 1024, sndBufSize: 32 * 1024, + associated: associated, + } + + // Unassociated endpoints are write-only and users call Write() with IP + // headers included. Because they're write-only, We don't need to + // register with the stack. + if !associated { + ep.rcvBufSizeMax = 0 + ep.waiterQueue = nil + return ep, nil } if err := ep.stack.RegisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep); err != nil { @@ -124,7 +139,7 @@ func (ep *endpoint) Close() { ep.mu.Lock() defer ep.mu.Unlock() - if ep.closed { + if ep.closed || !ep.associated { return } @@ -142,13 +157,23 @@ func (ep *endpoint) Close() { if ep.connected { ep.route.Release() + ep.connected = false } + ep.closed = true + ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (ep *endpoint) ModerateRecvBuf(copied int) {} + // Read implements tcpip.Endpoint.Read. func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { + if !ep.associated { + return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue + } + ep.rcvMu.Lock() // If there's no data to read, return that read would block or that the @@ -189,6 +214,33 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp return 0, nil, tcpip.ErrInvalidEndpointState } + payloadBytes, err := payload.Get(payload.Size()) + if err != nil { + ep.mu.RUnlock() + return 0, nil, err + } + + // If this is an unassociated socket and callee provided a nonzero + // destination address, route using that address. + if !ep.associated { + ip := header.IPv4(payloadBytes) + if !ip.IsValid(payload.Size()) { + ep.mu.RUnlock() + return 0, nil, tcpip.ErrInvalidOptionValue + } + dstAddr := ip.DestinationAddress() + // Update dstAddr with the address in the IP header, unless + // opts.To is set (e.g. if sendto specifies a specific + // address). + if dstAddr != tcpip.Address([]byte{0, 0, 0, 0}) && opts.To == nil { + opts.To = &tcpip.FullAddress{ + NIC: 0, // NIC is unset. + Addr: dstAddr, // The address from the payload. + Port: 0, // There are no ports here. + } + } + } + // Did the user caller provide a destination? If not, use the connected // destination. if opts.To == nil { @@ -213,12 +265,12 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp return 0, nil, tcpip.ErrInvalidEndpointState } - n, ch, err := ep.finishWrite(payload, savedRoute) + n, ch, err := ep.finishWrite(payloadBytes, savedRoute) ep.mu.Unlock() return n, ch, err } - n, ch, err := ep.finishWrite(payload, &ep.route) + n, ch, err := ep.finishWrite(payloadBytes, &ep.route) ep.mu.RUnlock() return n, ch, err } @@ -245,7 +297,7 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp return 0, nil, err } - n, ch, err := ep.finishWrite(payload, &route) + n, ch, err := ep.finishWrite(payloadBytes, &route) route.Release() ep.mu.RUnlock() return n, ch, err @@ -253,7 +305,7 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp // finishWrite writes the payload to a route. It resolves the route if // necessary. It's really just a helper to make defer unnecessary in Write. -func (ep *endpoint) finishWrite(payload tcpip.Payload, route *stack.Route) (uintptr, <-chan struct{}, *tcpip.Error) { +func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (uintptr, <-chan struct{}, *tcpip.Error) { // We may need to resolve the route (match a link layer address to the // network address). If that requires blocking (e.g. to use ARP), // return a channel on which the caller can wait. @@ -266,13 +318,14 @@ func (ep *endpoint) finishWrite(payload tcpip.Payload, route *stack.Route) (uint } } - payloadBytes, err := payload.Get(payload.Size()) - if err != nil { - return 0, nil, err - } - switch ep.netProto { case header.IPv4ProtocolNumber: + if !ep.associated { + if err := route.WriteHeaderIncludedPacket(buffer.View(payloadBytes).ToVectorisedView()); err != nil { + return 0, nil, err + } + break + } hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength())) if err := route.WritePacket(nil /* gso */, hdr, buffer.View(payloadBytes).ToVectorisedView(), ep.transProto, route.DefaultTTL()); err != nil { return 0, nil, err @@ -295,6 +348,11 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { ep.mu.Lock() defer ep.mu.Unlock() + if addr.Addr == "" { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + if ep.closed { return tcpip.ErrInvalidEndpointState } @@ -327,15 +385,17 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } defer route.Release() - // Re-register the endpoint with the appropriate NIC. - if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { - return err + if ep.associated { + // Re-register the endpoint with the appropriate NIC. + if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { + return err + } + ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) + ep.registeredNIC = nic } - ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) - // Save the route and NIC we've connected via. + // Save the route we've connected via. ep.route = route.Clone() - ep.registeredNIC = nic ep.connected = true return nil @@ -378,14 +438,16 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { return tcpip.ErrBadLocalAddress } - // Re-register the endpoint with the appropriate NIC. - if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { - return err + if ep.associated { + // Re-register the endpoint with the appropriate NIC. + if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { + return err + } + ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) + ep.registeredNIC = addr.NIC + ep.boundNIC = addr.NIC } - ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) - ep.registeredNIC = addr.NIC - ep.boundNIC = addr.NIC ep.boundAddr = addr.Addr ep.bound = true diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go index e8907ebb1..cb5534d90 100644 --- a/pkg/tcpip/transport/raw/endpoint_state.go +++ b/pkg/tcpip/transport/raw/endpoint_state.go @@ -15,9 +15,9 @@ package raw import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves packet.data field. diff --git a/pkg/tcpip/transport/raw/protocol.go b/pkg/tcpip/transport/raw/protocol.go new file mode 100644 index 000000000..783c21e6b --- /dev/null +++ b/pkg/tcpip/transport/raw/protocol.go @@ -0,0 +1,32 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raw + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" +) + +type factory struct{} + +// NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory. +func (factory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { + return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */) +} + +func init() { + stack.RegisterUnassociatedFactory(factory{}) +} diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 9db38196b..4cd25e8e2 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -21,6 +21,7 @@ go_library( "accept.go", "connect.go", "cubic.go", + "cubic_state.go", "endpoint.go", "endpoint_state.go", "forwarder.go", @@ -38,8 +39,8 @@ go_library( "tcp_segment_list.go", "timer.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index d05259c0a..52fd1bfa3 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -22,13 +22,13 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -213,6 +213,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i n.route = s.route.Clone() n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.route.NetProto} n.rcvBufSize = int(l.rcvWnd) + n.amss = mssForRoute(&n.route) n.maybeEnableTimestamp(rcvdSynOpts) n.maybeEnableSACKPermitted(rcvdSynOpts) @@ -232,7 +233,11 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // The receiver at least temporarily has a zero receive window scale, // but the caller may change it (before starting the protocol loop). n.snd = newSender(n, iss, irs, s.window, rcvdSynOpts.MSS, rcvdSynOpts.WS) - n.rcv = newReceiver(n, irs, l.rcvWnd, 0) + n.rcv = newReceiver(n, irs, seqnum.Size(n.initialReceiveWindow()), 0, seqnum.Size(n.receiveBufferSize())) + // Bootstrap the auto tuning algorithm. Starting at zero will result in + // a large step function on the first window adjustment causing the + // window to grow to a really large value. + n.rcvAutoParams.prevCopied = n.initialReceiveWindow() return n, nil } @@ -249,7 +254,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head } // Perform the 3-way handshake. - h := newHandshake(ep, l.rcvWnd) + h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow())) h.resetToSynRcvd(cookie, irs, opts) if err := h.execute(); err != nil { @@ -359,16 +364,19 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { return } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) - // Send SYN with window scaling because we currently + + // Send SYN without window scaling because we currently // dont't encode this information in the cookie. // // Enable Timestamp option if the original syn did have // the timestamp option specified. + mss := mssForRoute(&s.route) synOpts := header.TCPSynOptions{ WS: -1, TS: opts.TS, TSVal: tcpTimeStamp(timeStampOffset()), TSEcr: opts.TSVal, + MSS: uint16(mss), } sendSynTCP(&s.route, s.id, header.TCPFlagSyn|header.TCPFlagAck, cookie, s.sequenceNumber+1, ctx.rcvWnd, synOpts) e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index dd671f7ce..00d2ae524 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -18,14 +18,14 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // maxSegmentsPerWake is the maximum number of segments to process in the main @@ -78,6 +78,9 @@ type handshake struct { // mss is the maximum segment size received from the peer. mss uint16 + // amss is the maximum segment size advertised by us to the peer. + amss uint16 + // sndWndScale is the send window scale, as defined in RFC 1323. A // negative value means no scaling is supported by the peer. sndWndScale int @@ -87,11 +90,24 @@ type handshake struct { } func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake { + rcvWndScale := ep.rcvWndScaleForHandshake() + + // Round-down the rcvWnd to a multiple of wndScale. This ensures that the + // window offered in SYN won't be reduced due to the loss of precision if + // window scaling is enabled after the handshake. + rcvWnd = (rcvWnd >> uint8(rcvWndScale)) << uint8(rcvWndScale) + + // Ensure we can always accept at least 1 byte if the scale specified + // was too high for the provided rcvWnd. + if rcvWnd == 0 { + rcvWnd = 1 + } + h := handshake{ ep: ep, active: true, rcvWnd: rcvWnd, - rcvWndScale: FindWndScale(rcvWnd), + rcvWndScale: int(rcvWndScale), } h.resetState() return h @@ -224,7 +240,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { h.ep.state = StateSynRecv h.ep.mu.Unlock() synOpts := header.TCPSynOptions{ - WS: h.rcvWndScale, + WS: int(h.effectiveRcvWndScale()), TS: rcvSynOpts.TS, TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, @@ -233,6 +249,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // permits SACK. This is not explicitly defined in the RFC but // this is the behaviour implemented by Linux. SACKPermitted: rcvSynOpts.SACKPermitted, + MSS: h.ep.amss, } sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) @@ -277,6 +294,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, SACKPermitted: h.ep.sackPermitted, + MSS: h.ep.amss, } sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) return nil @@ -419,12 +437,15 @@ func (h *handshake) execute() *tcpip.Error { // Send the initial SYN segment and loop until the handshake is // completed. + h.ep.amss = mssForRoute(&h.ep.route) + synOpts := header.TCPSynOptions{ WS: h.rcvWndScale, TS: true, TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTS, SACKPermitted: bool(sackEnabled), + MSS: h.ep.amss, } // Execute is also called in a listen context so we want to make sure we @@ -433,6 +454,11 @@ func (h *handshake) execute() *tcpip.Error { if h.state == handshakeSynRcvd { synOpts.TS = h.ep.sendTSOk synOpts.SACKPermitted = h.ep.sackPermitted && bool(sackEnabled) + if h.sndWndScale < 0 { + // Disable window scaling if the peer did not send us + // the window scaling option. + synOpts.WS = -1 + } } sendSynTCP(&h.ep.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) for h.state != handshakeCompleted { @@ -554,13 +580,6 @@ func makeSynOptions(opts header.TCPSynOptions) []byte { } func sendSynTCP(r *stack.Route, id stack.TransportEndpointID, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size, opts header.TCPSynOptions) *tcpip.Error { - // The MSS in opts is automatically calculated as this function is - // called from many places and we don't want every call point being - // embedded with the MSS calculation. - if opts.MSS == 0 { - opts.MSS = uint16(r.MTU() - header.TCPMinimumSize) - } - options := makeSynOptions(opts) err := sendTCP(r, id, buffer.VectorisedView{}, r.DefaultTTL(), flags, seq, ack, rcvWnd, options, nil) putOptions(options) @@ -861,7 +880,8 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // This is an active connection, so we must initiate the 3-way // handshake, and then inform potential waiters about its // completion. - h := newHandshake(e, seqnum.Size(e.receiveBufferAvailable())) + initialRcvWnd := e.initialReceiveWindow() + h := newHandshake(e, seqnum.Size(initialRcvWnd)) e.mu.Lock() h.ep.state = StateSynSent e.mu.Unlock() @@ -886,8 +906,14 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // (indicated by a negative send window scale). e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) + rcvBufSize := seqnum.Size(e.receiveBufferSize()) e.rcvListMu.Lock() - e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale()) + e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize) + // boot strap the auto tuning algorithm. Starting at zero will + // result in a large step function on the first proper causing + // the window to just go to a really large value after the first + // RTT itself. + e.rcvAutoParams.prevCopied = initialRcvWnd e.rcvListMu.Unlock() } diff --git a/pkg/tcpip/transport/tcp/cubic.go b/pkg/tcpip/transport/tcp/cubic.go index e618cd2b9..7b1f5e763 100644 --- a/pkg/tcpip/transport/tcp/cubic.go +++ b/pkg/tcpip/transport/tcp/cubic.go @@ -23,6 +23,7 @@ import ( // control algorithm state. // // See: https://tools.ietf.org/html/rfc8312. +// +stateify savable type cubicState struct { // wLastMax is the previous wMax value. wLastMax float64 @@ -33,7 +34,7 @@ type cubicState struct { // t denotes the time when the current congestion avoidance // was entered. - t time.Time + t time.Time `state:".(unixTime)"` // numCongestionEvents tracks the number of congestion events since last // RTO. diff --git a/pkg/tcpip/transport/tcp/cubic_state.go b/pkg/tcpip/transport/tcp/cubic_state.go new file mode 100644 index 000000000..d0f58cfaf --- /dev/null +++ b/pkg/tcpip/transport/tcp/cubic_state.go @@ -0,0 +1,29 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp + +import ( + "time" +) + +// saveT is invoked by stateify. +func (c *cubicState) saveT() unixTime { + return unixTime{c.t.Unix(), c.t.UnixNano()} +} + +// loadT is invoked by stateify. +func (c *cubicState) loadT(unix unixTime) { + c.t = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go index 43bcfa070..d9f79e8c5 100644 --- a/pkg/tcpip/transport/tcp/dual_stack_test.go +++ b/pkg/tcpip/transport/tcp/dual_stack_test.go @@ -18,15 +18,15 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) func TestV4MappedConnectOnV6Only(t *testing.T) { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 23422ca5e..beb90afb5 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -17,19 +17,20 @@ package tcp import ( "fmt" "math" + "strings" "sync" "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tmutex" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tmutex" + "gvisor.dev/gvisor/pkg/waiter" ) // EndpointState represents the state of a TCP endpoint. @@ -116,6 +117,7 @@ const ( notifyDrain notifyReset notifyKeepaliveChanged + notifyMSSChanged ) // SACKInfo holds TCP SACK related information for a given endpoint. @@ -131,6 +133,42 @@ type SACKInfo struct { NumBlocks int } +// rcvBufAutoTuneParams are used to hold state variables to compute +// the auto tuned recv buffer size. +// +// +stateify savable +type rcvBufAutoTuneParams struct { + // measureTime is the time at which the current measurement + // was started. + measureTime time.Time `state:".(unixTime)"` + + // copied is the number of bytes copied out of the receive + // buffers since this measure began. + copied int + + // prevCopied is the number of bytes copied out of the receive + // buffers in the previous RTT period. + prevCopied int + + // rtt is the non-smoothed minimum RTT as measured by observing the time + // between when a byte is first acknowledged and the receipt of data + // that is at least one window beyond the sequence number that was + // acknowledged. + rtt time.Duration + + // rttMeasureSeqNumber is the highest acceptable sequence number at the + // time this RTT measurement period began. + rttMeasureSeqNumber seqnum.Value + + // rttMeasureTime is the absolute time at which the current rtt + // measurement period began. + rttMeasureTime time.Time `state:".(unixTime)"` + + // disabled is true if an explicit receive buffer is set for the + // endpoint. + disabled bool +} + // endpoint represents a TCP endpoint. This struct serves as the interface // between users of the endpoint and the protocol implementation; it is legal to // have concurrent goroutines make calls into the endpoint, they are properly @@ -164,18 +202,23 @@ type endpoint struct { // to indicate to users that no more data is coming. // // rcvListMu can be taken after the endpoint mu below. - rcvListMu sync.Mutex `state:"nosave"` - rcvList segmentList `state:"wait"` - rcvClosed bool - rcvBufSize int - rcvBufUsed int + rcvListMu sync.Mutex `state:"nosave"` + rcvList segmentList `state:"wait"` + rcvClosed bool + rcvBufSize int + rcvBufUsed int + rcvAutoParams rcvBufAutoTuneParams + // zeroWindow indicates that the window was closed due to receive buffer + // space being filled up. This is set by the worker goroutine before + // moving a segment to the rcvList. This setting is cleared by the + // endpoint when a Read() call reads enough data for the new window to + // be non-zero. + zeroWindow bool // The following fields are protected by the mutex. mu sync.RWMutex `state:"nosave"` id stack.TransportEndpointID - // state endpointState `state:".(endpointState)"` - // pState ProtocolState state EndpointState `state:".(EndpointState)"` isPortReserved bool `state:"manual"` @@ -269,6 +312,10 @@ type endpoint struct { // in SYN-RCVD state. synRcvdCount int + // userMSS if non-zero is the MSS value explicitly set by the user + // for this endpoint using the TCP_MAXSEG setsockopt. + userMSS int + // The following fields are used to manage the send buffer. When // segments are ready to be sent, they are added to sndQueue and the // protocol goroutine is signaled via sndWaker. @@ -286,7 +333,7 @@ type endpoint struct { // cc stores the name of the Congestion Control algorithm to use for // this endpoint. - cc CongestionControlOption + cc tcpip.CongestionControlOption // The following are used when a "packet too big" control packet is // received. They are protected by sndBufMu. They are used to @@ -338,6 +385,9 @@ type endpoint struct { bindAddress tcpip.Address connectingAddress tcpip.Address + // amss is the advertised MSS to the peer by this endpoint. + amss uint16 + gso *stack.GSO } @@ -372,8 +422,8 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite netProto: netProto, waiterQueue: waiterQueue, state: StateInitial, - rcvBufSize: DefaultBufferSize, - sndBufSize: DefaultBufferSize, + rcvBufSize: DefaultReceiveBufferSize, + sndBufSize: DefaultSendBufferSize, sndMTU: int(math.MaxInt32), reuseAddr: true, keepalive: keepalive{ @@ -394,11 +444,16 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite e.rcvBufSize = rs.Default } - var cs CongestionControlOption + var cs tcpip.CongestionControlOption if err := stack.TransportProtocolOption(ProtocolNumber, &cs); err == nil { e.cc = cs } + var mrb tcpip.ModerateReceiveBufferOption + if err := stack.TransportProtocolOption(ProtocolNumber, &mrb); err == nil { + e.rcvAutoParams.disabled = !bool(mrb) + } + if p := stack.GetTCPProbe(); p != nil { e.probe = p } @@ -407,6 +462,7 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite e.workMu.Init() e.workMu.Lock() e.tsOffset = timeStampOffset() + return e } @@ -550,6 +606,83 @@ func (e *endpoint) cleanupLocked() { tcpip.DeleteDanglingEndpoint(e) } +// initialReceiveWindow returns the initial receive window to advertise in the +// SYN/SYN-ACK. +func (e *endpoint) initialReceiveWindow() int { + rcvWnd := e.receiveBufferAvailable() + if rcvWnd > math.MaxUint16 { + rcvWnd = math.MaxUint16 + } + routeWnd := InitialCwnd * int(mssForRoute(&e.route)) * 2 + if rcvWnd > routeWnd { + rcvWnd = routeWnd + } + return rcvWnd +} + +// ModerateRecvBuf adjusts the receive buffer and the advertised window +// based on the number of bytes copied to user space. +func (e *endpoint) ModerateRecvBuf(copied int) { + e.rcvListMu.Lock() + if e.rcvAutoParams.disabled { + e.rcvListMu.Unlock() + return + } + now := time.Now() + if rtt := e.rcvAutoParams.rtt; rtt == 0 || now.Sub(e.rcvAutoParams.measureTime) < rtt { + e.rcvAutoParams.copied += copied + e.rcvListMu.Unlock() + return + } + prevRTTCopied := e.rcvAutoParams.copied + copied + prevCopied := e.rcvAutoParams.prevCopied + rcvWnd := 0 + if prevRTTCopied > prevCopied { + // The minimal receive window based on what was copied by the app + // in the immediate preceding RTT and some extra buffer for 16 + // segments to account for variations. + // We multiply by 2 to account for packet losses. + rcvWnd = prevRTTCopied*2 + 16*int(e.amss) + + // Scale for slow start based on bytes copied in this RTT vs previous. + grow := (rcvWnd * (prevRTTCopied - prevCopied)) / prevCopied + + // Multiply growth factor by 2 again to account for sender being + // in slow-start where the sender grows it's congestion window + // by 100% per RTT. + rcvWnd += grow * 2 + + // Make sure auto tuned buffer size can always receive upto 2x + // the initial window of 10 segments. + if minRcvWnd := int(e.amss) * InitialCwnd * 2; rcvWnd < minRcvWnd { + rcvWnd = minRcvWnd + } + + // Cap the auto tuned buffer size by the maximum permissible + // receive buffer size. + if max := e.maxReceiveBufferSize(); rcvWnd > max { + rcvWnd = max + } + + // We do not adjust downwards as that can cause the receiver to + // reject valid data that might already be in flight as the + // acceptable window will shrink. + if rcvWnd > e.rcvBufSize { + e.rcvBufSize = rcvWnd + e.notifyProtocolGoroutine(notifyReceiveWindowChanged) + } + + // We only update prevCopied when we grow the buffer because in cases + // where prevCopied > prevRTTCopied the existing buffer is already big + // enough to handle the current rate and we don't need to do any + // adjustments. + e.rcvAutoParams.prevCopied = prevRTTCopied + } + e.rcvAutoParams.measureTime = now + e.rcvAutoParams.copied = 0 + e.rcvListMu.Unlock() +} + // Read reads data from the endpoint. func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.mu.RLock() @@ -595,10 +728,12 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { s.decRef() } - scale := e.rcv.rcvWndScale - wasZero := e.zeroReceiveWindow(scale) e.rcvBufUsed -= len(v) - if wasZero && !e.zeroReceiveWindow(scale) { + // If the window was zero before this read and if the read freed up + // enough buffer space for the scaled window to be non-zero then notify + // the protocol goroutine to send a window update. + if e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale) { + e.zeroWindow = false e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow) } @@ -785,6 +920,17 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { } return nil + case tcpip.MaxSegOption: + userMSS := v + if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS { + return tcpip.ErrInvalidOptionValue + } + e.mu.Lock() + e.userMSS = int(userMSS) + e.mu.Unlock() + e.notifyProtocolGoroutine(notifyMSSChanged) + return nil + case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max // allowed. @@ -818,9 +964,10 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { size = math.MaxInt32 / 2 } - wasZero := e.zeroReceiveWindow(scale) e.rcvBufSize = size - if wasZero && !e.zeroReceiveWindow(scale) { + e.rcvAutoParams.disabled = true + if e.zeroWindow && !e.zeroReceiveWindow(scale) { + e.zeroWindow = false mask |= notifyNonZeroReceiveWindow } e.rcvListMu.Unlock() @@ -898,6 +1045,40 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.mu.Unlock() return nil + case tcpip.CongestionControlOption: + // Query the available cc algorithms in the stack and + // validate that the specified algorithm is actually + // supported in the stack. + var avail tcpip.AvailableCongestionControlOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &avail); err != nil { + return err + } + availCC := strings.Split(string(avail), " ") + for _, cc := range availCC { + if v == tcpip.CongestionControlOption(cc) { + // Acquire the work mutex as we may need to + // reinitialize the congestion control state. + e.mu.Lock() + state := e.state + e.cc = v + e.mu.Unlock() + switch state { + case StateEstablished: + e.workMu.Lock() + e.mu.Lock() + if e.state == state { + e.snd.cc = e.snd.initCongestionControl(e.cc) + } + e.mu.Unlock() + e.workMu.Unlock() + } + return nil + } + } + + // Linux returns ENOENT when an invalid congestion + // control algorithm is specified. + return tcpip.ErrNoSuchFile default: return nil } @@ -929,6 +1110,14 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.lastErrorMu.Unlock() return err + case *tcpip.MaxSegOption: + // This is just stubbed out. Linux never returns the user_mss + // value as it either returns the defaultMSS or returns the + // actual current MSS. Netstack just returns the defaultMSS + // always for now. + *o = header.TCPDefaultMSS + return nil + case *tcpip.SendBufferSizeOption: e.sndBufMu.Lock() *o = tcpip.SendBufferSizeOption(e.sndBufSize) @@ -1067,6 +1256,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } return nil + case *tcpip.CongestionControlOption: + e.mu.Lock() + *o = e.cc + e.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -1098,6 +1293,11 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol // Connect connects the endpoint to its peer. func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { + if addr.Addr == "" && addr.Port == 0 { + // AF_UNSPEC isn't supported. + return tcpip.ErrAddressFamilyNotSupported + } + return e.connect(addr, true, true) } @@ -1582,6 +1782,13 @@ func (e *endpoint) readyToRead(s *segment) { if s != nil { s.incRef() e.rcvBufUsed += s.data.Size() + // Check if the receive window is now closed. If so make sure + // we set the zero window before we deliver the segment to ensure + // that a subsequent read of the segment will correctly trigger + // a non-zero notification. + if avail := e.receiveBufferAvailableLocked(); avail>>e.rcv.rcvWndScale == 0 { + e.zeroWindow = true + } e.rcvList.PushBack(s) } else { e.rcvClosed = true @@ -1591,21 +1798,26 @@ func (e *endpoint) readyToRead(s *segment) { e.waiterQueue.Notify(waiter.EventIn) } -// receiveBufferAvailable calculates how many bytes are still available in the -// receive buffer. -func (e *endpoint) receiveBufferAvailable() int { - e.rcvListMu.Lock() - size := e.rcvBufSize - used := e.rcvBufUsed - e.rcvListMu.Unlock() - +// receiveBufferAvailableLocked calculates how many bytes are still available +// in the receive buffer. +// rcvListMu must be held when this function is called. +func (e *endpoint) receiveBufferAvailableLocked() int { // We may use more bytes than the buffer size when the receive buffer // shrinks. - if used >= size { + if e.rcvBufUsed >= e.rcvBufSize { return 0 } - return size - used + return e.rcvBufSize - e.rcvBufUsed +} + +// receiveBufferAvailable calculates how many bytes are still available in the +// receive buffer. +func (e *endpoint) receiveBufferAvailable() int { + e.rcvListMu.Lock() + available := e.receiveBufferAvailableLocked() + e.rcvListMu.Unlock() + return available } func (e *endpoint) receiveBufferSize() int { @@ -1616,6 +1828,33 @@ func (e *endpoint) receiveBufferSize() int { return size } +func (e *endpoint) maxReceiveBufferSize() int { + var rs ReceiveBufferSizeOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil { + // As a fallback return the hardcoded max buffer size. + return MaxBufferSize + } + return rs.Max +} + +// rcvWndScaleForHandshake computes the receive window scale to offer to the +// peer when window scaling is enabled (true by default). If auto-tuning is +// disabled then the window scaling factor is based on the size of the +// receiveBuffer otherwise we use the max permissible receive buffer size to +// compute the scale. +func (e *endpoint) rcvWndScaleForHandshake() int { + bufSizeForScale := e.receiveBufferSize() + + e.rcvListMu.Lock() + autoTuningDisabled := e.rcvAutoParams.disabled + e.rcvListMu.Unlock() + if autoTuningDisabled { + return FindWndScale(seqnum.Size(bufSizeForScale)) + } + + return FindWndScale(seqnum.Size(e.maxReceiveBufferSize())) +} + // updateRecentTimestamp updates the recent timestamp using the algorithm // described in https://tools.ietf.org/html/rfc7323#section-4.3 func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) { @@ -1708,6 +1947,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.RcvBufSize = e.rcvBufSize s.RcvBufUsed = e.rcvBufUsed s.RcvClosed = e.rcvClosed + s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime + s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied + s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied + s.RcvAutoParams.RTT = e.rcvAutoParams.rtt + s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber + s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime + s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled e.rcvListMu.Unlock() // Endpoint TCP Option state. @@ -1761,13 +2007,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState { RTTMeasureTime: e.snd.rttMeasureTime, Closed: e.snd.closed, RTO: e.snd.rto, - SRTTInited: e.snd.srttInited, MaxPayloadSize: e.snd.maxPayloadSize, SndWndScale: e.snd.sndWndScale, MaxSentAck: e.snd.maxSentAck, } e.snd.rtt.Lock() s.Sender.SRTT = e.snd.rtt.srtt + s.Sender.SRTTInited = e.snd.rtt.srttInited e.snd.rtt.Unlock() if cubic, ok := e.snd.cc.(*cubicState); ok { @@ -1815,3 +2061,7 @@ func (e *endpoint) State() uint32 { defer e.mu.Unlock() return uint32(e.state) } + +func mssForRoute(r *stack.Route) uint16 { + return uint16(r.MTU() - header.TCPMinimumSize) +} diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 5f30c2374..b93959034 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -19,9 +19,9 @@ import ( "sync" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) func (e *endpoint) drainSegmentLocked() { @@ -342,6 +342,7 @@ func loadError(s string) *tcpip.Error { tcpip.ErrNoBufferSpace, tcpip.ErrBroadcastDisabled, tcpip.ErrNotPermitted, + tcpip.ErrAddressFamilyNotSupported, } messageToError = make(map[string]*tcpip.Error) @@ -360,3 +361,23 @@ func loadError(s string) *tcpip.Error { return e } + +// saveMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) saveMeasureTime() unixTime { + return unixTime{r.measureTime.Unix(), r.measureTime.UnixNano()} +} + +// loadMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) loadMeasureTime(unix unixTime) { + r.measureTime = time.Unix(unix.second, unix.nano) +} + +// saveRttMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) saveRttMeasureTime() unixTime { + return unixTime{r.rttMeasureTime.Unix(), r.rttMeasureTime.UnixNano()} +} + +// loadRttMeasureTime is invoked by stateify. +func (r *rcvBufAutoTuneParams) loadRttMeasureTime(unix unixTime) { + r.rttMeasureTime = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index c30b45c2c..63666f0b3 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -17,12 +17,12 @@ package tcp import ( "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // Forwarder is a connection request forwarder, which allows clients to decide @@ -47,7 +47,7 @@ type Forwarder struct { // If rcvWnd is set to zero, the default buffer size is used instead. func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*ForwarderRequest)) *Forwarder { if rcvWnd == 0 { - rcvWnd = DefaultBufferSize + rcvWnd = DefaultReceiveBufferSize } return &Forwarder{ maxInFlight: maxInFlight, diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index b31bcccfa..ee04dcfcc 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -24,13 +24,13 @@ import ( "strings" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -41,13 +41,18 @@ const ( ProtocolNumber = header.TCPProtocolNumber // MinBufferSize is the smallest size of a receive or send buffer. - minBufferSize = 4 << 10 // 4096 bytes. + MinBufferSize = 4 << 10 // 4096 bytes. - // DefaultBufferSize is the default size of the receive and send buffers. - DefaultBufferSize = 1 << 20 // 1MB + // DefaultSendBufferSize is the default size of the send buffer for + // an endpoint. + DefaultSendBufferSize = 1 << 20 // 1MB - // MaxBufferSize is the largest size a receive and send buffer can grow to. - maxBufferSize = 4 << 20 // 4MB + // DefaultReceiveBufferSize is the default size of the receive buffer + // for an endpoint. + DefaultReceiveBufferSize = 1 << 20 // 1MB + + // MaxBufferSize is the largest size a receive/send buffer can grow to. + MaxBufferSize = 4 << 20 // 4MB // MaxUnprocessedSegments is the maximum number of unprocessed segments // that can be queued for a given endpoint. @@ -79,13 +84,6 @@ const ( ccCubic = "cubic" ) -// CongestionControlOption sets the current congestion control algorithm. -type CongestionControlOption string - -// AvailableCongestionControlOption returns the supported congestion control -// algorithms. -type AvailableCongestionControlOption string - type protocol struct { mu sync.Mutex sackEnabled bool @@ -93,7 +91,7 @@ type protocol struct { recvBufferSize ReceiveBufferSizeOption congestionControl string availableCongestionControl []string - allowedCongestionControl []string + moderateReceiveBuffer bool } // Number returns the tcp protocol number. @@ -188,7 +186,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil - case CongestionControlOption: + case tcpip.CongestionControlOption: for _, c := range p.availableCongestionControl { if string(v) == c { p.mu.Lock() @@ -197,7 +195,16 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { return nil } } - return tcpip.ErrInvalidOptionValue + // linux returns ENOENT when an invalid congestion control + // is specified. + return tcpip.ErrNoSuchFile + + case tcpip.ModerateReceiveBufferOption: + p.mu.Lock() + p.moderateReceiveBuffer = bool(v) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -223,16 +230,25 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { *v = p.recvBufferSize p.mu.Unlock() return nil - case *CongestionControlOption: + + case *tcpip.CongestionControlOption: p.mu.Lock() - *v = CongestionControlOption(p.congestionControl) + *v = tcpip.CongestionControlOption(p.congestionControl) p.mu.Unlock() return nil - case *AvailableCongestionControlOption: + + case *tcpip.AvailableCongestionControlOption: p.mu.Lock() - *v = AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " ")) + *v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " ")) p.mu.Unlock() return nil + + case *tcpip.ModerateReceiveBufferOption: + p.mu.Lock() + *v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -241,8 +257,8 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { func init() { stack.RegisterTransportProtocolFactory(ProtocolName, func() stack.TransportProtocol { return &protocol{ - sendBufferSize: SendBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize}, - recvBufferSize: ReceiveBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize}, + sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize}, + recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize}, congestionControl: ccReno, availableCongestionControl: []string{ccReno, ccCubic}, } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index f02fa6105..e90f9a7d9 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -16,9 +16,10 @@ package tcp import ( "container/heap" + "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // receiver holds the state necessary to receive TCP segments and turn them @@ -38,6 +39,9 @@ type receiver struct { // shrinking it. rcvAcc seqnum.Value + // rcvWnd is the non-scaled receive window last advertised to the peer. + rcvWnd seqnum.Size + rcvWndScale uint8 closed bool @@ -47,13 +51,14 @@ type receiver struct { pendingBufSize seqnum.Size } -func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver { +func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver { return &receiver{ ep: ep, rcvNxt: irs + 1, rcvAcc: irs.Add(rcvWnd + 1), + rcvWnd: rcvWnd, rcvWndScale: rcvWndScale, - pendingBufSize: rcvWnd, + pendingBufSize: pendingBufSize, } } @@ -72,14 +77,16 @@ func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { // getSendParams returns the parameters needed by the sender when building // segments to send. func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) { - // Calculate the window size based on the current buffer size. - n := r.ep.receiveBufferAvailable() - acc := r.rcvNxt.Add(seqnum.Size(n)) + // Calculate the window size based on the available buffer space. + receiveBufferAvailable := r.ep.receiveBufferAvailable() + acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable)) if r.rcvAcc.LessThan(acc) { r.rcvAcc = acc } - - return r.rcvNxt, r.rcvNxt.Size(r.rcvAcc) >> r.rcvWndScale + // Stash away the non-scaled receive window as we use it for measuring + // receiver's estimated RTT. + r.rcvWnd = r.rcvNxt.Size(r.rcvAcc) + return r.rcvNxt, r.rcvWnd >> r.rcvWndScale } // nonZeroWindow is called when the receive window grows from zero to nonzero; @@ -130,6 +137,21 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum // Update the segment that we're expecting to consume. r.rcvNxt = segSeq.Add(segLen) + // In cases of a misbehaving sender which could send more than the + // advertised window, we could end up in a situation where we get a + // segment that exceeds the window advertised. Instead of partially + // accepting the segment and discarding bytes beyond the advertised + // window, we accept the whole segment and make sure r.rcvAcc is moved + // forward to match r.rcvNxt to indicate that the window is now closed. + // + // In absence of this check the r.acceptable() check fails and accepts + // segments that should be dropped because rcvWnd is calculated as + // the size of the interval (rcvNxt, rcvAcc] which becomes extremely + // large if rcvAcc is ever less than rcvNxt. + if r.rcvAcc.LessThan(r.rcvNxt) { + r.rcvAcc = r.rcvNxt + } + // Trim SACK Blocks to remove any SACK information that covers // sequence numbers that have been consumed. TrimSACKBlockList(&r.ep.sack, r.rcvNxt) @@ -198,6 +220,39 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum return true } +// updateRTT updates the receiver RTT measurement based on the sequence number +// of the received segment. +func (r *receiver) updateRTT() { + // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf + // + // A system that is only transmitting acknowledgements can still + // estimate the round-trip time by observing the time between when a byte + // is first acknowledged and the receipt of data that is at least one + // window beyond the sequence number that was acknowledged. + r.ep.rcvListMu.Lock() + if r.ep.rcvAutoParams.rttMeasureTime.IsZero() { + // New measurement. + r.ep.rcvAutoParams.rttMeasureTime = time.Now() + r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) + r.ep.rcvListMu.Unlock() + return + } + if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) { + r.ep.rcvListMu.Unlock() + return + } + rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime) + // We only store the minimum observed RTT here as this is only used in + // absence of a SRTT available from either timestamps or a sender + // measurement of RTT. + if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt { + r.ep.rcvAutoParams.rtt = rtt + } + r.ep.rcvAutoParams.rttMeasureTime = time.Now() + r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) + r.ep.rcvListMu.Unlock() +} + // handleRcvdSegment handles TCP segments directed at the connection managed by // r as they arrive. It is called by the protocol main loop. func (r *receiver) handleRcvdSegment(s *segment) { @@ -226,10 +281,9 @@ func (r *receiver) handleRcvdSegment(s *segment) { r.pendingBufUsed += s.logicalLen() s.incRef() heap.Push(&r.pendingRcvdSegments, s) + UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt) } - UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt) - // Immediately send an ack so that the peer knows it may // have to retransmit. r.ep.snd.sendAck() @@ -237,6 +291,12 @@ func (r *receiver) handleRcvdSegment(s *segment) { return } + // Since we consumed a segment update the receiver's RTT estimate + // if required. + if segLen > 0 { + r.updateRTT() + } + // By consuming the current segment, we may have filled a gap in the // sequence number domain that allows pending segments to be consumed // now. So try to do it. diff --git a/pkg/tcpip/transport/tcp/sack.go b/pkg/tcpip/transport/tcp/sack.go index 6a013d99b..7be86d68e 100644 --- a/pkg/tcpip/transport/tcp/sack.go +++ b/pkg/tcpip/transport/tcp/sack.go @@ -15,8 +15,8 @@ package tcp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( @@ -31,6 +31,13 @@ const ( // segment identified by segStart->segEnd. func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, rcvNxt seqnum.Value) { newSB := header.SACKBlock{Start: segStart, End: segEnd} + + // Ignore any invalid SACK blocks or blocks that are before rcvNxt as + // those bytes have already been acked. + if newSB.End.LessThanEq(newSB.Start) || newSB.End.LessThan(rcvNxt) { + return + } + if sack.NumBlocks == 0 { sack.Blocks[0] = newSB sack.NumBlocks = 1 @@ -39,9 +46,8 @@ func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value var n = 0 for i := 0; i < sack.NumBlocks; i++ { start, end := sack.Blocks[i].Start, sack.Blocks[i].End - if end.LessThanEq(start) || start.LessThanEq(rcvNxt) { - // Discard any invalid blocks where end is before start - // and discard any sack blocks that are before rcvNxt as + if end.LessThanEq(rcvNxt) { + // Discard any sack blocks that are before rcvNxt as // those have already been acked. continue } diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go index 1c5766a42..7ef2df377 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -19,8 +19,8 @@ import ( "strings" "github.com/google/btree" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go index b59eedc9d..b4e5ba0df 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go @@ -17,9 +17,9 @@ package tcp_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) const smss = 1500 diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go index 450d9fbc1..ea725d513 100644 --- a/pkg/tcpip/transport/tcp/segment.go +++ b/pkg/tcpip/transport/tcp/segment.go @@ -18,10 +18,10 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // segment represents a TCP segment. It holds the payload and parsed TCP segment diff --git a/pkg/tcpip/transport/tcp/segment_state.go b/pkg/tcpip/transport/tcp/segment_state.go index dd7e14aa6..7dc2741a6 100644 --- a/pkg/tcpip/transport/tcp/segment_state.go +++ b/pkg/tcpip/transport/tcp/segment_state.go @@ -17,7 +17,7 @@ package tcp import ( "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/buffer" ) // saveData is invoked by stateify. diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index b236d7af2..0fee7ab72 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -20,11 +20,11 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) const ( @@ -121,9 +121,8 @@ type sender struct { // rtt.srtt, rtt.rttvar, and rto are the "smoothed round-trip time", // "round-trip time variation" and "retransmit timeout", as defined in // section 2 of RFC 6298. - rtt rtt - rto time.Duration - srttInited bool + rtt rtt + rto time.Duration // maxPayloadSize is the maximum size of the payload of a given segment. // It is initialized on demand. @@ -150,8 +149,9 @@ type sender struct { type rtt struct { sync.Mutex `state:"nosave"` - srtt time.Duration - rttvar time.Duration + srtt time.Duration + rttvar time.Duration + srttInited bool } // fastRecovery holds information related to fast recovery from a packet loss. @@ -194,8 +194,6 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint s := &sender{ ep: ep, - sndCwnd: InitialCwnd, - sndSsthresh: math.MaxInt64, sndWnd: sndWnd, sndUna: iss + 1, sndNxt: iss + 1, @@ -238,7 +236,13 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint return s } -func (s *sender) initCongestionControl(congestionControlName CongestionControlOption) congestionControl { +// initCongestionControl initializes the specified congestion control module and +// returns a handle to it. It also initializes the sndCwnd and sndSsThresh to +// their initial values. +func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionControlOption) congestionControl { + s.sndCwnd = InitialCwnd + s.sndSsthresh = math.MaxInt64 + switch congestionControlName { case ccCubic: return newCubicCC(s) @@ -319,10 +323,10 @@ func (s *sender) sendAck() { // available. This is done in accordance with section 2 of RFC 6298. func (s *sender) updateRTO(rtt time.Duration) { s.rtt.Lock() - if !s.srttInited { + if !s.rtt.srttInited { s.rtt.rttvar = rtt / 2 s.rtt.srtt = rtt - s.srttInited = true + s.rtt.srttInited = true } else { diff := s.rtt.srtt - rtt if diff < 0 { diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index 4d1519860..272bbcdbd 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" ) func TestFastRecovery(t *testing.T) { diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index 025d133be..4e7f1a740 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -21,13 +21,13 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" ) // createConnectedWithSACKPermittedOption creates and connects c.ep with the diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 779ca8b76..915a98047 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -21,20 +21,20 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/ports" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/ports" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -1110,8 +1110,9 @@ func TestNonScaledWindowAccept(t *testing.T) { t.Fatalf("Listen failed: %v", err) } - // Do 3-way handshake. - c.PassiveConnect(100, 2, header.TCPSynOptions{MSS: defaultIPv4MSS}) + // Do 3-way handshake w/ window scaling disabled. The SYN-ACK to the SYN + // should not carry the window scaling option. + c.PassiveConnect(100, -1, header.TCPSynOptions{MSS: defaultIPv4MSS}) // Try to accept the connection. we, ch := waiter.NewChannelEntry(nil) @@ -1600,7 +1601,6 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { // Set the buffer size to a deterministic size so that we can check the // window scaling option. const rcvBufferSize = 0x20000 - const wndScale = 2 if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(rcvBufferSize)); err != nil { t.Fatalf("SetSockOpt failed failed: %v", err) } @@ -1614,7 +1614,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { } // Do 3-way handshake. - c.PassiveConnect(maxPayload, wndScale, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) + c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) // Try to accept the connection. we, ch := waiter.NewChannelEntry(nil) @@ -1713,7 +1713,7 @@ func TestForwarderSendMSSLessThanMTU(t *testing.T) { s.SetTransportProtocolHandler(tcp.ProtocolNumber, f.HandlePacket) // Do 3-way handshake. - c.PassiveConnect(maxPayload, 1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) + c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize}) // Wait for connection to be available. select { @@ -2767,11 +2767,11 @@ func TestDefaultBufferSizes(t *testing.T) { } }() - checkSendBufferSize(t, ep, tcp.DefaultBufferSize) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default send buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2781,11 +2781,11 @@ func TestDefaultBufferSizes(t *testing.T) { t.Fatalf("NewEndpoint failed; %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default receive buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2795,8 +2795,8 @@ func TestDefaultBufferSizes(t *testing.T) { t.Fatalf("NewEndpoint failed; %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2) - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*3) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3) } func TestMinMaxBufferSizes(t *testing.T) { @@ -2810,11 +2810,11 @@ func TestMinMaxBufferSizes(t *testing.T) { defer ep.Close() // Change the min/max values for send/receive - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -2832,17 +2832,17 @@ func TestMinMaxBufferSizes(t *testing.T) { checkSendBufferSize(t, ep, 300) // Set values above the max. - if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultBufferSize*20)); err != nil { + if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultReceiveBufferSize*20)); err != nil { t.Fatalf("GetSockOpt failed: %v", err) } - checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*20) + checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20) - if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultBufferSize*30)); err != nil { + if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultSendBufferSize*30)); err != nil { t.Fatalf("GetSockOpt failed: %v", err) } - checkSendBufferSize(t, ep, tcp.DefaultBufferSize*30) + checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30) } func makeStack() (*stack.Stack, *tcpip.Error) { @@ -3205,13 +3205,14 @@ func TestTCPEndpointProbe(t *testing.T) { } } -func TestSetCongestionControl(t *testing.T) { +func TestStackSetCongestionControl(t *testing.T) { testCases := []struct { - cc tcp.CongestionControlOption - mustPass bool + cc tcpip.CongestionControlOption + err *tcpip.Error }{ - {"reno", true}, - {"cubic", true}, + {"reno", nil}, + {"cubic", nil}, + {"blahblah", tcpip.ErrNoSuchFile}, } for _, tc := range testCases { @@ -3221,62 +3222,135 @@ func TestSetCongestionControl(t *testing.T) { s := c.Stack() - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != nil && tc.mustPass { - t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want not-nil", tcp.ProtocolNumber, tc.cc, err) + var oldCC tcpip.CongestionControlOption + if err := s.TransportProtocolOption(tcp.ProtocolNumber, &oldCC); err != nil { + t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &oldCC, err) + } + + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != tc.err { + t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.cc, err, tc.err) } - var cc tcp.CongestionControlOption + var cc tcpip.CongestionControlOption if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil { t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err) } - if got, want := cc, tc.cc; got != want { + + got, want := cc, oldCC + // If SetTransportProtocolOption is expected to succeed + // then the returned value for congestion control should + // match the one specified in the + // SetTransportProtocolOption call above, else it should + // be what it was before the call to + // SetTransportProtocolOption. + if tc.err == nil { + want = tc.cc + } + if got != want { t.Fatalf("got congestion control: %v, want: %v", got, want) } }) } } -func TestAvailableCongestionControl(t *testing.T) { +func TestStackAvailableCongestionControl(t *testing.T) { c := context.New(t, 1500) defer c.Cleanup() s := c.Stack() // Query permitted congestion control algorithms. - var aCC tcp.AvailableCongestionControlOption + var aCC tcpip.AvailableCongestionControlOption if err := s.TransportProtocolOption(tcp.ProtocolNumber, &aCC); err != nil { t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &aCC, err) } - if got, want := aCC, tcp.AvailableCongestionControlOption("reno cubic"); got != want { - t.Fatalf("got tcp.AvailableCongestionControlOption: %v, want: %v", got, want) + if got, want := aCC, tcpip.AvailableCongestionControlOption("reno cubic"); got != want { + t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want) } } -func TestSetAvailableCongestionControl(t *testing.T) { +func TestStackSetAvailableCongestionControl(t *testing.T) { c := context.New(t, 1500) defer c.Cleanup() s := c.Stack() // Setting AvailableCongestionControlOption should fail. - aCC := tcp.AvailableCongestionControlOption("xyz") + aCC := tcpip.AvailableCongestionControlOption("xyz") if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &aCC); err == nil { t.Fatalf("s.TransportProtocolOption(%v, %v) = nil, want non-nil", tcp.ProtocolNumber, &aCC) } // Verify that we still get the expected list of congestion control options. - var cc tcp.AvailableCongestionControlOption + var cc tcpip.AvailableCongestionControlOption if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil { t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err) } - if got, want := cc, tcp.AvailableCongestionControlOption("reno cubic"); got != want { - t.Fatalf("got tcp.AvailableCongestionControlOption: %v, want: %v", got, want) + if got, want := cc, tcpip.AvailableCongestionControlOption("reno cubic"); got != want { + t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want) + } +} + +func TestEndpointSetCongestionControl(t *testing.T) { + testCases := []struct { + cc tcpip.CongestionControlOption + err *tcpip.Error + }{ + {"reno", nil}, + {"cubic", nil}, + {"blahblah", tcpip.ErrNoSuchFile}, + } + + for _, connected := range []bool{false, true} { + for _, tc := range testCases { + t.Run(fmt.Sprintf("SetSockOpt(.., %v) w/ connected = %v", tc.cc, connected), func(t *testing.T) { + c := context.New(t, 1500) + defer c.Cleanup() + + // Create TCP endpoint. + var err *tcpip.Error + c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) + if err != nil { + t.Fatalf("NewEndpoint failed: %v", err) + } + + var oldCC tcpip.CongestionControlOption + if err := c.EP.GetSockOpt(&oldCC); err != nil { + t.Fatalf("c.EP.SockOpt(%v) = %v", &oldCC, err) + } + + if connected { + c.Connect(789 /* iss */, 32768 /* rcvWnd */, nil) + } + + if err := c.EP.SetSockOpt(tc.cc); err != tc.err { + t.Fatalf("c.EP.SetSockOpt(%v) = %v, want %v", tc.cc, err, tc.err) + } + + var cc tcpip.CongestionControlOption + if err := c.EP.GetSockOpt(&cc); err != nil { + t.Fatalf("c.EP.SockOpt(%v) = %v", &cc, err) + } + + got, want := cc, oldCC + // If SetSockOpt is expected to succeed then the + // returned value for congestion control should match + // the one specified in the SetSockOpt above, else it + // should be what it was before the call to SetSockOpt. + if tc.err == nil { + want = tc.cc + } + if got != want { + t.Fatalf("got congestion control: %v, want: %v", got, want) + } + }) + } } } func enableCUBIC(t *testing.T, c *context.Context) { t.Helper() - opt := tcp.CongestionControlOption("cubic") + opt := tcpip.CongestionControlOption("cubic") if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt); err != nil { t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, %v = %v", opt, err) } @@ -3902,3 +3976,273 @@ func TestEndpointBindListenAcceptState(t *testing.T) { } } + +// This test verifies that the auto tuning does not grow the receive buffer if +// the application is not reading the data actively. +func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { + const mtu = 1500 + const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize + + c := context.New(t, mtu) + defer c.Cleanup() + + stk := c.Stack() + // Set lower limits for auto-tuning tests. This is required because the + // test stops the worker which can cause packets to be dropped because + // the segment queue holding unprocessed packets is limited to 500. + const receiveBufferSize = 80 << 10 // 80KB. + const maxReceiveBufferSize = receiveBufferSize * 10 + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + + // Enable auto-tuning. + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + // Change the expected window scale to match the value needed for the + // maximum buffer size defined above. + c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize)) + + rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4}) + + // NOTE: The timestamp values in the sent packets are meaningless to the + // peer so we just increment the timestamp value by 1 every batch as we + // are not really using them for anything. Send a single byte to verify + // the advertised window. + tsVal := rawEP.TSVal + 1 + + // Introduce a 25ms latency by delaying the first byte. + latency := 25 * time.Millisecond + time.Sleep(latency) + rawEP.SendPacketWithTS([]byte{1}, tsVal) + + // Verify that the ACK has the expected window. + wantRcvWnd := receiveBufferSize + wantRcvWnd = (wantRcvWnd >> uint32(c.WindowScale)) + rawEP.VerifyACKRcvWnd(uint16(wantRcvWnd - 1)) + time.Sleep(25 * time.Millisecond) + + // Allocate a large enough payload for the test. + b := make([]byte, int(receiveBufferSize)*2) + offset := 0 + payloadSize := receiveBufferSize - 1 + worker := (c.EP).(interface { + StopWork() + ResumeWork() + }) + tsVal++ + + // Stop the worker goroutine. + worker.StopWork() + start := offset + end := offset + payloadSize + packetsSent := 0 + for ; start < end; start += mss { + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + packetsSent++ + } + // Resume the worker so that it only sees the packets once all of them + // are waiting to be read. + worker.ResumeWork() + + // Since we read no bytes the window should goto zero till the + // application reads some of the data. + // Discard all intermediate acks except the last one. + if packetsSent > 100 { + for i := 0; i < (packetsSent / 100); i++ { + _ = c.GetPacket() + } + } + rawEP.VerifyACKRcvWnd(0) + + time.Sleep(25 * time.Millisecond) + // Verify that sending more data when window is closed is dropped and + // not acked. + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + + // Verify that the stack sends us back an ACK with the sequence number + // of the last packet sent indicating it was dropped. + p := c.GetPacket() + checker.IPv4(t, p, checker.TCP( + checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)), + checker.Window(0), + )) + + // Now read all the data from the endpoint and verify that advertised + // window increases to the full available buffer size. + for { + _, _, err := c.EP.Read(nil) + if err == tcpip.ErrWouldBlock { + break + } + } + + // Verify that we receive a non-zero window update ACK. When running + // under thread santizer this test can end up sending more than 1 + // ack, 1 for the non-zero window + p = c.GetPacket() + checker.IPv4(t, p, checker.TCP( + checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)), + func(t *testing.T, h header.Transport) { + tcp, ok := h.(header.TCP) + if !ok { + return + } + if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) { + t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd) + } + }, + )) +} + +// This test verifies that the auto tuning does not grow the receive buffer if +// the application is not reading the data actively. +func TestReceiveBufferAutoTuning(t *testing.T) { + const mtu = 1500 + const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize + + c := context.New(t, mtu) + defer c.Cleanup() + + // Enable Auto-tuning. + stk := c.Stack() + // Set lower limits for auto-tuning tests. This is required because the + // test stops the worker which can cause packets to be dropped because + // the segment queue holding unprocessed packets is limited to 500. + const receiveBufferSize = 80 << 10 // 80KB. + const maxReceiveBufferSize = receiveBufferSize * 10 + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + + // Enable auto-tuning. + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) + } + // Change the expected window scale to match the value needed for the + // maximum buffer size used by stack. + c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize)) + + rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4}) + + wantRcvWnd := receiveBufferSize + scaleRcvWnd := func(rcvWnd int) uint16 { + return uint16(rcvWnd >> uint16(c.WindowScale)) + } + // Allocate a large array to send to the endpoint. + b := make([]byte, receiveBufferSize*48) + + // In every iteration we will send double the number of bytes sent in + // the previous iteration and read the same from the app. The received + // window should grow by at least 2x of bytes read by the app in every + // RTT. + offset := 0 + payloadSize := receiveBufferSize / 8 + worker := (c.EP).(interface { + StopWork() + ResumeWork() + }) + tsVal := rawEP.TSVal + // We are going to do our own computation of what the moderated receive + // buffer should be based on sent/copied data per RTT and verify that + // the advertised window by the stack matches our calculations. + prevCopied := 0 + done := false + latency := 1 * time.Millisecond + for i := 0; !done; i++ { + tsVal++ + + // Stop the worker goroutine. + worker.StopWork() + start := offset + end := offset + payloadSize + totalSent := 0 + packetsSent := 0 + for ; start < end; start += mss { + rawEP.SendPacketWithTS(b[start:start+mss], tsVal) + totalSent += mss + packetsSent++ + } + // Resume it so that it only sees the packets once all of them + // are waiting to be read. + worker.ResumeWork() + + // Give 1ms for the worker to process the packets. + time.Sleep(1 * time.Millisecond) + + // Verify that the advertised window on the ACK is reduced by + // the total bytes sent. + expectedWnd := wantRcvWnd - totalSent + if packetsSent > 100 { + for i := 0; i < (packetsSent / 100); i++ { + _ = c.GetPacket() + } + } + rawEP.VerifyACKRcvWnd(scaleRcvWnd(expectedWnd)) + + // Now read all the data from the endpoint and invoke the + // moderation API to allow for receive buffer auto-tuning + // to happen before we measure the new window. + totalCopied := 0 + for { + b, _, err := c.EP.Read(nil) + if err == tcpip.ErrWouldBlock { + break + } + totalCopied += len(b) + } + + // Invoke the moderation API. This is required for auto-tuning + // to happen. This method is normally expected to be invoked + // from a higher layer than tcpip.Endpoint. So we simulate + // copying to user-space by invoking it explicitly here. + c.EP.ModerateRecvBuf(totalCopied) + + // Now send a keep-alive packet to trigger an ACK so that we can + // measure the new window. + rawEP.NextSeqNum-- + rawEP.SendPacketWithTS(nil, tsVal) + rawEP.NextSeqNum++ + + if i == 0 { + // In the first iteration the receiver based RTT is not + // yet known as a result the moderation code should not + // increase the advertised window. + rawEP.VerifyACKRcvWnd(scaleRcvWnd(wantRcvWnd)) + prevCopied = totalCopied + } else { + rttCopied := totalCopied + if i == 1 { + // The moderation code accumulates copied bytes till + // RTT is established. So add in the bytes sent in + // the first iteration to the total bytes for this + // RTT. + rttCopied += prevCopied + // Now reset it to the initial value used by the + // auto tuning logic. + prevCopied = tcp.InitialCwnd * mss * 2 + } + newWnd := rttCopied<<1 + 16*mss + grow := (newWnd * (rttCopied - prevCopied)) / prevCopied + newWnd += (grow << 1) + if newWnd > maxReceiveBufferSize { + newWnd = maxReceiveBufferSize + done = true + } + rawEP.VerifyACKRcvWnd(scaleRcvWnd(newWnd)) + wantRcvWnd = newWnd + prevCopied = rttCopied + // Increase the latency after first two iterations to + // establish a low RTT value in the receiver since it + // only tracks the lowest value. This ensures that when + // ModerateRcvBuf is called the elapsed time is always > + // rtt. Without this the test is flaky due to delays due + // to scheduling/wakeup etc. + latency += 50 * time.Millisecond + } + time.Sleep(latency) + offset += payloadSize + payloadSize *= 2 + } +} diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go index 039bbcfba..a641e953d 100644 --- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go @@ -20,13 +20,13 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" + "gvisor.dev/gvisor/pkg/waiter" ) // createConnectedWithTimestampOption creates and connects c.ep with the @@ -182,7 +182,7 @@ func TestTimeStampEnabledAccept(t *testing.T) { wndSize uint16 }{ {true, -1, 0xffff}, // When cookie is used window scaling is disabled. - {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default). + {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5. } for _, tc := range testCases { timeStampEnabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize) @@ -239,7 +239,7 @@ func TestTimeStampDisabledAccept(t *testing.T) { wndSize uint16 }{ {true, -1, 0xffff}, // When cookie is used window scaling is disabled. - {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default). + {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5. } for _, tc := range testCases { timeStampDisabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize) diff --git a/pkg/tcpip/transport/tcp/testing/context/BUILD b/pkg/tcpip/transport/tcp/testing/context/BUILD index 1584e4095..19b0d31c5 100644 --- a/pkg/tcpip/transport/tcp/testing/context/BUILD +++ b/pkg/tcpip/transport/tcp/testing/context/BUILD @@ -6,7 +6,7 @@ go_library( name = "context", testonly = 1, srcs = ["context.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context", + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context", visibility = [ "//:sandbox", ], diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 69a43b6f4..bcc0f3e28 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -21,18 +21,18 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -72,12 +72,6 @@ const ( testInitialSequenceNumber = 789 ) -// defaultWindowScale value specified here depends on the tcp.DefaultBufferSize -// constant defined in the tcp/endpoint.go because the tcp.DefaultBufferSize is -// used in tcp.newHandshake to determine the window scale to use when sending a -// SYN/SYN-ACK. -var defaultWindowScale = tcp.FindWndScale(tcp.DefaultBufferSize) - // Headers is used to represent the TCP header fields when building a // new packet. type Headers struct { @@ -134,6 +128,10 @@ type Context struct { // TimeStampEnabled is true if ep is connected with the timestamp option // enabled. TimeStampEnabled bool + + // WindowScale is the expected window scale in SYN packets sent by + // the stack. + WindowScale uint8 } // New allocates and initializes a test context containing a new @@ -142,11 +140,11 @@ func New(t *testing.T, mtu uint32) *Context { s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{}) // Allow minimum send/receive buffer sizes to be 1 during tests. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %v", err) } @@ -184,9 +182,10 @@ func New(t *testing.T, mtu uint32) *Context { }) return &Context{ - t: t, - s: s, - linkEP: linkEP, + t: t, + s: s, + linkEP: linkEP, + WindowScale: uint8(tcp.FindWndScale(tcp.DefaultReceiveBufferSize)), } } @@ -272,7 +271,7 @@ func (c *Context) GetPacketNonBlocking() []byte { // SendICMPPacket builds and sends an ICMPv4 packet via the link layer endpoint. func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code uint8, p1, p2 []byte, maxTotalSize int) { // Allocate a buffer data and headers. - buf := buffer.NewView(header.IPv4MinimumSize + header.ICMPv4MinimumSize + len(p1) + len(p2)) + buf := buffer.NewView(header.IPv4MinimumSize + header.ICMPv4PayloadOffset + len(p1) + len(p2)) if len(buf) > maxTotalSize { buf = buf[:maxTotalSize] } @@ -292,8 +291,8 @@ func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code uint8, p1, p2 []byt icmp.SetType(typ) icmp.SetCode(code) - copy(icmp[header.ICMPv4MinimumSize:], p1) - copy(icmp[header.ICMPv4MinimumSize+len(p1):], p2) + copy(icmp[header.ICMPv4PayloadOffset:], p1) + copy(icmp[header.ICMPv4PayloadOffset+len(p1):], p2) // Inject packet. c.linkEP.Inject(ipv4.ProtocolNumber, buf.ToVectorisedView()) @@ -520,35 +519,21 @@ func (c *Context) CreateConnected(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf c.CreateConnectedWithRawOptions(iss, rcvWnd, epRcvBuf, nil) } -// CreateConnectedWithRawOptions creates a connected TCP endpoint and sends -// the specified option bytes as the Option field in the initial SYN packet. +// Connect performs the 3-way handshake for c.EP with the provided Initial +// Sequence Number (iss) and receive window(rcvWnd) and any options if +// specified. // // It also sets the receive buffer for the endpoint to the specified // value in epRcvBuf. -func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf *tcpip.ReceiveBufferSizeOption, options []byte) { - // Create TCP endpoint. - var err *tcpip.Error - c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) - if err != nil { - c.t.Fatalf("NewEndpoint failed: %v", err) - } - if got, want := tcp.EndpointState(c.EP.State()), tcp.StateInitial; got != want { - c.t.Errorf("Unexpected endpoint state: want %v, got %v", want, got) - } - - if epRcvBuf != nil { - if err := c.EP.SetSockOpt(*epRcvBuf); err != nil { - c.t.Fatalf("SetSockOpt failed failed: %v", err) - } - } - +// +// PreCondition: c.EP must already be created. +func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte) { // Start connection attempt. waitEntry, notifyCh := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&waitEntry, waiter.EventOut) defer c.WQ.EventUnregister(&waitEntry) - err = c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort}) - if err != tcpip.ErrConnectStarted { + if err := c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort}); err != tcpip.ErrConnectStarted { c.t.Fatalf("Unexpected return value from Connect: %v", err) } @@ -590,8 +575,7 @@ func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum. // Wait for connection to be established. select { case <-notifyCh: - err = c.EP.GetSockOpt(tcpip.ErrorOption{}) - if err != nil { + if err := c.EP.GetSockOpt(tcpip.ErrorOption{}); err != nil { c.t.Fatalf("Unexpected error when connecting: %v", err) } case <-time.After(1 * time.Second): @@ -604,6 +588,27 @@ func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum. c.Port = tcpHdr.SourcePort() } +// CreateConnectedWithRawOptions creates a connected TCP endpoint and sends +// the specified option bytes as the Option field in the initial SYN packet. +// +// It also sets the receive buffer for the endpoint to the specified +// value in epRcvBuf. +func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf *tcpip.ReceiveBufferSizeOption, options []byte) { + // Create TCP endpoint. + var err *tcpip.Error + c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) + if err != nil { + c.t.Fatalf("NewEndpoint failed: %v", err) + } + + if epRcvBuf != nil { + if err := c.EP.SetSockOpt(*epRcvBuf); err != nil { + c.t.Fatalf("SetSockOpt failed failed: %v", err) + } + } + c.Connect(iss, rcvWnd, options) +} + // RawEndpoint is just a small wrapper around a TCP endpoint's state to make // sending data and ACK packets easy while being able to manipulate the sequence // numbers and timestamp values as needed. @@ -666,6 +671,21 @@ func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) { r.RecentTS = opts.TSVal } +// VerifyACKRcvWnd verifies that the window advertised by the incoming ACK +// matches the provided rcvWnd. +func (r *RawEndpoint) VerifyACKRcvWnd(rcvWnd uint16) { + ackPacket := r.C.GetPacket() + checker.IPv4(r.C.t, ackPacket, + checker.TCP( + checker.DstPort(r.SrcPort), + checker.TCPFlags(header.TCPFlagAck), + checker.SeqNum(uint32(r.AckNum)), + checker.AckNum(uint32(r.NextSeqNum)), + checker.Window(rcvWnd), + ), + ) +} + // VerifyACKNoSACK verifies that the ACK does not contain a SACK block. func (r *RawEndpoint) VerifyACKNoSACK() { r.VerifyACKHasSACK(nil) @@ -726,7 +746,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) * checker.TCPSynOptions(header.TCPSynOptions{ MSS: mss, TS: true, - WS: defaultWindowScale, + WS: int(c.WindowScale), SACKPermitted: c.SACKEnabled(), }), ), @@ -741,6 +761,9 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) * // Build options w/ tsVal to be sent in the SYN-ACK. synAckOptions := make([]byte, header.TCPOptionsMaximumSize) offset := 0 + if wantOptions.WS != -1 { + offset += header.EncodeWSOption(wantOptions.WS, synAckOptions[offset:]) + } if wantOptions.TS { offset += header.EncodeTSOption(wantOptions.TSVal, synOptions.TSVal, synAckOptions[offset:]) } diff --git a/pkg/tcpip/transport/tcp/timer.go b/pkg/tcpip/transport/tcp/timer.go index fc1c7cbd2..c70525f27 100644 --- a/pkg/tcpip/transport/tcp/timer.go +++ b/pkg/tcpip/transport/tcp/timer.go @@ -17,7 +17,7 @@ package tcp import ( "time" - "gvisor.googlesource.com/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sleep" ) type timerState int diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 31a845dee..4bec48c0f 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_library( name = "tcpconntrack", srcs = ["tcp_conntrack.go"], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack", + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go index f1dcd36d5..93712cd45 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go @@ -18,8 +18,8 @@ package tcpconntrack import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) // Result is returned when the state of a TCB is updated in response to an diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go index 435e136de..5e271b7ca 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go @@ -17,8 +17,8 @@ package tcpconntrack_test import ( "testing" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack" ) // connected creates a connection tracker TCB and sets it to a connected state diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index b9520d6e0..6dac66b50 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -24,8 +24,8 @@ go_library( "protocol.go", "udp_packet_list.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp", - imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"], + importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/udp", + imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ "//pkg/sleep", diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index fa7278286..cb0ea83a6 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -18,11 +18,11 @@ import ( "math" "sync" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // +stateify savable @@ -169,6 +169,9 @@ func (e *endpoint) Close() { e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } +// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. +func (e *endpoint) ModerateRecvBuf(copied int) {} + // Read reads data from the endpoint. This method does not block if // there is no data pending. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { @@ -695,8 +698,44 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t return netProto, nil } +func (e *endpoint) disconnect() *tcpip.Error { + e.mu.Lock() + defer e.mu.Unlock() + + if e.state != stateConnected { + return nil + } + id := stack.TransportEndpointID{} + // Exclude ephemerally bound endpoints. + if e.bindNICID != 0 || e.id.LocalAddress == "" { + var err *tcpip.Error + id = stack.TransportEndpointID{ + LocalPort: e.id.LocalPort, + LocalAddress: e.id.LocalAddress, + } + id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id) + if err != nil { + return err + } + e.state = stateBound + } else { + e.state = stateInitial + } + + e.stack.UnregisterTransportEndpoint(e.regNICID, e.effectiveNetProtos, ProtocolNumber, e.id, e) + e.id = id + e.route.Release() + e.route = stack.Route{} + e.dstPort = 0 + + return nil +} + // Connect connects the endpoint to its peer. Specifying a NIC is optional. func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { + if addr.Addr == "" { + return e.disconnect() + } if addr.Port == 0 { // We don't support connecting to port zero. return tcpip.ErrInvalidEndpointState @@ -731,12 +770,16 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { defer r.Release() id := stack.TransportEndpointID{ - LocalAddress: r.LocalAddress, + LocalAddress: e.id.LocalAddress, LocalPort: localPort, RemotePort: addr.Port, RemoteAddress: r.RemoteAddress, } + if e.state == stateInitial { + id.LocalAddress = r.LocalAddress + } + // Even if we're connected, this endpoint can still be used to send // packets on a different network protocol, so we register both even if // v6only is set to false and this is an ipv6 endpoint. diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index 74e8e9fd5..18e786397 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -15,10 +15,10 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" ) // saveData saves udpPacket.data field. @@ -92,8 +92,6 @@ func (e *endpoint) afterLoad() { if err != nil { panic(*err) } - - e.id.LocalAddress = e.route.LocalAddress } else if len(e.id.LocalAddress) != 0 { // stateBound if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 { panic(tcpip.ErrBadLocalAddress) diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go index 25bdd2929..a874fc9fd 100644 --- a/pkg/tcpip/transport/udp/forwarder.go +++ b/pkg/tcpip/transport/udp/forwarder.go @@ -15,10 +15,10 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" ) // Forwarder is a session request forwarder, which allows clients to decide diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 3d31dfbf1..f76e7fbe1 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -21,12 +21,12 @@ package udp import ( - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" + "gvisor.dev/gvisor/pkg/waiter" ) const ( diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 86a8fa19b..75129a2ff 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -21,17 +21,17 @@ import ( "testing" "time" - "gvisor.googlesource.com/gvisor/pkg/tcpip" - "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/checker" - "gvisor.googlesource.com/gvisor/pkg/tcpip/header" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel" - "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4" - "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6" - "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" - "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/waiter" ) const ( |