diff options
Diffstat (limited to 'pkg')
215 files changed, 6736 insertions, 4293 deletions
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index 8591acbf2..185eee0bb 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -347,26 +347,57 @@ const SizeOfLinger = 8 // // +marshal type TCPInfo struct { - State uint8 - CaState uint8 + // State is the state of the connection. + State uint8 + + // CaState is the congestion control state. + CaState uint8 + + // Retransmits is the number of retransmissions triggered by RTO. Retransmits uint8 - Probes uint8 - Backoff uint8 - Options uint8 - // WindowScale is the combination of snd_wscale (first 4 bits) and rcv_wscale (second 4 bits) + + // Probes is the number of unanswered zero window probes. + Probes uint8 + + // BackOff indicates exponential backoff. + Backoff uint8 + + // Options indicates the options enabled for the connection. + Options uint8 + + // WindowScale is the combination of snd_wscale (first 4 bits) and + // rcv_wscale (second 4 bits) WindowScale uint8 - // DeliveryRateAppLimited is a boolean and only the first bit is meaningful. + + // DeliveryRateAppLimited is a boolean and only the first bit is + // meaningful. DeliveryRateAppLimited uint8 - RTO uint32 - ATO uint32 + // RTO is the retransmission timeout. + RTO uint32 + + // ATO is the acknowledgement timeout interval. + ATO uint32 + + // SndMss is the send maximum segment size. SndMss uint32 + + // RcvMss is the receive maximum segment size. RcvMss uint32 + // Unacked is the number of packets sent but not acknowledged. Unacked uint32 - Sacked uint32 - Lost uint32 + + // Sacked is the number of packets which are selectively acknowledged. + Sacked uint32 + + // Lost is the number of packets marked as lost. + Lost uint32 + + // Retrans is the number of retransmitted packets. Retrans uint32 + + // Fackets is not used and is always zero. Fackets uint32 // Times. @@ -385,37 +416,78 @@ type TCPInfo struct { Advmss uint32 Reordering uint32 - RcvRTT uint32 + // RcvRTT is the receiver round trip time. + RcvRTT uint32 + + // RcvSpace is the current buffer space available for receiving data. RcvSpace uint32 + // TotalRetrans is the total number of retransmits seen since the start + // of the connection. TotalRetrans uint32 - PacingRate uint64 + // PacingRate is the pacing rate in bytes per second. + PacingRate uint64 + + // MaxPacingRate is the maximum pacing rate. MaxPacingRate uint64 + // BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked. BytesAcked uint64 + // BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived. BytesReceived uint64 + // SegsOut is RFC4898 tcpEStatsPerfSegsOut. SegsOut uint32 + // SegsIn is RFC4898 tcpEStatsPerfSegsIn. SegsIn uint32 + // NotSentBytes is the amount of bytes in the write queue that are not + // yet sent. NotSentBytes uint32 - MinRTT uint32 + + // MinRTT is the minimum round trip time seen in the connection. + MinRTT uint32 + // DataSegsIn is RFC4898 tcpEStatsDataSegsIn. DataSegsIn uint32 + // DataSegsOut is RFC4898 tcpEStatsDataSegsOut. DataSegsOut uint32 + // DeliveryRate is the most recent delivery rate in bytes per second. DeliveryRate uint64 // BusyTime is the time in microseconds busy sending data. BusyTime uint64 + // RwndLimited is the time in microseconds limited by receive window. RwndLimited uint64 + // SndBufLimited is the time in microseconds limited by send buffer. SndBufLimited uint64 + + // Delivered is the total data packets delivered including retransmits. + Delivered uint32 + + // DeliveredCE is the total ECE marked data packets delivered including + // retransmits. + DeliveredCE uint32 + + // BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut. + BytesSent uint64 + + // BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans. + BytesRetrans uint64 + + // DSACKDups is RFC4898 tcpEStatsStackDSACKDups. + DSACKDups uint32 + + // ReordSeen is the number of reordering events seen since the start of + // the connection. + ReordSeen uint32 } // SizeOfTCPInfo is the binary size of a TCPInfo struct. diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go index 2a8d4708b..1a3c0916f 100644 --- a/pkg/abi/linux/tcp.go +++ b/pkg/abi/linux/tcp.go @@ -59,3 +59,12 @@ const ( MAX_TCP_KEEPINTVL = 32767 MAX_TCP_KEEPCNT = 127 ) + +// Congestion control states from include/uapi/linux/tcp.h. +const ( + TCP_CA_Open = 0 + TCP_CA_Disorder = 1 + TCP_CA_CWR = 2 + TCP_CA_Recovery = 3 + TCP_CA_Loss = 4 +) diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/ring0/BUILD index 2852b7387..d1b14efdb 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/ring0/BUILD @@ -43,16 +43,16 @@ arch_genrule( name = "entry_impl_amd64", srcs = ["entry_amd64.s"], outs = ["entry_impl_amd64.s"], - cmd = "(echo -e '// build +amd64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_amd64.s)) > $@", - tools = ["//pkg/sentry/platform/ring0/gen_offsets"], + cmd = "(echo -e '// build +amd64\\n' && QEMU $(location //pkg/ring0/gen_offsets) && cat $(location entry_amd64.s)) > $@", + tools = ["//pkg/ring0/gen_offsets"], ) arch_genrule( name = "entry_impl_arm64", srcs = ["entry_arm64.s"], outs = ["entry_impl_arm64.s"], - cmd = "(echo -e '// build +arm64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_arm64.s)) > $@", - tools = ["//pkg/sentry/platform/ring0/gen_offsets"], + cmd = "(echo -e '// build +arm64\\n' && QEMU $(location //pkg/ring0/gen_offsets) && cat $(location entry_arm64.s)) > $@", + tools = ["//pkg/ring0/gen_offsets"], ) go_library( @@ -77,9 +77,9 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", + "//pkg/ring0/pagetables", "//pkg/safecopy", "//pkg/sentry/arch", - "//pkg/sentry/platform/ring0/pagetables", "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/ring0/aarch64.go index 3bda594f9..3bda594f9 100644 --- a/pkg/sentry/platform/ring0/aarch64.go +++ b/pkg/ring0/aarch64.go diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/ring0/defs.go index f9765771e..e2561e4c2 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/ring0/defs.go @@ -15,8 +15,8 @@ package ring0 import ( + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ) // Kernel is a global kernel object. diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go index 7a2275558..ceddf719d 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/ring0/defs_amd64.go @@ -17,7 +17,6 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go index a014dcbc0..dcb255fc8 100644 --- a/pkg/sentry/platform/ring0/defs_arm64.go +++ b/pkg/ring0/defs_arm64.go @@ -17,7 +17,6 @@ package ring0 import ( - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/ring0/entry_amd64.go index d87b1fd00..d87b1fd00 100644 --- a/pkg/sentry/platform/ring0/entry_amd64.go +++ b/pkg/ring0/entry_amd64.go diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s index f59747df3..f59747df3 100644 --- a/pkg/sentry/platform/ring0/entry_amd64.s +++ b/pkg/ring0/entry_amd64.s diff --git a/pkg/sentry/platform/ring0/entry_arm64.go b/pkg/ring0/entry_arm64.go index 62a93f3d6..62a93f3d6 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.go +++ b/pkg/ring0/entry_arm64.go diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/ring0/entry_arm64.s index b2bb18257..b2bb18257 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.s +++ b/pkg/ring0/entry_arm64.s diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/ring0/gen_offsets/BUILD index a9703baf6..15b93d61c 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ b/pkg/ring0/gen_offsets/BUILD @@ -7,14 +7,14 @@ go_template_instance( name = "defs_impl_arm64", out = "defs_impl_arm64.go", package = "main", - template = "//pkg/sentry/platform/ring0:defs_arm64", + template = "//pkg/ring0:defs_arm64", ) go_template_instance( name = "defs_impl_amd64", out = "defs_impl_amd64.go", package = "main", - template = "//pkg/sentry/platform/ring0:defs_amd64", + template = "//pkg/ring0:defs_amd64", ) go_binary( @@ -28,13 +28,13 @@ go_binary( # pass the sentry deps test. system_malloc = True, visibility = [ + "//pkg/ring0:__pkg__", "//pkg/sentry/platform/kvm:__pkg__", - "//pkg/sentry/platform/ring0:__pkg__", ], deps = [ "//pkg/cpuid", + "//pkg/ring0/pagetables", "//pkg/sentry/arch", - "//pkg/sentry/platform/ring0/pagetables", "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/ring0/gen_offsets/main.go b/pkg/ring0/gen_offsets/main.go index a4927da2f..a4927da2f 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/main.go +++ b/pkg/ring0/gen_offsets/main.go diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/ring0/kernel.go index 292f9d0cc..292f9d0cc 100644 --- a/pkg/sentry/platform/ring0/kernel.go +++ b/pkg/ring0/kernel.go diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go index 36a60700e..36a60700e 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/ring0/kernel_amd64.go diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go index c05284641..c05284641 100644 --- a/pkg/sentry/platform/ring0/kernel_arm64.go +++ b/pkg/ring0/kernel_arm64.go diff --git a/pkg/sentry/platform/ring0/kernel_unsafe.go b/pkg/ring0/kernel_unsafe.go index 16955ad91..16955ad91 100644 --- a/pkg/sentry/platform/ring0/kernel_unsafe.go +++ b/pkg/ring0/kernel_unsafe.go diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/ring0/lib_amd64.go index 0ec5c3bc5..0ec5c3bc5 100644 --- a/pkg/sentry/platform/ring0/lib_amd64.go +++ b/pkg/ring0/lib_amd64.go diff --git a/pkg/sentry/platform/ring0/lib_amd64.s b/pkg/ring0/lib_amd64.s index 2fe83568a..2fe83568a 100644 --- a/pkg/sentry/platform/ring0/lib_amd64.s +++ b/pkg/ring0/lib_amd64.s diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go index a490bf3af..a490bf3af 100644 --- a/pkg/sentry/platform/ring0/lib_arm64.go +++ b/pkg/ring0/lib_arm64.go diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/ring0/lib_arm64.s index e39b32841..e39b32841 100644 --- a/pkg/sentry/platform/ring0/lib_arm64.s +++ b/pkg/ring0/lib_arm64.s diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/ring0/offsets_amd64.go index ca4075b09..ca4075b09 100644 --- a/pkg/sentry/platform/ring0/offsets_amd64.go +++ b/pkg/ring0/offsets_amd64.go diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/ring0/offsets_arm64.go index 164db6d5a..164db6d5a 100644 --- a/pkg/sentry/platform/ring0/offsets_arm64.go +++ b/pkg/ring0/offsets_arm64.go diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/ring0/pagetables/BUILD index 9e3539e4c..65a978cbb 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/ring0/pagetables/BUILD @@ -9,7 +9,10 @@ package(licenses = ["notice"]) # architecture builds. go_template( name = "generic_walker_%s" % arch, - srcs = ["walker_%s.go" % arch], + srcs = [ + "walker_generic.go", + "walker_%s.go" % arch, + ], opt_types = [ "Visitor", ], @@ -50,6 +53,7 @@ go_library( "pcids_x86.go", "walker_amd64.go", "walker_arm64.go", + "walker_generic.go", ":walker_empty_amd64", ":walker_empty_arm64", ":walker_lookup_amd64", @@ -60,8 +64,8 @@ go_library( ":walker_unmap_arm64", ], visibility = [ + "//pkg/ring0:__subpackages__", "//pkg/sentry/platform/kvm:__subpackages__", - "//pkg/sentry/platform/ring0:__subpackages__", ], deps = [ "//pkg/sync", diff --git a/pkg/sentry/platform/ring0/pagetables/allocator.go b/pkg/ring0/pagetables/allocator.go index 8d75b7599..8d75b7599 100644 --- a/pkg/sentry/platform/ring0/pagetables/allocator.go +++ b/pkg/ring0/pagetables/allocator.go diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/ring0/pagetables/allocator_unsafe.go index d08bfdeb3..d08bfdeb3 100644 --- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go +++ b/pkg/ring0/pagetables/allocator_unsafe.go diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/ring0/pagetables/pagetables.go index 7605d0cb2..8c0a6aa82 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/ring0/pagetables/pagetables.go @@ -60,6 +60,7 @@ type PageTables struct { // Init initializes a set of PageTables. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) Init(allocator Allocator) { p.Allocator = allocator @@ -92,7 +93,6 @@ func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uin } p.InitArch(a) - return p } @@ -112,7 +112,7 @@ type mapVisitor struct { // visit is used for map. // //go:nosplit -func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { p := v.physical + (start - uintptr(v.target)) if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) { v.prev = true @@ -122,9 +122,10 @@ func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) { // install a valid entry here, however we must zap any existing // entry to ensure this happens. pte.Clear() - return + return true } pte.Set(p, v.opts) + return true } //go:nosplit @@ -140,7 +141,6 @@ func (*mapVisitor) requiresSplit() bool { return true } // Precondition: addr & length must be page-aligned, their sum must not overflow. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool { if p.readOnlyShared { @@ -158,9 +158,6 @@ func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physic length = p.upperStart - uintptr(addr) } } - if !opts.AccessType.Any() { - return p.Unmap(addr, length) - } w := mapWalker{ pageTables: p, visitor: mapVisitor{ @@ -187,9 +184,10 @@ func (*unmapVisitor) requiresSplit() bool { return true } // visit unmaps the given entry. // //go:nosplit -func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { pte.Clear() v.count++ + return true } // Unmap unmaps the given range. @@ -199,7 +197,6 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) { // Precondition: addr & length must be page-aligned, their sum must not overflow. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool { if p.readOnlyShared { @@ -241,8 +238,9 @@ func (*emptyVisitor) requiresSplit() bool { return false } // visit unmaps the given entry. // //go:nosplit -func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { v.count++ + return true } // IsEmpty checks if the given range is empty. @@ -250,7 +248,6 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) { // Precondition: addr & length must be page-aligned. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool { w := emptyWalker{ @@ -262,20 +259,28 @@ func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool { // lookupVisitor is used for lookup. type lookupVisitor struct { - target uintptr // Input. - physical uintptr // Output. - opts MapOpts // Output. + target uintptr // Input & Output. + findFirst bool // Input. + physical uintptr // Output. + size uintptr // Output. + opts MapOpts // Output. } // visit matches the given address. // //go:nosplit -func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { if !pte.Valid() { - return + // If looking for the first, then we just keep iterating until + // we find a valid entry. + return v.findFirst } - v.physical = pte.Address() + (start - uintptr(v.target)) + // Is this within the current range? + v.target = start + v.physical = pte.Address() + v.size = (align + 1) v.opts = pte.Opts() + return false } //go:nosplit @@ -286,20 +291,29 @@ func (*lookupVisitor) requiresSplit() bool { return false } // Lookup returns the physical address for the given virtual address. // -// +checkescape:hard,stack +// If findFirst is true, then the next valid address after addr is returned. +// If findFirst is false, then only a mapping for addr will be returned. +// +// Note that if size is zero, then no matching entry was found. // +// +checkescape:hard,stack //go:nosplit -func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) { +func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) { mask := uintptr(usermem.PageSize - 1) - offset := uintptr(addr) & mask + addr &^= usermem.Addr(mask) w := lookupWalker{ pageTables: p, visitor: lookupVisitor{ - target: uintptr(addr &^ usermem.Addr(mask)), + target: uintptr(addr), + findFirst: findFirst, }, } - w.iterateRange(uintptr(addr), uintptr(addr)+1) - return w.visitor.physical + offset, w.visitor.opts + end := ^usermem.Addr(0) &^ usermem.Addr(mask) + if !findFirst { + end = addr + 1 + } + w.iterateRange(uintptr(addr), uintptr(end)) + return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts } // MarkReadOnlyShared marks the pagetables read-only and can be shared. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go index 520161755..163a3aea3 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/ring0/pagetables/pagetables_aarch64.go @@ -156,12 +156,7 @@ func (p *PTE) IsSect() bool { // //go:nosplit func (p *PTE) Set(addr uintptr, opts MapOpts) { - if !opts.AccessType.Any() { - p.Clear() - return - } - v := (addr &^ optionMask) | protDefault | nG | readOnly - + v := (addr &^ optionMask) | nG | readOnly | protDefault if p.IsSect() { // Note that this is inherited from the previous instance. Set // does not change the value of Sect. See above. @@ -169,6 +164,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { } else { v |= typePage } + if !opts.AccessType.Any() { + // Leave as non-valid if no access is available. + v &^= pteValid + } if opts.Global { v = v &^ nG diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/ring0/pagetables/pagetables_amd64.go index 4bdde8448..a217f404c 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go +++ b/pkg/ring0/pagetables/pagetables_amd64.go @@ -43,6 +43,7 @@ const ( // InitArch does some additional initialization related to the architecture. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) InitArch(allocator Allocator) { if p.upperSharedPageTables != nil { @@ -50,6 +51,7 @@ func (p *PageTables) InitArch(allocator Allocator) { } } +//go:nosplit func pgdIndex(upperStart uintptr) uintptr { if upperStart&(pgdSize-1) != 0 { panic("upperStart should be pgd size aligned") diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go index 54e8e554f..54e8e554f 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/ring0/pagetables/pagetables_amd64_test.go diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/ring0/pagetables/pagetables_arm64.go index ad0e30c88..fef7a0fd1 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go +++ b/pkg/ring0/pagetables/pagetables_arm64.go @@ -44,6 +44,7 @@ const ( // InitArch does some additional initialization related to the architecture. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) InitArch(allocator Allocator) { if p.upperSharedPageTables != nil { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go index 2f73d424f..2f73d424f 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/ring0/pagetables/pagetables_arm64_test.go diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/ring0/pagetables/pagetables_test.go index 5c88d087d..772f4fc5e 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/ring0/pagetables/pagetables_test.go @@ -34,7 +34,7 @@ type checkVisitor struct { failed string // Output. } -func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { v.found = append(v.found, mapping{ start: start, length: align + 1, @@ -43,7 +43,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { }) if v.failed != "" { // Don't keep looking for errors. - return + return false } if v.current >= len(v.expected) { @@ -58,6 +58,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { v.failed = "opts didn't match" } v.current++ + return true } func (*checkVisitor) requiresAlloc() bool { return false } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go index 157438d9b..32edd2f0a 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/ring0/pagetables/pagetables_x86.go @@ -137,7 +137,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { p.Clear() return } - v := (addr &^ optionMask) | present | accessed + v := (addr &^ optionMask) + if opts.AccessType.Any() { + v |= present | accessed + } if opts.User { v |= user } diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/ring0/pagetables/pcids.go index 964496aac..964496aac 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids.go +++ b/pkg/ring0/pagetables/pcids.go diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/ring0/pagetables/pcids_aarch64.go index fbfd41d83..fbfd41d83 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go +++ b/pkg/ring0/pagetables/pcids_aarch64.go diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/ring0/pagetables/pcids_aarch64.s index e9d62d768..e9d62d768 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s +++ b/pkg/ring0/pagetables/pcids_aarch64.s diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/ring0/pagetables/pcids_x86.go index 91fc5e8dd..91fc5e8dd 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go +++ b/pkg/ring0/pagetables/pcids_x86.go diff --git a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go b/pkg/ring0/pagetables/walker_amd64.go index 8f9dacd93..eb4fbcc31 100644 --- a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go +++ b/pkg/ring0/pagetables/walker_amd64.go @@ -16,104 +16,10 @@ package pagetables -// Visitor is a generic type. -type Visitor interface { - // visit is called on each PTE. - visit(start uintptr, pte *PTE, align uintptr) - - // requiresAlloc indicates that new entries should be allocated within - // the walked range. - requiresAlloc() bool - - // requiresSplit indicates that entries in the given range should be - // split if they are huge or jumbo pages. - requiresSplit() bool -} - -// Walker walks page tables. -type Walker struct { - // pageTables are the tables to walk. - pageTables *PageTables - - // Visitor is the set of arguments. - visitor Visitor -} - -// iterateRange iterates over all appropriate levels of page tables for the given range. -// -// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The -// exception is super pages. If a valid super page (huge or jumbo) cannot be -// installed, then the walk will continue to individual entries. -// -// This algorithm will attempt to maximize the use of super pages whenever -// possible. Whether a super page is provided will be clear through the range -// provided in the callback. -// -// Note that if requiresAlloc is true, then no gaps will be present. However, -// if alloc is not set, then the iteration will likely be full of gaps. -// -// Note that this function should generally be avoided in favor of Map, Unmap, -// etc. when not necessary. -// -// Precondition: start must be page-aligned. -// -// Precondition: start must be less than end. -// -// Precondition: If requiresAlloc is true, then start and end should not span -// non-canonical ranges. If they do, a panic will result. -// -//go:nosplit -func (w *Walker) iterateRange(start, end uintptr) { - if start%pteSize != 0 { - panic("unaligned start") - } - if end < start { - panic("start > end") - } - if start < lowerTop { - if end <= lowerTop { - w.iterateRangeCanonical(start, end) - } else if end > lowerTop && end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - w.iterateRangeCanonical(upperBottom, end) - } - } else if start < upperBottom { - if end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(upperBottom, end) - } - } else { - w.iterateRangeCanonical(start, end) - } -} - -// next returns the next address quantized by the given size. -// -//go:nosplit -func next(start uintptr, size uintptr) uintptr { - start &= ^(size - 1) - start += size - return start -} - // iterateRangeCanonical walks a canonical range. // //go:nosplit -func (w *Walker) iterateRangeCanonical(start, end uintptr) { +func (w *Walker) iterateRangeCanonical(start, end uintptr) bool { for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { var ( pgdEntry = &w.pageTables.root[pgdIndex] @@ -127,10 +33,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pgd. - pudEntries = w.pageTables.Allocator.NewPTEs() + pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator. pgdEntry.setPageTable(w.pageTables, pudEntries) } else { - pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) + pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above. } // Map the next level. @@ -155,7 +61,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // new page for the pmd. if start&(pudSize-1) == 0 && end-start >= pudSize { pudEntry.SetSuper() - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) { + return false + } if pudEntry.Valid() { start = next(start, pudSize) continue @@ -163,14 +71,14 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pud. - pmdEntries = w.pageTables.Allocator.NewPTEs() + pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. pudEntry.setPageTable(w.pageTables, pmdEntries) } else if pudEntry.IsSuper() { // Does this page need to be split? if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) { // Install the relevant entries. - pmdEntries = w.pageTables.Allocator.NewPTEs() + pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. for index := uint16(0); index < entriesPerPage; index++ { pmdEntries[index].SetSuper() pmdEntries[index].Set( @@ -180,7 +88,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pudEntry.setPageTable(w.pageTables, pmdEntries) } else { // A super page to be checked directly. - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) { + return false + } // Might have been cleared. if !pudEntry.Valid() { @@ -192,7 +102,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { continue } } else { - pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) + pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) // escapes: see above. } // Map the next level, since this is valid. @@ -216,7 +126,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // As above, we can skip allocating a new page. if start&(pmdSize-1) == 0 && end-start >= pmdSize { pmdEntry.SetSuper() - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) { + return false + } if pmdEntry.Valid() { start = next(start, pmdSize) continue @@ -224,7 +136,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pmd. - pteEntries = w.pageTables.Allocator.NewPTEs() + pteEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. pmdEntry.setPageTable(w.pageTables, pteEntries) } else if pmdEntry.IsSuper() { @@ -240,7 +152,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pmdEntry.setPageTable(w.pageTables, pteEntries) } else { // A huge page to be checked directly. - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) { + return false + } // Might have been cleared. if !pmdEntry.Valid() { @@ -252,7 +166,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { continue } } else { - pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) + pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) // escapes: see above. } // Map the next level, since this is valid. @@ -269,11 +183,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // At this point, we are guaranteed that start%pteSize == 0. - w.visitor.visit(uintptr(start), pteEntry, pteSize-1) - if !pteEntry.Valid() { - if w.visitor.requiresAlloc() { - panic("PTE not set after iteration with requiresAlloc!") - } + if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) { + return false + } + if !pteEntry.Valid() && !w.visitor.requiresAlloc() { clearPTEEntries++ } @@ -285,7 +198,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPTEEntries == entriesPerPage { pmdEntry.Clear() - w.pageTables.Allocator.FreePTEs(pteEntries) + w.pageTables.Allocator.FreePTEs(pteEntries) // escapes: see above. clearPMDEntries++ } } @@ -293,7 +206,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPMDEntries == entriesPerPage { pudEntry.Clear() - w.pageTables.Allocator.FreePTEs(pmdEntries) + w.pageTables.Allocator.FreePTEs(pmdEntries) // escapes: see above. clearPUDEntries++ } } @@ -301,7 +214,8 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPUDEntries == entriesPerPage { pgdEntry.Clear() - w.pageTables.Allocator.FreePTEs(pudEntries) + w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above. } } + return true } diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/ring0/pagetables/walker_arm64.go index c261d393a..5ed881c7a 100644 --- a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go +++ b/pkg/ring0/pagetables/walker_arm64.go @@ -16,104 +16,10 @@ package pagetables -// Visitor is a generic type. -type Visitor interface { - // visit is called on each PTE. - visit(start uintptr, pte *PTE, align uintptr) - - // requiresAlloc indicates that new entries should be allocated within - // the walked range. - requiresAlloc() bool - - // requiresSplit indicates that entries in the given range should be - // split if they are huge or jumbo pages. - requiresSplit() bool -} - -// Walker walks page tables. -type Walker struct { - // pageTables are the tables to walk. - pageTables *PageTables - - // Visitor is the set of arguments. - visitor Visitor -} - -// iterateRange iterates over all appropriate levels of page tables for the given range. -// -// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The -// exception is sect pages. If a valid sect page (huge or jumbo) cannot be -// installed, then the walk will continue to individual entries. -// -// This algorithm will attempt to maximize the use of sect pages whenever -// possible. Whether a sect page is provided will be clear through the range -// provided in the callback. -// -// Note that if requiresAlloc is true, then no gaps will be present. However, -// if alloc is not set, then the iteration will likely be full of gaps. -// -// Note that this function should generally be avoided in favor of Map, Unmap, -// etc. when not necessary. -// -// Precondition: start must be page-aligned. -// -// Precondition: start must be less than end. -// -// Precondition: If requiresAlloc is true, then start and end should not span -// non-canonical ranges. If they do, a panic will result. -// -//go:nosplit -func (w *Walker) iterateRange(start, end uintptr) { - if start%pteSize != 0 { - panic("unaligned start") - } - if end < start { - panic("start > end") - } - if start < lowerTop { - if end <= lowerTop { - w.iterateRangeCanonical(start, end) - } else if end > lowerTop && end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - w.iterateRangeCanonical(upperBottom, end) - } - } else if start < upperBottom { - if end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(upperBottom, end) - } - } else { - w.iterateRangeCanonical(start, end) - } -} - -// next returns the next address quantized by the given size. -// -//go:nosplit -func next(start uintptr, size uintptr) uintptr { - start &= ^(size - 1) - start += size - return start -} - // iterateRangeCanonical walks a canonical range. // //go:nosplit -func (w *Walker) iterateRangeCanonical(start, end uintptr) { +func (w *Walker) iterateRangeCanonical(start, end uintptr) bool { pgdEntryIndex := w.pageTables.root if start >= upperBottom { pgdEntryIndex = w.pageTables.archPageTables.root @@ -160,7 +66,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // new page for the pmd. if start&(pudSize-1) == 0 && end-start >= pudSize { pudEntry.SetSect() - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) { + return false + } if pudEntry.Valid() { start = next(start, pudSize) continue @@ -185,7 +93,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pudEntry.setPageTable(w.pageTables, pmdEntries) } else { // A sect page to be checked directly. - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) { + return false + } // Might have been cleared. if !pudEntry.Valid() { @@ -222,7 +132,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // As above, we can skip allocating a new page. if start&(pmdSize-1) == 0 && end-start >= pmdSize { pmdEntry.SetSect() - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) { + return false + } if pmdEntry.Valid() { start = next(start, pmdSize) continue @@ -246,7 +158,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pmdEntry.setPageTable(w.pageTables, pteEntries) } else { // A huge page to be checked directly. - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) { + return false + } // Might have been cleared. if !pmdEntry.Valid() { @@ -276,7 +190,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // At this point, we are guaranteed that start%pteSize == 0. - w.visitor.visit(uintptr(start), pteEntry, pteSize-1) + if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) { + return false + } if !pteEntry.Valid() { if w.visitor.requiresAlloc() { panic("PTE not set after iteration with requiresAlloc!") @@ -311,4 +227,5 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { w.pageTables.Allocator.FreePTEs(pudEntries) } } + return true } diff --git a/pkg/ring0/pagetables/walker_generic.go b/pkg/ring0/pagetables/walker_generic.go new file mode 100644 index 000000000..34fba7b84 --- /dev/null +++ b/pkg/ring0/pagetables/walker_generic.go @@ -0,0 +1,110 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pagetables + +// Visitor is a generic type. +type Visitor interface { + // visit is called on each PTE. The returned boolean indicates whether + // the walk should continue. + visit(start uintptr, pte *PTE, align uintptr) bool + + // requiresAlloc indicates that new entries should be allocated within + // the walked range. + requiresAlloc() bool + + // requiresSplit indicates that entries in the given range should be + // split if they are huge or jumbo pages. + requiresSplit() bool +} + +// Walker walks page tables. +type Walker struct { + // pageTables are the tables to walk. + pageTables *PageTables + + // Visitor is the set of arguments. + visitor Visitor +} + +// iterateRange iterates over all appropriate levels of page tables for the given range. +// +// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The +// exception is super pages. If a valid super page (huge or jumbo) cannot be +// installed, then the walk will continue to individual entries. +// +// This algorithm will attempt to maximize the use of super/sect pages whenever +// possible. Whether a super page is provided will be clear through the range +// provided in the callback. +// +// Note that if requiresAlloc is true, then no gaps will be present. However, +// if alloc is not set, then the iteration will likely be full of gaps. +// +// Note that this function should generally be avoided in favor of Map, Unmap, +// etc. when not necessary. +// +// Precondition: start must be page-aligned. +// Precondition: start must be less than end. +// Precondition: If requiresAlloc is true, then start and end should not span +// non-canonical ranges. If they do, a panic will result. +// +//go:nosplit +func (w *Walker) iterateRange(start, end uintptr) { + if start%pteSize != 0 { + panic("unaligned start") + } + if end < start { + panic("start > end") + } + if start < lowerTop { + if end <= lowerTop { + w.iterateRangeCanonical(start, end) + } else if end > lowerTop && end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + if !w.iterateRangeCanonical(start, lowerTop) { + return + } + w.iterateRangeCanonical(upperBottom, end) + } + } else if start < upperBottom { + if end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(upperBottom, end) + } + } else { + w.iterateRangeCanonical(start, end) + } +} + +// next returns the next address quantized by the given size. +// +//go:nosplit +func next(start uintptr, size uintptr) uintptr { + start &= ^(size - 1) + start += size + return start +} diff --git a/pkg/sentry/platform/ring0/ring0.go b/pkg/ring0/ring0.go index cdeb1b43a..cdeb1b43a 100644 --- a/pkg/sentry/platform/ring0/ring0.go +++ b/pkg/ring0/ring0.go diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/ring0/x86.go index 34fbc1c35..34fbc1c35 100644 --- a/pkg/sentry/platform/ring0/x86.go +++ b/pkg/ring0/x86.go diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 1d88db12f..de7a0f3ab 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -404,3 +404,16 @@ func ttyName(tty *kernel.TTY) string { } return fmt.Sprintf("pts/%d", tty.Index) } + +// ContainerUsage retrieves per-container CPU usage. +func ContainerUsage(kr *kernel.Kernel) map[string]uint64 { + cusage := make(map[string]uint64) + for _, tg := range kr.TaskSet().Root.ThreadGroups() { + // We want each tg's usage including reaped children. + cid := tg.Leader().ContainerID() + stats := tg.CPUStats() + stats.Accumulate(tg.JoinedChildCPUStats()) + cusage[cid] += uint64(stats.UserTime.Nanoseconds()) + uint64(stats.SysTime.Nanoseconds()) + } + return cusage +} diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index a2f3d5918..07b4fb70f 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -257,7 +257,7 @@ func (c *ConnectedEndpoint) Passcred() bool { } // GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress. -func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { return tcpip.FullAddress{Addr: tcpip.Address(c.path)}, nil } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 089955a96..ae972fcb5 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -299,10 +299,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off src = src.TakeFirst64(limit) } - // Do a buffered write. See rationale in PRead. if d.cachedMetadataAuthoritative() { - d.touchCMtime() + if fd.isRegularFile { + d.touchCMtimeLocked() + } else { + d.touchCMtime() + } } + + // Do a buffered write. See rationale in PRead. buf := make([]byte, src.NumBytes()) copied, copyErr := src.CopyIn(ctx, buf) if copied == 0 && copyErr != nil { diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go index 60acc367f..72aa535f8 100644 --- a/pkg/sentry/fsimpl/host/socket.go +++ b/pkg/sentry/fsimpl/host/socket.go @@ -201,7 +201,7 @@ func (c *ConnectedEndpoint) Passcred() bool { } // GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress. -func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index eac578f25..8139bff76 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -371,6 +371,8 @@ type OrderedChildrenOptions struct { // OrderedChildren may modify the tracked children. This applies to // operations related to rename, unlink and rmdir. If an OrderedChildren is // not writable, these operations all fail with EPERM. + // + // Note that writable users must implement the sticky bit (I_SVTX). Writable bool } @@ -556,7 +558,6 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) return err } - // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. o.removeLocked(name) return nil } @@ -603,8 +604,8 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c if err := o.checkExistingLocked(oldname, child); err != nil { return err } + o.removeLocked(oldname) - // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. dst.replaceChildLocked(ctx, newname, child) return nil } diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 3b6336e94..09c0ccaf2 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -368,17 +368,15 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst }) } -// CopyOutFrom implements usermem.IO.CopyOutFrom. +// CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's +// responsibility to call fd.pipe.Notify(waiter.EventIn) after the write is +// completed. // // Preconditions: fd.pipe.mu must be locked. func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { - n, err := fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) { + return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) { return src.ReadToBlocks(dsts) }) - if n > 0 { - fd.pipe.Notify(waiter.EventIn) - } - return n, err } // SwapUint32 implements usermem.IO.SwapUint32. diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 8ce411102..b3290917e 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -45,14 +45,14 @@ go_library( "//pkg/cpuid", "//pkg/log", "//pkg/procid", + "//pkg/ring0", + "//pkg/ring0/pagetables", "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/memmap", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", - "//pkg/sentry/platform/ring0", - "//pkg/sentry/platform/ring0/pagetables", "//pkg/sentry/time", "//pkg/sync", "//pkg/usermem", @@ -75,11 +75,11 @@ go_test( "requires-kvm", ], deps = [ + "//pkg/ring0", + "//pkg/ring0/pagetables", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", - "//pkg/sentry/platform/ring0", - "//pkg/sentry/platform/ring0/pagetables", "//pkg/sentry/time", "//pkg/usermem", ], @@ -89,6 +89,6 @@ genrule( name = "bluepill_impl_amd64", srcs = ["bluepill_amd64.s"], outs = ["bluepill_impl_amd64.s"], - cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@", - tools = ["//pkg/sentry/platform/ring0/gen_offsets"], + cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/ring0/gen_offsets) && cat $(SRCS)) > $@", + tools = ["//pkg/ring0/gen_offsets"], ) diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index af5c5e191..25c21e843 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -18,9 +18,9 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 4b23f7803..2c970162e 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -19,9 +19,9 @@ import ( "reflect" "syscall" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) // bluepill enters guest mode. diff --git a/pkg/sentry/platform/kvm/bluepill_allocator.go b/pkg/sentry/platform/kvm/bluepill_allocator.go index 9485e1301..1825edc3a 100644 --- a/pkg/sentry/platform/kvm/bluepill_allocator.go +++ b/pkg/sentry/platform/kvm/bluepill_allocator.go @@ -17,7 +17,7 @@ package kvm import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/ring0/pagetables" ) type allocator struct { diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index ddc1554d5..83a4766fb 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -19,8 +19,8 @@ package kvm import ( "syscall" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) var ( diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index f8ccb7430..0063e947b 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -20,8 +20,8 @@ import ( "syscall" "unsafe" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) // dieArchSetup initializes the state for dieTrampoline. diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index 1f09813ba..35298135a 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -19,8 +19,8 @@ package kvm import ( "syscall" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) var ( diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index 4d912769a..dbbf2a897 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -20,8 +20,8 @@ import ( "syscall" "unsafe" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) // fpsimdPtr returns a fpsimd64 for the given address. diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index 17268d127..aeae01dbd 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -18,10 +18,10 @@ import ( "sync/atomic" pkgcontext "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 5979aef97..7bdf57436 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -20,9 +20,9 @@ import ( "os" "syscall" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go index 093497bc4..b9ed4a706 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64.go +++ b/pkg/sentry/platform/kvm/kvm_amd64.go @@ -18,7 +18,7 @@ package kvm import ( "gvisor.dev/gvisor/pkg/cpuid" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/ring0" ) // userRegs represents KVM user registers. diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go index c0b4fd374..76fc594a0 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go @@ -19,11 +19,11 @@ package kvm import ( "testing" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ) func TestSegments(t *testing.T) { diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index 9db1db4e9..b73340f0e 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -17,8 +17,8 @@ package kvm import ( + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) type kvmOneReg struct { diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index a650877d6..11ca1f0ea 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -22,11 +22,11 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index e2fffc99b..1ece1b8d8 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/procid" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 8e03c310d..59c752d73 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -24,10 +24,10 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ktime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index aa2d21748..7d7857067 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -17,10 +17,10 @@ package kvm import ( + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index a466acf4d..dca0cdb60 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -23,10 +23,10 @@ import ( "syscall" "unsafe" + "gvisor.dev/gvisor/pkg/ring0" + "gvisor.dev/gvisor/pkg/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go index f7fa2f98d..8bdec93ae 100644 --- a/pkg/sentry/platform/kvm/physical_map.go +++ b/pkg/sentry/platform/kvm/physical_map.go @@ -20,7 +20,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/platform/ring0" + "gvisor.dev/gvisor/pkg/ring0" "gvisor.dev/gvisor/pkg/usermem" ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 7065a0e46..69693f263 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -251,11 +251,11 @@ var errStackType = syserr.New("expected but did not receive a netstack.Stack", l type commonEndpoint interface { // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress and // transport.Endpoint.GetLocalAddress. - GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) + GetLocalAddress() (tcpip.FullAddress, tcpip.Error) // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress and // transport.Endpoint.GetRemoteAddress. - GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) + GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) // Readiness implements tcpip.Endpoint.Readiness and // transport.Endpoint.Readiness. @@ -263,19 +263,19 @@ type commonEndpoint interface { // SetSockOpt implements tcpip.Endpoint.SetSockOpt and // transport.Endpoint.SetSockOpt. - SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error + SetSockOpt(tcpip.SettableSocketOption) tcpip.Error // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and // transport.Endpoint.SetSockOptInt. - SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error + SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error // GetSockOpt implements tcpip.Endpoint.GetSockOpt and // transport.Endpoint.GetSockOpt. - GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error + GetSockOpt(tcpip.GettableSocketOption) tcpip.Error // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and // transport.Endpoint.GetSockOpt. - GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) + GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) // State returns a socket's lifecycle state. The returned value is // protocol-specific and is primarily used for diagnostics. @@ -283,7 +283,7 @@ type commonEndpoint interface { // LastError implements tcpip.Endpoint.LastError and // transport.Endpoint.LastError. - LastError() *tcpip.Error + LastError() tcpip.Error // SocketOptions implements tcpip.Endpoint.SocketOptions and // transport.Endpoint.SocketOptions. @@ -442,7 +442,7 @@ func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Write func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { r := src.Reader(ctx) n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { return 0, syserror.ErrWouldBlock } if err != nil { @@ -459,17 +459,24 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO var _ tcpip.Payloader = (*limitedPayloader)(nil) type limitedPayloader struct { - io.LimitedReader + inner io.LimitedReader + err error } -func (l limitedPayloader) Len() int { - return int(l.N) +func (l *limitedPayloader) Read(p []byte) (int, error) { + n, err := l.inner.Read(p) + l.err = err + return n, err +} + +func (l *limitedPayloader) Len() int { + return int(l.inner.N) } // ReadFrom implements fs.FileOperations.ReadFrom. func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) { f := limitedPayloader{ - LimitedReader: io.LimitedReader{ + inner: io.LimitedReader{ R: r, N: count, }, @@ -479,8 +486,8 @@ func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader // so we can't release the lock while copying data. Atomic: true, }) - if err == tcpip.ErrBadBuffer { - err = nil + if _, ok := err.(*tcpip.ErrBadBuffer); ok { + return n, f.err } return n, syserr.TranslateNetstackError(err).ToError() } @@ -526,7 +533,7 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool if family == linux.AF_UNSPEC { err := s.Endpoint.Disconnect() - if err == tcpip.ErrNotSupported { + if _, ok := err.(*tcpip.ErrNotSupported); ok { return syserr.ErrAddressFamilyNotSupported } return syserr.TranslateNetstackError(err) @@ -548,15 +555,16 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool s.EventRegister(&e, waiter.EventOut) defer s.EventUnregister(&e) - if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting { + switch err := s.Endpoint.Connect(addr); err.(type) { + case *tcpip.ErrConnectStarted, *tcpip.ErrAlreadyConnecting: + case *tcpip.ErrNoPortAvailable: if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM { // TCP unlike UDP returns EADDRNOTAVAIL when it can't // find an available local ephemeral port. - if err == tcpip.ErrNoPortAvailable { - return syserr.ErrAddressNotAvailable - } + return syserr.ErrAddressNotAvailable } - + return syserr.TranslateNetstackError(err) + default: return syserr.TranslateNetstackError(err) } @@ -614,16 +622,16 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { // Issue the bind request to the endpoint. err := s.Endpoint.Bind(addr) - if err == tcpip.ErrNoPortAvailable { + if _, ok := err.(*tcpip.ErrNoPortAvailable); ok { // Bind always returns EADDRINUSE irrespective of if the specified port was // already bound or if an ephemeral port was requested but none were // available. // - // tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because + // *tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because // UDP connect returns EAGAIN on ephemeral port exhaustion. // // TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion. - err = tcpip.ErrPortInUse + err = &tcpip.ErrPortInUse{} } return syserr.TranslateNetstackError(err) @@ -646,7 +654,8 @@ func (s *socketOpsCommon) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAdd // Try to accept the connection again; if it fails, then wait until we // get a notification. for { - if ep, wq, err := s.Endpoint.Accept(peerAddr); err != tcpip.ErrWouldBlock { + ep, wq, err := s.Endpoint.Accept(peerAddr) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { return ep, wq, syserr.TranslateNetstackError(err) } @@ -665,7 +674,7 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, } ep, wq, terr := s.Endpoint.Accept(peerAddr) if terr != nil { - if terr != tcpip.ErrWouldBlock || !blocking { + if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking { return 0, nil, 0, syserr.TranslateNetstackError(terr) } @@ -1098,6 +1107,29 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name, // TODO(b/64800844): Translate fields once they are added to // tcpip.TCPInfoOption. info := linux.TCPInfo{} + switch v.CcState { + case tcpip.RTORecovery: + info.CaState = linux.TCP_CA_Loss + case tcpip.FastRecovery, tcpip.SACKRecovery: + info.CaState = linux.TCP_CA_Recovery + case tcpip.Disorder: + info.CaState = linux.TCP_CA_Disorder + case tcpip.Open: + info.CaState = linux.TCP_CA_Open + } + info.RTO = uint32(v.RTO / time.Microsecond) + info.RTT = uint32(v.RTT / time.Microsecond) + info.RTTVar = uint32(v.RTTVar / time.Microsecond) + info.SndSsthresh = v.SndSsthresh + info.SndCwnd = v.SndCwnd + + // In netstack reorderSeen is updated only when RACK is enabled. + // We only track whether the reordering is seen, which is + // different than Linux where reorderSeen is not specific to + // RACK and is incremented when a reordering event is seen. + if v.ReorderSeen { + info.ReordSeen = 1 + } // Linux truncates the output binary to outLen. buf := t.CopyScratchBuffer(info.SizeBytes()) @@ -2534,7 +2566,7 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq defer s.readMu.Unlock() res, err := s.Endpoint.Read(w, readOptions) - if err == tcpip.ErrBadBuffer && dst.NumBytes() == 0 { + if _, ok := err.(*tcpip.ErrBadBuffer); ok && dst.NumBytes() == 0 { err = nil } if err != nil { @@ -2634,9 +2666,9 @@ func (s *socketOpsCommon) dequeueErr() *tcpip.SockError { } // Update socket error to reflect ICMP errors in queue. - if nextErr := so.PeekErr(); nextErr != nil && nextErr.ErrOrigin.IsICMPErr() { + if nextErr := so.PeekErr(); nextErr != nil && nextErr.Cause.Origin().IsICMPErr() { so.SetLastError(nextErr.Err) - } else if err.ErrOrigin.IsICMPErr() { + } else if err.Cause.Origin().IsICMPErr() { so.SetLastError(nil) } return err @@ -2790,13 +2822,15 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b if flags&linux.MSG_DONTWAIT != 0 { return int(total), syserr.TranslateNetstackError(err) } - switch err { + block := true + switch err.(type) { case nil: - if total == src.NumBytes() { - break - } - fallthrough - case tcpip.ErrWouldBlock: + block = total != src.NumBytes() + case *tcpip.ErrWouldBlock: + default: + block = false + } + if block { if ch == nil { // We'll have to block. Register for notification and keep trying to // send all the data. diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 3bbdf552e..24922c400 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -130,7 +130,7 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs r := src.Reader(ctx) n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { return 0, syserror.ErrWouldBlock } if err != nil { @@ -154,7 +154,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block } ep, wq, terr := s.Endpoint.Accept(peerAddr) if terr != nil { - if terr != tcpip.ErrWouldBlock || !blocking { + if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking { return 0, nil, 0, syserr.TranslateNetstackError(terr) } diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go index c847ff1c7..2515dda80 100644 --- a/pkg/sentry/socket/netstack/provider.go +++ b/pkg/sentry/socket/netstack/provider.go @@ -118,7 +118,7 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (* // Create the endpoint. var ep tcpip.Endpoint - var e *tcpip.Error + var e tcpip.Error wq := &waiter.Queue{} if stype == linux.SOCK_RAW { ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated) diff --git a/pkg/sentry/socket/netstack/provider_vfs2.go b/pkg/sentry/socket/netstack/provider_vfs2.go index 0af805246..ba1cc79e9 100644 --- a/pkg/sentry/socket/netstack/provider_vfs2.go +++ b/pkg/sentry/socket/netstack/provider_vfs2.go @@ -62,7 +62,7 @@ func (p *providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int // Create the endpoint. var ep tcpip.Endpoint - var e *tcpip.Error + var e tcpip.Error wq := &waiter.Queue{} if stype == linux.SOCK_RAW { ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated) diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 97729dacc..cc535d794 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -81,10 +81,10 @@ func sockErrCmsgToLinux(sockErr *tcpip.SockError) linux.SockErrCMsg { ee := linux.SockExtendedErr{ Errno: uint32(syserr.TranslateNetstackError(sockErr.Err).ToLinux().Number()), - Origin: errOriginToLinux(sockErr.ErrOrigin), - Type: sockErr.ErrType, - Code: sockErr.ErrCode, - Info: sockErr.ErrInfo, + Origin: errOriginToLinux(sockErr.Cause.Origin()), + Type: sockErr.Cause.Type(), + Code: sockErr.Cause.Code(), + Info: sockErr.Cause.Info(), } switch sockErr.NetProto { diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index b011082dc..fc5b823b0 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -48,7 +48,7 @@ type ConnectingEndpoint interface { Type() linux.SockType // GetLocalAddress returns the bound path. - GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) + GetLocalAddress() (tcpip.FullAddress, tcpip.Error) // Locker protects the following methods. While locked, only the holder of // the lock can change the return value of the protected methods. diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 0e3889c6d..70227bbd2 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -169,32 +169,32 @@ type Endpoint interface { Type() linux.SockType // GetLocalAddress returns the address to which the endpoint is bound. - GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) + GetLocalAddress() (tcpip.FullAddress, tcpip.Error) // GetRemoteAddress returns the address to which the endpoint is // connected. - GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) + GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) // SetSockOpt sets a socket option. - SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error + SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error // SetSockOptInt sets a socket option for simple cases when a value has // the int type. - SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error + SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error // GetSockOpt gets a socket option. - GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error + GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error // GetSockOptInt gets a socket option for simple cases when a return // value has the int type. - GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) + GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) // State returns the current state of the socket, as represented by Linux in // procfs. State() uint32 // LastError clears and returns the last error reported by the endpoint. - LastError() *tcpip.Error + LastError() tcpip.Error // SocketOptions returns the structure which contains all the socket // level options. @@ -580,7 +580,7 @@ type ConnectedEndpoint interface { Passcred() bool // GetLocalAddress implements Endpoint.GetLocalAddress. - GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) + GetLocalAddress() (tcpip.FullAddress, tcpip.Error) // Send sends a single message. This method does not block. // @@ -640,7 +640,7 @@ type connectedEndpoint struct { Passcred() bool // GetLocalAddress implements Endpoint.GetLocalAddress. - GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) + GetLocalAddress() (tcpip.FullAddress, tcpip.Error) // Type implements Endpoint.Type. Type() linux.SockType @@ -655,7 +655,7 @@ func (e *connectedEndpoint) Passcred() bool { } // GetLocalAddress implements ConnectedEndpoint.GetLocalAddress. -func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { return e.endpoint.GetLocalAddress() } @@ -836,11 +836,11 @@ func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMess } // SetSockOpt sets a socket option. -func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { return nil } -func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.ReceiveBufferSizeOption: default: @@ -855,34 +855,34 @@ func (e *baseEndpoint) IsUnixSocket() bool { } // GetSendBufferSize implements tcpip.SocketOptionsHandler.GetSendBufferSize. -func (e *baseEndpoint) GetSendBufferSize() (int64, *tcpip.Error) { +func (e *baseEndpoint) GetSendBufferSize() (int64, tcpip.Error) { e.Lock() defer e.Unlock() if !e.Connected() { - return -1, tcpip.ErrNotConnected + return -1, &tcpip.ErrNotConnected{} } v := e.connected.SendMaxQueueSize() if v < 0 { - return -1, tcpip.ErrQueueSizeNotSupported + return -1, &tcpip.ErrQueueSizeNotSupported{} } return v, nil } -func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.ReceiveQueueSizeOption: v := 0 e.Lock() if !e.Connected() { e.Unlock() - return -1, tcpip.ErrNotConnected + return -1, &tcpip.ErrNotConnected{} } v = int(e.receiver.RecvQueuedSize()) e.Unlock() if v < 0 { - return -1, tcpip.ErrQueueSizeNotSupported + return -1, &tcpip.ErrQueueSizeNotSupported{} } return v, nil @@ -890,12 +890,12 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { e.Lock() if !e.Connected() { e.Unlock() - return -1, tcpip.ErrNotConnected + return -1, &tcpip.ErrNotConnected{} } v := e.connected.SendQueuedSize() e.Unlock() if v < 0 { - return -1, tcpip.ErrQueueSizeNotSupported + return -1, &tcpip.ErrQueueSizeNotSupported{} } return int(v), nil @@ -903,29 +903,29 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { e.Lock() if e.receiver == nil { e.Unlock() - return -1, tcpip.ErrNotConnected + return -1, &tcpip.ErrNotConnected{} } v := e.receiver.RecvMaxQueueSize() e.Unlock() if v < 0 { - return -1, tcpip.ErrQueueSizeNotSupported + return -1, &tcpip.ErrQueueSizeNotSupported{} } return int(v), nil default: log.Warningf("Unsupported socket option: %d", opt) - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { +func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { log.Warningf("Unsupported socket option: %T", opt) - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } // LastError implements Endpoint.LastError. -func (*baseEndpoint) LastError() *tcpip.Error { +func (*baseEndpoint) LastError() tcpip.Error { return nil } @@ -965,7 +965,7 @@ func (e *baseEndpoint) Shutdown(flags tcpip.ShutdownFlags) *syserr.Error { } // GetLocalAddress returns the bound path. -func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { e.Lock() defer e.Unlock() return tcpip.FullAddress{Addr: tcpip.Address(e.path)}, nil @@ -973,14 +973,14 @@ func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { // GetRemoteAddress returns the local address of the connected endpoint (if // available). -func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { e.Lock() c := e.connected e.Unlock() if c != nil { return c.GetLocalAddress() } - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } // Release implements BoundEndpoint.Release. diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index dab6207c0..d1778d029 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -134,8 +134,8 @@ func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op s // Similar to EPIPE. Return what we wrote this time, and let // ENOSPC be returned on the next call. return true, nil - case syserror.ECONNRESET: - // For TCP sendfile connections, we may have a reset. But we + case syserror.ECONNRESET, syserror.ETIMEDOUT: + // For TCP sendfile connections, we may have a reset or timeout. But we // should just return n as the result. return true, nil case syserror.ErrWouldBlock: diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index e39f074f2..1a31898e8 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -123,6 +123,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + switch cmd { + case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL: + // allowed + default: + return 0, nil, syserror.EBADF + } + } + switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: minfd := args[2].Int() @@ -395,6 +404,10 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + // If the FD refers to a pipe or FIFO, return error. if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { return 0, nil, syserror.ESPIPE diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go index 20c264fef..c7c3fed57 100644 --- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go +++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go @@ -32,6 +32,10 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + // Handle ioctls that apply to all FDs. switch args[1].Int() { case linux.FIONCLEX: diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index 6e9b599e2..1f8a5878c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -36,6 +36,10 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + return 0, nil, file.SyncFS(t) } diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index a3868bf16..df4990854 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -83,6 +83,7 @@ go_library( "mount.go", "mount_namespace_refs.go", "mount_unsafe.go", + "opath.go", "options.go", "pathname.go", "permissions.go", diff --git a/pkg/sentry/vfs/opath.go b/pkg/sentry/vfs/opath.go new file mode 100644 index 000000000..39fbac987 --- /dev/null +++ b/pkg/sentry/vfs/opath.go @@ -0,0 +1,139 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// opathFD implements vfs.FileDescriptionImpl for a file description opened with O_PATH. +// +// +stateify savable +type opathFD struct { + vfsfd FileDescription + FileDescriptionDefaultImpl + NoLockFD +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *opathFD) Release(context.Context) { + // noop +} + +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *opathFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + return syserror.EBADF +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *opathFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { + return 0, syserror.EBADF +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *opathFD) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { + return 0, syserror.EBADF +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *opathFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { + return 0, syserror.EBADF +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *opathFD) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { + return 0, syserror.EBADF +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *opathFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + return 0, syserror.EBADF +} + +// IterDirents implements vfs.FileDescriptionImpl.IterDirents. +func (fd *opathFD) IterDirents(ctx context.Context, cb IterDirentsCallback) error { + return syserror.EBADF +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *opathFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + return 0, syserror.EBADF +} + +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (fd *opathFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + return syserror.EBADF +} + +// ListXattr implements vfs.FileDescriptionImpl.ListXattr. +func (fd *opathFD) ListXattr(ctx context.Context, size uint64) ([]string, error) { + return nil, syserror.EBADF +} + +// GetXattr implements vfs.FileDescriptionImpl.GetXattr. +func (fd *opathFD) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) { + return "", syserror.EBADF +} + +// SetXattr implements vfs.FileDescriptionImpl.SetXattr. +func (fd *opathFD) SetXattr(ctx context.Context, opts SetXattrOptions) error { + return syserror.EBADF +} + +// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr. +func (fd *opathFD) RemoveXattr(ctx context.Context, name string) error { + return syserror.EBADF +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *opathFD) Sync(ctx context.Context) error { + return syserror.EBADF +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *opathFD) SetStat(ctx context.Context, opts SetStatOptions) error { + return syserror.EBADF +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *opathFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { + vfsObj := fd.vfsfd.vd.mount.vfs + rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ + Root: fd.vfsfd.vd, + Start: fd.vfsfd.vd, + }) + stat, err := fd.vfsfd.vd.mount.fs.impl.StatAt(ctx, rp, opts) + vfsObj.putResolvingPath(ctx, rp) + return stat, err +} + +// StatFS returns metadata for the filesystem containing the file represented +// by fd. +func (fd *opathFD) StatFS(ctx context.Context) (linux.Statfs, error) { + vfsObj := fd.vfsfd.vd.mount.vfs + rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ + Root: fd.vfsfd.vd, + Start: fd.vfsfd.vd, + }) + statfs, err := fd.vfsfd.vd.mount.fs.impl.StatFSAt(ctx, rp) + vfsObj.putResolvingPath(ctx, rp) + return statfs, err +} diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index bc79e5ecc..c9907843c 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -129,7 +129,7 @@ type OpenOptions struct { // // FilesystemImpls are responsible for implementing the following flags: // O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_DIRECT, O_DSYNC, - // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_PATH, O_SYNC, O_TMPFILE, and + // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_SYNC, O_TMPFILE, and // O_TRUNC. VFS is responsible for handling O_DIRECTORY, O_LARGEFILE, and // O_NOFOLLOW. VFS users are responsible for handling O_CLOEXEC, since file // descriptors are mostly outside the scope of VFS. diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 6fd1bb0b2..0aff2dd92 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -425,6 +425,18 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential rp.mustBeDir = true rp.mustBeDirOrig = true } + if opts.Flags&linux.O_PATH != 0 { + vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) + if err != nil { + return nil, err + } + fd := &opathFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}); err != nil { + return nil, err + } + vd.DecRef(ctx) + return &fd.vfsfd, err + } for { fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts) if err == nil { diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD index 28e62abbb..2e2395807 100644 --- a/pkg/sync/BUILD +++ b/pkg/sync/BUILD @@ -67,6 +67,7 @@ go_library( ], marshal = False, stateify = False, + visibility = ["//:sandbox"], deps = [ "//pkg/goid", ], diff --git a/pkg/syserr/netstack.go b/pkg/syserr/netstack.go index cb8981633..a6a91e064 100644 --- a/pkg/syserr/netstack.go +++ b/pkg/syserr/netstack.go @@ -23,95 +23,114 @@ import ( // Mapping for tcpip.Error types. var ( - ErrUnknownProtocol = New(tcpip.ErrUnknownProtocol.String(), linux.EINVAL) - ErrUnknownNICID = New(tcpip.ErrUnknownNICID.String(), linux.ENODEV) - ErrUnknownDevice = New(tcpip.ErrUnknownDevice.String(), linux.ENODEV) - ErrUnknownProtocolOption = New(tcpip.ErrUnknownProtocolOption.String(), linux.ENOPROTOOPT) - ErrDuplicateNICID = New(tcpip.ErrDuplicateNICID.String(), linux.EEXIST) - ErrDuplicateAddress = New(tcpip.ErrDuplicateAddress.String(), linux.EEXIST) - ErrBadLinkEndpoint = New(tcpip.ErrBadLinkEndpoint.String(), linux.EINVAL) - ErrAlreadyBound = New(tcpip.ErrAlreadyBound.String(), linux.EINVAL) - ErrInvalidEndpointState = New(tcpip.ErrInvalidEndpointState.String(), linux.EINVAL) - ErrAlreadyConnecting = New(tcpip.ErrAlreadyConnecting.String(), linux.EALREADY) - ErrNoPortAvailable = New(tcpip.ErrNoPortAvailable.String(), linux.EAGAIN) - ErrPortInUse = New(tcpip.ErrPortInUse.String(), linux.EADDRINUSE) - ErrBadLocalAddress = New(tcpip.ErrBadLocalAddress.String(), linux.EADDRNOTAVAIL) - ErrClosedForSend = New(tcpip.ErrClosedForSend.String(), linux.EPIPE) - ErrClosedForReceive = New(tcpip.ErrClosedForReceive.String(), nil) - ErrTimeout = New(tcpip.ErrTimeout.String(), linux.ETIMEDOUT) - ErrAborted = New(tcpip.ErrAborted.String(), linux.EPIPE) - ErrConnectStarted = New(tcpip.ErrConnectStarted.String(), linux.EINPROGRESS) - ErrDestinationRequired = New(tcpip.ErrDestinationRequired.String(), linux.EDESTADDRREQ) - ErrNotSupported = New(tcpip.ErrNotSupported.String(), linux.EOPNOTSUPP) - ErrQueueSizeNotSupported = New(tcpip.ErrQueueSizeNotSupported.String(), linux.ENOTTY) - ErrNoSuchFile = New(tcpip.ErrNoSuchFile.String(), linux.ENOENT) - ErrInvalidOptionValue = New(tcpip.ErrInvalidOptionValue.String(), linux.EINVAL) - ErrBroadcastDisabled = New(tcpip.ErrBroadcastDisabled.String(), linux.EACCES) - ErrNotPermittedNet = New(tcpip.ErrNotPermitted.String(), linux.EPERM) - ErrBadBuffer = New(tcpip.ErrBadBuffer.String(), linux.EFAULT) + ErrUnknownProtocol = New((&tcpip.ErrUnknownProtocol{}).String(), linux.EINVAL) + ErrUnknownNICID = New((&tcpip.ErrUnknownNICID{}).String(), linux.ENODEV) + ErrUnknownDevice = New((&tcpip.ErrUnknownDevice{}).String(), linux.ENODEV) + ErrUnknownProtocolOption = New((&tcpip.ErrUnknownProtocolOption{}).String(), linux.ENOPROTOOPT) + ErrDuplicateNICID = New((&tcpip.ErrDuplicateNICID{}).String(), linux.EEXIST) + ErrDuplicateAddress = New((&tcpip.ErrDuplicateAddress{}).String(), linux.EEXIST) + ErrAlreadyBound = New((&tcpip.ErrAlreadyBound{}).String(), linux.EINVAL) + ErrInvalidEndpointState = New((&tcpip.ErrInvalidEndpointState{}).String(), linux.EINVAL) + ErrAlreadyConnecting = New((&tcpip.ErrAlreadyConnecting{}).String(), linux.EALREADY) + ErrNoPortAvailable = New((&tcpip.ErrNoPortAvailable{}).String(), linux.EAGAIN) + ErrPortInUse = New((&tcpip.ErrPortInUse{}).String(), linux.EADDRINUSE) + ErrBadLocalAddress = New((&tcpip.ErrBadLocalAddress{}).String(), linux.EADDRNOTAVAIL) + ErrClosedForSend = New((&tcpip.ErrClosedForSend{}).String(), linux.EPIPE) + ErrClosedForReceive = New((&tcpip.ErrClosedForReceive{}).String(), nil) + ErrTimeout = New((&tcpip.ErrTimeout{}).String(), linux.ETIMEDOUT) + ErrAborted = New((&tcpip.ErrAborted{}).String(), linux.EPIPE) + ErrConnectStarted = New((&tcpip.ErrConnectStarted{}).String(), linux.EINPROGRESS) + ErrDestinationRequired = New((&tcpip.ErrDestinationRequired{}).String(), linux.EDESTADDRREQ) + ErrNotSupported = New((&tcpip.ErrNotSupported{}).String(), linux.EOPNOTSUPP) + ErrQueueSizeNotSupported = New((&tcpip.ErrQueueSizeNotSupported{}).String(), linux.ENOTTY) + ErrNoSuchFile = New((&tcpip.ErrNoSuchFile{}).String(), linux.ENOENT) + ErrInvalidOptionValue = New((&tcpip.ErrInvalidOptionValue{}).String(), linux.EINVAL) + ErrBroadcastDisabled = New((&tcpip.ErrBroadcastDisabled{}).String(), linux.EACCES) + ErrNotPermittedNet = New((&tcpip.ErrNotPermitted{}).String(), linux.EPERM) + ErrBadBuffer = New((&tcpip.ErrBadBuffer{}).String(), linux.EFAULT) ) -var netstackErrorTranslations map[string]*Error - -func addErrMapping(tcpipErr *tcpip.Error, netstackErr *Error) { - key := tcpipErr.String() - if _, ok := netstackErrorTranslations[key]; ok { - panic(fmt.Sprintf("duplicate error key: %s", key)) - } - netstackErrorTranslations[key] = netstackErr -} - -func init() { - netstackErrorTranslations = make(map[string]*Error) - addErrMapping(tcpip.ErrUnknownProtocol, ErrUnknownProtocol) - addErrMapping(tcpip.ErrUnknownNICID, ErrUnknownNICID) - addErrMapping(tcpip.ErrUnknownDevice, ErrUnknownDevice) - addErrMapping(tcpip.ErrUnknownProtocolOption, ErrUnknownProtocolOption) - addErrMapping(tcpip.ErrDuplicateNICID, ErrDuplicateNICID) - addErrMapping(tcpip.ErrDuplicateAddress, ErrDuplicateAddress) - addErrMapping(tcpip.ErrNoRoute, ErrNoRoute) - addErrMapping(tcpip.ErrBadLinkEndpoint, ErrBadLinkEndpoint) - addErrMapping(tcpip.ErrAlreadyBound, ErrAlreadyBound) - addErrMapping(tcpip.ErrInvalidEndpointState, ErrInvalidEndpointState) - addErrMapping(tcpip.ErrAlreadyConnecting, ErrAlreadyConnecting) - addErrMapping(tcpip.ErrAlreadyConnected, ErrAlreadyConnected) - addErrMapping(tcpip.ErrNoPortAvailable, ErrNoPortAvailable) - addErrMapping(tcpip.ErrPortInUse, ErrPortInUse) - addErrMapping(tcpip.ErrBadLocalAddress, ErrBadLocalAddress) - addErrMapping(tcpip.ErrClosedForSend, ErrClosedForSend) - addErrMapping(tcpip.ErrClosedForReceive, ErrClosedForReceive) - addErrMapping(tcpip.ErrWouldBlock, ErrWouldBlock) - addErrMapping(tcpip.ErrConnectionRefused, ErrConnectionRefused) - addErrMapping(tcpip.ErrTimeout, ErrTimeout) - addErrMapping(tcpip.ErrAborted, ErrAborted) - addErrMapping(tcpip.ErrConnectStarted, ErrConnectStarted) - addErrMapping(tcpip.ErrDestinationRequired, ErrDestinationRequired) - addErrMapping(tcpip.ErrNotSupported, ErrNotSupported) - addErrMapping(tcpip.ErrQueueSizeNotSupported, ErrQueueSizeNotSupported) - addErrMapping(tcpip.ErrNotConnected, ErrNotConnected) - addErrMapping(tcpip.ErrConnectionReset, ErrConnectionReset) - addErrMapping(tcpip.ErrConnectionAborted, ErrConnectionAborted) - addErrMapping(tcpip.ErrNoSuchFile, ErrNoSuchFile) - addErrMapping(tcpip.ErrInvalidOptionValue, ErrInvalidOptionValue) - addErrMapping(tcpip.ErrBadAddress, ErrBadAddress) - addErrMapping(tcpip.ErrNetworkUnreachable, ErrNetworkUnreachable) - addErrMapping(tcpip.ErrMessageTooLong, ErrMessageTooLong) - addErrMapping(tcpip.ErrNoBufferSpace, ErrNoBufferSpace) - addErrMapping(tcpip.ErrBroadcastDisabled, ErrBroadcastDisabled) - addErrMapping(tcpip.ErrNotPermitted, ErrNotPermittedNet) - addErrMapping(tcpip.ErrAddressFamilyNotSupported, ErrAddressFamilyNotSupported) - addErrMapping(tcpip.ErrBadBuffer, ErrBadBuffer) -} - // TranslateNetstackError converts an error from the tcpip package to a sentry // internal error. -func TranslateNetstackError(err *tcpip.Error) *Error { - if err == nil { +func TranslateNetstackError(err tcpip.Error) *Error { + switch err.(type) { + case nil: return nil + case *tcpip.ErrUnknownProtocol: + return ErrUnknownProtocol + case *tcpip.ErrUnknownNICID: + return ErrUnknownNICID + case *tcpip.ErrUnknownDevice: + return ErrUnknownDevice + case *tcpip.ErrUnknownProtocolOption: + return ErrUnknownProtocolOption + case *tcpip.ErrDuplicateNICID: + return ErrDuplicateNICID + case *tcpip.ErrDuplicateAddress: + return ErrDuplicateAddress + case *tcpip.ErrNoRoute: + return ErrNoRoute + case *tcpip.ErrAlreadyBound: + return ErrAlreadyBound + case *tcpip.ErrInvalidEndpointState: + return ErrInvalidEndpointState + case *tcpip.ErrAlreadyConnecting: + return ErrAlreadyConnecting + case *tcpip.ErrAlreadyConnected: + return ErrAlreadyConnected + case *tcpip.ErrNoPortAvailable: + return ErrNoPortAvailable + case *tcpip.ErrPortInUse: + return ErrPortInUse + case *tcpip.ErrBadLocalAddress: + return ErrBadLocalAddress + case *tcpip.ErrClosedForSend: + return ErrClosedForSend + case *tcpip.ErrClosedForReceive: + return ErrClosedForReceive + case *tcpip.ErrWouldBlock: + return ErrWouldBlock + case *tcpip.ErrConnectionRefused: + return ErrConnectionRefused + case *tcpip.ErrTimeout: + return ErrTimeout + case *tcpip.ErrAborted: + return ErrAborted + case *tcpip.ErrConnectStarted: + return ErrConnectStarted + case *tcpip.ErrDestinationRequired: + return ErrDestinationRequired + case *tcpip.ErrNotSupported: + return ErrNotSupported + case *tcpip.ErrQueueSizeNotSupported: + return ErrQueueSizeNotSupported + case *tcpip.ErrNotConnected: + return ErrNotConnected + case *tcpip.ErrConnectionReset: + return ErrConnectionReset + case *tcpip.ErrConnectionAborted: + return ErrConnectionAborted + case *tcpip.ErrNoSuchFile: + return ErrNoSuchFile + case *tcpip.ErrInvalidOptionValue: + return ErrInvalidOptionValue + case *tcpip.ErrBadAddress: + return ErrBadAddress + case *tcpip.ErrNetworkUnreachable: + return ErrNetworkUnreachable + case *tcpip.ErrMessageTooLong: + return ErrMessageTooLong + case *tcpip.ErrNoBufferSpace: + return ErrNoBufferSpace + case *tcpip.ErrBroadcastDisabled: + return ErrBroadcastDisabled + case *tcpip.ErrNotPermitted: + return ErrNotPermittedNet + case *tcpip.ErrAddressFamilyNotSupported: + return ErrAddressFamilyNotSupported + case *tcpip.ErrBadBuffer: + return ErrBadBuffer + default: + panic(fmt.Sprintf("unknown error %T", err)) } - se, ok := netstackErrorTranslations[err.String()] - if !ok { - panic("Unknown error: " + err.String()) - } - return se } diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index e7924e5c2..f979d22f0 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -18,6 +18,7 @@ go_template_instance( go_library( name = "tcpip", srcs = [ + "errors.go", "sock_err_list.go", "socketops.go", "tcpip.go", diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index 7c7495c30..c188aaa18 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -248,7 +248,7 @@ func NewTCPConn(wq *waiter.Queue, ep tcpip.Endpoint) *TCPConn { func (l *TCPListener) Accept() (net.Conn, error) { n, wq, err := l.ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Create wait queue entry that notifies a channel. waitEntry, notifyCh := waiter.NewChannelEntry(nil) l.wq.EventRegister(&waitEntry, waiter.EventIn) @@ -257,7 +257,7 @@ func (l *TCPListener) Accept() (net.Conn, error) { for { n, wq, err = l.ep.Accept(nil) - if err != tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { break } @@ -298,14 +298,14 @@ func commonRead(b []byte, ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan s opts := tcpip.ReadOptions{NeedRemoteAddr: addr != nil} res, err := ep.Read(&w, opts) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Create wait queue entry that notifies a channel. waitEntry, notifyCh := waiter.NewChannelEntry(nil) wq.EventRegister(&waitEntry, waiter.EventIn) defer wq.EventUnregister(&waitEntry) for { res, err = ep.Read(&w, opts) - if err != tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { break } select { @@ -316,7 +316,7 @@ func commonRead(b []byte, ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan s } } - if err == tcpip.ErrClosedForReceive { + if _, ok := err.(*tcpip.ErrClosedForReceive); ok { return 0, io.EOF } @@ -356,7 +356,7 @@ func (c *TCPConn) Write(b []byte) (int, error) { } // We must handle two soft failure conditions simultaneously: - // 1. Write may write nothing and return tcpip.ErrWouldBlock. + // 1. Write may write nothing and return *tcpip.ErrWouldBlock. // If this happens, we need to register for notifications if we have // not already and wait to try again. // 2. Write may write fewer than the full number of bytes and return @@ -376,9 +376,9 @@ func (c *TCPConn) Write(b []byte) (int, error) { r.Reset(b[nbytes:]) n, err := c.ep.Write(&r, tcpip.WriteOptions{}) nbytes += int(n) - switch err { + switch err.(type) { case nil: - case tcpip.ErrWouldBlock: + case *tcpip.ErrWouldBlock: if ch == nil { entry, ch = waiter.NewChannelEntry(nil) @@ -495,7 +495,7 @@ func DialContextTCP(ctx context.Context, s *stack.Stack, addr tcpip.FullAddress, } err = ep.Connect(addr) - if err == tcpip.ErrConnectStarted { + if _, ok := err.(*tcpip.ErrConnectStarted); ok { select { case <-ctx.Done(): ep.Close() @@ -649,7 +649,7 @@ func (c *UDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { var r bytes.Reader r.Reset(b) n, err := c.ep.Write(&r, writeOptions) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Create wait queue entry that notifies a channel. waitEntry, notifyCh := waiter.NewChannelEntry(nil) c.wq.EventRegister(&waitEntry, waiter.EventOut) @@ -662,7 +662,7 @@ func (c *UDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { } n, err = c.ep.Write(&r, writeOptions) - if err != tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { break } } diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go index b196324c7..2b3ea4bdf 100644 --- a/pkg/tcpip/adapters/gonet/gonet_test.go +++ b/pkg/tcpip/adapters/gonet/gonet_test.go @@ -58,7 +58,7 @@ func TestTimeouts(t *testing.T) { } } -func newLoopbackStack() (*stack.Stack, *tcpip.Error) { +func newLoopbackStack() (*stack.Stack, tcpip.Error) { // Create the stack and add a NIC. s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol}, @@ -94,7 +94,7 @@ type testConnection struct { ep tcpip.Endpoint } -func connect(s *stack.Stack, addr tcpip.FullAddress) (*testConnection, *tcpip.Error) { +func connect(s *stack.Stack, addr tcpip.FullAddress) (*testConnection, tcpip.Error) { wq := &waiter.Queue{} ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) if err != nil { @@ -105,7 +105,7 @@ func connect(s *stack.Stack, addr tcpip.FullAddress) (*testConnection, *tcpip.Er wq.EventRegister(&entry, waiter.EventOut) err = ep.Connect(addr) - if err == tcpip.ErrConnectStarted { + if _, ok := err.(*tcpip.ErrConnectStarted); ok { <-ch err = ep.LastError() } @@ -660,11 +660,13 @@ func TestTCPDialError(t *testing.T) { ip := tcpip.Address(net.IPv4(169, 254, 10, 1).To4()) addr := tcpip.FullAddress{NICID, ip, 11211} - _, err := DialTCP(s, addr, ipv4.ProtocolNumber) - got, ok := err.(*net.OpError) - want := tcpip.ErrNoRoute - if !ok || got.Err.Error() != want.String() { - t.Errorf("Got DialTCP() = %v, want = %v", err, tcpip.ErrNoRoute) + switch _, err := DialTCP(s, addr, ipv4.ProtocolNumber); err := err.(type) { + case *net.OpError: + if err.Err.Error() != (&tcpip.ErrNoRoute{}).String() { + t.Errorf("got DialTCP() = %s, want = %s", err, &tcpip.ErrNoRoute{}) + } + default: + t.Errorf("got DialTCP(...) = %v, want %s", err, &tcpip.ErrNoRoute{}) } } diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index c9bcf9326..23aa0ad05 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "prependable.go", "view.go", + "view_unsafe.go", ], visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go index 91cc62cc8..b05e81526 100644 --- a/pkg/tcpip/buffer/view.go +++ b/pkg/tcpip/buffer/view.go @@ -239,6 +239,16 @@ func (vv *VectorisedView) Size() int { return vv.size } +// MemSize returns the estimation size of the vv in memory, including backing +// buffer data. +func (vv *VectorisedView) MemSize() int { + var size int + for _, v := range vv.views { + size += cap(v) + } + return size + cap(vv.views)*viewStructSize + vectorisedViewStructSize +} + // ToView returns a single view containing the content of the vectorised view. // // If the vectorised view contains a single view, that view will be returned diff --git a/pkg/tcpip/buffer/view_test.go b/pkg/tcpip/buffer/view_test.go index e7f7cc9f1..78b2faa26 100644 --- a/pkg/tcpip/buffer/view_test.go +++ b/pkg/tcpip/buffer/view_test.go @@ -20,6 +20,7 @@ import ( "io" "reflect" "testing" + "unsafe" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -578,3 +579,15 @@ func TestAppendView(t *testing.T) { } } } + +func TestMemSize(t *testing.T) { + const perViewCap = 128 + views := make([]buffer.View, 2, 32) + views[0] = make(buffer.View, 10, perViewCap) + views[1] = make(buffer.View, 20, perViewCap) + vv := buffer.NewVectorisedView(30, views) + want := int(unsafe.Sizeof(vv)) + cap(views)*int(unsafe.Sizeof(views)) + 2*perViewCap + if got := vv.MemSize(); got != want { + t.Errorf("vv.MemSize() = %d, want %d", got, want) + } +} diff --git a/pkg/tcpip/buffer/view_unsafe.go b/pkg/tcpip/buffer/view_unsafe.go new file mode 100644 index 000000000..75ccd40f8 --- /dev/null +++ b/pkg/tcpip/buffer/view_unsafe.go @@ -0,0 +1,22 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package buffer + +import "unsafe" + +const ( + vectorisedViewStructSize = int(unsafe.Sizeof(VectorisedView{})) + viewStructSize = int(unsafe.Sizeof(View{})) +) diff --git a/pkg/tcpip/errors.go b/pkg/tcpip/errors.go new file mode 100644 index 000000000..af46da1d2 --- /dev/null +++ b/pkg/tcpip/errors.go @@ -0,0 +1,538 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcpip + +import ( + "fmt" +) + +// Error represents an error in the netstack error space. +// +// The error interface is intentionally omitted to avoid loss of type +// information that would occur if these errors were passed as error. +type Error interface { + isError() + + // IgnoreStats indicates whether this error should be included in failure + // counts in tcpip.Stats structs. + IgnoreStats() bool + + fmt.Stringer +} + +// ErrAborted indicates the operation was aborted. +// +// +stateify savable +type ErrAborted struct{} + +func (*ErrAborted) isError() {} + +// IgnoreStats implements Error. +func (*ErrAborted) IgnoreStats() bool { + return false +} +func (*ErrAborted) String() string { + return "operation aborted" +} + +// ErrAddressFamilyNotSupported indicates the operation does not support the +// given address family. +// +// +stateify savable +type ErrAddressFamilyNotSupported struct{} + +func (*ErrAddressFamilyNotSupported) isError() {} + +// IgnoreStats implements Error. +func (*ErrAddressFamilyNotSupported) IgnoreStats() bool { + return false +} +func (*ErrAddressFamilyNotSupported) String() string { + return "address family not supported by protocol" +} + +// ErrAlreadyBound indicates the endpoint is already bound. +// +// +stateify savable +type ErrAlreadyBound struct{} + +func (*ErrAlreadyBound) isError() {} + +// IgnoreStats implements Error. +func (*ErrAlreadyBound) IgnoreStats() bool { + return true +} +func (*ErrAlreadyBound) String() string { return "endpoint already bound" } + +// ErrAlreadyConnected indicates the endpoint is already connected. +// +// +stateify savable +type ErrAlreadyConnected struct{} + +func (*ErrAlreadyConnected) isError() {} + +// IgnoreStats implements Error. +func (*ErrAlreadyConnected) IgnoreStats() bool { + return true +} +func (*ErrAlreadyConnected) String() string { return "endpoint is already connected" } + +// ErrAlreadyConnecting indicates the endpoint is already connecting. +// +// +stateify savable +type ErrAlreadyConnecting struct{} + +func (*ErrAlreadyConnecting) isError() {} + +// IgnoreStats implements Error. +func (*ErrAlreadyConnecting) IgnoreStats() bool { + return true +} +func (*ErrAlreadyConnecting) String() string { return "endpoint is already connecting" } + +// ErrBadAddress indicates a bad address was provided. +// +// +stateify savable +type ErrBadAddress struct{} + +func (*ErrBadAddress) isError() {} + +// IgnoreStats implements Error. +func (*ErrBadAddress) IgnoreStats() bool { + return false +} +func (*ErrBadAddress) String() string { return "bad address" } + +// ErrBadBuffer indicates a bad buffer was provided. +// +// +stateify savable +type ErrBadBuffer struct{} + +func (*ErrBadBuffer) isError() {} + +// IgnoreStats implements Error. +func (*ErrBadBuffer) IgnoreStats() bool { + return false +} +func (*ErrBadBuffer) String() string { return "bad buffer" } + +// ErrBadLocalAddress indicates a bad local address was provided. +// +// +stateify savable +type ErrBadLocalAddress struct{} + +func (*ErrBadLocalAddress) isError() {} + +// IgnoreStats implements Error. +func (*ErrBadLocalAddress) IgnoreStats() bool { + return false +} +func (*ErrBadLocalAddress) String() string { return "bad local address" } + +// ErrBroadcastDisabled indicates broadcast is not enabled on the endpoint. +// +// +stateify savable +type ErrBroadcastDisabled struct{} + +func (*ErrBroadcastDisabled) isError() {} + +// IgnoreStats implements Error. +func (*ErrBroadcastDisabled) IgnoreStats() bool { + return false +} +func (*ErrBroadcastDisabled) String() string { return "broadcast socket option disabled" } + +// ErrClosedForReceive indicates the endpoint is closed for incoming data. +// +// +stateify savable +type ErrClosedForReceive struct{} + +func (*ErrClosedForReceive) isError() {} + +// IgnoreStats implements Error. +func (*ErrClosedForReceive) IgnoreStats() bool { + return false +} +func (*ErrClosedForReceive) String() string { return "endpoint is closed for receive" } + +// ErrClosedForSend indicates the endpoint is closed for outgoing data. +// +// +stateify savable +type ErrClosedForSend struct{} + +func (*ErrClosedForSend) isError() {} + +// IgnoreStats implements Error. +func (*ErrClosedForSend) IgnoreStats() bool { + return false +} +func (*ErrClosedForSend) String() string { return "endpoint is closed for send" } + +// ErrConnectStarted indicates the endpoint is connecting asynchronously. +// +// +stateify savable +type ErrConnectStarted struct{} + +func (*ErrConnectStarted) isError() {} + +// IgnoreStats implements Error. +func (*ErrConnectStarted) IgnoreStats() bool { + return true +} +func (*ErrConnectStarted) String() string { return "connection attempt started" } + +// ErrConnectionAborted indicates the connection was aborted. +// +// +stateify savable +type ErrConnectionAborted struct{} + +func (*ErrConnectionAborted) isError() {} + +// IgnoreStats implements Error. +func (*ErrConnectionAborted) IgnoreStats() bool { + return false +} +func (*ErrConnectionAborted) String() string { return "connection aborted" } + +// ErrConnectionRefused indicates the connection was refused. +// +// +stateify savable +type ErrConnectionRefused struct{} + +func (*ErrConnectionRefused) isError() {} + +// IgnoreStats implements Error. +func (*ErrConnectionRefused) IgnoreStats() bool { + return false +} +func (*ErrConnectionRefused) String() string { return "connection was refused" } + +// ErrConnectionReset indicates the connection was reset. +// +// +stateify savable +type ErrConnectionReset struct{} + +func (*ErrConnectionReset) isError() {} + +// IgnoreStats implements Error. +func (*ErrConnectionReset) IgnoreStats() bool { + return false +} +func (*ErrConnectionReset) String() string { return "connection reset by peer" } + +// ErrDestinationRequired indicates the operation requires a destination +// address, and one was not provided. +// +// +stateify savable +type ErrDestinationRequired struct{} + +func (*ErrDestinationRequired) isError() {} + +// IgnoreStats implements Error. +func (*ErrDestinationRequired) IgnoreStats() bool { + return false +} +func (*ErrDestinationRequired) String() string { return "destination address is required" } + +// ErrDuplicateAddress indicates the operation encountered a duplicate address. +// +// +stateify savable +type ErrDuplicateAddress struct{} + +func (*ErrDuplicateAddress) isError() {} + +// IgnoreStats implements Error. +func (*ErrDuplicateAddress) IgnoreStats() bool { + return false +} +func (*ErrDuplicateAddress) String() string { return "duplicate address" } + +// ErrDuplicateNICID indicates the operation encountered a duplicate NIC ID. +// +// +stateify savable +type ErrDuplicateNICID struct{} + +func (*ErrDuplicateNICID) isError() {} + +// IgnoreStats implements Error. +func (*ErrDuplicateNICID) IgnoreStats() bool { + return false +} +func (*ErrDuplicateNICID) String() string { return "duplicate nic id" } + +// ErrInvalidEndpointState indicates the endpoint is in an invalid state. +// +// +stateify savable +type ErrInvalidEndpointState struct{} + +func (*ErrInvalidEndpointState) isError() {} + +// IgnoreStats implements Error. +func (*ErrInvalidEndpointState) IgnoreStats() bool { + return false +} +func (*ErrInvalidEndpointState) String() string { return "endpoint is in invalid state" } + +// ErrInvalidOptionValue indicates an invalid option value was provided. +// +// +stateify savable +type ErrInvalidOptionValue struct{} + +func (*ErrInvalidOptionValue) isError() {} + +// IgnoreStats implements Error. +func (*ErrInvalidOptionValue) IgnoreStats() bool { + return false +} +func (*ErrInvalidOptionValue) String() string { return "invalid option value specified" } + +// ErrMalformedHeader indicates the operation encountered a malformed header. +// +// +stateify savable +type ErrMalformedHeader struct{} + +func (*ErrMalformedHeader) isError() {} + +// IgnoreStats implements Error. +func (*ErrMalformedHeader) IgnoreStats() bool { + return false +} +func (*ErrMalformedHeader) String() string { return "header is malformed" } + +// ErrMessageTooLong indicates the operation encountered a message whose length +// exceeds the maximum permitted. +// +// +stateify savable +type ErrMessageTooLong struct{} + +func (*ErrMessageTooLong) isError() {} + +// IgnoreStats implements Error. +func (*ErrMessageTooLong) IgnoreStats() bool { + return false +} +func (*ErrMessageTooLong) String() string { return "message too long" } + +// ErrNetworkUnreachable indicates the operation is not able to reach the +// destination network. +// +// +stateify savable +type ErrNetworkUnreachable struct{} + +func (*ErrNetworkUnreachable) isError() {} + +// IgnoreStats implements Error. +func (*ErrNetworkUnreachable) IgnoreStats() bool { + return false +} +func (*ErrNetworkUnreachable) String() string { return "network is unreachable" } + +// ErrNoBufferSpace indicates no buffer space is available. +// +// +stateify savable +type ErrNoBufferSpace struct{} + +func (*ErrNoBufferSpace) isError() {} + +// IgnoreStats implements Error. +func (*ErrNoBufferSpace) IgnoreStats() bool { + return false +} +func (*ErrNoBufferSpace) String() string { return "no buffer space available" } + +// ErrNoPortAvailable indicates no port could be allocated for the operation. +// +// +stateify savable +type ErrNoPortAvailable struct{} + +func (*ErrNoPortAvailable) isError() {} + +// IgnoreStats implements Error. +func (*ErrNoPortAvailable) IgnoreStats() bool { + return false +} +func (*ErrNoPortAvailable) String() string { return "no ports are available" } + +// ErrNoRoute indicates the operation is not able to find a route to the +// destination. +// +// +stateify savable +type ErrNoRoute struct{} + +func (*ErrNoRoute) isError() {} + +// IgnoreStats implements Error. +func (*ErrNoRoute) IgnoreStats() bool { + return false +} +func (*ErrNoRoute) String() string { return "no route" } + +// ErrNoSuchFile is used to indicate that ENOENT should be returned the to +// calling application. +// +// +stateify savable +type ErrNoSuchFile struct{} + +func (*ErrNoSuchFile) isError() {} + +// IgnoreStats implements Error. +func (*ErrNoSuchFile) IgnoreStats() bool { + return false +} +func (*ErrNoSuchFile) String() string { return "no such file" } + +// ErrNotConnected indicates the endpoint is not connected. +// +// +stateify savable +type ErrNotConnected struct{} + +func (*ErrNotConnected) isError() {} + +// IgnoreStats implements Error. +func (*ErrNotConnected) IgnoreStats() bool { + return false +} +func (*ErrNotConnected) String() string { return "endpoint not connected" } + +// ErrNotPermitted indicates the operation is not permitted. +// +// +stateify savable +type ErrNotPermitted struct{} + +func (*ErrNotPermitted) isError() {} + +// IgnoreStats implements Error. +func (*ErrNotPermitted) IgnoreStats() bool { + return false +} +func (*ErrNotPermitted) String() string { return "operation not permitted" } + +// ErrNotSupported indicates the operation is not supported. +// +// +stateify savable +type ErrNotSupported struct{} + +func (*ErrNotSupported) isError() {} + +// IgnoreStats implements Error. +func (*ErrNotSupported) IgnoreStats() bool { + return false +} +func (*ErrNotSupported) String() string { return "operation not supported" } + +// ErrPortInUse indicates the provided port is in use. +// +// +stateify savable +type ErrPortInUse struct{} + +func (*ErrPortInUse) isError() {} + +// IgnoreStats implements Error. +func (*ErrPortInUse) IgnoreStats() bool { + return false +} +func (*ErrPortInUse) String() string { return "port is in use" } + +// ErrQueueSizeNotSupported indicates the endpoint does not allow queue size +// operation. +// +// +stateify savable +type ErrQueueSizeNotSupported struct{} + +func (*ErrQueueSizeNotSupported) isError() {} + +// IgnoreStats implements Error. +func (*ErrQueueSizeNotSupported) IgnoreStats() bool { + return false +} +func (*ErrQueueSizeNotSupported) String() string { return "queue size querying not supported" } + +// ErrTimeout indicates the operation timed out. +// +// +stateify savable +type ErrTimeout struct{} + +func (*ErrTimeout) isError() {} + +// IgnoreStats implements Error. +func (*ErrTimeout) IgnoreStats() bool { + return false +} +func (*ErrTimeout) String() string { return "operation timed out" } + +// ErrUnknownDevice indicates an unknown device identifier was provided. +// +// +stateify savable +type ErrUnknownDevice struct{} + +func (*ErrUnknownDevice) isError() {} + +// IgnoreStats implements Error. +func (*ErrUnknownDevice) IgnoreStats() bool { + return false +} +func (*ErrUnknownDevice) String() string { return "unknown device" } + +// ErrUnknownNICID indicates an unknown NIC ID was provided. +// +// +stateify savable +type ErrUnknownNICID struct{} + +func (*ErrUnknownNICID) isError() {} + +// IgnoreStats implements Error. +func (*ErrUnknownNICID) IgnoreStats() bool { + return false +} +func (*ErrUnknownNICID) String() string { return "unknown nic id" } + +// ErrUnknownProtocol indicates an unknown protocol was requested. +// +// +stateify savable +type ErrUnknownProtocol struct{} + +func (*ErrUnknownProtocol) isError() {} + +// IgnoreStats implements Error. +func (*ErrUnknownProtocol) IgnoreStats() bool { + return false +} +func (*ErrUnknownProtocol) String() string { return "unknown protocol" } + +// ErrUnknownProtocolOption indicates an unknown protocol option was provided. +// +// +stateify savable +type ErrUnknownProtocolOption struct{} + +func (*ErrUnknownProtocolOption) isError() {} + +// IgnoreStats implements Error. +func (*ErrUnknownProtocolOption) IgnoreStats() bool { + return false +} +func (*ErrUnknownProtocolOption) String() string { return "unknown option for protocol" } + +// ErrWouldBlock indicates the operation would block. +// +// +stateify savable +type ErrWouldBlock struct{} + +func (*ErrWouldBlock) isError() {} + +// IgnoreStats implements Error. +func (*ErrWouldBlock) IgnoreStats() bool { + return true +} +func (*ErrWouldBlock) String() string { return "operation would block" } diff --git a/pkg/tcpip/faketime/BUILD b/pkg/tcpip/faketime/BUILD index 114d43df3..bb9d44aff 100644 --- a/pkg/tcpip/faketime/BUILD +++ b/pkg/tcpip/faketime/BUILD @@ -6,10 +6,7 @@ go_library( name = "faketime", srcs = ["faketime.go"], visibility = ["//visibility:public"], - deps = [ - "//pkg/tcpip", - "@com_github_dpjacques_clockwork//:go_default_library", - ], + deps = ["//pkg/tcpip"], ) go_test( diff --git a/pkg/tcpip/faketime/faketime.go b/pkg/tcpip/faketime/faketime.go index f7a4fbde1..fb819d7a8 100644 --- a/pkg/tcpip/faketime/faketime.go +++ b/pkg/tcpip/faketime/faketime.go @@ -17,10 +17,10 @@ package faketime import ( "container/heap" + "fmt" "sync" "time" - "github.com/dpjacques/clockwork" "gvisor.dev/gvisor/pkg/tcpip" ) @@ -44,38 +44,85 @@ func (*NullClock) AfterFunc(time.Duration, func()) tcpip.Timer { return nil } +type notificationChannels struct { + mu struct { + sync.Mutex + + ch []<-chan struct{} + } +} + +func (n *notificationChannels) add(ch <-chan struct{}) { + n.mu.Lock() + defer n.mu.Unlock() + n.mu.ch = append(n.mu.ch, ch) +} + +// wait returns once all the notification channels are readable. +// +// Channels that are added while waiting on existing channels will be waited on +// as well. +func (n *notificationChannels) wait() { + for { + n.mu.Lock() + ch := n.mu.ch + n.mu.ch = nil + n.mu.Unlock() + + if len(ch) == 0 { + break + } + + for _, c := range ch { + <-c + } + } +} + // ManualClock implements tcpip.Clock and only advances manually with Advance // method. type ManualClock struct { - clock clockwork.FakeClock + // runningTimers tracks the completion of timer callbacks that began running + // immediately upon their scheduling. It is used to ensure the proper ordering + // of timer callback dispatch. + runningTimers notificationChannels + + mu struct { + sync.RWMutex - // mu protects the fields below. - mu sync.RWMutex + // now is the current (fake) time of the clock. + now time.Time - // times is min-heap of times. A heap is used for quick retrieval of the next - // upcoming time of scheduled work. - times *timeHeap + // times is min-heap of times. + times timeHeap - // waitGroups stores one WaitGroup for all work scheduled to execute at the - // same time via AfterFunc. This allows parallel execution of all functions - // passed to AfterFunc scheduled for the same time. - waitGroups map[time.Time]*sync.WaitGroup + // timers holds the timers scheduled for each time. + timers map[time.Time]map[*manualTimer]struct{} + } } // NewManualClock creates a new ManualClock instance. func NewManualClock() *ManualClock { - return &ManualClock{ - clock: clockwork.NewFakeClock(), - times: &timeHeap{}, - waitGroups: make(map[time.Time]*sync.WaitGroup), - } + c := &ManualClock{} + + c.mu.Lock() + defer c.mu.Unlock() + + // Set the initial time to a non-zero value since the zero value is used to + // detect inactive timers. + c.mu.now = time.Unix(0, 0) + c.mu.timers = make(map[time.Time]map[*manualTimer]struct{}) + + return c } var _ tcpip.Clock = (*ManualClock)(nil) // NowNanoseconds implements tcpip.Clock.NowNanoseconds. func (mc *ManualClock) NowNanoseconds() int64 { - return mc.clock.Now().UnixNano() + mc.mu.RLock() + defer mc.mu.RUnlock() + return mc.mu.now.UnixNano() } // NowMonotonic implements tcpip.Clock.NowMonotonic. @@ -85,128 +132,203 @@ func (mc *ManualClock) NowMonotonic() int64 { // AfterFunc implements tcpip.Clock.AfterFunc. func (mc *ManualClock) AfterFunc(d time.Duration, f func()) tcpip.Timer { - until := mc.clock.Now().Add(d) - wg := mc.addWait(until) - return &manualTimer{ + mt := &manualTimer{ clock: mc, - until: until, - timer: mc.clock.AfterFunc(d, func() { - defer wg.Done() - f() - }), + f: f, } -} -// addWait adds an additional wait to the WaitGroup for parallel execution of -// all work scheduled for t. Returns a reference to the WaitGroup modified. -func (mc *ManualClock) addWait(t time.Time) *sync.WaitGroup { - mc.mu.RLock() - wg, ok := mc.waitGroups[t] - mc.mu.RUnlock() + mc.mu.Lock() + defer mc.mu.Unlock() + + mt.mu.Lock() + defer mt.mu.Unlock() - if ok { - wg.Add(1) - return wg + mc.resetTimerLocked(mt, d) + return mt +} + +// resetTimerLocked schedules a timer to be fired after the given duration. +// +// Precondition: mc.mu and mt.mu must be locked. +func (mc *ManualClock) resetTimerLocked(mt *manualTimer, d time.Duration) { + if !mt.mu.firesAt.IsZero() { + panic("tried to reset an active timer") } - mc.mu.Lock() - heap.Push(mc.times, t) - mc.mu.Unlock() + t := mc.mu.now.Add(d) - wg = &sync.WaitGroup{} - wg.Add(1) + if !mc.mu.now.Before(t) { + // If the timer is scheduled to fire immediately, call its callback + // in a new goroutine immediately. + // + // It needs to be called in its own goroutine to escape its current + // execution context - like an actual timer. + ch := make(chan struct{}) + mc.runningTimers.add(ch) - mc.mu.Lock() - mc.waitGroups[t] = wg - mc.mu.Unlock() + go func() { + defer close(ch) + + mt.f() + }() - return wg + return + } + + mt.mu.firesAt = t + + timers, ok := mc.mu.timers[t] + if !ok { + timers = make(map[*manualTimer]struct{}) + mc.mu.timers[t] = timers + heap.Push(&mc.mu.times, t) + } + + timers[mt] = struct{}{} } -// removeWait removes a wait from the WaitGroup for parallel execution of all -// work scheduled for t. -func (mc *ManualClock) removeWait(t time.Time) { - mc.mu.RLock() - defer mc.mu.RUnlock() +// stopTimerLocked stops a timer from firing. +// +// Precondition: mc.mu and mt.mu must be locked. +func (mc *ManualClock) stopTimerLocked(mt *manualTimer) { + t := mt.mu.firesAt + mt.mu.firesAt = time.Time{} + + if t.IsZero() { + panic("tried to stop an inactive timer") + } - wg := mc.waitGroups[t] - wg.Done() + timers, ok := mc.mu.timers[t] + if !ok { + err := fmt.Sprintf("tried to stop an active timer but the clock does not have anything scheduled for the timer @ t = %s %p\nScheduled timers @:", t.UTC(), mt) + for t := range mc.mu.timers { + err += fmt.Sprintf("%s\n", t.UTC()) + } + panic(err) + } + + if _, ok := timers[mt]; !ok { + panic(fmt.Sprintf("did not have an entry in timers for an active timer @ t = %s", t.UTC())) + } + + delete(timers, mt) + + if len(timers) == 0 { + delete(mc.mu.timers, t) + } } // Advance executes all work that have been scheduled to execute within d from -// the current time. Blocks until all work has completed execution. +// the current time. Blocks until all work has completed execution. func (mc *ManualClock) Advance(d time.Duration) { - // Block until all the work is done - until := mc.clock.Now().Add(d) - for { - mc.mu.Lock() - if mc.times.Len() == 0 { - mc.mu.Unlock() - break - } + // We spawn goroutines for timers that were scheduled to fire at the time of + // being reset. Wait for those goroutines to complete before proceeding so + // that timer callbacks are called in the right order. + mc.runningTimers.wait() - t := heap.Pop(mc.times).(time.Time) + mc.mu.Lock() + defer mc.mu.Unlock() + + until := mc.mu.now.Add(d) + for mc.mu.times.Len() > 0 { + t := heap.Pop(&mc.mu.times).(time.Time) if t.After(until) { // No work to do - heap.Push(mc.times, t) - mc.mu.Unlock() + heap.Push(&mc.mu.times, t) break } - mc.mu.Unlock() - diff := t.Sub(mc.clock.Now()) - mc.clock.Advance(diff) + timers := mc.mu.timers[t] + delete(mc.mu.timers, t) + + mc.mu.now = t + + // Mark the timers as inactive since they will be fired. + // + // This needs to be done while holding mc's lock because we remove the entry + // in the map of timers for the current time. If an attempt to stop a + // timer is made after mc's lock was dropped but before the timer is + // marked inactive, we would panic since no entry exists for the time when + // the timer was expected to fire. + for mt := range timers { + mt.mu.Lock() + mt.mu.firesAt = time.Time{} + mt.mu.Unlock() + } - mc.mu.RLock() - wg := mc.waitGroups[t] - mc.mu.RUnlock() + // Release the lock before calling the timer's callback fn since the + // callback fn might try to schedule a timer which requires obtaining + // mc's lock. + mc.mu.Unlock() - wg.Wait() + for mt := range timers { + mt.f() + } + // The timer callbacks may have scheduled a timer to fire immediately. + // We spawn goroutines for these timers and need to wait for them to + // finish before proceeding so that timer callbacks are called in the + // right order. + mc.runningTimers.wait() mc.mu.Lock() - delete(mc.waitGroups, t) - mc.mu.Unlock() } - if now := mc.clock.Now(); until.After(now) { - mc.clock.Advance(until.Sub(now)) + + mc.mu.now = until +} + +func (mc *ManualClock) resetTimer(mt *manualTimer, d time.Duration) { + mc.mu.Lock() + defer mc.mu.Unlock() + + mt.mu.Lock() + defer mt.mu.Unlock() + + if !mt.mu.firesAt.IsZero() { + mc.stopTimerLocked(mt) } + + mc.resetTimerLocked(mt, d) +} + +func (mc *ManualClock) stopTimer(mt *manualTimer) bool { + mc.mu.Lock() + defer mc.mu.Unlock() + + mt.mu.Lock() + defer mt.mu.Unlock() + + if mt.mu.firesAt.IsZero() { + return false + } + + mc.stopTimerLocked(mt) + return true } type manualTimer struct { clock *ManualClock - timer clockwork.Timer + f func() - mu sync.RWMutex - until time.Time + mu struct { + sync.Mutex + + // firesAt is the time when the timer will fire. + // + // Zero only when the timer is not active. + firesAt time.Time + } } var _ tcpip.Timer = (*manualTimer)(nil) // Reset implements tcpip.Timer.Reset. -func (t *manualTimer) Reset(d time.Duration) { - if !t.timer.Reset(d) { - return - } - - t.mu.Lock() - defer t.mu.Unlock() - - t.clock.removeWait(t.until) - t.until = t.clock.clock.Now().Add(d) - t.clock.addWait(t.until) +func (mt *manualTimer) Reset(d time.Duration) { + mt.clock.resetTimer(mt, d) } // Stop implements tcpip.Timer.Stop. -func (t *manualTimer) Stop() bool { - if !t.timer.Stop() { - return false - } - - t.mu.RLock() - defer t.mu.RUnlock() - - t.clock.removeWait(t.until) - return true +func (mt *manualTimer) Stop() bool { + return mt.clock.stopTimer(mt) } type timeHeap []time.Time diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 5f9b8e9e2..f840a4322 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -16,7 +16,6 @@ package header import ( "encoding/binary" - "fmt" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -208,16 +207,3 @@ func ICMPv4Checksum(h ICMPv4, vv buffer.VectorisedView) uint16 { return ^xsum } - -// ICMPOriginFromNetProto returns the appropriate SockErrOrigin to use when -// a packet having a `net` header causing an ICMP error. -func ICMPOriginFromNetProto(net tcpip.NetworkProtocolNumber) tcpip.SockErrOrigin { - switch net { - case IPv4ProtocolNumber: - return tcpip.SockExtErrorOriginICMP - case IPv6ProtocolNumber: - return tcpip.SockExtErrorOriginICMP6 - default: - panic(fmt.Sprintf("unsupported net proto to extract ICMP error origin: %d", net)) - } -} diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index 5580d6a78..f2403978c 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -453,9 +453,9 @@ const ( ) // ScopeForIPv6Address returns the scope for an IPv6 address. -func ScopeForIPv6Address(addr tcpip.Address) (IPv6AddressScope, *tcpip.Error) { +func ScopeForIPv6Address(addr tcpip.Address) (IPv6AddressScope, tcpip.Error) { if len(addr) != IPv6AddressSize { - return GlobalScope, tcpip.ErrBadAddress + return GlobalScope, &tcpip.ErrBadAddress{} } switch { diff --git a/pkg/tcpip/header/ipv6_test.go b/pkg/tcpip/header/ipv6_test.go index e3fbd64f3..f10f446a6 100644 --- a/pkg/tcpip/header/ipv6_test.go +++ b/pkg/tcpip/header/ipv6_test.go @@ -299,7 +299,7 @@ func TestScopeForIPv6Address(t *testing.T) { name string addr tcpip.Address scope header.IPv6AddressScope - err *tcpip.Error + err tcpip.Error }{ { name: "Unique Local", @@ -329,15 +329,15 @@ func TestScopeForIPv6Address(t *testing.T) { name: "IPv4", addr: "\x01\x02\x03\x04", scope: header.GlobalScope, - err: tcpip.ErrBadAddress, + err: &tcpip.ErrBadAddress{}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { got, err := header.ScopeForIPv6Address(test.addr) - if err != test.err { - t.Errorf("got header.IsV6UniqueLocalAddress(%s) = (_, %v), want = (_, %v)", test.addr, err, test.err) + if diff := cmp.Diff(test.err, err); diff != "" { + t.Errorf("unexpected error from header.IsV6UniqueLocalAddress(%s), (-want, +got):\n%s", test.addr, diff) } if got != test.scope { t.Errorf("got header.IsV6UniqueLocalAddress(%s) = (%d, _), want = (%d, _)", test.addr, got, test.scope) diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index a068d93a4..cd76272de 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -229,7 +229,7 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress { } // WritePacket stores outbound packets into the channel. -func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { p := PacketInfo{ Pkt: pkt, Proto: protocol, @@ -243,7 +243,7 @@ func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip } // WritePackets stores outbound packets into the channel. -func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { n := 0 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { p := PacketInfo{ diff --git a/pkg/tcpip/link/ethernet/ethernet.go b/pkg/tcpip/link/ethernet/ethernet.go index 2f2d9d4ac..d873766a6 100644 --- a/pkg/tcpip/link/ethernet/ethernet.go +++ b/pkg/tcpip/link/ethernet/ethernet.go @@ -61,13 +61,13 @@ func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities { } // WritePacket implements stack.LinkEndpoint. -func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { e.AddHeader(e.Endpoint.LinkAddress(), r.RemoteLinkAddress, proto, pkt) return e.Endpoint.WritePacket(r, gso, proto, pkt) } // WritePackets implements stack.LinkEndpoint. -func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, tcpip.Error) { linkAddr := e.Endpoint.LinkAddress() for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index f86c383d8..0164d851b 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -57,7 +57,7 @@ import ( // linkDispatcher reads packets from the link FD and dispatches them to the // NetworkDispatcher. type linkDispatcher interface { - dispatch() (bool, *tcpip.Error) + dispatch() (bool, tcpip.Error) } // PacketDispatchMode are the various supported methods of receiving and @@ -118,7 +118,7 @@ type endpoint struct { // closed is a function to be called when the FD's peer (if any) closes // its end of the communication pipe. - closed func(*tcpip.Error) + closed func(tcpip.Error) inboundDispatchers []linkDispatcher dispatcher stack.NetworkDispatcher @@ -149,7 +149,7 @@ type Options struct { // ClosedFunc is a function to be called when an endpoint's peer (if // any) closes its end of the communication pipe. - ClosedFunc func(*tcpip.Error) + ClosedFunc func(tcpip.Error) // Address is the link address for this endpoint. Only used if // EthernetHeader is true. @@ -411,7 +411,7 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net // WritePacket writes outbound packets to the file descriptor. If it is not // currently writable, the packet is dropped. -func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if e.hdrSize > 0 { e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) } @@ -451,7 +451,7 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip return rawfile.NonBlockingWriteIovec(fd, builder.Build()) } -func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tcpip.Error) { +func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, tcpip.Error) { // Send a batch of packets through batchFD. mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch)) for _, pkt := range batch { @@ -518,7 +518,7 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc // - pkt.EgressRoute // - pkt.GSOOptions // - pkt.NetworkProtocolNumber -func (e *endpoint) WritePackets(_ stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, _ tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(_ stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, _ tcpip.NetworkProtocolNumber) (int, tcpip.Error) { // Preallocate to avoid repeated reallocation as we append to batch. // batchSz is 47 because when SWGSO is in use then a single 65KB TCP // segment can get split into 46 segments of 1420 bytes and a single 216 @@ -562,13 +562,13 @@ func viewsEqual(vs1, vs2 []buffer.View) bool { } // InjectOutobund implements stack.InjectableEndpoint.InjectOutbound. -func (e *endpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error { +func (e *endpoint) InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error { return rawfile.NonBlockingWrite(e.fds[0], packet) } // dispatchLoop reads packets from the file descriptor in a loop and dispatches // them to the network stack. -func (e *endpoint) dispatchLoop(inboundDispatcher linkDispatcher) *tcpip.Error { +func (e *endpoint) dispatchLoop(inboundDispatcher linkDispatcher) tcpip.Error { for { cont, err := inboundDispatcher.dispatch() if err != nil || !cont { diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index e2985cb84..e82371798 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -95,7 +95,7 @@ func newContext(t *testing.T, opt *Options) *context { } done := make(chan struct{}, 2) - opt.ClosedFunc = func(*tcpip.Error) { + opt.ClosedFunc = func(tcpip.Error) { done <- struct{}{} } diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index c475dda20..a2b63fe6b 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -129,7 +129,7 @@ type packetMMapDispatcher struct { ringOffset int } -func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, *tcpip.Error) { +func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, tcpip.Error) { hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) for hdr.tpStatus()&tpStatusUser == 0 { event := rawfile.PollEvent{ @@ -163,7 +163,7 @@ func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, *tcpip.Error) { // dispatch reads packets from an mmaped ring buffer and dispatches them to the // network stack. -func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) { +func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) { pkt, err := d.readMMappedPacket() if err != nil { return false, err diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index edab110b5..ecae1ad2d 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -139,7 +139,7 @@ func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { } // dispatch reads one packet from the file descriptor and dispatches it. -func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) { +func (d *readVDispatcher) dispatch() (bool, tcpip.Error) { n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs()) if n == 0 || err != nil { return false, err @@ -226,7 +226,7 @@ func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) { // recvMMsgDispatch reads more than one packet at a time from the file // descriptor and dispatches it. -func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) { +func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) { // Fill message headers. for k := range d.msgHdrs { if d.msgHdrs[k].Msg.Iovlen > 0 { diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index ac6a6be87..691467870 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -76,7 +76,7 @@ func (*endpoint) Wait() {} // WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound // packets to the network-layer dispatcher. -func (e *endpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { // Construct data as the unparsed portion for the loopback packet. data := buffer.NewVectorisedView(pkt.Size(), pkt.Views()) @@ -92,7 +92,7 @@ func (e *endpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, protocol tcpip.N } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (e *endpoint) WritePackets(stack.RouteInfo, *stack.GSO, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(stack.RouteInfo, *stack.GSO, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) { panic("not implemented") } diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index 316f508e6..668f72eee 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -87,10 +87,10 @@ func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, // WritePackets writes outbound packets to the appropriate // LinkInjectableEndpoint based on the RemoteAddress. HandleLocal only works if // r.RemoteAddress has a route registered in this endpoint. -func (m *InjectableEndpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (m *InjectableEndpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { endpoint, ok := m.routes[r.RemoteAddress] if !ok { - return 0, tcpip.ErrNoRoute + return 0, &tcpip.ErrNoRoute{} } return endpoint.WritePackets(r, gso, pkts, protocol) } @@ -98,19 +98,19 @@ func (m *InjectableEndpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkt // WritePacket writes outbound packets to the appropriate LinkInjectableEndpoint // based on the RemoteAddress. HandleLocal only works if r.RemoteAddress has a // route registered in this endpoint. -func (m *InjectableEndpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (m *InjectableEndpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if endpoint, ok := m.routes[r.RemoteAddress]; ok { return endpoint.WritePacket(r, gso, protocol, pkt) } - return tcpip.ErrNoRoute + return &tcpip.ErrNoRoute{} } // InjectOutbound writes outbound packets to the appropriate // LinkInjectableEndpoint based on the dest address. -func (m *InjectableEndpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error { +func (m *InjectableEndpoint) InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error { endpoint, ok := m.routes[dest] if !ok { - return tcpip.ErrNoRoute + return &tcpip.ErrNoRoute{} } return endpoint.InjectOutbound(dest, packet) } diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go index 814a54f23..97ad9fdd5 100644 --- a/pkg/tcpip/link/nested/nested.go +++ b/pkg/tcpip/link/nested/nested.go @@ -113,12 +113,12 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress { } // WritePacket implements stack.LinkEndpoint. -func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { return e.child.WritePacket(r, gso, protocol, pkt) } // WritePackets implements stack.LinkEndpoint. -func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { return e.child.WritePackets(r, gso, pkts, protocol) } diff --git a/pkg/tcpip/link/packetsocket/endpoint.go b/pkg/tcpip/link/packetsocket/endpoint.go index c95cdd681..6cbe18a56 100644 --- a/pkg/tcpip/link/packetsocket/endpoint.go +++ b/pkg/tcpip/link/packetsocket/endpoint.go @@ -35,13 +35,13 @@ func New(lower stack.LinkEndpoint) stack.LinkEndpoint { } // WritePacket implements stack.LinkEndpoint.WritePacket. -func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress, r.LocalLinkAddress, protocol, pkt) return e.Endpoint.WritePacket(r, gso, protocol, pkt) } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, tcpip.Error) { for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress, r.LocalLinkAddress, pkt.NetworkProtocolNumber, pkt) } diff --git a/pkg/tcpip/link/pipe/pipe.go b/pkg/tcpip/link/pipe/pipe.go index 36aa9055c..bbe84f220 100644 --- a/pkg/tcpip/link/pipe/pipe.go +++ b/pkg/tcpip/link/pipe/pipe.go @@ -75,7 +75,7 @@ func (e *Endpoint) deliverPackets(r stack.RouteInfo, proto tcpip.NetworkProtocol } // WritePacket implements stack.LinkEndpoint. -func (e *Endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if e.linked.IsAttached() { var pkts stack.PacketBufferList pkts.PushBack(pkt) @@ -86,7 +86,7 @@ func (e *Endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, proto tcpip.Netw } // WritePackets implements stack.LinkEndpoint. -func (e *Endpoint) WritePackets(r stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, tcpip.Error) { if e.linked.IsAttached() { e.deliverPackets(r, proto, pkts) } diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go index 03efba606..128ef6e87 100644 --- a/pkg/tcpip/link/qdisc/fifo/endpoint.go +++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go @@ -150,7 +150,7 @@ func (e *endpoint) GSOMaxSize() uint32 { } // WritePacket implements stack.LinkEndpoint.WritePacket. -func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { // WritePacket caller's do not set the following fields in PacketBuffer // so we populate them here. pkt.EgressRoute = r @@ -158,7 +158,7 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip pkt.NetworkProtocolNumber = protocol d := e.dispatchers[int(pkt.Hash)%len(e.dispatchers)] if !d.q.enqueue(pkt) { - return tcpip.ErrNoBufferSpace + return &tcpip.ErrNoBufferSpace{} } d.newPacketWaker.Assert() return nil @@ -171,7 +171,7 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip // - pkt.EgressRoute // - pkt.GSOOptions // - pkt.NetworkProtocolNumber -func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { enqueued := 0 for pkt := pkts.Front(); pkt != nil; { d := e.dispatchers[int(pkt.Hash)%len(e.dispatchers)] @@ -180,7 +180,7 @@ func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.Pa if enqueued > 0 { d.newPacketWaker.Assert() } - return enqueued, tcpip.ErrNoBufferSpace + return enqueued, &tcpip.ErrNoBufferSpace{} } pkt = nxt enqueued++ diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD index 6c410c5a6..e1047da50 100644 --- a/pkg/tcpip/link/rawfile/BUILD +++ b/pkg/tcpip/link/rawfile/BUILD @@ -27,5 +27,6 @@ go_test( library = "rawfile", deps = [ "//pkg/tcpip", + "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 604868fd8..406b97709 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -17,7 +17,6 @@ package rawfile import ( - "fmt" "syscall" "gvisor.dev/gvisor/pkg/tcpip" @@ -25,48 +24,54 @@ import ( const maxErrno = 134 -var translations [maxErrno]*tcpip.Error - // TranslateErrno translate an errno from the syscall package into a -// *tcpip.Error. +// tcpip.Error. // // Valid, but unrecognized errnos will be translated to -// tcpip.ErrInvalidEndpointState (EINVAL). -func TranslateErrno(e syscall.Errno) *tcpip.Error { - if e > 0 && e < syscall.Errno(len(translations)) { - if err := translations[e]; err != nil { - return err - } - } - return tcpip.ErrInvalidEndpointState -} - -func addTranslation(host syscall.Errno, trans *tcpip.Error) { - if translations[host] != nil { - panic(fmt.Sprintf("duplicate translation for host errno %q (%d)", host.Error(), host)) +// *tcpip.ErrInvalidEndpointState (EINVAL). +func TranslateErrno(e syscall.Errno) tcpip.Error { + switch e { + case syscall.EEXIST: + return &tcpip.ErrDuplicateAddress{} + case syscall.ENETUNREACH: + return &tcpip.ErrNoRoute{} + case syscall.EINVAL: + return &tcpip.ErrInvalidEndpointState{} + case syscall.EALREADY: + return &tcpip.ErrAlreadyConnecting{} + case syscall.EISCONN: + return &tcpip.ErrAlreadyConnected{} + case syscall.EADDRINUSE: + return &tcpip.ErrPortInUse{} + case syscall.EADDRNOTAVAIL: + return &tcpip.ErrBadLocalAddress{} + case syscall.EPIPE: + return &tcpip.ErrClosedForSend{} + case syscall.EWOULDBLOCK: + return &tcpip.ErrWouldBlock{} + case syscall.ECONNREFUSED: + return &tcpip.ErrConnectionRefused{} + case syscall.ETIMEDOUT: + return &tcpip.ErrTimeout{} + case syscall.EINPROGRESS: + return &tcpip.ErrConnectStarted{} + case syscall.EDESTADDRREQ: + return &tcpip.ErrDestinationRequired{} + case syscall.ENOTSUP: + return &tcpip.ErrNotSupported{} + case syscall.ENOTTY: + return &tcpip.ErrQueueSizeNotSupported{} + case syscall.ENOTCONN: + return &tcpip.ErrNotConnected{} + case syscall.ECONNRESET: + return &tcpip.ErrConnectionReset{} + case syscall.ECONNABORTED: + return &tcpip.ErrConnectionAborted{} + case syscall.EMSGSIZE: + return &tcpip.ErrMessageTooLong{} + case syscall.ENOBUFS: + return &tcpip.ErrNoBufferSpace{} + default: + return &tcpip.ErrInvalidEndpointState{} } - translations[host] = trans -} - -func init() { - addTranslation(syscall.EEXIST, tcpip.ErrDuplicateAddress) - addTranslation(syscall.ENETUNREACH, tcpip.ErrNoRoute) - addTranslation(syscall.EINVAL, tcpip.ErrInvalidEndpointState) - addTranslation(syscall.EALREADY, tcpip.ErrAlreadyConnecting) - addTranslation(syscall.EISCONN, tcpip.ErrAlreadyConnected) - addTranslation(syscall.EADDRINUSE, tcpip.ErrPortInUse) - addTranslation(syscall.EADDRNOTAVAIL, tcpip.ErrBadLocalAddress) - addTranslation(syscall.EPIPE, tcpip.ErrClosedForSend) - addTranslation(syscall.EWOULDBLOCK, tcpip.ErrWouldBlock) - addTranslation(syscall.ECONNREFUSED, tcpip.ErrConnectionRefused) - addTranslation(syscall.ETIMEDOUT, tcpip.ErrTimeout) - addTranslation(syscall.EINPROGRESS, tcpip.ErrConnectStarted) - addTranslation(syscall.EDESTADDRREQ, tcpip.ErrDestinationRequired) - addTranslation(syscall.ENOTSUP, tcpip.ErrNotSupported) - addTranslation(syscall.ENOTTY, tcpip.ErrQueueSizeNotSupported) - addTranslation(syscall.ENOTCONN, tcpip.ErrNotConnected) - addTranslation(syscall.ECONNRESET, tcpip.ErrConnectionReset) - addTranslation(syscall.ECONNABORTED, tcpip.ErrConnectionAborted) - addTranslation(syscall.EMSGSIZE, tcpip.ErrMessageTooLong) - addTranslation(syscall.ENOBUFS, tcpip.ErrNoBufferSpace) } diff --git a/pkg/tcpip/link/rawfile/errors_test.go b/pkg/tcpip/link/rawfile/errors_test.go index e4cdc66bd..61aea1744 100644 --- a/pkg/tcpip/link/rawfile/errors_test.go +++ b/pkg/tcpip/link/rawfile/errors_test.go @@ -20,34 +20,35 @@ import ( "syscall" "testing" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" ) func TestTranslateErrno(t *testing.T) { for _, test := range []struct { errno syscall.Errno - translated *tcpip.Error + translated tcpip.Error }{ { errno: syscall.Errno(0), - translated: tcpip.ErrInvalidEndpointState, + translated: &tcpip.ErrInvalidEndpointState{}, }, { errno: syscall.Errno(maxErrno), - translated: tcpip.ErrInvalidEndpointState, + translated: &tcpip.ErrInvalidEndpointState{}, }, { errno: syscall.Errno(514), - translated: tcpip.ErrInvalidEndpointState, + translated: &tcpip.ErrInvalidEndpointState{}, }, { errno: syscall.EEXIST, - translated: tcpip.ErrDuplicateAddress, + translated: &tcpip.ErrDuplicateAddress{}, }, } { got := TranslateErrno(test.errno) - if got != test.translated { - t.Errorf("TranslateErrno(%q) = %q, want %q", test.errno, got, test.translated) + if diff := cmp.Diff(test.translated, got); diff != "" { + t.Errorf("unexpected result from TranslateErrno(%q), (-want, +got):\n%s", test.errno, diff) } } } diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index f4c32c2da..06f3ee21e 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -52,7 +52,7 @@ func GetMTU(name string) (uint32, error) { // NonBlockingWrite writes the given buffer to a file descriptor. It fails if // partial data is written. -func NonBlockingWrite(fd int, buf []byte) *tcpip.Error { +func NonBlockingWrite(fd int, buf []byte) tcpip.Error { var ptr unsafe.Pointer if len(buf) > 0 { ptr = unsafe.Pointer(&buf[0]) @@ -68,7 +68,7 @@ func NonBlockingWrite(fd int, buf []byte) *tcpip.Error { // NonBlockingWriteIovec writes iovec to a file descriptor in a single syscall. // It fails if partial data is written. -func NonBlockingWriteIovec(fd int, iovec []syscall.Iovec) *tcpip.Error { +func NonBlockingWriteIovec(fd int, iovec []syscall.Iovec) tcpip.Error { iovecLen := uintptr(len(iovec)) _, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen) if e != 0 { @@ -78,7 +78,7 @@ func NonBlockingWriteIovec(fd int, iovec []syscall.Iovec) *tcpip.Error { } // NonBlockingSendMMsg sends multiple messages on a socket. -func NonBlockingSendMMsg(fd int, msgHdrs []MMsgHdr) (int, *tcpip.Error) { +func NonBlockingSendMMsg(fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) { n, _, e := syscall.RawSyscall6(unix.SYS_SENDMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), syscall.MSG_DONTWAIT, 0, 0) if e != 0 { return 0, TranslateErrno(e) @@ -97,7 +97,7 @@ type PollEvent struct { // BlockingRead reads from a file descriptor that is set up as non-blocking. If // no data is available, it will block in a poll() syscall until the file // descriptor becomes readable. -func BlockingRead(fd int, b []byte) (int, *tcpip.Error) { +func BlockingRead(fd int, b []byte) (int, tcpip.Error) { for { n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b))) if e == 0 { @@ -119,7 +119,7 @@ func BlockingRead(fd int, b []byte) (int, *tcpip.Error) { // BlockingReadv reads from a file descriptor that is set up as non-blocking and // stores the data in a list of iovecs buffers. If no data is available, it will // block in a poll() syscall until the file descriptor becomes readable. -func BlockingReadv(fd int, iovecs []syscall.Iovec) (int, *tcpip.Error) { +func BlockingReadv(fd int, iovecs []syscall.Iovec) (int, tcpip.Error) { for { n, _, e := syscall.RawSyscall(syscall.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs))) if e == 0 { @@ -149,7 +149,7 @@ type MMsgHdr struct { // and stores the received messages in a slice of MMsgHdr structures. If no data // is available, it will block in a poll() syscall until the file descriptor // becomes readable. -func BlockingRecvMMsg(fd int, msgHdrs []MMsgHdr) (int, *tcpip.Error) { +func BlockingRecvMMsg(fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) { for { n, _, e := syscall.RawSyscall6(syscall.SYS_RECVMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), syscall.MSG_DONTWAIT, 0, 0) if e == 0 { diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index 6c937c858..2599bc406 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -203,7 +203,7 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net // WritePacket writes outbound packets to the file descriptor. If it is not // currently writable, the packet is dropped. -func (e *endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) views := pkt.Views() @@ -213,14 +213,14 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, _ *stack.GSO, protocol tcpip.N e.mu.Unlock() if !ok { - return tcpip.ErrWouldBlock + return &tcpip.ErrWouldBlock{} } return nil } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (*endpoint) WritePackets(stack.RouteInfo, *stack.GSO, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (*endpoint) WritePackets(stack.RouteInfo, *stack.GSO, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) { panic("not implemented") } diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 23242b9e0..d480ad656 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -425,8 +425,9 @@ func TestFillTxQueue(t *testing.T) { ReserveHeaderBytes: int(c.ep.MaxHeaderLength()), Data: buf.ToVectorisedView(), }) - if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt); err != want { - t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want) + err := c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got WritePacket(...) = %v, want %s", err, &tcpip.ErrWouldBlock{}) } } @@ -493,8 +494,9 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) { ReserveHeaderBytes: int(c.ep.MaxHeaderLength()), Data: buf.ToVectorisedView(), }) - if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt); err != want { - t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want) + err := c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got WritePacket(...) = %v, want %s", err, &tcpip.ErrWouldBlock{}) } } @@ -538,8 +540,8 @@ func TestFillTxMemory(t *testing.T) { Data: buf.ToVectorisedView(), }) err := c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt) - if want := tcpip.ErrWouldBlock; err != want { - t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got WritePacket(...) = %v, want %s", err, &tcpip.ErrWouldBlock{}) } } @@ -579,8 +581,9 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) { ReserveHeaderBytes: int(c.ep.MaxHeaderLength()), Data: buffer.NewView(bufferSize).ToVectorisedView(), }) - if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt); err != want { - t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want) + err := c.ep.WritePacket(r, nil /* gso */, header.IPv4ProtocolNumber, pkt) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got WritePacket(...) = %v, want %s", err, &tcpip.ErrWouldBlock{}) } } diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 5859851d8..bd2b8d4bf 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -187,7 +187,7 @@ func (e *endpoint) dumpPacket(dir direction, gso *stack.GSO, protocol tcpip.Netw // WritePacket implements the stack.LinkEndpoint interface. It is called by // higher-level protocols to write packets; it just logs the packet and // forwards the request to the lower endpoint. -func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { e.dumpPacket(directionSend, gso, protocol, pkt) return e.Endpoint.WritePacket(r, gso, protocol, pkt) } @@ -195,7 +195,7 @@ func (e *endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip // WritePackets implements the stack.LinkEndpoint interface. It is called by // higher-level protocols to write packets; it just logs the packet and // forwards the request to the lower endpoint. -func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { e.dumpPacket(directionSend, gso, protocol, pkt) } diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index bfac358f4..3829ca9c9 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -149,10 +149,10 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE err := s.CreateNICWithOptions(endpoint.nicID, endpoint, stack.NICOptions{ Name: endpoint.name, }) - switch err { + switch err.(type) { case nil: return endpoint, nil - case tcpip.ErrDuplicateNICID: + case *tcpip.ErrDuplicateNICID: // Race detected: A NIC has been created in between. continue default: diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index 30f1ad540..20259b285 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -108,7 +108,7 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress { // WritePacket implements stack.LinkEndpoint.WritePacket. It is called by // higher-level protocols to write packets. It only forwards packets to the // lower endpoint if Wait or WaitWrite haven't been called. -func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if !e.writeGate.Enter() { return nil } @@ -121,7 +121,7 @@ func (e *Endpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip // WritePackets implements stack.LinkEndpoint.WritePackets. It is called by // higher-level protocols to write packets. It only forwards packets to the // lower endpoint if Wait or WaitWrite haven't been called. -func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *Endpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { if !e.writeGate.Enter() { return pkts.Len(), nil } diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index b139de7dd..e368a9eaa 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -69,13 +69,13 @@ func (e *countedEndpoint) LinkAddress() tcpip.LinkAddress { return e.linkAddr } -func (e *countedEndpoint) WritePacket(stack.RouteInfo, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error { +func (e *countedEndpoint) WritePacket(stack.RouteInfo, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) tcpip.Error { e.writeCount++ return nil } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (e *countedEndpoint) WritePackets(_ stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, _ tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *countedEndpoint) WritePackets(_ stack.RouteInfo, _ *stack.GSO, pkts stack.PacketBufferList, _ tcpip.NetworkProtocolNumber) (int, tcpip.Error) { e.writeCount += pkts.Len() return pkts.Len(), nil } diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD index 9ebf31b78..0caa65251 100644 --- a/pkg/tcpip/network/BUILD +++ b/pkg/tcpip/network/BUILD @@ -25,5 +25,6 @@ go_test( "//pkg/tcpip/transport/icmp", "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", + "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index c7ab876bf..933845269 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -10,7 +10,6 @@ go_library( ], visibility = ["//visibility:public"], deps = [ - "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index 6bc8c5c02..0d7fadc31 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -22,7 +22,6 @@ import ( "reflect" "sync/atomic" - "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -35,6 +34,8 @@ const ( ProtocolNumber = header.ARPProtocolNumber ) +var _ stack.LinkAddressResolver = (*endpoint)(nil) + // ARP endpoints need to implement stack.NetworkEndpoint because the stack // considers the layer above the link-layer a network layer; the only // facility provided by the stack to deliver packets to a layer above @@ -49,15 +50,13 @@ type endpoint struct { // Must be accessed using atomic operations. enabled uint32 - nic stack.NetworkInterface - linkAddrCache stack.LinkAddressCache - nud stack.NUDHandler - stats sharedStats + nic stack.NetworkInterface + stats sharedStats } -func (e *endpoint) Enable() *tcpip.Error { +func (e *endpoint) Enable() tcpip.Error { if !e.nic.Enabled() { - return tcpip.ErrNotPermitted + return &tcpip.ErrNotPermitted{} } e.setEnabled(true) @@ -101,12 +100,10 @@ func (e *endpoint) MaxHeaderLength() uint16 { return e.nic.MaxHeaderLength() + header.ARPSize } -func (e *endpoint) Close() { - e.protocol.forgetEndpoint(e.nic.ID()) -} +func (*endpoint) Close() {} -func (*endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) tcpip.Error { + return &tcpip.ErrNotSupported{} } // NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber. @@ -115,12 +112,12 @@ func (*endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { } // WritePackets implements stack.NetworkEndpoint.WritePackets. -func (*endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList, stack.NetworkHeaderParams) (int, *tcpip.Error) { - return 0, tcpip.ErrNotSupported +func (*endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList, stack.NetworkHeaderParams) (int, tcpip.Error) { + return 0, &tcpip.ErrNotSupported{} } -func (*endpoint) WriteHeaderIncludedPacket(*stack.Route, *stack.PacketBuffer) *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) WriteHeaderIncludedPacket(*stack.Route, *stack.PacketBuffer) tcpip.Error { + return &tcpip.ErrNotSupported{} } func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { @@ -151,10 +148,12 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { remoteAddr := tcpip.Address(h.ProtocolAddressSender()) remoteLinkAddr := tcpip.LinkAddress(h.HardwareAddressSender()) - if e.nud == nil { - e.linkAddrCache.AddLinkAddress(remoteAddr, remoteLinkAddr) - } else { - e.nud.HandleProbe(remoteAddr, ProtocolNumber, remoteLinkAddr, e.protocol) + switch err := e.nic.HandleNeighborProbe(header.IPv4ProtocolNumber, remoteAddr, remoteLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ARP but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) } respPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ @@ -195,14 +194,9 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { addr := tcpip.Address(h.ProtocolAddressSender()) linkAddr := tcpip.LinkAddress(h.HardwareAddressSender()) - if e.nud == nil { - e.linkAddrCache.AddLinkAddress(addr, linkAddr) - return - } - // The solicited, override, and isRouter flags are not available for ARP; // they are only available for IPv6 Neighbor Advertisements. - e.nud.HandleConfirmation(addr, linkAddr, stack.ReachabilityConfirmationFlags{ + switch err := e.nic.HandleNeighborConfirmation(header.IPv4ProtocolNumber, addr, linkAddr, stack.ReachabilityConfirmationFlags{ // Solicited and unsolicited (also referred to as gratuitous) ARP Replies // are handled equivalently to a solicited Neighbor Advertisement. Solicited: true, @@ -211,7 +205,13 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { Override: false, // ARP does not distinguish between router and non-router hosts. IsRouter: false, - }) + }); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ARP but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor confirmation message: %s", err)) + } } } @@ -221,19 +221,10 @@ func (e *endpoint) Stats() stack.NetworkEndpointStats { } var _ stack.NetworkProtocol = (*protocol)(nil) -var _ stack.LinkAddressResolver = (*protocol)(nil) // protocol implements stack.NetworkProtocol and stack.LinkAddressResolver. type protocol struct { stack *stack.Stack - - mu struct { - sync.RWMutex - - // eps is keyed by NICID to allow protocol methods to retrieve the correct - // endpoint depending on the NIC. - eps map[tcpip.NICID]*endpoint - } } func (p *protocol) Number() tcpip.NetworkProtocolNumber { return ProtocolNumber } @@ -244,12 +235,10 @@ func (*protocol) ParseAddresses(buffer.View) (src, dst tcpip.Address) { return "", "" } -func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { e := &endpoint{ - protocol: p, - nic: nic, - linkAddrCache: linkAddrCache, - nud: nud, + protocol: p, + nic: nic, } tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) @@ -257,60 +246,43 @@ func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.L stackStats := p.stack.Stats() e.stats.arp.init(&e.stats.localStats.ARP, &stackStats.ARP) - p.mu.Lock() - p.mu.eps[nic.ID()] = e - p.mu.Unlock() - return e } -func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { - p.mu.Lock() - defer p.mu.Unlock() - delete(p.mu.eps, nicID) -} - // LinkAddressProtocol implements stack.LinkAddressResolver.LinkAddressProtocol. -func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber { +func (*endpoint) LinkAddressProtocol() tcpip.NetworkProtocolNumber { return header.IPv4ProtocolNumber } // LinkAddressRequest implements stack.LinkAddressResolver.LinkAddressRequest. -func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error { - nicID := nic.ID() - - p.mu.Lock() - netEP, ok := p.mu.eps[nicID] - p.mu.Unlock() - if !ok { - return tcpip.ErrNotConnected - } +func (e *endpoint) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error { + nicID := e.nic.ID() - stats := netEP.stats.arp + stats := e.stats.arp if len(remoteLinkAddr) == 0 { remoteLinkAddr = header.EthernetBroadcastAddress } if len(localAddr) == 0 { - addr, ok := p.stack.GetMainNICAddress(nicID, header.IPv4ProtocolNumber) + addr, ok := e.protocol.stack.GetMainNICAddress(nicID, header.IPv4ProtocolNumber) if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } if len(addr.Address) == 0 { stats.outgoingRequestInterfaceHasNoLocalAddressErrors.Increment() - return tcpip.ErrNetworkUnreachable + return &tcpip.ErrNetworkUnreachable{} } localAddr = addr.Address - } else if p.stack.CheckLocalAddress(nicID, header.IPv4ProtocolNumber, localAddr) == 0 { + } else if e.protocol.stack.CheckLocalAddress(nicID, header.IPv4ProtocolNumber, localAddr) == 0 { stats.outgoingRequestBadLocalAddressErrors.Increment() - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - ReserveHeaderBytes: int(nic.MaxHeaderLength()) + header.ARPSize, + ReserveHeaderBytes: int(e.nic.MaxHeaderLength()) + header.ARPSize, }) h := header.ARP(pkt.NetworkHeader().Push(header.ARPSize)) pkt.NetworkProtocolNumber = ProtocolNumber @@ -318,14 +290,14 @@ func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remot h.SetOp(header.ARPRequest) // TODO(gvisor.dev/issue/4582): check copied length once TAP devices have a // link address. - _ = copy(h.HardwareAddressSender(), nic.LinkAddress()) + _ = copy(h.HardwareAddressSender(), e.nic.LinkAddress()) if n := copy(h.ProtocolAddressSender(), localAddr); n != header.IPv4AddressSize { panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize)) } if n := copy(h.ProtocolAddressTarget(), targetAddr); n != header.IPv4AddressSize { panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize)) } - if err := nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt); err != nil { + if err := e.nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt); err != nil { stats.outgoingRequestsDropped.Increment() return err } @@ -334,7 +306,7 @@ func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remot } // ResolveStaticAddress implements stack.LinkAddressResolver.ResolveStaticAddress. -func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { +func (*endpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { if addr == header.IPv4Broadcast { return header.EthernetBroadcastAddress, true } @@ -345,13 +317,13 @@ func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo } // SetOption implements stack.NetworkProtocol.SetOption. -func (*protocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) SetOption(tcpip.SettableNetworkProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Option implements stack.NetworkProtocol.Option. -func (*protocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) Option(tcpip.GettableNetworkProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Close implements stack.TransportProtocol.Close. @@ -369,9 +341,5 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu func NewProtocol(s *stack.Stack) stack.NetworkProtocol { return &protocol{ stack: s, - mu: struct { - sync.RWMutex - eps map[tcpip.NICID]*endpoint - }{eps: make(map[tcpip.NICID]*endpoint)}, } } diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 001fca727..24357e15d 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -125,8 +125,8 @@ func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, entry stack.Neighbo func (d *arpDispatcher) waitForEvent(ctx context.Context, want eventInfo) error { select { case got := <-d.C: - if diff := cmp.Diff(got, want, cmp.AllowUnexported(got), cmpopts.IgnoreFields(stack.NeighborEntry{}, "UpdatedAtNanos")); diff != "" { - return fmt.Errorf("got invalid event (-got +want):\n%s", diff) + if diff := cmp.Diff(want, got, cmp.AllowUnexported(got), cmpopts.IgnoreFields(stack.NeighborEntry{}, "UpdatedAtNanos")); diff != "" { + return fmt.Errorf("got invalid event (-want +got):\n%s", diff) } case <-ctx.Done(): return fmt.Errorf("%s for %s", ctx.Err(), want) @@ -491,9 +491,9 @@ func TestDirectRequestWithNeighborCache(t *testing.T) { t.Fatal(err) } - neighbors, err := c.s.Neighbors(nicID) + neighbors, err := c.s.Neighbors(nicID, ipv4.ProtocolNumber) if err != nil { - t.Fatalf("c.s.Neighbors(%d): %s", nicID, err) + t.Fatalf("c.s.Neighbors(%d, %d): %s", nicID, ipv4.ProtocolNumber, err) } neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) @@ -530,52 +530,19 @@ func TestDirectRequestWithNeighborCache(t *testing.T) { } } -var _ stack.NetworkInterface = (*testInterface)(nil) +var _ stack.LinkEndpoint = (*testLinkEndpoint)(nil) -type testInterface struct { +type testLinkEndpoint struct { stack.LinkEndpoint - nicID tcpip.NICID - - writeErr *tcpip.Error -} - -func (t *testInterface) ID() tcpip.NICID { - return t.nicID -} - -func (*testInterface) IsLoopback() bool { - return false -} - -func (*testInterface) Name() string { - return "" -} - -func (*testInterface) Enabled() bool { - return true -} - -func (*testInterface) Promiscuous() bool { - return false -} - -func (t *testInterface) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { - return t.LinkEndpoint.WritePacket(r.Fields(), gso, protocol, pkt) -} - -func (t *testInterface) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { - return t.LinkEndpoint.WritePackets(r.Fields(), gso, pkts, protocol) + writeErr tcpip.Error } -func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (t *testLinkEndpoint) WritePacket(r stack.RouteInfo, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if t.writeErr != nil { return t.writeErr } - var r stack.RouteInfo - r.NetProto = protocol - r.RemoteLinkAddress = remoteLinkAddr return t.LinkEndpoint.WritePacket(r, gso, protocol, pkt) } @@ -589,8 +556,8 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr tcpip.Address localAddr tcpip.Address remoteLinkAddr tcpip.LinkAddress - linkErr *tcpip.Error - expectedErr *tcpip.Error + linkErr tcpip.Error + expectedErr tcpip.Error expectedLocalAddr tcpip.Address expectedRemoteLinkAddr tcpip.LinkAddress expectedRequestsSent uint64 @@ -651,7 +618,7 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr: stackAddr, localAddr: testAddr, remoteLinkAddr: remoteLinkAddr, - expectedErr: tcpip.ErrBadLocalAddress, + expectedErr: &tcpip.ErrBadLocalAddress{}, expectedRequestsSent: 0, expectedRequestBadLocalAddressErrors: 1, expectedRequestInterfaceHasNoLocalAddressErrors: 0, @@ -662,7 +629,7 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr: stackAddr, localAddr: testAddr, remoteLinkAddr: "", - expectedErr: tcpip.ErrBadLocalAddress, + expectedErr: &tcpip.ErrBadLocalAddress{}, expectedRequestsSent: 0, expectedRequestBadLocalAddressErrors: 1, expectedRequestInterfaceHasNoLocalAddressErrors: 0, @@ -673,7 +640,7 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr: "", localAddr: "", remoteLinkAddr: remoteLinkAddr, - expectedErr: tcpip.ErrNetworkUnreachable, + expectedErr: &tcpip.ErrNetworkUnreachable{}, expectedRequestsSent: 0, expectedRequestBadLocalAddressErrors: 0, expectedRequestInterfaceHasNoLocalAddressErrors: 1, @@ -684,7 +651,7 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr: "", localAddr: "", remoteLinkAddr: "", - expectedErr: tcpip.ErrNetworkUnreachable, + expectedErr: &tcpip.ErrNetworkUnreachable{}, expectedRequestsSent: 0, expectedRequestBadLocalAddressErrors: 0, expectedRequestInterfaceHasNoLocalAddressErrors: 1, @@ -695,8 +662,8 @@ func TestLinkAddressRequest(t *testing.T) { nicAddr: stackAddr, localAddr: stackAddr, remoteLinkAddr: remoteLinkAddr, - linkErr: tcpip.ErrInvalidEndpointState, - expectedErr: tcpip.ErrInvalidEndpointState, + linkErr: &tcpip.ErrInvalidEndpointState{}, + expectedErr: &tcpip.ErrInvalidEndpointState{}, expectedRequestsSent: 0, expectedRequestBadLocalAddressErrors: 0, expectedRequestInterfaceHasNoLocalAddressErrors: 0, @@ -709,31 +676,31 @@ func TestLinkAddressRequest(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol}, }) - p := s.NetworkProtocolInstance(arp.ProtocolNumber) - linkRes, ok := p.(stack.LinkAddressResolver) - if !ok { - t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver") - } - linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr) - if err := s.CreateNIC(nicID, linkEP); err != nil { + if err := s.CreateNIC(nicID, &testLinkEndpoint{LinkEndpoint: linkEP, writeErr: test.linkErr}); err != nil { t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err) } + ep, err := s.GetNetworkEndpoint(nicID, arp.ProtocolNumber) + if err != nil { + t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, arp.ProtocolNumber, err) + } + linkRes, ok := ep.(stack.LinkAddressResolver) + if !ok { + t.Fatalf("expected %T to implement stack.LinkAddressResolver", ep) + } + if len(test.nicAddr) != 0 { if err := s.AddAddress(nicID, ipv4.ProtocolNumber, test.nicAddr); err != nil { t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, test.nicAddr, err) } } - // We pass a test network interface to LinkAddressRequest with the same - // NIC ID and link endpoint used by the NIC we created earlier so that we - // can mock a link address request and observe the packets sent to the - // link endpoint even though the stack uses the real NIC to validate the - // local address. - iface := testInterface{LinkEndpoint: linkEP, nicID: nicID, writeErr: test.linkErr} - if err := linkRes.LinkAddressRequest(remoteAddr, test.localAddr, test.remoteLinkAddr, &iface); err != test.expectedErr { - t.Fatalf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", remoteAddr, test.localAddr, test.remoteLinkAddr, err, test.expectedErr) + { + err := linkRes.LinkAddressRequest(remoteAddr, test.localAddr, test.remoteLinkAddr) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Fatalf("unexpected error from p.LinkAddressRequest(%s, %s, %s, _), (-want, +got):\n%s", remoteAddr, test.localAddr, test.remoteLinkAddr, diff) + } } if got := s.Stats().ARP.OutgoingRequestsSent.Value(); got != test.expectedRequestsSent { @@ -781,18 +748,3 @@ func TestLinkAddressRequest(t *testing.T) { }) } } - -func TestLinkAddressRequestWithoutNIC(t *testing.T) { - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol}, - }) - p := s.NetworkProtocolInstance(arp.ProtocolNumber) - linkRes, ok := p.(stack.LinkAddressResolver) - if !ok { - t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver") - } - - if err := linkRes.LinkAddressRequest(remoteAddr, "", remoteLinkAddr, &testInterface{nicID: nicID}); err != tcpip.ErrNotConnected { - t.Fatalf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", remoteAddr, "", remoteLinkAddr, err, tcpip.ErrNotConnected) - } -} diff --git a/pkg/tcpip/network/arp/stats_test.go b/pkg/tcpip/network/arp/stats_test.go index 036fdf739..65c708ac4 100644 --- a/pkg/tcpip/network/arp/stats_test.go +++ b/pkg/tcpip/network/arp/stats_test.go @@ -34,55 +34,13 @@ func (t *testInterface) ID() tcpip.NICID { return t.nicID } -func knownNICIDs(proto *protocol) []tcpip.NICID { - var nicIDs []tcpip.NICID - - for k := range proto.mu.eps { - nicIDs = append(nicIDs, k) - } - - return nicIDs -} - -func TestClearEndpointFromProtocolOnClose(t *testing.T) { - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, - }) - proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) - nic := testInterface{nicID: 1} - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) - var nicIDs []tcpip.NICID - - proto.mu.Lock() - foundEP, hasEndpointBeforeClose := proto.mu.eps[nic.ID()] - nicIDs = knownNICIDs(proto) - proto.mu.Unlock() - - if !hasEndpointBeforeClose { - t.Fatalf("expected to find the nic id %d in the protocol's endpoint map (%v)", nic.ID(), nicIDs) - } - if foundEP != ep { - t.Fatalf("found an incorrect endpoint mapped to nic id %d", nic.ID()) - } - - ep.Close() - - proto.mu.Lock() - _, hasEndpointAfterClose := proto.mu.eps[nic.ID()] - nicIDs = knownNICIDs(proto) - proto.mu.Unlock() - if hasEndpointAfterClose { - t.Fatalf("unexpectedly found an endpoint mapped to the nic id %d in the protocol's known nic ids (%v)", nic.ID(), nicIDs) - } -} - func TestMultiCounterStatsInitialization(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, }) proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) var nic testInterface - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) + ep := proto.NewEndpoint(&nic, nil).(*endpoint) // At this point, the Stack's stats and the NetworkEndpoint's stats are // expected to be bound by a MultiCounterStat. refStack := s.Stats() diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index 1af87d713..243738951 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -84,7 +84,7 @@ type Fragmentation struct { lowLimit int reassemblers map[FragmentID]*reassembler rList reassemblerList - size int + memSize int timeout time.Duration blockSize uint16 clock tcpip.Clock @@ -156,22 +156,22 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea // the protocol to identify a fragment. func (f *Fragmentation) Process( id FragmentID, first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) ( - buffer.VectorisedView, uint8, bool, error) { + *stack.PacketBuffer, uint8, bool, error) { if first > last { - return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs) + return nil, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs) } if first%f.blockSize != 0 { - return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is not a multiple of block size=%d: %w", first, f.blockSize, ErrInvalidArgs) + return nil, 0, false, fmt.Errorf("first=%d is not a multiple of block size=%d: %w", first, f.blockSize, ErrInvalidArgs) } fragmentSize := last - first + 1 if more && fragmentSize%f.blockSize != 0 { - return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs) + return nil, 0, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs) } if l := pkt.Data.Size(); l != int(fragmentSize) { - return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes not equal to the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs) + return nil, 0, false, fmt.Errorf("got fragment size=%d bytes not equal to the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs) } f.mu.Lock() @@ -190,24 +190,24 @@ func (f *Fragmentation) Process( } f.mu.Unlock() - res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, pkt) + resPkt, firstFragmentProto, done, memConsumed, err := r.process(first, last, more, proto, pkt) if err != nil { // We probably got an invalid sequence of fragments. Just // discard the reassembler and move on. f.mu.Lock() f.release(r, false /* timedOut */) f.mu.Unlock() - return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragmentation processing error: %w", err) + return nil, 0, false, fmt.Errorf("fragmentation processing error: %w", err) } f.mu.Lock() - f.size += consumed + f.memSize += memConsumed if done { f.release(r, false /* timedOut */) } // Evict reassemblers if we are consuming more memory than highLimit until // we reach lowLimit. - if f.size > f.highLimit { - for f.size > f.lowLimit { + if f.memSize > f.highLimit { + for f.memSize > f.lowLimit { tail := f.rList.Back() if tail == nil { break @@ -216,7 +216,7 @@ func (f *Fragmentation) Process( } } f.mu.Unlock() - return res, firstFragmentProto, done, nil + return resPkt, firstFragmentProto, done, nil } func (f *Fragmentation) release(r *reassembler, timedOut bool) { @@ -228,10 +228,10 @@ func (f *Fragmentation) release(r *reassembler, timedOut bool) { delete(f.reassemblers, r.id) f.rList.Remove(r) - f.size -= r.size - if f.size < 0 { - log.Printf("memory counter < 0 (%d), this is an accounting bug that requires investigation", f.size) - f.size = 0 + f.memSize -= r.memSize + if f.memSize < 0 { + log.Printf("memory counter < 0 (%d), this is an accounting bug that requires investigation", f.memSize) + f.memSize = 0 } if h := f.timeoutHandler; timedOut && h != nil { diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go index 3a79688a8..905bbc19b 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation_test.go +++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go @@ -16,7 +16,6 @@ package fragmentation import ( "errors" - "reflect" "testing" "time" @@ -112,20 +111,20 @@ func TestFragmentationProcess(t *testing.T) { f := NewFragmentation(minBlockSize, 1024, 512, reassembleTimeout, &faketime.NullClock{}, nil) firstFragmentProto := c.in[0].proto for i, in := range c.in { - vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.pkt) + resPkt, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.pkt) if err != nil { t.Fatalf("f.Process(%+v, %d, %d, %t, %d, %#v) failed: %s", in.id, in.first, in.last, in.more, in.proto, in.pkt, err) } - if !reflect.DeepEqual(vv, c.out[i].vv) { - t.Errorf("got Process(%+v, %d, %d, %t, %d, %#v) = (%X, _, _, _), want = (%X, _, _, _)", - in.id, in.first, in.last, in.more, in.proto, in.pkt, vv.ToView(), c.out[i].vv.ToView()) - } if done != c.out[i].done { t.Errorf("got Process(%+v, %d, %d, %t, %d, _) = (_, _, %t, _), want = (_, _, %t, _)", in.id, in.first, in.last, in.more, in.proto, done, c.out[i].done) } if c.out[i].done { + if diff := cmp.Diff(c.out[i].vv.ToOwnedView(), resPkt.Data.ToOwnedView()); diff != "" { + t.Errorf("got Process(%+v, %d, %d, %t, %d, %#v) result mismatch (-want, +got):\n%s", + in.id, in.first, in.last, in.more, in.proto, in.pkt, diff) + } if firstFragmentProto != proto { t.Errorf("got Process(%+v, %d, %d, %t, %d, _) = (_, %d, _, _), want = (_, %d, _, _)", in.id, in.first, in.last, in.more, in.proto, proto, firstFragmentProto) @@ -173,9 +172,17 @@ func TestReassemblingTimeout(t *testing.T) { // reassembly is done after the fragment is processd. expectDone bool - // sizeAfterEvent is the expected size of the fragmentation instance after - // the event. - sizeAfterEvent int + // memSizeAfterEvent is the expected memory size of the fragmentation + // instance after the event. + memSizeAfterEvent int + } + + memSizeOfFrags := func(frags ...*fragment) int { + var size int + for _, frag := range frags { + size += pkt(len(frag.data), frag.data).MemSize() + } + return size } half1 := &fragment{first: 0, last: 0, more: true, data: "0"} @@ -189,16 +196,16 @@ func TestReassemblingTimeout(t *testing.T) { name: "half1 and half2 are reassembled successfully", events: []event{ { - name: "half1", - fragment: half1, - expectDone: false, - sizeAfterEvent: 1, + name: "half1", + fragment: half1, + expectDone: false, + memSizeAfterEvent: memSizeOfFrags(half1), }, { - name: "half2", - fragment: half2, - expectDone: true, - sizeAfterEvent: 0, + name: "half2", + fragment: half2, + expectDone: true, + memSizeAfterEvent: 0, }, }, }, @@ -206,36 +213,36 @@ func TestReassemblingTimeout(t *testing.T) { name: "half1 timeout, half2 timeout", events: []event{ { - name: "half1", - fragment: half1, - expectDone: false, - sizeAfterEvent: 1, + name: "half1", + fragment: half1, + expectDone: false, + memSizeAfterEvent: memSizeOfFrags(half1), }, { - name: "half1 just before reassembly timeout", - clockAdvance: reassemblyTimeout - 1, - sizeAfterEvent: 1, + name: "half1 just before reassembly timeout", + clockAdvance: reassemblyTimeout - 1, + memSizeAfterEvent: memSizeOfFrags(half1), }, { - name: "half1 reassembly timeout", - clockAdvance: 1, - sizeAfterEvent: 0, + name: "half1 reassembly timeout", + clockAdvance: 1, + memSizeAfterEvent: 0, }, { - name: "half2", - fragment: half2, - expectDone: false, - sizeAfterEvent: 1, + name: "half2", + fragment: half2, + expectDone: false, + memSizeAfterEvent: memSizeOfFrags(half2), }, { - name: "half2 just before reassembly timeout", - clockAdvance: reassemblyTimeout - 1, - sizeAfterEvent: 1, + name: "half2 just before reassembly timeout", + clockAdvance: reassemblyTimeout - 1, + memSizeAfterEvent: memSizeOfFrags(half2), }, { - name: "half2 reassembly timeout", - clockAdvance: 1, - sizeAfterEvent: 0, + name: "half2 reassembly timeout", + clockAdvance: 1, + memSizeAfterEvent: 0, }, }, }, @@ -255,8 +262,8 @@ func TestReassemblingTimeout(t *testing.T) { t.Fatalf("%s: got done = %t, want = %t", event.name, done, event.expectDone) } } - if got, want := f.size, event.sizeAfterEvent; got != want { - t.Errorf("%s: got f.size = %d, want = %d", event.name, got, want) + if got, want := f.memSize, event.memSizeAfterEvent; got != want { + t.Errorf("%s: got f.memSize = %d, want = %d", event.name, got, want) } } }) @@ -264,7 +271,9 @@ func TestReassemblingTimeout(t *testing.T) { } func TestMemoryLimits(t *testing.T) { - f := NewFragmentation(minBlockSize, 3, 1, reassembleTimeout, &faketime.NullClock{}, nil) + lowLimit := pkt(1, "0").MemSize() + highLimit := 3 * lowLimit // Allow at most 3 such packets. + f := NewFragmentation(minBlockSize, highLimit, lowLimit, reassembleTimeout, &faketime.NullClock{}, nil) // Send first fragment with id = 0. f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, pkt(1, "0")) // Send first fragment with id = 1. @@ -288,15 +297,14 @@ func TestMemoryLimits(t *testing.T) { } func TestMemoryLimitsIgnoresDuplicates(t *testing.T) { - f := NewFragmentation(minBlockSize, 1, 0, reassembleTimeout, &faketime.NullClock{}, nil) + memSize := pkt(1, "0").MemSize() + f := NewFragmentation(minBlockSize, memSize, 0, reassembleTimeout, &faketime.NullClock{}, nil) // Send first fragment with id = 0. f.Process(FragmentID{}, 0, 0, true, 0xFF, pkt(1, "0")) // Send the same packet again. f.Process(FragmentID{}, 0, 0, true, 0xFF, pkt(1, "0")) - got := f.size - want := 1 - if got != want { + if got, want := f.memSize, memSize; got != want { t.Errorf("Wrong size, duplicates are not handled correctly: got=%d, want=%d.", got, want) } } diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go index 9b20bb1d8..933d63d32 100644 --- a/pkg/tcpip/network/fragmentation/reassembler.go +++ b/pkg/tcpip/network/fragmentation/reassembler.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -29,13 +28,15 @@ type hole struct { last uint16 filled bool final bool - data buffer.View + // pkt is the fragment packet if hole is filled. We keep the whole pkt rather + // than the fragmented payload to prevent binding to specific buffer types. + pkt *stack.PacketBuffer } type reassembler struct { reassemblerEntry id FragmentID - size int + memSize int proto uint8 mu sync.Mutex holes []hole @@ -59,18 +60,18 @@ func newReassembler(id FragmentID, clock tcpip.Clock) *reassembler { return r } -func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (buffer.VectorisedView, uint8, bool, int, error) { +func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (*stack.PacketBuffer, uint8, bool, int, error) { r.mu.Lock() defer r.mu.Unlock() if r.done { // A concurrent goroutine might have already reassembled // the packet and emptied the heap while this goroutine // was waiting on the mutex. We don't have to do anything in this case. - return buffer.VectorisedView{}, 0, false, 0, nil + return nil, 0, false, 0, nil } var holeFound bool - var consumed int + var memConsumed int for i := range r.holes { currentHole := &r.holes[i] @@ -90,12 +91,12 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *s // https://github.com/torvalds/linux/blob/38525c6/net/ipv4/inet_fragment.c#L349 if first < currentHole.first || currentHole.last < last { // Incoming fragment only partially fits in the free hole. - return buffer.VectorisedView{}, 0, false, 0, ErrFragmentOverlap + return nil, 0, false, 0, ErrFragmentOverlap } if !more { if !currentHole.final || currentHole.filled && currentHole.last != last { // We have another final fragment, which does not perfectly overlap. - return buffer.VectorisedView{}, 0, false, 0, ErrFragmentConflict + return nil, 0, false, 0, ErrFragmentConflict } } @@ -124,16 +125,15 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *s }) currentHole.final = false } - v := pkt.Data.ToOwnedView() - consumed = v.Size() - r.size += consumed + memConsumed = pkt.MemSize() + r.memSize += memConsumed // Update the current hole to precisely match the incoming fragment. r.holes[i] = hole{ first: first, last: last, filled: true, final: currentHole.final, - data: v, + pkt: pkt, } r.filled++ // For IPv6, it is possible to have different Protocol values between @@ -153,25 +153,24 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *s } if !holeFound { // Incoming fragment is beyond end. - return buffer.VectorisedView{}, 0, false, 0, ErrFragmentConflict + return nil, 0, false, 0, ErrFragmentConflict } // Check if all the holes have been filled and we are ready to reassemble. if r.filled < len(r.holes) { - return buffer.VectorisedView{}, 0, false, consumed, nil + return nil, 0, false, memConsumed, nil } sort.Slice(r.holes, func(i, j int) bool { return r.holes[i].first < r.holes[j].first }) - var size int - views := make([]buffer.View, 0, len(r.holes)) - for _, hole := range r.holes { - views = append(views, hole.data) - size += hole.data.Size() + resPkt := r.holes[0].pkt + for i := 1; i < len(r.holes); i++ { + fragPkt := r.holes[i].pkt + fragPkt.Data.ReadToVV(&resPkt.Data, fragPkt.Data.Size()) } - return buffer.NewVectorisedView(size, views), r.proto, true, consumed, nil + return resPkt, r.proto, true, memConsumed, nil } func (r *reassembler) checkDoneOrMark() bool { diff --git a/pkg/tcpip/network/fragmentation/reassembler_test.go b/pkg/tcpip/network/fragmentation/reassembler_test.go index 2ff03eeeb..214a93709 100644 --- a/pkg/tcpip/network/fragmentation/reassembler_test.go +++ b/pkg/tcpip/network/fragmentation/reassembler_test.go @@ -15,6 +15,7 @@ package fragmentation import ( + "bytes" "math" "testing" @@ -44,16 +45,21 @@ func TestReassemblerProcess(t *testing.T) { return payload } - pkt := func(size int) *stack.PacketBuffer { + pkt := func(sizes ...int) *stack.PacketBuffer { + var vv buffer.VectorisedView + for _, size := range sizes { + vv.AppendView(v(size)) + } return stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: v(size).ToVectorisedView(), + Data: vv, }) } var tests = []struct { - name string - params []processParams - want []hole + name string + params []processParams + want []hole + wantPkt *stack.PacketBuffer }{ { name: "No fragments", @@ -64,7 +70,7 @@ func TestReassemblerProcess(t *testing.T) { name: "One fragment at beginning", params: []processParams{{first: 0, last: 1, more: true, pkt: pkt(2), wantDone: false, wantError: nil}}, want: []hole{ - {first: 0, last: 1, filled: true, final: false, data: v(2)}, + {first: 0, last: 1, filled: true, final: false, pkt: pkt(2)}, {first: 2, last: math.MaxUint16, filled: false, final: true}, }, }, @@ -72,7 +78,7 @@ func TestReassemblerProcess(t *testing.T) { name: "One fragment in the middle", params: []processParams{{first: 1, last: 2, more: true, pkt: pkt(2), wantDone: false, wantError: nil}}, want: []hole{ - {first: 1, last: 2, filled: true, final: false, data: v(2)}, + {first: 1, last: 2, filled: true, final: false, pkt: pkt(2)}, {first: 0, last: 0, filled: false, final: false}, {first: 3, last: math.MaxUint16, filled: false, final: true}, }, @@ -81,7 +87,7 @@ func TestReassemblerProcess(t *testing.T) { name: "One fragment at the end", params: []processParams{{first: 1, last: 2, more: false, pkt: pkt(2), wantDone: false, wantError: nil}}, want: []hole{ - {first: 1, last: 2, filled: true, final: true, data: v(2)}, + {first: 1, last: 2, filled: true, final: true, pkt: pkt(2)}, {first: 0, last: 0, filled: false}, }, }, @@ -89,8 +95,9 @@ func TestReassemblerProcess(t *testing.T) { name: "One fragment completing a packet", params: []processParams{{first: 0, last: 1, more: false, pkt: pkt(2), wantDone: true, wantError: nil}}, want: []hole{ - {first: 0, last: 1, filled: true, final: true, data: v(2)}, + {first: 0, last: 1, filled: true, final: true}, }, + wantPkt: pkt(2), }, { name: "Two fragments completing a packet", @@ -99,9 +106,10 @@ func TestReassemblerProcess(t *testing.T) { {first: 2, last: 3, more: false, pkt: pkt(2), wantDone: true, wantError: nil}, }, want: []hole{ - {first: 0, last: 1, filled: true, final: false, data: v(2)}, - {first: 2, last: 3, filled: true, final: true, data: v(2)}, + {first: 0, last: 1, filled: true, final: false}, + {first: 2, last: 3, filled: true, final: true}, }, + wantPkt: pkt(2, 2), }, { name: "Two fragments completing a packet with a duplicate", @@ -111,9 +119,10 @@ func TestReassemblerProcess(t *testing.T) { {first: 2, last: 3, more: false, pkt: pkt(2), wantDone: true, wantError: nil}, }, want: []hole{ - {first: 0, last: 1, filled: true, final: false, data: v(2)}, - {first: 2, last: 3, filled: true, final: true, data: v(2)}, + {first: 0, last: 1, filled: true, final: false}, + {first: 2, last: 3, filled: true, final: true}, }, + wantPkt: pkt(2, 2), }, { name: "Two fragments completing a packet with a partial duplicate", @@ -123,9 +132,10 @@ func TestReassemblerProcess(t *testing.T) { {first: 4, last: 5, more: false, pkt: pkt(2), wantDone: true, wantError: nil}, }, want: []hole{ - {first: 0, last: 3, filled: true, final: false, data: v(4)}, - {first: 4, last: 5, filled: true, final: true, data: v(2)}, + {first: 0, last: 3, filled: true, final: false}, + {first: 4, last: 5, filled: true, final: true}, }, + wantPkt: pkt(4, 2), }, { name: "Two overlapping fragments", @@ -134,7 +144,7 @@ func TestReassemblerProcess(t *testing.T) { {first: 5, last: 15, more: false, pkt: pkt(11), wantDone: false, wantError: ErrFragmentOverlap}, }, want: []hole{ - {first: 0, last: 10, filled: true, final: false, data: v(11)}, + {first: 0, last: 10, filled: true, final: false, pkt: pkt(11)}, {first: 11, last: math.MaxUint16, filled: false, final: true}, }, }, @@ -145,7 +155,7 @@ func TestReassemblerProcess(t *testing.T) { {first: 0, last: 9, more: false, pkt: pkt(10), wantDone: false, wantError: ErrFragmentConflict}, }, want: []hole{ - {first: 10, last: 14, filled: true, final: true, data: v(5)}, + {first: 10, last: 14, filled: true, final: true, pkt: pkt(5)}, {first: 0, last: 9, filled: false, final: false}, }, }, @@ -156,7 +166,7 @@ func TestReassemblerProcess(t *testing.T) { {first: 10, last: 14, more: false, pkt: pkt(5), wantDone: false, wantError: nil}, }, want: []hole{ - {first: 5, last: 14, filled: true, final: true, data: v(10)}, + {first: 5, last: 14, filled: true, final: true, pkt: pkt(10)}, {first: 0, last: 4, filled: false, final: false}, }, }, @@ -167,7 +177,7 @@ func TestReassemblerProcess(t *testing.T) { {first: 10, last: 13, more: false, pkt: pkt(4), wantDone: false, wantError: ErrFragmentConflict}, }, want: []hole{ - {first: 5, last: 14, filled: true, final: true, data: v(10)}, + {first: 5, last: 14, filled: true, final: true, pkt: pkt(10)}, {first: 0, last: 4, filled: false, final: false}, }, }, @@ -176,14 +186,47 @@ func TestReassemblerProcess(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { r := newReassembler(FragmentID{}, &faketime.NullClock{}) + var resPkt *stack.PacketBuffer + var isDone bool for _, param := range test.params { - _, _, done, _, err := r.process(param.first, param.last, param.more, proto, param.pkt) + pkt, _, done, _, err := r.process(param.first, param.last, param.more, proto, param.pkt) if done != param.wantDone || err != param.wantError { t.Errorf("got r.process(%d, %d, %t, %d, _) = (_, _, %t, _, %v), want = (%t, %v)", param.first, param.last, param.more, proto, done, err, param.wantDone, param.wantError) } + if done { + resPkt = pkt + isDone = true + } + } + + ignorePkt := func(a, b *stack.PacketBuffer) bool { return true } + cmpPktData := func(a, b *stack.PacketBuffer) bool { + if a == nil || b == nil { + return a == b + } + return bytes.Equal(a.Data.ToOwnedView(), b.Data.ToOwnedView()) } - if diff := cmp.Diff(test.want, r.holes, cmp.AllowUnexported(hole{})); diff != "" { - t.Errorf("r.holes mismatch (-want +got):\n%s", diff) + + if isDone { + if diff := cmp.Diff( + test.want, r.holes, + cmp.AllowUnexported(hole{}), + // Do not compare pkt in hole. Data will be altered. + cmp.Comparer(ignorePkt), + ); diff != "" { + t.Errorf("r.holes mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(test.wantPkt, resPkt, cmp.Comparer(cmpPktData)); diff != "" { + t.Errorf("Reassembled pkt mismatch (-want +got):\n%s", diff) + } + } else { + if diff := cmp.Diff( + test.want, r.holes, + cmp.AllowUnexported(hole{}), + cmp.Comparer(cmpPktData), + ); diff != "" { + t.Errorf("r.holes mismatch (-want +got):\n%s", diff) + } } }) } diff --git a/pkg/tcpip/network/ip/generic_multicast_protocol.go b/pkg/tcpip/network/ip/generic_multicast_protocol.go index f2f0e069c..b9f129728 100644 --- a/pkg/tcpip/network/ip/generic_multicast_protocol.go +++ b/pkg/tcpip/network/ip/generic_multicast_protocol.go @@ -126,6 +126,16 @@ type multicastGroupState struct { // // Must not be nil. delayedReportJob *tcpip.Job + + // delyedReportJobFiresAt is the time when the delayed report job will fire. + // + // A zero value indicates that the job is not scheduled. + delayedReportJobFiresAt time.Time +} + +func (m *multicastGroupState) cancelDelayedReportJob() { + m.delayedReportJob.Cancel() + m.delayedReportJobFiresAt = time.Time{} } // GenericMulticastProtocolOptions holds options for the generic multicast @@ -174,10 +184,10 @@ type MulticastGroupProtocol interface { // // Returns false if the caller should queue the report to be sent later. Note, // returning false does not mean that the receiver hit an error. - SendReport(groupAddress tcpip.Address) (sent bool, err *tcpip.Error) + SendReport(groupAddress tcpip.Address) (sent bool, err tcpip.Error) // SendLeave sends a multicast leave for the specified group address. - SendLeave(groupAddress tcpip.Address) *tcpip.Error + SendLeave(groupAddress tcpip.Address) tcpip.Error } // GenericMulticastProtocolState is the per interface generic multicast protocol @@ -428,7 +438,7 @@ func (g *GenericMulticastProtocolState) HandleReportLocked(groupAddress tcpip.Ad // on that interface, it stops its timer and does not send a Report for // that address, thus suppressing duplicate reports on the link. if info, ok := g.memberships[groupAddress]; ok && info.state.isDelayingMember() { - info.delayedReportJob.Cancel() + info.cancelDelayedReportJob() info.lastToSendReport = false info.state = idleMember g.memberships[groupAddress] = info @@ -603,7 +613,7 @@ func (g *GenericMulticastProtocolState) transitionToNonMemberLocked(groupAddress return } - info.delayedReportJob.Cancel() + info.cancelDelayedReportJob() g.maybeSendLeave(groupAddress, info.lastToSendReport) info.lastToSendReport = false info.state = nonMember @@ -645,14 +655,24 @@ func (g *GenericMulticastProtocolState) setDelayTimerForAddressRLocked(groupAddr // If a timer for any address is already running, it is reset to the new // random value only if the requested Maximum Response Delay is less than // the remaining value of the running timer. + now := time.Unix(0 /* seconds */, g.opts.Clock.NowNanoseconds()) if info.state == delayingMember { - // TODO: Reset the timer if time remaining is greater than maxResponseTime. - return + if info.delayedReportJobFiresAt.IsZero() { + panic(fmt.Sprintf("delayed report unscheduled while in the delaying member state; group = %s", groupAddress)) + } + + if info.delayedReportJobFiresAt.Sub(now) <= maxResponseTime { + // The timer is scheduled to fire before the maximum response time so we + // leave our timer as is. + return + } } info.state = delayingMember - info.delayedReportJob.Cancel() - info.delayedReportJob.Schedule(g.calculateDelayTimerDuration(maxResponseTime)) + info.cancelDelayedReportJob() + maxResponseTime = g.calculateDelayTimerDuration(maxResponseTime) + info.delayedReportJob.Schedule(maxResponseTime) + info.delayedReportJobFiresAt = now.Add(maxResponseTime) } // calculateDelayTimerDuration returns a random time between (0, maxRespTime]. diff --git a/pkg/tcpip/network/ip/generic_multicast_protocol_test.go b/pkg/tcpip/network/ip/generic_multicast_protocol_test.go index 85593f211..60eaea37e 100644 --- a/pkg/tcpip/network/ip/generic_multicast_protocol_test.go +++ b/pkg/tcpip/network/ip/generic_multicast_protocol_test.go @@ -141,7 +141,7 @@ func (m *mockMulticastGroupProtocol) Enabled() bool { // SendReport implements ip.MulticastGroupProtocol. // // Precondition: m.mu must be locked. -func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) { +func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) { if m.mu.TryLock() { m.mu.Unlock() m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending report for %s", groupAddress) @@ -158,7 +158,7 @@ func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (boo // SendLeave implements ip.MulticastGroupProtocol. // // Precondition: m.mu must be locked. -func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) *tcpip.Error { +func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) tcpip.Error { if m.mu.TryLock() { m.mu.Unlock() m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending leave for %s", groupAddress) @@ -408,40 +408,46 @@ func TestHandleReport(t *testing.T) { func TestHandleQuery(t *testing.T) { tests := []struct { - name string - queryAddr tcpip.Address - maxDelay time.Duration - expectReportsFor []tcpip.Address + name string + queryAddr tcpip.Address + maxDelay time.Duration + expectQueriedReportsFor []tcpip.Address + expectDelayedReportsFor []tcpip.Address }{ { - name: "Unpecified empty", - queryAddr: "", - maxDelay: 0, - expectReportsFor: []tcpip.Address{addr1, addr2}, + name: "Unpecified empty", + queryAddr: "", + maxDelay: 0, + expectQueriedReportsFor: []tcpip.Address{addr1, addr2}, + expectDelayedReportsFor: nil, }, { - name: "Unpecified any", - queryAddr: "\x00", - maxDelay: 1, - expectReportsFor: []tcpip.Address{addr1, addr2}, + name: "Unpecified any", + queryAddr: "\x00", + maxDelay: 1, + expectQueriedReportsFor: []tcpip.Address{addr1, addr2}, + expectDelayedReportsFor: nil, }, { - name: "Specified", - queryAddr: addr1, - maxDelay: 2, - expectReportsFor: []tcpip.Address{addr1}, + name: "Specified", + queryAddr: addr1, + maxDelay: 2, + expectQueriedReportsFor: []tcpip.Address{addr1}, + expectDelayedReportsFor: []tcpip.Address{addr2}, }, { - name: "Specified all-nodes", - queryAddr: addr3, - maxDelay: 3, - expectReportsFor: nil, + name: "Specified all-nodes", + queryAddr: addr3, + maxDelay: 3, + expectQueriedReportsFor: nil, + expectDelayedReportsFor: []tcpip.Address{addr1, addr2}, }, { - name: "Specified other", - queryAddr: addr4, - maxDelay: 4, - expectReportsFor: nil, + name: "Specified other", + queryAddr: addr4, + maxDelay: 4, + expectQueriedReportsFor: nil, + expectDelayedReportsFor: []tcpip.Address{addr1, addr2}, }, } @@ -469,20 +475,20 @@ func TestHandleQuery(t *testing.T) { if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" { t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff) } - // Generic multicast protocol timers are expected to take the job mutex. - clock.Advance(maxUnsolicitedReportDelay) - if diff := mgp.check([]tcpip.Address{addr1, addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" { - t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff) - } - // Receiving a query should make us schedule a new delayed report if it - // is a query directed at us or a general query. + // Receiving a query should make us reschedule our delayed report timer + // to some time within the new max response delay. mgp.handleQuery(test.queryAddr, test.maxDelay) - if len(test.expectReportsFor) != 0 { - clock.Advance(test.maxDelay) - if diff := mgp.check(test.expectReportsFor /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" { - t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff) - } + clock.Advance(test.maxDelay) + if diff := mgp.check(test.expectQueriedReportsFor /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" { + t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff) + } + + // The groups that were not affected by the query should still send a + // report after the max unsolicited report delay. + clock.Advance(maxUnsolicitedReportDelay) + if diff := mgp.check(test.expectDelayedReportsFor /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" { + t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff) } // Should have no more messages to send. diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 2a6ec19dc..6a1f11a36 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -18,6 +18,7 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -58,6 +59,14 @@ var localIPv6AddrWithPrefix = tcpip.AddressWithPrefix{ PrefixLen: 120, } +type transportError struct { + origin tcpip.SockErrOrigin + typ uint8 + code uint8 + info uint32 + kind stack.TransportErrorKind +} + // testObject implements two interfaces: LinkEndpoint and TransportDispatcher. // The former is used to pretend that it's a link endpoint so that we can // inspect packets written by the network endpoints. The latter is used to @@ -73,8 +82,7 @@ type testObject struct { srcAddr tcpip.Address dstAddr tcpip.Address v4 bool - typ stack.ControlType - extra uint32 + transErr transportError dataCalls int controlCalls int @@ -118,16 +126,23 @@ func (t *testObject) DeliverTransportPacket(protocol tcpip.TransportProtocolNumb return stack.TransportPacketHandled } -// DeliverTransportControlPacket is called by network endpoints after parsing +// DeliverTransportError is called by network endpoints after parsing // incoming control (ICMP) packets. This is used by the test object to verify // that the results of the parsing are expected. -func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { +func (t *testObject) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr stack.TransportError, pkt *stack.PacketBuffer) { t.checkValues(trans, pkt.Data, remote, local) - if typ != t.typ { - t.t.Errorf("typ = %v, want %v", typ, t.typ) - } - if extra != t.extra { - t.t.Errorf("extra = %v, want %v", extra, t.extra) + if diff := cmp.Diff( + t.transErr, + transportError{ + origin: transErr.Origin(), + typ: transErr.Type(), + code: transErr.Code(), + info: transErr.Info(), + kind: transErr.Kind(), + }, + cmp.AllowUnexported(transportError{}), + ); diff != "" { + t.t.Errorf("transport error mismatch (-want +got):\n%s", diff) } t.controlCalls++ } @@ -167,7 +182,7 @@ func (*testObject) Wait() {} // WritePacket is called by network endpoints after producing a packet and // writing it to the link endpoint. This is used by the test object to verify // that the produced packet is as expected. -func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { var prot tcpip.TransportProtocolNumber var srcAddr tcpip.Address var dstAddr tcpip.Address @@ -189,7 +204,7 @@ func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.Ne } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (*testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (*testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { panic("not implemented") } @@ -203,7 +218,7 @@ func (*testObject) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net panic("not implemented") } -func buildIPv4Route(local, remote tcpip.Address) (*stack.Route, *tcpip.Error) { +func buildIPv4Route(local, remote tcpip.Address) (*stack.Route, tcpip.Error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol}, @@ -219,7 +234,7 @@ func buildIPv4Route(local, remote tcpip.Address) (*stack.Route, *tcpip.Error) { return s.FindRoute(nicID, local, remote, ipv4.ProtocolNumber, false /* multicastLoop */) } -func buildIPv6Route(local, remote tcpip.Address) (*stack.Route, *tcpip.Error) { +func buildIPv6Route(local, remote tcpip.Address) (*stack.Route, tcpip.Error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol}, TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol}, @@ -306,8 +321,16 @@ func (t *testInterface) setEnabled(v bool) { t.mu.disabled = !v } -func (*testInterface) WritePacketToRemote(tcpip.LinkAddress, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error { - return tcpip.ErrNotSupported +func (*testInterface) WritePacketToRemote(tcpip.LinkAddress, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) tcpip.Error { + return &tcpip.ErrNotSupported{} +} + +func (*testInterface) HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error { + return nil +} + +func (*testInterface) HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, stack.ReachabilityConfirmationFlags) tcpip.Error { + return nil } func TestSourceAddressValidation(t *testing.T) { @@ -464,7 +487,7 @@ func TestEnableWhenNICDisabled(t *testing.T) { // We pass nil for all parameters except the NetworkInterface and Stack // since Enable only depends on these. - ep := p.NewEndpoint(&nic, nil, nil, nil) + ep := p.NewEndpoint(&nic, nil) // The endpoint should initially be disabled, regardless the NIC's enabled // status. @@ -479,8 +502,9 @@ func TestEnableWhenNICDisabled(t *testing.T) { // Attempting to enable the endpoint while the NIC is disabled should // fail. nic.setEnabled(false) - if err := ep.Enable(); err != tcpip.ErrNotPermitted { - t.Fatalf("got ep.Enable() = %s, want = %s", err, tcpip.ErrNotPermitted) + err := ep.Enable() + if _, ok := err.(*tcpip.ErrNotPermitted); !ok { + t.Fatalf("got ep.Enable() = %s, want = %s", err, &tcpip.ErrNotPermitted{}) } // ep should consider the NIC's enabled status when determining its own // enabled status so we "enable" the NIC to read just the endpoint's @@ -525,7 +549,7 @@ func TestIPv4Send(t *testing.T) { v4: true, }, } - ep := proto.NewEndpoint(&nic, nil, nil, nil) + ep := proto.NewEndpoint(&nic, nil) defer ep.Close() // Allocate and initialize the payload view. @@ -659,7 +683,7 @@ func TestReceive(t *testing.T) { v4: test.v4, }, } - ep := s.NetworkProtocolInstance(test.protoNum).NewEndpoint(&nic, nil, nil, &nic.testObject) + ep := s.NetworkProtocolInstance(test.protoNum).NewEndpoint(&nic, &nic.testObject) defer ep.Close() if err := ep.Enable(); err != nil { @@ -692,24 +716,81 @@ func TestReceive(t *testing.T) { } func TestIPv4ReceiveControl(t *testing.T) { - const mtu = 0xbeef - header.IPv4MinimumSize + const ( + mtu = 0xbeef - header.IPv4MinimumSize + dataLen = 8 + ) + cases := []struct { name string expectedCount int fragmentOffset uint16 code header.ICMPv4Code - expectedTyp stack.ControlType - expectedExtra uint32 + transErr transportError trunc int }{ - {"FragmentationNeeded", 1, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 0}, - {"Truncated (10 bytes missing)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 10}, - {"Truncated (missing IPv4 header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.IPv4MinimumSize + 8}, - {"Truncated (missing 'extra info')", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 4 + header.IPv4MinimumSize + 8}, - {"Truncated (missing ICMP header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.ICMPv4MinimumSize + header.IPv4MinimumSize + 8}, - {"Port unreachable", 1, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Non-zero fragment offset", 0, 100, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Zero-length packet", 0, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + 8}, + { + name: "FragmentationNeeded", + expectedCount: 1, + fragmentOffset: 0, + code: header.ICMPv4FragmentationNeeded, + transErr: transportError{ + origin: tcpip.SockExtErrorOriginICMP, + typ: uint8(header.ICMPv4DstUnreachable), + code: uint8(header.ICMPv4FragmentationNeeded), + info: mtu, + kind: stack.PacketTooBigTransportError, + }, + trunc: 0, + }, + { + name: "Truncated (missing IPv4 header)", + expectedCount: 0, + fragmentOffset: 0, + code: header.ICMPv4FragmentationNeeded, + trunc: header.IPv4MinimumSize + header.ICMPv4MinimumSize, + }, + { + name: "Truncated (partial offending packet's IP header)", + expectedCount: 0, + fragmentOffset: 0, + code: header.ICMPv4FragmentationNeeded, + trunc: header.IPv4MinimumSize + header.ICMPv4MinimumSize + header.IPv4MinimumSize - 1, + }, + { + name: "Truncated (partial offending packet's data)", + expectedCount: 0, + fragmentOffset: 0, + code: header.ICMPv4FragmentationNeeded, + trunc: header.ICMPv4MinimumSize + header.ICMPv4MinimumSize + header.IPv4MinimumSize + dataLen - 1, + }, + { + name: "Port unreachable", + expectedCount: 1, + fragmentOffset: 0, + code: header.ICMPv4PortUnreachable, + transErr: transportError{ + origin: tcpip.SockExtErrorOriginICMP, + typ: uint8(header.ICMPv4DstUnreachable), + code: uint8(header.ICMPv4PortUnreachable), + kind: stack.DestinationPortUnreachableTransportError, + }, + trunc: 0, + }, + { + name: "Non-zero fragment offset", + expectedCount: 0, + fragmentOffset: 100, + code: header.ICMPv4PortUnreachable, + trunc: 0, + }, + { + name: "Zero-length packet", + expectedCount: 0, + fragmentOffset: 100, + code: header.ICMPv4PortUnreachable, + trunc: 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + dataLen, + }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { @@ -720,7 +801,7 @@ func TestIPv4ReceiveControl(t *testing.T) { t: t, }, } - ep := proto.NewEndpoint(&nic, nil, nil, &nic.testObject) + ep := proto.NewEndpoint(&nic, &nic.testObject) defer ep.Close() if err := ep.Enable(); err != nil { @@ -728,7 +809,7 @@ func TestIPv4ReceiveControl(t *testing.T) { } const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize - view := buffer.NewView(dataOffset + 8) + view := buffer.NewView(dataOffset + dataLen) // Create the outer IPv4 header. ip := header.IPv4(view) @@ -775,8 +856,7 @@ func TestIPv4ReceiveControl(t *testing.T) { nic.testObject.srcAddr = remoteIPv4Addr nic.testObject.dstAddr = localIPv4Addr nic.testObject.contents = view[dataOffset:] - nic.testObject.typ = c.expectedTyp - nic.testObject.extra = c.expectedExtra + nic.testObject.transErr = c.transErr addressableEndpoint, ok := ep.(stack.AddressableEndpoint) if !ok { @@ -809,7 +889,7 @@ func TestIPv4FragmentationReceive(t *testing.T) { v4: true, }, } - ep := proto.NewEndpoint(&nic, nil, nil, &nic.testObject) + ep := proto.NewEndpoint(&nic, &nic.testObject) defer ep.Close() if err := ep.Enable(); err != nil { @@ -904,7 +984,7 @@ func TestIPv6Send(t *testing.T) { t: t, }, } - ep := proto.NewEndpoint(&nic, nil, nil, nil) + ep := proto.NewEndpoint(&nic, nil) defer ep.Close() if err := ep.Enable(); err != nil { @@ -943,30 +1023,112 @@ func TestIPv6Send(t *testing.T) { } func TestIPv6ReceiveControl(t *testing.T) { + const ( + mtu = 0xffff + outerSrcAddr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa" + dataLen = 8 + ) + newUint16 := func(v uint16) *uint16 { return &v } - const mtu = 0xffff - const outerSrcAddr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa" + portUnreachableTransErr := transportError{ + origin: tcpip.SockExtErrorOriginICMP6, + typ: uint8(header.ICMPv6DstUnreachable), + code: uint8(header.ICMPv6PortUnreachable), + kind: stack.DestinationPortUnreachableTransportError, + } + cases := []struct { name string expectedCount int fragmentOffset *uint16 typ header.ICMPv6Type code header.ICMPv6Code - expectedTyp stack.ControlType - expectedExtra uint32 + transErr transportError trunc int }{ - {"PacketTooBig", 1, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 0}, - {"Truncated (10 bytes missing)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 10}, - {"Truncated (missing IPv6 header)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, header.IPv6MinimumSize + 8}, - {"Truncated PacketTooBig (missing 'extra info')", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 4 + header.IPv6MinimumSize + 8}, - {"Truncated (missing ICMP header)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize + 8}, - {"Port unreachable", 1, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Truncated DstUnreachable (missing 'extra info')", 0, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 4 + header.IPv6MinimumSize + 8}, - {"Fragmented, zero offset", 1, newUint16(0), header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Non-zero fragment offset", 0, newUint16(100), header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0}, - {"Zero-length packet", 0, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + 8}, + { + name: "PacketTooBig", + expectedCount: 1, + fragmentOffset: nil, + typ: header.ICMPv6PacketTooBig, + code: header.ICMPv6UnusedCode, + transErr: transportError{ + origin: tcpip.SockExtErrorOriginICMP6, + typ: uint8(header.ICMPv6PacketTooBig), + code: uint8(header.ICMPv6UnusedCode), + info: mtu, + kind: stack.PacketTooBigTransportError, + }, + trunc: 0, + }, + { + name: "Truncated (missing offending packet's IPv6 header)", + expectedCount: 0, + fragmentOffset: nil, + typ: header.ICMPv6PacketTooBig, + code: header.ICMPv6UnusedCode, + trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize, + }, + { + name: "Truncated PacketTooBig (partial offending packet's IPv6 header)", + expectedCount: 0, + fragmentOffset: nil, + typ: header.ICMPv6PacketTooBig, + code: header.ICMPv6UnusedCode, + trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize - 1, + }, + { + name: "Truncated (partial offending packet's data)", + expectedCount: 0, + fragmentOffset: nil, + typ: header.ICMPv6PacketTooBig, + code: header.ICMPv6UnusedCode, + trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize + dataLen - 1, + }, + { + name: "Port unreachable", + expectedCount: 1, + fragmentOffset: nil, + typ: header.ICMPv6DstUnreachable, + code: header.ICMPv6PortUnreachable, + transErr: portUnreachableTransErr, + trunc: 0, + }, + { + name: "Truncated DstPortUnreachable (partial offending packet's IP header)", + expectedCount: 0, + fragmentOffset: nil, + typ: header.ICMPv6DstUnreachable, + code: header.ICMPv6PortUnreachable, + trunc: header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + header.IPv6MinimumSize - 1, + }, + { + name: "DstPortUnreachable for Fragmented, zero offset", + expectedCount: 1, + fragmentOffset: newUint16(0), + typ: header.ICMPv6DstUnreachable, + code: header.ICMPv6PortUnreachable, + transErr: portUnreachableTransErr, + trunc: 0, + }, + { + name: "DstPortUnreachable for Non-zero fragment offset", + expectedCount: 0, + fragmentOffset: newUint16(100), + typ: header.ICMPv6DstUnreachable, + code: header.ICMPv6PortUnreachable, + transErr: portUnreachableTransErr, + trunc: 0, + }, + { + name: "Zero-length packet", + expectedCount: 0, + fragmentOffset: nil, + typ: header.ICMPv6DstUnreachable, + code: header.ICMPv6PortUnreachable, + trunc: 2*header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + dataLen, + }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { @@ -977,7 +1139,7 @@ func TestIPv6ReceiveControl(t *testing.T) { t: t, }, } - ep := proto.NewEndpoint(&nic, nil, nil, &nic.testObject) + ep := proto.NewEndpoint(&nic, &nic.testObject) defer ep.Close() if err := ep.Enable(); err != nil { @@ -988,7 +1150,7 @@ func TestIPv6ReceiveControl(t *testing.T) { if c.fragmentOffset != nil { dataOffset += header.IPv6FragmentHeaderSize } - view := buffer.NewView(dataOffset + 8) + view := buffer.NewView(dataOffset + dataLen) // Create the outer IPv6 header. ip := header.IPv6(view) @@ -1039,8 +1201,7 @@ func TestIPv6ReceiveControl(t *testing.T) { nic.testObject.srcAddr = remoteIPv6Addr nic.testObject.dstAddr = localIPv6Addr nic.testObject.contents = view[dataOffset:] - nic.testObject.typ = c.expectedTyp - nic.testObject.extra = c.expectedExtra + nic.testObject.transErr = c.transErr // Set ICMPv6 checksum. icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIPv6Addr, buffer.VectorisedView{})) @@ -1122,7 +1283,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) { remoteAddr tcpip.Address pktGen func(*testing.T, tcpip.Address) buffer.VectorisedView checker func(*testing.T, *stack.PacketBuffer, tcpip.Address) - expectedErr *tcpip.Error + expectedErr tcpip.Error }{ { name: "IPv4", @@ -1187,7 +1348,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) { ip.SetHeaderLength(header.IPv4MinimumSize - 1) return hdr.View().ToVectorisedView() }, - expectedErr: tcpip.ErrMalformedHeader, + expectedErr: &tcpip.ErrMalformedHeader{}, }, { name: "IPv4 too small", @@ -1205,7 +1366,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) { }) return buffer.View(ip[:len(ip)-1]).ToVectorisedView() }, - expectedErr: tcpip.ErrMalformedHeader, + expectedErr: &tcpip.ErrMalformedHeader{}, }, { name: "IPv4 minimum size", @@ -1465,7 +1626,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) { }) return buffer.View(ip[:len(ip)-1]).ToVectorisedView() }, - expectedErr: tcpip.ErrMalformedHeader, + expectedErr: &tcpip.ErrMalformedHeader{}, }, } @@ -1506,10 +1667,13 @@ func TestWriteHeaderIncludedPacket(t *testing.T) { } defer r.Release() - if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: test.pktGen(t, subTest.srcAddr), - })); err != test.expectedErr { - t.Fatalf("got r.WriteHeaderIncludedPacket(_) = %s, want = %s", err, test.expectedErr) + { + err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: test.pktGen(t, subTest.srcAddr), + })) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Fatalf("unexpected error from r.WriteHeaderIncludedPacket(_), (-want, +got):\n%s", diff) + } } if test.expectedErr != nil { diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 330a7d170..9713c4448 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -44,6 +44,7 @@ go_test( "//pkg/tcpip/network/testutil", "//pkg/tcpip/stack", "//pkg/tcpip/transport/icmp", + "//pkg/tcpip/transport/raw", "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", "//pkg/waiter", diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index 6bb97c46a..74e70e283 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -23,11 +23,108 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/stack" ) +// icmpv4DestinationUnreachableSockError is a general ICMPv4 Destination +// Unreachable error. +// +// +stateify savable +type icmpv4DestinationUnreachableSockError struct{} + +// Origin implements tcpip.SockErrorCause. +func (*icmpv4DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin { + return tcpip.SockExtErrorOriginICMP +} + +// Type implements tcpip.SockErrorCause. +func (*icmpv4DestinationUnreachableSockError) Type() uint8 { + return uint8(header.ICMPv4DstUnreachable) +} + +// Info implements tcpip.SockErrorCause. +func (*icmpv4DestinationUnreachableSockError) Info() uint32 { + return 0 +} + +var _ stack.TransportError = (*icmpv4DestinationHostUnreachableSockError)(nil) + +// icmpv4DestinationHostUnreachableSockError is an ICMPv4 Destination Host +// Unreachable error. +// +// It indicates that a packet was not able to reach the destination host. +// +// +stateify savable +type icmpv4DestinationHostUnreachableSockError struct { + icmpv4DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv4DestinationHostUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv4HostUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv4DestinationHostUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationHostUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv4DestinationPortUnreachableSockError)(nil) + +// icmpv4DestinationPortUnreachableSockError is an ICMPv4 Destination Port +// Unreachable error. +// +// It indicates that a packet reached the destination host, but the transport +// protocol was not active on the destination port. +// +// +stateify savable +type icmpv4DestinationPortUnreachableSockError struct { + icmpv4DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv4DestinationPortUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv4PortUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv4DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationPortUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv4FragmentationNeededSockError)(nil) + +// icmpv4FragmentationNeededSockError is an ICMPv4 Destination Unreachable error +// due to fragmentation being required but the packet was set to not be +// fragmented. +// +// It indicates that a link exists on the path to the destination with an MTU +// that is too small to carry the packet. +// +// +stateify savable +type icmpv4FragmentationNeededSockError struct { + icmpv4DestinationUnreachableSockError + + mtu uint32 +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv4FragmentationNeededSockError) Code() uint8 { + return uint8(header.ICMPv4FragmentationNeeded) +} + +// Info implements tcpip.SockErrorCause. +func (e *icmpv4FragmentationNeededSockError) Info() uint32 { + return e.mtu +} + +// Kind implements stack.TransportError. +func (*icmpv4FragmentationNeededSockError) Kind() stack.TransportErrorKind { + return stack.PacketTooBigTransportError +} + // handleControl handles the case when an ICMP error packet contains the headers // of the original packet that caused the ICMP one to be sent. This information // is used to find out which transport endpoint must be notified about the ICMP // packet. We only expect the payload, not the enclosing ICMP packet. -func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { +func (e *endpoint) handleControl(errInfo stack.TransportError, pkt *stack.PacketBuffer) { h, ok := pkt.Data.PullUp(header.IPv4MinimumSize) if !ok { return @@ -54,10 +151,10 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack return } - // Skip the ip header, then deliver control message. + // Skip the ip header, then deliver the error. pkt.Data.TrimFront(hlen) p := hdr.TransportProtocol() - e.dispatcher.DeliverTransportControlPacket(srcAddr, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt) + e.dispatcher.DeliverTransportError(srcAddr, hdr.DestinationAddress(), ProtocolNumber, p, errInfo, pkt) } func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) { @@ -222,19 +319,16 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) { pkt.Data.TrimFront(header.ICMPv4MinimumSize) switch h.Code() { case header.ICMPv4HostUnreachable: - e.handleControl(stack.ControlNoRoute, 0, pkt) - + e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) case header.ICMPv4PortUnreachable: - e.handleControl(stack.ControlPortUnreachable, 0, pkt) - + e.handleControl(&icmpv4DestinationPortUnreachableSockError{}, pkt) case header.ICMPv4FragmentationNeeded: networkMTU, err := calculateNetworkMTU(uint32(h.MTU()), header.IPv4MinimumSize) if err != nil { networkMTU = 0 } - e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt) + e.handleControl(&icmpv4FragmentationNeededSockError{mtu: networkMTU}, pkt) } - case header.ICMPv4SrcQuench: received.srcQuench.Increment() @@ -310,7 +404,7 @@ func (*icmpReasonParamProblem) isICMPReason() {} // the problematic packet. It incorporates as much of that packet as // possible as well as any error metadata as is available. returnError // expects pkt to hold a valid IPv4 packet as per the wire format. -func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error { +func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error { origIPHdr := header.IPv4(pkt.NetworkHeader().View()) origIPHdrSrc := origIPHdr.SourceAddress() origIPHdrDst := origIPHdr.DestinationAddress() @@ -376,7 +470,7 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpi netEP, ok := p.mu.eps[pkt.NICID] p.mu.Unlock() if !ok { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } sent := netEP.stats.icmp.packetsSent diff --git a/pkg/tcpip/network/ipv4/igmp.go b/pkg/tcpip/network/ipv4/igmp.go index 4550aacd6..acc126c3b 100644 --- a/pkg/tcpip/network/ipv4/igmp.go +++ b/pkg/tcpip/network/ipv4/igmp.go @@ -103,7 +103,7 @@ func (igmp *igmpState) Enabled() bool { // SendReport implements ip.MulticastGroupProtocol. // // Precondition: igmp.ep.mu must be read locked. -func (igmp *igmpState) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) { +func (igmp *igmpState) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) { igmpType := header.IGMPv2MembershipReport if igmp.v1Present() { igmpType = header.IGMPv1MembershipReport @@ -114,7 +114,7 @@ func (igmp *igmpState) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Erro // SendLeave implements ip.MulticastGroupProtocol. // // Precondition: igmp.ep.mu must be read locked. -func (igmp *igmpState) SendLeave(groupAddress tcpip.Address) *tcpip.Error { +func (igmp *igmpState) SendLeave(groupAddress tcpip.Address) tcpip.Error { // As per RFC 2236 Section 6, Page 8: "If the interface state says the // Querier is running IGMPv1, this action SHOULD be skipped. If the flag // saying we were the last host to report is cleared, this action MAY be @@ -215,6 +215,11 @@ func (igmp *igmpState) setV1Present(v bool) { } } +func (igmp *igmpState) resetV1Present() { + igmp.igmpV1Job.Cancel() + igmp.setV1Present(false) +} + // handleMembershipQuery handles a membership query. // // Precondition: igmp.ep.mu must be locked. @@ -242,7 +247,7 @@ func (igmp *igmpState) handleMembershipReport(groupAddress tcpip.Address) { // writePacket assembles and sends an IGMP packet. // // Precondition: igmp.ep.mu must be read locked. -func (igmp *igmpState) writePacket(destAddress tcpip.Address, groupAddress tcpip.Address, igmpType header.IGMPType) (bool, *tcpip.Error) { +func (igmp *igmpState) writePacket(destAddress tcpip.Address, groupAddress tcpip.Address, igmpType header.IGMPType) (bool, tcpip.Error) { igmpData := header.IGMP(buffer.NewView(header.IGMPReportMinimumSize)) igmpData.SetType(igmpType) igmpData.SetGroupAddress(groupAddress) @@ -293,7 +298,7 @@ func (igmp *igmpState) writePacket(destAddress tcpip.Address, groupAddress tcpip // messages. // // If the group already exists in the membership map, returns -// tcpip.ErrDuplicateAddress. +// *tcpip.ErrDuplicateAddress. // // Precondition: igmp.ep.mu must be locked. func (igmp *igmpState) joinGroup(groupAddress tcpip.Address) { @@ -312,13 +317,13 @@ func (igmp *igmpState) isInGroup(groupAddress tcpip.Address) bool { // if required. // // Precondition: igmp.ep.mu must be locked. -func (igmp *igmpState) leaveGroup(groupAddress tcpip.Address) *tcpip.Error { +func (igmp *igmpState) leaveGroup(groupAddress tcpip.Address) tcpip.Error { // LeaveGroup returns false only if the group was not joined. if igmp.genericMulticastProtocol.LeaveGroupLocked(groupAddress) { return nil } - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } // softLeaveAll leaves all groups from the perspective of IGMP, but remains diff --git a/pkg/tcpip/network/ipv4/igmp_test.go b/pkg/tcpip/network/ipv4/igmp_test.go index 1ee573ac8..95fd75ab7 100644 --- a/pkg/tcpip/network/ipv4/igmp_test.go +++ b/pkg/tcpip/network/ipv4/igmp_test.go @@ -101,10 +101,10 @@ func createAndInjectIGMPPacket(e *channel.Endpoint, igmpType header.IGMPType, ma }) } -// TestIgmpV1Present tests the handling of the case where an IGMPv1 router is -// present on the network. The IGMP stack will then send IGMPv1 Membership -// reports for backwards compatibility. -func TestIgmpV1Present(t *testing.T) { +// TestIGMPV1Present tests the node's ability to fallback to V1 when a V1 +// router is detected. V1 present status is expected to be reset when the NIC +// cycles. +func TestIGMPV1Present(t *testing.T) { e, s, clock := createStack(t, true) if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr); err != nil { t.Fatalf("AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, addr, err) @@ -116,14 +116,16 @@ func TestIgmpV1Present(t *testing.T) { // This NIC will send an IGMPv2 report immediately, before this test can get // the IGMPv1 General Membership Query in. - p, ok := e.Read() - if !ok { - t.Fatal("unable to Read IGMP packet, expected V2MembershipReport") - } - if got := s.Stats().IGMP.PacketsSent.V2MembershipReport.Value(); got != 1 { - t.Fatalf("got V2MembershipReport messages sent = %d, want = 1", got) + { + p, ok := e.Read() + if !ok { + t.Fatal("unable to Read IGMP packet, expected V2MembershipReport") + } + if got := s.Stats().IGMP.PacketsSent.V2MembershipReport.Value(); got != 1 { + t.Fatalf("got V2MembershipReport messages sent = %d, want = 1", got) + } + validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr) } - validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr) if t.Failed() { t.FailNow() } @@ -145,19 +147,38 @@ func TestIgmpV1Present(t *testing.T) { // Verify the solicited Membership Report is sent. Now that this NIC has seen // an IGMPv1 query, it should send an IGMPv1 Membership Report. - p, ok = e.Read() - if ok { + if p, ok := e.Read(); ok { t.Fatalf("sent unexpected packet, expected V1MembershipReport only after advancing the clock = %+v", p.Pkt) } clock.Advance(ipv4.UnsolicitedReportIntervalMax) - p, ok = e.Read() - if !ok { - t.Fatal("unable to Read IGMP packet, expected V1MembershipReport") - } - if got := s.Stats().IGMP.PacketsSent.V1MembershipReport.Value(); got != 1 { - t.Fatalf("got V1MembershipReport messages sent = %d, want = 1", got) + { + p, ok := e.Read() + if !ok { + t.Fatal("unable to Read IGMP packet, expected V1MembershipReport") + } + if got := s.Stats().IGMP.PacketsSent.V1MembershipReport.Value(); got != 1 { + t.Fatalf("got V1MembershipReport messages sent = %d, want = 1", got) + } + validateIgmpPacket(t, p, multicastAddr, header.IGMPv1MembershipReport, 0, multicastAddr) + } + + // Cycling the interface should reset the V1 present flag. + if err := s.DisableNIC(nicID); err != nil { + t.Fatalf("s.DisableNIC(%d): %s", nicID, err) + } + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("s.EnableNIC(%d): %s", nicID, err) + } + { + p, ok := e.Read() + if !ok { + t.Fatal("unable to Read IGMP packet, expected V2MembershipReport") + } + if got := s.Stats().IGMP.PacketsSent.V2MembershipReport.Value(); got != 2 { + t.Fatalf("got V2MembershipReport messages sent = %d, want = 2", got) + } + validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr) } - validateIgmpPacket(t, p, multicastAddr, header.IGMPv1MembershipReport, 0, multicastAddr) } func TestSendQueuedIGMPReports(t *testing.T) { diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index a05275a5b..b2d626107 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -101,11 +101,11 @@ func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { // Use the same control type as an ICMPv4 destination host unreachable error // since the host is considered unreachable if we cannot resolve the link // address to the next hop. - e.handleControl(stack.ControlNoRoute, 0, pkt) + e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) } // NewEndpoint creates a new ipv4 endpoint. -func (p *protocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { e := &endpoint{ nic: nic, dispatcher: dispatcher, @@ -137,14 +137,14 @@ func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { } // Enable implements stack.NetworkEndpoint. -func (e *endpoint) Enable() *tcpip.Error { +func (e *endpoint) Enable() tcpip.Error { e.mu.Lock() defer e.mu.Unlock() // If the NIC is not enabled, the endpoint can't do anything meaningful so // don't enable the endpoint. if !e.nic.Enabled() { - return tcpip.ErrNotPermitted + return &tcpip.ErrNotPermitted{} } // If the endpoint is already enabled, there is nothing for it to do. @@ -212,7 +212,9 @@ func (e *endpoint) disableLocked() { } // The endpoint may have already left the multicast group. - if err := e.leaveGroupLocked(header.IPv4AllSystems); err != nil && err != tcpip.ErrBadLocalAddress { + switch err := e.leaveGroupLocked(header.IPv4AllSystems); err.(type) { + case nil, *tcpip.ErrBadLocalAddress: + default: panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) } @@ -221,10 +223,18 @@ func (e *endpoint) disableLocked() { e.mu.igmp.softLeaveAll() // The address may have already been removed. - if err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err != nil && err != tcpip.ErrBadLocalAddress { + switch err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { + case nil, *tcpip.ErrBadLocalAddress: + default: panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) } + // Reset the IGMP V1 present flag. + // + // If the node comes back up on the same network, it will re-learn that it + // needs to perform IGMPv1. + e.mu.igmp.resetV1Present() + if !e.setEnabled(false) { panic("should have only done work to disable the endpoint if it was enabled") } @@ -256,7 +266,7 @@ func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { return e.protocol.Number() } -func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) *tcpip.Error { +func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { hdrLen := header.IPv4MinimumSize var optLen int if options != nil { @@ -264,12 +274,12 @@ func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.Packet } hdrLen += optLen if hdrLen > header.IPv4MaximumHeaderSize { - return tcpip.ErrMessageTooLong + return &tcpip.ErrMessageTooLong{} } ip := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) length := pkt.Size() if length > math.MaxUint16 { - return tcpip.ErrMessageTooLong + return &tcpip.ErrMessageTooLong{} } // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic // datagrams. Since the DF bit is never being set here, all datagrams @@ -294,7 +304,7 @@ func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.Packet // fragment. It returns the number of fragments handled and the number of // fragments left to be processed. The IP header must already be present in the // original packet. -func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) { +func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { // Round the MTU down to align to 8 bytes. fragmentPayloadSize := networkMTU &^ 7 networkHeader := header.IPv4(pkt.NetworkHeader().View()) @@ -314,7 +324,7 @@ func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU ui } // WritePacket writes a packet to the given destination address and protocol. -func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { if err := e.addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* options */); err != nil { return err } @@ -353,7 +363,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw return e.writePacket(r, gso, pkt, false /* headerIncluded */) } -func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, headerIncluded bool) *tcpip.Error { +func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, headerIncluded bool) tcpip.Error { if r.Loop&stack.PacketLoop != 0 { pkt := pkt.CloneToInbound() if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK { @@ -377,7 +387,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet } if packetMustBeFragmented(pkt, networkMTU, gso) { - sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error { + sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we // want to create a PacketBufferList from the fragments and feed it to @@ -398,7 +408,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet } // WritePackets implements stack.NetworkEndpoint.WritePackets. -func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { if r.Loop&stack.PacketLoop != 0 { panic("multiple packets in local loop") } @@ -423,7 +433,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe // Keep track of the packet that is about to be fragmented so it can be // removed once the fragmentation is done. originalPkt := pkt - if _, _, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error { + if _, _, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { // Modify the packet list in place with the new fragments. pkts.InsertAfter(pkt, fragPkt) pkt = fragPkt @@ -488,22 +498,22 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe } // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. -func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { // The packet already has an IP header, but there are a few required // checks. h, ok := pkt.Data.PullUp(header.IPv4MinimumSize) if !ok { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } hdrLen := header.IPv4(h).HeaderLength() if hdrLen < header.IPv4MinimumSize { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } h, ok = pkt.Data.PullUp(int(hdrLen)) if !ok { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } ip := header.IPv4(h) @@ -541,14 +551,14 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu // wire format. We also want to check if the header's fields are valid before // sending the packet. if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().View()).IsValid(pktSize) { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } return e.writePacket(r, nil /* gso */, pkt, true /* headerIncluded */) } // forwardPacket attempts to forward a packet to its final destination. -func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { h := header.IPv4(pkt.NetworkHeader().View()) ttl := h.TTL() if ttl == 0 { @@ -568,7 +578,7 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error { networkEndpoint.(*endpoint).handlePacket(pkt) return nil } - if err != tcpip.ErrBadAddress { + if _, ok := err.(*tcpip.ErrBadAddress); !ok { return err } @@ -730,7 +740,7 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) { } proto := h.Protocol() - data, _, ready, err := e.protocol.fragmentation.Process( + resPkt, _, ready, err := e.protocol.fragmentation.Process( // As per RFC 791 section 2.3, the identification value is unique // for a source-destination pair and protocol. fragmentation.FragmentID{ @@ -753,7 +763,8 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) { if !ready { return } - pkt.Data = data + pkt = resPkt + h = header.IPv4(pkt.NetworkHeader().View()) // The reassembler doesn't take care of fixing up the header, so we need // to do it here. @@ -825,7 +836,7 @@ func (e *endpoint) Close() { } // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. -func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) { +func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) { e.mu.Lock() defer e.mu.Unlock() @@ -837,7 +848,7 @@ func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, p } // RemovePermanentAddress implements stack.AddressableEndpoint. -func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() return e.mu.addressableEndpointState.RemovePermanentAddress(addr) @@ -894,7 +905,7 @@ func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { } // JoinGroup implements stack.GroupAddressableEndpoint. -func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() return e.joinGroupLocked(addr) @@ -903,9 +914,9 @@ func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error { // joinGroupLocked is like JoinGroup but with locking requirements. // // Precondition: e.mu must be locked. -func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { if !header.IsV4MulticastAddress(addr) { - return tcpip.ErrBadAddress + return &tcpip.ErrBadAddress{} } e.mu.igmp.joinGroup(addr) @@ -913,7 +924,7 @@ func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error { } // LeaveGroup implements stack.GroupAddressableEndpoint. -func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() return e.leaveGroupLocked(addr) @@ -922,7 +933,7 @@ func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error { // leaveGroupLocked is like LeaveGroup but with locking requirements. // // Precondition: e.mu must be locked. -func (e *endpoint) leaveGroupLocked(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { return e.mu.igmp.leaveGroup(addr) } @@ -995,24 +1006,24 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { } // SetOption implements NetworkProtocol.SetOption. -func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error { +func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: p.SetDefaultTTL(uint8(*v)) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // Option implements NetworkProtocol.Option. -func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error { +func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: *v = tcpip.DefaultTTLOption(p.DefaultTTL()) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } @@ -1058,9 +1069,9 @@ func (p *protocol) SetForwarding(v bool) { // calculateNetworkMTU calculates the network-layer payload MTU based on the // link-layer payload mtu. -func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, *tcpip.Error) { +func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { if linkMTU < header.IPv4MinimumMTU { - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in @@ -1068,7 +1079,7 @@ func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, *tcpip.Erro // The maximal internet header is 60 octets, and a typical internet header // is 20 octets, allowing a margin for headers of higher level protocols. if networkHeaderSize > header.IPv4MaximumHeaderSize { - return 0, tcpip.ErrMalformedHeader + return 0, &tcpip.ErrMalformedHeader{} } networkMTU := linkMTU diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index dac7cbfd4..a296bed79 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -38,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/network/testutil" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/pkg/waiter" @@ -78,8 +79,11 @@ func TestExcludeBroadcast(t *testing.T) { defer ep.Close() // Cannot connect using a broadcast address as the source. - if err := ep.Connect(randomAddr); err != tcpip.ErrNoRoute { - t.Errorf("got ep.Connect(...) = %v, want = %v", err, tcpip.ErrNoRoute) + { + err := ep.Connect(randomAddr) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Errorf("got ep.Connect(...) = %v, want = %v", err, &tcpip.ErrNoRoute{}) + } } // However, we can bind to a broadcast address to listen. @@ -1376,8 +1380,8 @@ func TestFragmentationErrors(t *testing.T) { payloadSize int allowPackets int outgoingErrors int - mockError *tcpip.Error - wantError *tcpip.Error + mockError tcpip.Error + wantError tcpip.Error }{ { description: "No frag", @@ -1386,8 +1390,8 @@ func TestFragmentationErrors(t *testing.T) { transportHeaderLength: 0, allowPackets: 0, outgoingErrors: 1, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error on first frag", @@ -1396,8 +1400,8 @@ func TestFragmentationErrors(t *testing.T) { transportHeaderLength: 0, allowPackets: 0, outgoingErrors: 3, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error on second frag", @@ -1406,8 +1410,8 @@ func TestFragmentationErrors(t *testing.T) { transportHeaderLength: 0, allowPackets: 1, outgoingErrors: 2, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error on first frag MTU smaller than header", @@ -1416,8 +1420,8 @@ func TestFragmentationErrors(t *testing.T) { payloadSize: 500, allowPackets: 0, outgoingErrors: 4, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error when MTU is smaller than IPv4 minimum MTU", @@ -1427,7 +1431,7 @@ func TestFragmentationErrors(t *testing.T) { allowPackets: 0, outgoingErrors: 1, mockError: nil, - wantError: tcpip.ErrInvalidEndpointState, + wantError: &tcpip.ErrInvalidEndpointState{}, }, } @@ -1441,8 +1445,8 @@ func TestFragmentationErrors(t *testing.T) { TTL: ttl, TOS: stack.DefaultTOS, }, pkt) - if err != ft.wantError { - t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, ft.wantError) + if diff := cmp.Diff(ft.wantError, err); diff != "" { + t.Fatalf("unexpected error from r.WritePacket(_, _, _), (-want, +got):\n%s", diff) } if got := int(r.Stats().IP.PacketsSent.Value()); got != ft.allowPackets { t.Errorf("got r.Stats().IP.PacketsSent.Value() = %d, want = %d", got, ft.allowPackets) @@ -2055,7 +2059,7 @@ func TestReceiveFragments(t *testing.T) { // the fragment block size of 8 (RFC 791 section 3.1 page 14). ipv4Payload3Addr1ToAddr2 := udpGen(127, 3, addr1, addr2) udpPayload3Addr1ToAddr2 := ipv4Payload3Addr1ToAddr2[header.UDPMinimumSize:] - // Used to test the max reassembled payload length (65,535 octets). + // Used to test the max reassembled IPv4 payload length. ipv4Payload4Addr1ToAddr2 := udpGen(header.UDPMaximumSize-header.UDPMinimumSize, 4, addr1, addr2) udpPayload4Addr1ToAddr2 := ipv4Payload4Addr1ToAddr2[header.UDPMinimumSize:] @@ -2403,6 +2407,7 @@ func TestReceiveFragments(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol}, + RawFactory: raw.EndpointFactory{}, }) e := channel.New(0, 1280, tcpip.LinkAddress("\xf0\x00")) if err := s.CreateNIC(nicID, e); err != nil { @@ -2428,6 +2433,13 @@ func TestReceiveFragments(t *testing.T) { t.Fatalf("Bind(%+v): %s", bindAddr, err) } + // Bring up a raw endpoint so we can examine network headers. + epRaw, err := s.NewRawEndpoint(udp.ProtocolNumber, header.IPv4ProtocolNumber, &wq, true /* associated */) + if err != nil { + t.Fatalf("NewRawEndpoint(%d, %d, _, true): %s", udp.ProtocolNumber, header.IPv4ProtocolNumber, err) + } + defer epRaw.Close() + // Prepare and send the fragments. for _, frag := range test.fragments { hdr := buffer.NewPrependable(header.IPv4MinimumSize) @@ -2459,10 +2471,11 @@ func TestReceiveFragments(t *testing.T) { } for i, expectedPayload := range test.expectedPayloads { + // Check UDP payload delivered by UDP endpoint. var buf bytes.Buffer result, err := ep.Read(&buf, tcpip.ReadOptions{}) if err != nil { - t.Fatalf("(i=%d) Read: %s", i, err) + t.Fatalf("(i=%d) ep.Read: %s", i, err) } if diff := cmp.Diff(tcpip.ReadResult{ Count: len(expectedPayload), @@ -2471,12 +2484,30 @@ func TestReceiveFragments(t *testing.T) { t.Errorf("(i=%d) ep.Read: unexpected result (-want +got):\n%s", i, diff) } if diff := cmp.Diff(expectedPayload, buf.Bytes()); diff != "" { - t.Errorf("(i=%d) got UDP payload mismatch (-want +got):\n%s", i, diff) + t.Errorf("(i=%d) ep.Read: UDP payload mismatch (-want +got):\n%s", i, diff) + } + + // Check IPv4 header in packet delivered by raw endpoint. + buf.Reset() + result, err = epRaw.Read(&buf, tcpip.ReadOptions{}) + if err != nil { + t.Fatalf("(i=%d) epRaw.Read: %s", i, err) + } + // Reassambly does not take care of checksum. Here we write our own + // check routine instead of using checker.IPv4. + ip := header.IPv4(buf.Bytes()) + for _, check := range []checker.NetworkChecker{ + checker.FragmentFlags(0), + checker.FragmentOffset(0), + checker.IPFullLength(uint16(header.IPv4MinimumSize + header.UDPMinimumSize + len(expectedPayload))), + } { + check(t, []header.Network{ip}) } } - if res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}); err != tcpip.ErrWouldBlock { - t.Fatalf("(last) got Read = (%v, %v), want = (_, %s)", res, err, tcpip.ErrWouldBlock) + res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("(last) got Read = (%#v, %v), want = (_, %s)", res, err, &tcpip.ErrWouldBlock{}) } }) } @@ -2554,11 +2585,11 @@ func TestWriteStats(t *testing.T) { // Parameterize the tests to run with both WritePacket and WritePackets. writers := []struct { name string - writePackets func(*stack.Route, stack.PacketBufferList) (int, *tcpip.Error) + writePackets func(*stack.Route, stack.PacketBufferList) (int, tcpip.Error) }{ { name: "WritePacket", - writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, *tcpip.Error) { + writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, tcpip.Error) { nWritten := 0 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { if err := rt.WritePacket(nil, stack.NetworkHeaderParams{}, pkt); err != nil { @@ -2570,7 +2601,7 @@ func TestWriteStats(t *testing.T) { }, }, { name: "WritePackets", - writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, *tcpip.Error) { + writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, tcpip.Error) { return rt.WritePackets(nil, pkts, stack.NetworkHeaderParams{}) }, }, @@ -2580,7 +2611,7 @@ func TestWriteStats(t *testing.T) { t.Run(writer.name, func(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumMTU, tcpip.ErrInvalidEndpointState, test.allowPackets) + ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumMTU, &tcpip.ErrInvalidEndpointState{}, test.allowPackets) rt := buildRoute(t, ep) var pkts stack.PacketBufferList diff --git a/pkg/tcpip/network/ipv4/stats_test.go b/pkg/tcpip/network/ipv4/stats_test.go index b28e7dcde..fbbc6e69c 100644 --- a/pkg/tcpip/network/ipv4/stats_test.go +++ b/pkg/tcpip/network/ipv4/stats_test.go @@ -50,7 +50,7 @@ func TestClearEndpointFromProtocolOnClose(t *testing.T) { }) proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) nic := testInterface{nicID: 1} - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) + ep := proto.NewEndpoint(&nic, nil).(*endpoint) var nicIDs []tcpip.NICID proto.mu.Lock() @@ -82,7 +82,7 @@ func TestMultiCounterStatsInitialization(t *testing.T) { }) proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) var nic testInterface - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) + ep := proto.NewEndpoint(&nic, nil).(*endpoint) // At this point, the Stack's stats and the NetworkEndpoint's stats are // expected to be bound by a MultiCounterStat. refStack := s.Stats() diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 95efada3a..dcfd93bab 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -23,11 +23,136 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/stack" ) +// icmpv6DestinationUnreachableSockError is a general ICMPv6 Destination +// Unreachable error. +// +// +stateify savable +type icmpv6DestinationUnreachableSockError struct{} + +// Origin implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin { + return tcpip.SockExtErrorOriginICMP6 +} + +// Type implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Type() uint8 { + return uint8(header.ICMPv6DstUnreachable) +} + +// Info implements tcpip.SockErrorCause. +func (*icmpv6DestinationUnreachableSockError) Info() uint32 { + return 0 +} + +var _ stack.TransportError = (*icmpv6DestinationNetworkUnreachableSockError)(nil) + +// icmpv6DestinationNetworkUnreachableSockError is an ICMPv6 Destination Network +// Unreachable error. +// +// It indicates that the destination network is unreachable. +// +// +stateify savable +type icmpv6DestinationNetworkUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationNetworkUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6NetworkUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationNetworkUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationNetworkUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6DestinationPortUnreachableSockError)(nil) + +// icmpv6DestinationPortUnreachableSockError is an ICMPv6 Destination Port +// Unreachable error. +// +// It indicates that a packet reached the destination host, but the transport +// protocol was not active on the destination port. +// +// +stateify savable +type icmpv6DestinationPortUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationPortUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6PortUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationPortUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6DestinationAddressUnreachableSockError)(nil) + +// icmpv6DestinationAddressUnreachableSockError is an ICMPv6 Destination Address +// Unreachable error. +// +// It indicates that a packet was not able to reach the destination. +// +// +stateify savable +type icmpv6DestinationAddressUnreachableSockError struct { + icmpv6DestinationUnreachableSockError +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6DestinationAddressUnreachableSockError) Code() uint8 { + return uint8(header.ICMPv6AddressUnreachable) +} + +// Kind implements stack.TransportError. +func (*icmpv6DestinationAddressUnreachableSockError) Kind() stack.TransportErrorKind { + return stack.DestinationHostUnreachableTransportError +} + +var _ stack.TransportError = (*icmpv6PacketTooBigSockError)(nil) + +// icmpv6PacketTooBigSockError is an ICMPv6 Packet Too Big error. +// +// It indicates that a link exists on the path to the destination with an MTU +// that is too small to carry the packet. +// +// +stateify savable +type icmpv6PacketTooBigSockError struct { + mtu uint32 +} + +// Origin implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Origin() tcpip.SockErrOrigin { + return tcpip.SockExtErrorOriginICMP6 +} + +// Type implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Type() uint8 { + return uint8(header.ICMPv6PacketTooBig) +} + +// Code implements tcpip.SockErrorCause. +func (*icmpv6PacketTooBigSockError) Code() uint8 { + return uint8(header.ICMPv6UnusedCode) +} + +// Info implements tcpip.SockErrorCause. +func (e *icmpv6PacketTooBigSockError) Info() uint32 { + return e.mtu +} + +// Kind implements stack.TransportError. +func (*icmpv6PacketTooBigSockError) Kind() stack.TransportErrorKind { + return stack.PacketTooBigTransportError +} + // handleControl handles the case when an ICMP packet contains the headers of // the original packet that caused the ICMP one to be sent. This information is // used to find out which transport endpoint must be notified about the ICMP // packet. -func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { +func (e *endpoint) handleControl(transErr stack.TransportError, pkt *stack.PacketBuffer) { h, ok := pkt.Data.PullUp(header.IPv6MinimumSize) if !ok { return @@ -67,8 +192,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack p = fragHdr.TransportProtocol() } - // Deliver the control packet to the transport endpoint. - e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt) + e.dispatcher.DeliverTransportError(src, hdr.DestinationAddress(), ProtocolNumber, p, transErr, pkt) } // getLinkAddrOption searches NDP options for a given link address option using @@ -175,7 +299,7 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { if err != nil { networkMTU = 0 } - e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt) + e.handleControl(&icmpv6PacketTooBigSockError{mtu: networkMTU}, pkt) case header.ICMPv6DstUnreachable: received.dstUnreachable.Increment() @@ -187,11 +311,10 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize) switch header.ICMPv6(hdr).Code() { case header.ICMPv6NetworkUnreachable: - e.handleControl(stack.ControlNetworkUnreachable, 0, pkt) + e.handleControl(&icmpv6DestinationNetworkUnreachableSockError{}, pkt) case header.ICMPv6PortUnreachable: - e.handleControl(stack.ControlPortUnreachable, 0, pkt) + e.handleControl(&icmpv6DestinationPortUnreachableSockError{}, pkt) } - case header.ICMPv6NeighborSolicit: received.neighborSolicit.Increment() if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize { @@ -237,7 +360,9 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { // // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate // address is detected for an assigned address. - if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState { + switch err := e.dupTentativeAddrDetected(targetAddr); err.(type) { + case nil, *tcpip.ErrBadAddress, *tcpip.ErrInvalidEndpointState: + default: panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err)) } } @@ -287,10 +412,14 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { } else if unspecifiedSource { received.invalid.Increment() return - } else if e.nud != nil { - e.nud.HandleProbe(srcAddr, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol) } else { - e.linkAddrCache.AddLinkAddress(srcAddr, sourceLinkAddr) + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, srcAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) + } } // As per RFC 4861 section 7.1.1: @@ -413,10 +542,12 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { // // TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate // address is detected for an assigned address. - if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState { + switch err := e.dupTentativeAddrDetected(targetAddr); err.(type) { + case nil, *tcpip.ErrBadAddress, *tcpip.ErrInvalidEndpointState: + return + default: panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err)) } - return } it, err := na.Options().Iter(false /* check */) @@ -441,20 +572,30 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { return } - // If the NA message has the target link layer option, update the link - // address cache with the link address for the target of the message. - if e.nud == nil { - if len(targetLinkAddr) != 0 { - e.linkAddrCache.AddLinkAddress(targetAddr, targetLinkAddr) - } + // As per RFC 4861 section 7.1.2: + // A node MUST silently discard any received Neighbor Advertisement + // messages that do not satisfy all of the following validity checks: + // ... + // - If the IP Destination Address is a multicast address the + // Solicited flag is zero. + if header.IsV6MulticastAddress(dstAddr) && na.SolicitedFlag() { + received.invalid.Increment() return } - e.nud.HandleConfirmation(targetAddr, targetLinkAddr, stack.ReachabilityConfirmationFlags{ + // If the NA message has the target link layer option, update the link + // address cache with the link address for the target of the message. + switch err := e.nic.HandleNeighborConfirmation(ProtocolNumber, targetAddr, targetLinkAddr, stack.ReachabilityConfirmationFlags{ Solicited: na.SolicitedFlag(), Override: na.OverrideFlag(), IsRouter: na.RouterFlag(), - }) + }); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor confirmation message: %s", err)) + } case header.ICMPv6EchoRequest: received.echoRequest.Increment() @@ -560,10 +701,14 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { return } - if e.nud != nil { - // A RS with a specified source IP address modifies the NUD state - // machine in the same way a reachability probe would. - e.nud.HandleProbe(srcAddr, ProtocolNumber, sourceLinkAddr, e.protocol) + // A RS with a specified source IP address modifies the neighbor table + // in the same way a regular probe would. + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, srcAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) } } @@ -612,8 +757,14 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { // If the RA has the source link layer option, update the link address // cache with the link address for the advertised router. - if len(sourceLinkAddr) != 0 && e.nud != nil { - e.nud.HandleProbe(routerAddr, ProtocolNumber, sourceLinkAddr, e.protocol) + if len(sourceLinkAddr) != 0 { + switch err := e.nic.HandleNeighborProbe(ProtocolNumber, routerAddr, sourceLinkAddr); err.(type) { + case nil: + case *tcpip.ErrNotSupported: + // The stack may support ICMPv6 but the NIC may not need link resolution. + default: + panic(fmt.Sprintf("unexpected error when informing NIC of neighbor probe message: %s", err)) + } } e.mu.Lock() @@ -679,24 +830,13 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) { } } -var _ stack.LinkAddressResolver = (*protocol)(nil) - // LinkAddressProtocol implements stack.LinkAddressResolver. -func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber { +func (*endpoint) LinkAddressProtocol() tcpip.NetworkProtocolNumber { return header.IPv6ProtocolNumber } // LinkAddressRequest implements stack.LinkAddressResolver. -func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error { - nicID := nic.ID() - - p.mu.Lock() - netEP, ok := p.mu.eps[nicID] - p.mu.Unlock() - if !ok { - return tcpip.ErrNotConnected - } - +func (e *endpoint) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error { remoteAddr := targetAddr if len(remoteLinkAddr) == 0 { remoteAddr = header.SolicitedNodeAddr(targetAddr) @@ -704,22 +844,22 @@ func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remot } if len(localAddr) == 0 { - addressEndpoint := netEP.AcquireOutgoingPrimaryAddress(remoteAddr, false /* allowExpired */) + addressEndpoint := e.AcquireOutgoingPrimaryAddress(remoteAddr, false /* allowExpired */) if addressEndpoint == nil { - return tcpip.ErrNetworkUnreachable + return &tcpip.ErrNetworkUnreachable{} } localAddr = addressEndpoint.AddressWithPrefix().Address - } else if p.stack.CheckLocalAddress(nicID, ProtocolNumber, localAddr) == 0 { - return tcpip.ErrBadLocalAddress + } else if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, localAddr) == 0 { + return &tcpip.ErrBadLocalAddress{} } optsSerializer := header.NDPOptionsSerializer{ - header.NDPSourceLinkLayerAddressOption(nic.LinkAddress()), + header.NDPSourceLinkLayerAddressOption(e.nic.LinkAddress()), } neighborSolicitSize := header.ICMPv6NeighborSolicitMinimumSize + optsSerializer.Length() pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - ReserveHeaderBytes: int(nic.MaxHeaderLength()) + header.IPv6FixedHeaderSize + neighborSolicitSize, + ReserveHeaderBytes: int(e.nic.MaxHeaderLength()) + header.IPv6FixedHeaderSize + neighborSolicitSize, }) pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber packet := header.ICMPv6(pkt.TransportHeader().Push(neighborSolicitSize)) @@ -736,9 +876,9 @@ func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remot panic(fmt.Sprintf("failed to add IP header: %s", err)) } - stat := netEP.stats.icmp.packetsSent + stat := e.stats.icmp.packetsSent - if err := nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt); err != nil { + if err := e.nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt); err != nil { stat.dropped.Increment() return err } @@ -748,7 +888,7 @@ func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remot } // ResolveStaticAddress implements stack.LinkAddressResolver. -func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { +func (*endpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { if header.IsV6MulticastAddress(addr) { return header.EthernetAddressFromMulticastIPv6Address(addr), true } @@ -813,7 +953,7 @@ func (*icmpReasonReassemblyTimeout) isICMPReason() {} // returnError takes an error descriptor and generates the appropriate ICMP // error packet for IPv6 and sends it. -func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error { +func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error { origIPHdr := header.IPv6(pkt.NetworkHeader().View()) origIPHdrSrc := origIPHdr.SourceAddress() origIPHdrDst := origIPHdr.DestinationAddress() @@ -884,7 +1024,7 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpi netEP, ok := p.mu.eps[pkt.NICID] p.mu.Unlock() if !ok { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } sent := netEP.stats.icmp.packetsSent diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index 641c60b7c..92f9ee2c2 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -79,7 +79,7 @@ func (*stubLinkEndpoint) LinkAddress() tcpip.LinkAddress { return "" } -func (*stubLinkEndpoint) WritePacket(stack.RouteInfo, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error { +func (*stubLinkEndpoint) WritePacket(stack.RouteInfo, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) tcpip.Error { return nil } @@ -93,35 +93,14 @@ func (*stubDispatcher) DeliverTransportPacket(tcpip.TransportProtocolNumber, *st return stack.TransportPacketHandled } -var _ stack.LinkAddressCache = (*stubLinkAddressCache)(nil) - -type stubLinkAddressCache struct{} - -func (*stubLinkAddressCache) AddLinkAddress(tcpip.Address, tcpip.LinkAddress) {} - -type stubNUDHandler struct { - probeCount int - confirmationCount int -} - -var _ stack.NUDHandler = (*stubNUDHandler)(nil) - -func (s *stubNUDHandler) HandleProbe(tcpip.Address, tcpip.NetworkProtocolNumber, tcpip.LinkAddress, stack.LinkAddressResolver) { - s.probeCount++ -} - -func (s *stubNUDHandler) HandleConfirmation(tcpip.Address, tcpip.LinkAddress, stack.ReachabilityConfirmationFlags) { - s.confirmationCount++ -} - -func (*stubNUDHandler) HandleUpperLevelConfirmation(tcpip.Address) { -} - var _ stack.NetworkInterface = (*testInterface)(nil) type testInterface struct { stack.LinkEndpoint + probeCount int + confirmationCount int + nicID tcpip.NICID } @@ -145,21 +124,31 @@ func (*testInterface) Promiscuous() bool { return false } -func (t *testInterface) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (t *testInterface) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { return t.LinkEndpoint.WritePacket(r.Fields(), gso, protocol, pkt) } -func (t *testInterface) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (t *testInterface) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { return t.LinkEndpoint.WritePackets(r.Fields(), gso, pkts, protocol) } -func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { var r stack.RouteInfo r.NetProto = protocol r.RemoteLinkAddress = remoteLinkAddr return t.LinkEndpoint.WritePacket(r, gso, protocol, pkt) } +func (t *testInterface) HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error { + t.probeCount++ + return nil +} + +func (t *testInterface) HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, stack.ReachabilityConfirmationFlags) tcpip.Error { + t.confirmationCount++ + return nil +} + func TestICMPCounts(t *testing.T) { tests := []struct { name string @@ -202,7 +191,7 @@ func TestICMPCounts(t *testing.T) { if netProto == nil { t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - ep := netProto.NewEndpoint(&testInterface{}, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}) + ep := netProto.NewEndpoint(&testInterface{}, &stubDispatcher{}) defer ep.Close() if err := ep.Enable(); err != nil { @@ -360,7 +349,7 @@ func TestICMPCountsWithNeighborCache(t *testing.T) { if netProto == nil { t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - ep := netProto.NewEndpoint(&testInterface{}, nil, &stubNUDHandler{}, &stubDispatcher{}) + ep := netProto.NewEndpoint(&testInterface{}, &stubDispatcher{}) defer ep.Close() if err := ep.Enable(); err != nil { @@ -573,12 +562,19 @@ func newTestContext(t *testing.T) *testContext { }}, ) - return c -} + t.Cleanup(func() { + if err := c.s0.RemoveNIC(nicID); err != nil { + t.Errorf("c.s0.RemoveNIC(%d): %s", nicID, err) + } + if err := c.s1.RemoveNIC(nicID); err != nil { + t.Errorf("c.s1.RemoveNIC(%d): %s", nicID, err) + } -func (c *testContext) cleanup() { - c.linkEP0.Close() - c.linkEP1.Close() + c.linkEP0.Close() + c.linkEP1.Close() + }) + + return c } type routeArgs struct { @@ -628,7 +624,6 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header. func TestLinkResolution(t *testing.T) { c := newTestContext(t) - defer c.cleanup() r, err := c.s0.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */) if err != nil { @@ -1283,7 +1278,7 @@ func TestLinkAddressRequest(t *testing.T) { localAddr tcpip.Address remoteLinkAddr tcpip.LinkAddress - expectedErr *tcpip.Error + expectedErr tcpip.Error expectedRemoteAddr tcpip.Address expectedRemoteLinkAddr tcpip.LinkAddress }{ @@ -1321,23 +1316,23 @@ func TestLinkAddressRequest(t *testing.T) { name: "Unicast with unassigned address", localAddr: lladdr1, remoteLinkAddr: linkAddr1, - expectedErr: tcpip.ErrBadLocalAddress, + expectedErr: &tcpip.ErrBadLocalAddress{}, }, { name: "Multicast with unassigned address", localAddr: lladdr1, remoteLinkAddr: "", - expectedErr: tcpip.ErrBadLocalAddress, + expectedErr: &tcpip.ErrBadLocalAddress{}, }, { name: "Unicast with no local address available", remoteLinkAddr: linkAddr1, - expectedErr: tcpip.ErrNetworkUnreachable, + expectedErr: &tcpip.ErrNetworkUnreachable{}, }, { name: "Multicast with no local address available", remoteLinkAddr: "", - expectedErr: tcpip.ErrNetworkUnreachable, + expectedErr: &tcpip.ErrNetworkUnreachable{}, }, } @@ -1346,28 +1341,32 @@ func TestLinkAddressRequest(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, }) - p := s.NetworkProtocolInstance(ProtocolNumber) - linkRes, ok := p.(stack.LinkAddressResolver) - if !ok { - t.Fatalf("expected IPv6 protocol to implement stack.LinkAddressResolver") - } linkEP := channel.New(defaultChannelSize, defaultMTU, linkAddr0) if err := s.CreateNIC(nicID, linkEP); err != nil { t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err) } + + ep, err := s.GetNetworkEndpoint(nicID, ProtocolNumber) + if err != nil { + t.Fatalf("s.GetNetworkEndpoint(%d, %d): %s", nicID, ProtocolNumber, err) + } + linkRes, ok := ep.(stack.LinkAddressResolver) + if !ok { + t.Fatalf("expected %T to implement stack.LinkAddressResolver", ep) + } + if len(test.nicAddr) != 0 { if err := s.AddAddress(nicID, ProtocolNumber, test.nicAddr); err != nil { t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ProtocolNumber, test.nicAddr, err) } } - // We pass a test network interface to LinkAddressRequest with the same NIC - // ID and link endpoint used by the NIC we created earlier so that we can - // mock a link address request and observe the packets sent to the link - // endpoint even though the stack uses the real NIC. - if err := linkRes.LinkAddressRequest(lladdr0, test.localAddr, test.remoteLinkAddr, &testInterface{LinkEndpoint: linkEP, nicID: nicID}); err != test.expectedErr { - t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", lladdr0, test.localAddr, test.remoteLinkAddr, err, test.expectedErr) + { + err := linkRes.LinkAddressRequest(lladdr0, test.localAddr, test.remoteLinkAddr) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Fatalf("unexpected error from p.LinkAddressRequest(%s, %s, %s, _), (-want, +got):\n%s", lladdr0, test.localAddr, test.remoteLinkAddr, diff) + } } if test.expectedErr != nil { @@ -1797,8 +1796,9 @@ func TestCallsToNeighborCache(t *testing.T) { if netProto == nil { t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - nudHandler := &stubNUDHandler{} - ep := netProto.NewEndpoint(&testInterface{LinkEndpoint: channel.New(0, header.IPv6MinimumMTU, linkAddr0)}, &stubLinkAddressCache{}, nudHandler, &stubDispatcher{}) + + testInterface := testInterface{LinkEndpoint: channel.New(0, header.IPv6MinimumMTU, linkAddr0)} + ep := netProto.NewEndpoint(&testInterface, &stubDispatcher{}) defer ep.Close() if err := ep.Enable(); err != nil { @@ -1833,11 +1833,11 @@ func TestCallsToNeighborCache(t *testing.T) { ep.HandlePacket(pkt) // Confirm the endpoint calls the correct NUDHandler method. - if nudHandler.probeCount != test.wantProbeCount { - t.Errorf("got nudHandler.probeCount = %d, want = %d", nudHandler.probeCount, test.wantProbeCount) + if testInterface.probeCount != test.wantProbeCount { + t.Errorf("got testInterface.probeCount = %d, want = %d", testInterface.probeCount, test.wantProbeCount) } - if nudHandler.confirmationCount != test.wantConfirmationCount { - t.Errorf("got nudHandler.confirmationCount = %d, want = %d", nudHandler.confirmationCount, test.wantConfirmationCount) + if testInterface.confirmationCount != test.wantConfirmationCount { + t.Errorf("got testInterface.confirmationCount = %d, want = %d", testInterface.confirmationCount, test.wantConfirmationCount) } }) } diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index d658f9bcb..c2e8c3ea7 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -164,6 +164,7 @@ func getLabel(addr tcpip.Address) uint8 { panic(fmt.Sprintf("should have a label for address = %s", addr)) } +var _ stack.LinkAddressResolver = (*endpoint)(nil) var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) var _ stack.AddressableEndpoint = (*endpoint)(nil) @@ -172,13 +173,11 @@ var _ stack.NDPEndpoint = (*endpoint)(nil) var _ NDPEndpoint = (*endpoint)(nil) type endpoint struct { - nic stack.NetworkInterface - linkAddrCache stack.LinkAddressCache - nud stack.NUDHandler - dispatcher stack.TransportDispatcher - protocol *protocol - stack *stack.Stack - stats sharedStats + nic stack.NetworkInterface + dispatcher stack.TransportDispatcher + protocol *protocol + stack *stack.Stack + stats sharedStats // enabled is set to 1 when the endpoint is enabled and 0 when it is // disabled. @@ -236,7 +235,7 @@ func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { }) pkt.NICID = e.nic.ID() pkt.NetworkProtocolNumber = ProtocolNumber - e.handleControl(stack.ControlAddressUnreachable, 0, pkt) + e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt) } // onAddressAssignedLocked handles an address being assigned. @@ -307,17 +306,17 @@ func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool { // dupTentativeAddrDetected removes the tentative address if it exists. If the // address was generated via SLAAC, an attempt is made to generate a new // address. -func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() addressEndpoint := e.getAddressRLocked(addr) if addressEndpoint == nil { - return tcpip.ErrBadAddress + return &tcpip.ErrBadAddress{} } if addressEndpoint.GetKind() != stack.PermanentTentative { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an @@ -369,14 +368,14 @@ func (e *endpoint) transitionForwarding(forwarding bool) { } // Enable implements stack.NetworkEndpoint. -func (e *endpoint) Enable() *tcpip.Error { +func (e *endpoint) Enable() tcpip.Error { e.mu.Lock() defer e.mu.Unlock() // If the NIC is not enabled, the endpoint can't do anything meaningful so // don't enable the endpoint. if !e.nic.Enabled() { - return tcpip.ErrNotPermitted + return &tcpip.ErrNotPermitted{} } // If the endpoint is already enabled, there is nothing for it to do. @@ -418,7 +417,7 @@ func (e *endpoint) Enable() *tcpip.Error { // // Addresses may have aleady completed DAD but in the time since the endpoint // was last enabled, other devices may have acquired the same addresses. - var err *tcpip.Error + var err tcpip.Error e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { addr := addressEndpoint.AddressWithPrefix().Address if !header.IsV6UnicastAddress(addr) { @@ -499,7 +498,9 @@ func (e *endpoint) disableLocked() { e.stopDADForPermanentAddressesLocked() // The endpoint may have already left the multicast group. - if err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil && err != tcpip.ErrBadLocalAddress { + switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err.(type) { + case nil, *tcpip.ErrBadLocalAddress: + default: panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err)) } @@ -555,11 +556,11 @@ func (e *endpoint) MaxHeaderLength() uint16 { return e.nic.MaxHeaderLength() + header.IPv6MinimumSize } -func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) *tcpip.Error { +func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error { extHdrsLen := extensionHeaders.Length() length := pkt.Size() + extensionHeaders.Length() if length > math.MaxUint16 { - return tcpip.ErrMessageTooLong + return &tcpip.ErrMessageTooLong{} } ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen)) ip.Encode(&header.IPv6Fields{ @@ -585,7 +586,7 @@ func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *sta // fragments left to be processed. The IP header must already be present in the // original packet. The transport header protocol number is required to avoid // parsing the IPv6 extension headers. -func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) { +func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { networkHeader := header.IPv6(pkt.NetworkHeader().View()) // TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are @@ -598,13 +599,13 @@ func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU ui // of 8 as per RFC 8200 section 4.5: // Each complete fragment, except possibly the last ("rightmost") one, is // an integer multiple of 8 octets long. - return 0, 1, tcpip.ErrMessageTooLong + return 0, 1, &tcpip.ErrMessageTooLong{} } if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) { // As per RFC 8200 Section 4.5, the Transport Header is expected to be small // enough to fit in the first fragment. - return 0, 1, tcpip.ErrMessageTooLong + return 0, 1, &tcpip.ErrMessageTooLong{} } pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt)) @@ -624,7 +625,7 @@ func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU ui } // WritePacket writes a packet to the given destination address and protocol. -func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { if err := addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* extensionHeaders */); err != nil { return err } @@ -662,7 +663,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw return e.writePacket(r, gso, pkt, params.Protocol, false /* headerIncluded */) } -func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) *tcpip.Error { +func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error { if r.Loop&stack.PacketLoop != 0 { pkt := pkt.CloneToInbound() if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK { @@ -685,7 +686,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet } if packetMustBeFragmented(pkt, networkMTU, gso) { - sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error { + sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we // want to create a PacketBufferList from the fragments and feed it to @@ -707,7 +708,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet } // WritePackets implements stack.NetworkEndpoint.WritePackets. -func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) { +func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { if r.Loop&stack.PacketLoop != 0 { panic("not implemented") } @@ -731,7 +732,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe // Keep track of the packet that is about to be fragmented so it can be // removed once the fragmentation is done. originalPkt := pb - if _, _, err := e.handleFragments(r, gso, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error { + if _, _, err := e.handleFragments(r, gso, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { // Modify the packet list in place with the new fragments. pkts.InsertAfter(pb, fragPkt) pb = fragPkt @@ -798,11 +799,11 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe } // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. -func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { // The packet already has an IP header, but there are a few required checks. h, ok := pkt.Data.PullUp(header.IPv6MinimumSize) if !ok { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } ip := header.IPv6(h) @@ -827,14 +828,14 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu // sending the packet. proto, _, _, _, ok := parse.IPv6(pkt) if !ok || !header.IPv6(pkt.NetworkHeader().View()).IsValid(pktSize) { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } return e.writePacket(r, nil /* gso */, pkt, proto, true /* headerIncluded */) } // forwardPacket attempts to forward a packet to its final destination. -func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error { +func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error { h := header.IPv6(pkt.NetworkHeader().View()) hopLimit := h.HopLimit() if hopLimit <= 1 { @@ -856,7 +857,7 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error { networkEndpoint.(*endpoint).handlePacket(pkt) return nil } - if err != tcpip.ErrBadAddress { + if _, ok := err.(*tcpip.ErrBadAddress); !ok { return err } @@ -1165,7 +1166,7 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) { // Note that pkt doesn't have its transport header set after reassembly, // and won't until DeliverNetworkPacket sets it. - data, proto, ready, err := e.protocol.fragmentation.Process( + resPkt, proto, ready, err := e.protocol.fragmentation.Process( // IPv6 ignores the Protocol field since the ID only needs to be unique // across source-destination pairs, as per RFC 8200 section 4.5. fragmentation.FragmentID{ @@ -1186,7 +1187,7 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) { } if ready { - pkt.Data = data + pkt = resPkt // We create a new iterator with the reassembled packet because we could // have more extension headers in the reassembled payload, as per RFC @@ -1330,7 +1331,7 @@ func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { } // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. -func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) { +func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) { // TODO(b/169350103): add checks here after making sure we no longer receive // an empty address. e.mu.Lock() @@ -1345,7 +1346,7 @@ func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, p // solicited-node multicast group and start duplicate address detection. // // Precondition: e.mu must be write locked. -func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) { +func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) { addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated) if err != nil { return nil, err @@ -1374,13 +1375,13 @@ func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPre } // RemovePermanentAddress implements stack.AddressableEndpoint. -func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() addressEndpoint := e.getAddressRLocked(addr) if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } return e.removePermanentEndpointLocked(addressEndpoint, true) @@ -1390,7 +1391,7 @@ func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error { // it works with a stack.AddressEndpoint. // // Precondition: e.mu must be write locked. -func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool) *tcpip.Error { +func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool) tcpip.Error { addr := addressEndpoint.AddressWithPrefix() unicast := header.IsV6UnicastAddress(addr.Address) if unicast { @@ -1415,12 +1416,12 @@ func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEn } snmc := header.SolicitedNodeAddr(addr.Address) + err := e.leaveGroupLocked(snmc) // The endpoint may have already left the multicast group. - if err := e.leaveGroupLocked(snmc); err != nil && err != tcpip.ErrBadLocalAddress { - return err + if _, ok := err.(*tcpip.ErrBadLocalAddress); ok { + err = nil } - - return nil + return err } // hasPermanentAddressLocked returns true if the endpoint has a permanent @@ -1630,7 +1631,7 @@ func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { } // JoinGroup implements stack.GroupAddressableEndpoint. -func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() return e.joinGroupLocked(addr) @@ -1639,9 +1640,9 @@ func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error { // joinGroupLocked is like JoinGroup but with locking requirements. // // Precondition: e.mu must be locked. -func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { if !header.IsV6MulticastAddress(addr) { - return tcpip.ErrBadAddress + return &tcpip.ErrBadAddress{} } e.mu.mld.joinGroup(addr) @@ -1649,7 +1650,7 @@ func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error { } // LeaveGroup implements stack.GroupAddressableEndpoint. -func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() return e.leaveGroupLocked(addr) @@ -1658,7 +1659,7 @@ func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error { // leaveGroupLocked is like LeaveGroup but with locking requirements. // // Precondition: e.mu must be locked. -func (e *endpoint) leaveGroupLocked(addr tcpip.Address) *tcpip.Error { +func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { return e.mu.mld.leaveGroup(addr) } @@ -1730,13 +1731,11 @@ func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { } // NewEndpoint creates a new ipv6 endpoint. -func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { +func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { e := &endpoint{ - nic: nic, - linkAddrCache: linkAddrCache, - nud: nud, - dispatcher: dispatcher, - protocol: p, + nic: nic, + dispatcher: dispatcher, + protocol: p, } e.mu.Lock() e.mu.addressableEndpointState.Init(e) @@ -1762,24 +1761,24 @@ func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { } // SetOption implements NetworkProtocol.SetOption. -func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error { +func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: p.SetDefaultTTL(uint8(*v)) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // Option implements NetworkProtocol.Option. -func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error { +func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: *v = tcpip.DefaultTTLOption(p.DefaultTTL()) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } @@ -1842,9 +1841,9 @@ func (p *protocol) SetForwarding(v bool) { // link-layer payload MTU and the length of every IPv6 header. // Note that this is different than the Payload Length field of the IPv6 header, // which includes the length of the extension headers. -func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, *tcpip.Error) { +func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) { if linkMTU < header.IPv6MinimumMTU { - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } // As per RFC 7112 section 5, we should discard packets if their IPv6 header @@ -1855,7 +1854,7 @@ func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, *tcpip.Erro // bytes ensures that the header chain length does not exceed the IPv6 // minimum MTU. if networkHeadersLen > header.IPv6MinimumMTU { - return 0, tcpip.ErrMalformedHeader + return 0, &tcpip.ErrMalformedHeader{} } networkMTU := linkMTU - uint32(networkHeadersLen) diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go index 5276878a0..1c6c37c91 100644 --- a/pkg/tcpip/network/ipv6/ipv6_test.go +++ b/pkg/tcpip/network/ipv6/ipv6_test.go @@ -996,8 +996,9 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) { } // Should not have any more UDP packets. - if res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}); err != tcpip.ErrWouldBlock { - t.Fatalf("got Read = (%v, %v), want = (_, %s)", res, err, tcpip.ErrWouldBlock) + res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got Read = (%v, %v), want = (_, %s)", res, err, &tcpip.ErrWouldBlock{}) } }) } @@ -1988,8 +1989,9 @@ func TestReceiveIPv6Fragments(t *testing.T) { } } - if res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}); err != tcpip.ErrWouldBlock { - t.Fatalf("(last) got Read = (%v, %v), want = (_, %s)", res, err, tcpip.ErrWouldBlock) + res, err := ep.Read(ioutil.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("(last) got Read = (%v, %v), want = (_, %s)", res, err, &tcpip.ErrWouldBlock{}) } }) } @@ -2472,11 +2474,11 @@ func TestWriteStats(t *testing.T) { writers := []struct { name string - writePackets func(*stack.Route, stack.PacketBufferList) (int, *tcpip.Error) + writePackets func(*stack.Route, stack.PacketBufferList) (int, tcpip.Error) }{ { name: "WritePacket", - writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, *tcpip.Error) { + writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, tcpip.Error) { nWritten := 0 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { if err := rt.WritePacket(nil, stack.NetworkHeaderParams{}, pkt); err != nil { @@ -2488,7 +2490,7 @@ func TestWriteStats(t *testing.T) { }, }, { name: "WritePackets", - writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, *tcpip.Error) { + writePackets: func(rt *stack.Route, pkts stack.PacketBufferList) (int, tcpip.Error) { return rt.WritePackets(nil, pkts, stack.NetworkHeaderParams{}) }, }, @@ -2498,7 +2500,7 @@ func TestWriteStats(t *testing.T) { t.Run(writer.name, func(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - ep := testutil.NewMockLinkEndpoint(header.IPv6MinimumMTU, tcpip.ErrInvalidEndpointState, test.allowPackets) + ep := testutil.NewMockLinkEndpoint(header.IPv6MinimumMTU, &tcpip.ErrInvalidEndpointState{}, test.allowPackets) rt := buildRoute(t, ep) var pkts stack.PacketBufferList for i := 0; i < nPackets; i++ { @@ -2597,7 +2599,7 @@ func TestClearEndpointFromProtocolOnClose(t *testing.T) { }) proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) var nic testInterface - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) + ep := proto.NewEndpoint(&nic, nil).(*endpoint) var nicIDs []tcpip.NICID proto.mu.Lock() @@ -2832,8 +2834,8 @@ func TestFragmentationErrors(t *testing.T) { payloadSize int allowPackets int outgoingErrors int - mockError *tcpip.Error - wantError *tcpip.Error + mockError tcpip.Error + wantError tcpip.Error }{ { description: "No frag", @@ -2842,8 +2844,8 @@ func TestFragmentationErrors(t *testing.T) { transHdrLen: 0, allowPackets: 0, outgoingErrors: 1, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error on first frag", @@ -2852,8 +2854,8 @@ func TestFragmentationErrors(t *testing.T) { transHdrLen: 0, allowPackets: 0, outgoingErrors: 3, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error on second frag", @@ -2862,8 +2864,8 @@ func TestFragmentationErrors(t *testing.T) { transHdrLen: 0, allowPackets: 1, outgoingErrors: 2, - mockError: tcpip.ErrAborted, - wantError: tcpip.ErrAborted, + mockError: &tcpip.ErrAborted{}, + wantError: &tcpip.ErrAborted{}, }, { description: "Error when MTU is smaller than transport header", @@ -2873,7 +2875,7 @@ func TestFragmentationErrors(t *testing.T) { allowPackets: 0, outgoingErrors: 1, mockError: nil, - wantError: tcpip.ErrMessageTooLong, + wantError: &tcpip.ErrMessageTooLong{}, }, { description: "Error when MTU is smaller than IPv6 minimum MTU", @@ -2883,7 +2885,7 @@ func TestFragmentationErrors(t *testing.T) { allowPackets: 0, outgoingErrors: 1, mockError: nil, - wantError: tcpip.ErrInvalidEndpointState, + wantError: &tcpip.ErrInvalidEndpointState{}, }, } @@ -2897,8 +2899,8 @@ func TestFragmentationErrors(t *testing.T) { TTL: ttl, TOS: stack.DefaultTOS, }, pkt) - if err != ft.wantError { - t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, ft.wantError) + if diff := cmp.Diff(ft.wantError, err); diff != "" { + t.Errorf("unexpected error from WritePacket(_, _, _), (-want, +got):\n%s", diff) } if got := int(r.Stats().IP.PacketsSent.Value()); got != ft.allowPackets { t.Errorf("got r.Stats().IP.PacketsSent.Value() = %d, want = %d", got, ft.allowPackets) @@ -3073,7 +3075,7 @@ func TestMultiCounterStatsInitialization(t *testing.T) { }) proto := s.NetworkProtocolInstance(ProtocolNumber).(*protocol) var nic testInterface - ep := proto.NewEndpoint(&nic, nil, nil, nil).(*endpoint) + ep := proto.NewEndpoint(&nic, nil).(*endpoint) // At this point, the Stack's stats and the NetworkEndpoint's stats are // supposed to be bound. refStack := s.Stats() diff --git a/pkg/tcpip/network/ipv6/mld.go b/pkg/tcpip/network/ipv6/mld.go index c376016e9..2cc0dfebd 100644 --- a/pkg/tcpip/network/ipv6/mld.go +++ b/pkg/tcpip/network/ipv6/mld.go @@ -68,14 +68,14 @@ func (mld *mldState) Enabled() bool { // SendReport implements ip.MulticastGroupProtocol. // // Precondition: mld.ep.mu must be read locked. -func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) { +func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error) { return mld.writePacket(groupAddress, groupAddress, header.ICMPv6MulticastListenerReport) } // SendLeave implements ip.MulticastGroupProtocol. // // Precondition: mld.ep.mu must be read locked. -func (mld *mldState) SendLeave(groupAddress tcpip.Address) *tcpip.Error { +func (mld *mldState) SendLeave(groupAddress tcpip.Address) tcpip.Error { _, err := mld.writePacket(header.IPv6AllRoutersMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone) return err } @@ -112,7 +112,7 @@ func (mld *mldState) handleMulticastListenerReport(mldHdr header.MLD) { // joinGroup handles joining a new group and sending and scheduling the required // messages. // -// If the group is already joined, returns tcpip.ErrDuplicateAddress. +// If the group is already joined, returns *tcpip.ErrDuplicateAddress. // // Precondition: mld.ep.mu must be locked. func (mld *mldState) joinGroup(groupAddress tcpip.Address) { @@ -131,13 +131,13 @@ func (mld *mldState) isInGroup(groupAddress tcpip.Address) bool { // required. // // Precondition: mld.ep.mu must be locked. -func (mld *mldState) leaveGroup(groupAddress tcpip.Address) *tcpip.Error { +func (mld *mldState) leaveGroup(groupAddress tcpip.Address) tcpip.Error { // LeaveGroup returns false only if the group was not joined. if mld.genericMulticastProtocol.LeaveGroupLocked(groupAddress) { return nil } - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } // softLeaveAll leaves all groups from the perspective of MLD, but remains @@ -166,7 +166,7 @@ func (mld *mldState) sendQueuedReports() { // writePacket assembles and sends an MLD packet. // // Precondition: mld.ep.mu must be read locked. -func (mld *mldState) writePacket(destAddress, groupAddress tcpip.Address, mldType header.ICMPv6Type) (bool, *tcpip.Error) { +func (mld *mldState) writePacket(destAddress, groupAddress tcpip.Address, mldType header.ICMPv6Type) (bool, tcpip.Error) { sentStats := mld.ep.stats.icmp.packetsSent var mldStat tcpip.MultiCounterStat switch mldType { diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go index ca4ff621d..d7dde1767 100644 --- a/pkg/tcpip/network/ipv6/ndp.go +++ b/pkg/tcpip/network/ipv6/ndp.go @@ -241,7 +241,7 @@ type NDPDispatcher interface { // // This function is not permitted to block indefinitely. This function // is also not permitted to call into the stack. - OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error) + OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err tcpip.Error) // OnDefaultRouterDiscovered is called when a new default router is // discovered. Implementations must return true if the newly discovered @@ -614,10 +614,10 @@ type slaacPrefixState struct { // tentative. // // The IPv6 endpoint that ndp belongs to MUST be locked. -func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error { +func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) tcpip.Error { // addr must be a valid unicast IPv6 address. if !header.IsV6UnicastAddress(addr) { - return tcpip.ErrAddressFamilyNotSupported + return &tcpip.ErrAddressFamilyNotSupported{} } if addressEndpoint.GetKind() != stack.PermanentTentative { @@ -666,7 +666,7 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressE dadDone := remaining == 0 - var err *tcpip.Error + var err tcpip.Error if !dadDone { err = ndp.sendDADPacket(addr, addressEndpoint) } @@ -717,7 +717,7 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressE // addr. // // addr must be a tentative IPv6 address on ndp's IPv6 endpoint. -func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error { +func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) tcpip.Error { snmc := header.SolicitedNodeAddr(addr) icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize)) diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go index 7a22309e5..8edaa9508 100644 --- a/pkg/tcpip/network/ipv6/ndp_test.go +++ b/pkg/tcpip/network/ipv6/ndp_test.go @@ -63,7 +63,7 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeig t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber) } - ep := netProto.NewEndpoint(&testInterface{}, &stubLinkAddressCache{}, &stubNUDHandler{}, &stubDispatcher{}) + ep := netProto.NewEndpoint(&testInterface{}, &stubDispatcher{}) if err := ep.Enable(); err != nil { t.Fatalf("ep.Enable(): %s", err) } @@ -90,7 +90,7 @@ type testNDPDispatcher struct { addr tcpip.Address } -func (*testNDPDispatcher) OnDuplicateAddressDetectionStatus(tcpip.NICID, tcpip.Address, bool, *tcpip.Error) { +func (*testNDPDispatcher) OnDuplicateAddressDetectionStatus(tcpip.NICID, tcpip.Address, bool, tcpip.Error) { } func (t *testNDPDispatcher) OnDefaultRouterDiscovered(_ tcpip.NICID, addr tcpip.Address) bool { @@ -167,10 +167,10 @@ type linkResolutionResult struct { ok bool } -// TestNeighorSolicitationWithSourceLinkLayerOption tests that receiving a +// TestNeighborSolicitationWithSourceLinkLayerOption tests that receiving a // valid NDP NS message with the Source Link Layer Address option results in a // new entry in the link address cache for the sender of the message. -func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) { +func TestNeighborSolicitationWithSourceLinkLayerOption(t *testing.T) { const nicID = 1 tests := []struct { @@ -199,6 +199,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) { NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, }) e := channel.New(0, 1280, linkAddr0) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } @@ -242,17 +243,19 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) { }) wantInvalid := uint64(0) - wantErr := (*tcpip.Error)(nil) wantSucccess := true if len(test.expectedLinkAddr) == 0 { wantInvalid = 1 - wantErr = tcpip.ErrWouldBlock wantSucccess = false + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = %s", nicID, lladdr1, lladdr0, ProtocolNumber, err, &tcpip.ErrWouldBlock{}) + } + } else { + if err != nil { + t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = nil", nicID, lladdr1, lladdr0, ProtocolNumber, err) + } } - if err != wantErr { - t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = %s", nicID, lladdr1, lladdr0, ProtocolNumber, err, wantErr) - } if diff := cmp.Diff(stack.LinkResolutionResult{LinkAddress: test.expectedLinkAddr, Success: wantSucccess}, <-ch); diff != "" { t.Errorf("linkResolutionResult mismatch (-want +got):\n%s", diff) } @@ -263,11 +266,11 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) { } } -// TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache tests +// TestNeighborSolicitationWithSourceLinkLayerOptionUsingNeighborCache tests // that receiving a valid NDP NS message with the Source Link Layer Address // option results in a new entry in the link address cache for the sender of // the message. -func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testing.T) { +func TestNeighborSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testing.T) { const nicID = 1 tests := []struct { @@ -335,18 +338,18 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi Data: hdr.View().ToVectorisedView(), })) - neighbors, err := s.Neighbors(nicID) + neighbors, err := s.Neighbors(nicID, ProtocolNumber) if err != nil { - t.Fatalf("s.Neighbors(%d): %s", nicID, err) + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, ProtocolNumber, err) } neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) for _, n := range neighbors { if existing, ok := neighborByAddr[n.Addr]; ok { if diff := cmp.Diff(existing, n); diff != "" { - t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff) + t.Fatalf("s.Neighbors(%d, %d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, ProtocolNumber, diff) } - t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %#v", nicID, existing) + t.Fatalf("s.Neighbors(%d, %d) returned unexpected duplicate neighbor entry: %#v", nicID, ProtocolNumber, existing) } neighborByAddr[n.Addr] = n } @@ -380,7 +383,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi } } -func TestNeighorSolicitationResponse(t *testing.T) { +func TestNeighborSolicitationResponse(t *testing.T) { const nicID = 1 nicAddr := lladdr0 remoteAddr := lladdr1 @@ -719,10 +722,10 @@ func TestNeighorSolicitationResponse(t *testing.T) { } } -// TestNeighorAdvertisementWithTargetLinkLayerOption tests that receiving a +// TestNeighborAdvertisementWithTargetLinkLayerOption tests that receiving a // valid NDP NA message with the Target Link Layer Address option results in a // new entry in the link address cache for the target of the message. -func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) { +func TestNeighborAdvertisementWithTargetLinkLayerOption(t *testing.T) { const nicID = 1 tests := []struct { @@ -756,8 +759,10 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) { t.Run(test.name, func(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, + UseLinkAddrCache: true, }) e := channel.New(0, 1280, linkAddr0) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } @@ -801,17 +806,19 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) { }) wantInvalid := uint64(0) - wantErr := (*tcpip.Error)(nil) wantSucccess := true if len(test.expectedLinkAddr) == 0 { wantInvalid = 1 - wantErr = tcpip.ErrWouldBlock wantSucccess = false + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = %s", nicID, lladdr1, lladdr0, ProtocolNumber, err, &tcpip.ErrWouldBlock{}) + } + } else { + if err != nil { + t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = nil", nicID, lladdr1, lladdr0, ProtocolNumber, err) + } } - if err != wantErr { - t.Errorf("got s.GetLinkAddress(%d, %s, %s, %d, _) = %s, want = %s", nicID, lladdr1, lladdr0, ProtocolNumber, err, wantErr) - } if diff := cmp.Diff(stack.LinkResolutionResult{LinkAddress: test.expectedLinkAddr, Success: wantSucccess}, <-ch); diff != "" { t.Errorf("linkResolutionResult mismatch (-want +got):\n%s", diff) } @@ -822,11 +829,11 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) { } } -// TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache tests +// TestNeighborAdvertisementWithTargetLinkLayerOptionUsingNeighborCache tests // that receiving a valid NDP NA message with the Target Link Layer Address // option does not result in a new entry in the neighbor cache for the target // of the message. -func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *testing.T) { +func TestNeighborAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *testing.T) { const nicID = 1 tests := []struct { @@ -901,18 +908,18 @@ func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *test Data: hdr.View().ToVectorisedView(), })) - neighbors, err := s.Neighbors(nicID) + neighbors, err := s.Neighbors(nicID, ProtocolNumber) if err != nil { - t.Fatalf("s.Neighbors(%d): %s", nicID, err) + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, ProtocolNumber, err) } neighborByAddr := make(map[tcpip.Address]stack.NeighborEntry) for _, n := range neighbors { if existing, ok := neighborByAddr[n.Addr]; ok { if diff := cmp.Diff(existing, n); diff != "" { - t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff) + t.Fatalf("s.Neighbors(%d, %d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, ProtocolNumber, diff) } - t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %#v", nicID, existing) + t.Fatalf("s.Neighbors(%d, %d) returned unexpected duplicate neighbor entry: %#v", nicID, ProtocolNumber, existing) } neighborByAddr[n.Addr] = n } @@ -1168,6 +1175,118 @@ func TestNDPValidation(t *testing.T) { } +// TestNeighborAdvertisementValidation tests that the NIC validates received +// Neighbor Advertisements. +// +// In particular, if the IP Destination Address is a multicast address, and the +// Solicited flag is not zero, the Neighbor Advertisement is invalid and should +// be discarded. +func TestNeighborAdvertisementValidation(t *testing.T) { + tests := []struct { + name string + ipDstAddr tcpip.Address + solicitedFlag bool + valid bool + }{ + { + name: "Multicast IP destination address with Solicited flag set", + ipDstAddr: header.IPv6AllNodesMulticastAddress, + solicitedFlag: true, + valid: false, + }, + { + name: "Multicast IP destination address with Solicited flag unset", + ipDstAddr: header.IPv6AllNodesMulticastAddress, + solicitedFlag: false, + valid: true, + }, + { + name: "Unicast IP destination address with Solicited flag set", + ipDstAddr: lladdr0, + solicitedFlag: true, + valid: true, + }, + { + name: "Unicast IP destination address with Solicited flag unset", + ipDstAddr: lladdr0, + solicitedFlag: false, + valid: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol}, + UseNeighborCache: true, + }) + e := channel.New(0, header.IPv6MinimumMTU, linkAddr0) + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, lladdr0, err) + } + + ndpNASize := header.ICMPv6NeighborAdvertMinimumSize + hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize) + pkt := header.ICMPv6(hdr.Prepend(ndpNASize)) + pkt.SetType(header.ICMPv6NeighborAdvert) + na := header.NDPNeighborAdvert(pkt.MessageBody()) + na.SetTargetAddress(lladdr1) + na.SetSolicitedFlag(test.solicitedFlag) + pkt.SetChecksum(header.ICMPv6Checksum(pkt, lladdr1, test.ipDstAddr, buffer.VectorisedView{})) + payloadLength := hdr.UsedLength() + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(payloadLength), + TransportProtocol: header.ICMPv6ProtocolNumber, + HopLimit: 255, + SrcAddr: lladdr1, + DstAddr: test.ipDstAddr, + }) + + stats := s.Stats().ICMP.V6.PacketsReceived + invalid := stats.Invalid + rxNA := stats.NeighborAdvert + + if got := rxNA.Value(); got != 0 { + t.Fatalf("got rxNA = %d, want = 0", got) + } + if got := invalid.Value(); got != 0 { + t.Fatalf("got invalid = %d, want = 0", got) + } + + e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: hdr.View().ToVectorisedView(), + })) + + if got := rxNA.Value(); got != 1 { + t.Fatalf("got rxNA = %d, want = 1", got) + } + var wantInvalid uint64 = 1 + if test.valid { + wantInvalid = 0 + } + if got := invalid.Value(); got != wantInvalid { + t.Fatalf("got invalid = %d, want = %d", got, wantInvalid) + } + // As per RFC 4861 section 7.2.5: + // When a valid Neighbor Advertisement is received ... + // If no entry exists, the advertisement SHOULD be silently discarded. + // There is no need to create an entry if none exists, since the + // recipient has apparently not initiated any communication with the + // target. + if neighbors, err := s.Neighbors(nicID, ProtocolNumber); err != nil { + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, ProtocolNumber, err) + } else if len(neighbors) != 0 { + t.Fatalf("got len(neighbors) = %d, want = 0; neighbors = %#v", len(neighbors), neighbors) + } + }) + } +} + // TestRouterAdvertValidation tests that when the NIC is configured to handle // NDP Router Advertisement packets, it validates the Router Advertisement // properly before handling them. diff --git a/pkg/tcpip/network/testutil/testutil.go b/pkg/tcpip/network/testutil/testutil.go index 9bd009374..f5fa77b65 100644 --- a/pkg/tcpip/network/testutil/testutil.go +++ b/pkg/tcpip/network/testutil/testutil.go @@ -35,7 +35,7 @@ type MockLinkEndpoint struct { WrittenPackets []*stack.PacketBuffer mtu uint32 - err *tcpip.Error + err tcpip.Error allowPackets int } @@ -43,7 +43,7 @@ type MockLinkEndpoint struct { // // err is the error that will be returned once allowPackets packets are written // to the endpoint. -func NewMockLinkEndpoint(mtu uint32, err *tcpip.Error, allowPackets int) *MockLinkEndpoint { +func NewMockLinkEndpoint(mtu uint32, err tcpip.Error, allowPackets int) *MockLinkEndpoint { return &MockLinkEndpoint{ mtu: mtu, err: err, @@ -64,7 +64,7 @@ func (*MockLinkEndpoint) MaxHeaderLength() uint16 { return 0 } func (*MockLinkEndpoint) LinkAddress() tcpip.LinkAddress { return "" } // WritePacket implements LinkEndpoint.WritePacket. -func (ep *MockLinkEndpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, _ tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +func (ep *MockLinkEndpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, _ tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error { if ep.allowPackets == 0 { return ep.err } @@ -74,7 +74,7 @@ func (ep *MockLinkEndpoint) WritePacket(_ stack.RouteInfo, _ *stack.GSO, _ tcpip } // WritePackets implements LinkEndpoint.WritePackets. -func (ep *MockLinkEndpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (ep *MockLinkEndpoint) WritePackets(r stack.RouteInfo, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { var n int for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index 2bad05a2e..57abec5c9 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -18,5 +18,6 @@ go_test( library = ":ports", deps = [ "//pkg/tcpip", + "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index d87193650..11dbdbbcf 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -329,7 +329,7 @@ func NewPortManager() *PortManager { // possible ephemeral ports, allowing the caller to decide whether a given port // is suitable for its needs, and stopping when a port is found or an error // occurs. -func (s *PortManager) PickEphemeralPort(testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { +func (s *PortManager) PickEphemeralPort(testPort func(p uint16) (bool, tcpip.Error)) (port uint16, err tcpip.Error) { offset := uint32(rand.Int31n(numEphemeralPorts)) return s.pickEphemeralPort(offset, numEphemeralPorts, testPort) } @@ -348,7 +348,7 @@ func (s *PortManager) incPortHint() { // iterates over all ephemeral ports, allowing the caller to decide whether a // given port is suitable for its needs and stopping when a port is found or an // error occurs. -func (s *PortManager) PickEphemeralPortStable(offset uint32, testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { +func (s *PortManager) PickEphemeralPortStable(offset uint32, testPort func(p uint16) (bool, tcpip.Error)) (port uint16, err tcpip.Error) { p, err := s.pickEphemeralPort(s.portHint()+offset, numEphemeralPorts, testPort) if err == nil { s.incPortHint() @@ -361,7 +361,7 @@ func (s *PortManager) PickEphemeralPortStable(offset uint32, testPort func(p uin // and iterates over the number of ports specified by count and allows the // caller to decide whether a given port is suitable for its needs, and stopping // when a port is found or an error occurs. -func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { +func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p uint16) (bool, tcpip.Error)) (port uint16, err tcpip.Error) { for i := uint32(0); i < count; i++ { port = uint16(FirstEphemeral + (offset+i)%count) ok, err := testPort(port) @@ -374,7 +374,7 @@ func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p ui } } - return 0, tcpip.ErrNoPortAvailable + return 0, &tcpip.ErrNoPortAvailable{} } // IsPortAvailable tests if the given port is available on all given protocols. @@ -404,7 +404,7 @@ func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumb // An optional testPort closure can be passed in which if provided will be used // to test if the picked port can be used. The function should return true if // the port is safe to use, false otherwise. -func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress, testPort func(port uint16) bool) (reservedPort uint16, err *tcpip.Error) { +func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress, testPort func(port uint16) bool) (reservedPort uint16, err tcpip.Error) { s.mu.Lock() defer s.mu.Unlock() @@ -414,17 +414,17 @@ func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transp // protocols. if port != 0 { if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice, dst) { - return 0, tcpip.ErrPortInUse + return 0, &tcpip.ErrPortInUse{} } if testPort != nil && !testPort(port) { s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, dst) - return 0, tcpip.ErrPortInUse + return 0, &tcpip.ErrPortInUse{} } return port, nil } // A port wasn't specified, so try to find one. - return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { + return s.PickEphemeralPort(func(p uint16) (bool, tcpip.Error) { if !s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst) { return false, nil } diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index 4bc949fd8..e70fbb72b 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -18,6 +18,7 @@ import ( "math/rand" "testing" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" ) @@ -32,7 +33,7 @@ const ( type portReserveTestAction struct { port uint16 ip tcpip.Address - want *tcpip.Error + want tcpip.Error flags Flags release bool device tcpip.NICID @@ -50,19 +51,19 @@ func TestPortReservation(t *testing.T) { {port: 80, ip: fakeIPAddress, want: nil}, {port: 80, ip: fakeIPAddress1, want: nil}, /* N.B. Order of tests matters! */ - {port: 80, ip: anyIPAddress, want: tcpip.ErrPortInUse}, - {port: 80, ip: fakeIPAddress, want: tcpip.ErrPortInUse, flags: Flags{LoadBalanced: true}}, + {port: 80, ip: anyIPAddress, want: &tcpip.ErrPortInUse{}}, + {port: 80, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}, flags: Flags{LoadBalanced: true}}, }, }, { tname: "bind to inaddr any", actions: []portReserveTestAction{ {port: 22, ip: anyIPAddress, want: nil}, - {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, + {port: 22, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}}, /* release fakeIPAddress, but anyIPAddress is still inuse */ {port: 22, ip: fakeIPAddress, release: true}, - {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse, flags: Flags{LoadBalanced: true}}, + {port: 22, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}}, + {port: 22, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}, flags: Flags{LoadBalanced: true}}, /* Release port 22 from any IP address, then try to reserve fake IP address on 22 */ {port: 22, ip: anyIPAddress, want: nil, release: true}, {port: 22, ip: fakeIPAddress, want: nil}, @@ -80,8 +81,8 @@ func TestPortReservation(t *testing.T) { {port: 25, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, {port: 25, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 25, ip: fakeIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, - {port: 25, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 25, ip: fakeIPAddress, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, + {port: 25, ip: anyIPAddress, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, {port: 25, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, }, @@ -91,14 +92,14 @@ func TestPortReservation(t *testing.T) { {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 24, ip: anyIPAddress, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, release: true, want: nil}, {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, release: true}, - {port: 24, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 24, ip: anyIPAddress, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, release: true}, {port: 24, ip: anyIPAddress, flags: Flags{}, want: nil}, @@ -107,7 +108,7 @@ func TestPortReservation(t *testing.T) { tname: "bind twice with device fails", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 3, want: nil}, - {port: 24, ip: fakeIPAddress, device: 3, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 3, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind to device", @@ -119,50 +120,50 @@ func TestPortReservation(t *testing.T) { tname: "bind to device and then without device", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind without device", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, want: nil}, - {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with device", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, want: nil}, - {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, device: 0, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 789, want: nil}, - {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with reuseport", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, }, }, { tname: "binding with reuseport and device", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 789, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 999, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 999, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "mixing reuseport and not reuseport by binding to device", @@ -177,14 +178,14 @@ func TestPortReservation(t *testing.T) { actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 456, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind and release", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 345, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 345, flags: Flags{}, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 789, flags: Flags{LoadBalanced: true}, want: nil}, // Release the bind to device 0 and try again. @@ -195,7 +196,7 @@ func TestPortReservation(t *testing.T) { tname: "bind twice with reuseport once", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "release an unreserved device", @@ -213,16 +214,16 @@ func TestPortReservation(t *testing.T) { tname: "bind with reuseaddr", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{MostRecent: true}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, want: &tcpip.ErrPortInUse{}}, {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{MostRecent: true}, want: nil}, }, }, { tname: "bind twice with reuseaddr once", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{MostRecent: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with reuseaddr and reuseport", @@ -236,14 +237,14 @@ func TestPortReservation(t *testing.T) { actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with reuseaddr and reuseport, and then reuseport", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with reuseaddr and reuseport twice, and then reuseaddr", @@ -264,14 +265,14 @@ func TestPortReservation(t *testing.T) { actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind with reuseport, and then reuseaddr and reuseport", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind tuple with reuseaddr, and then wildcard with reuseaddr", @@ -283,7 +284,7 @@ func TestPortReservation(t *testing.T) { tname: "bind tuple with reuseaddr, and then wildcard", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, - {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind wildcard with reuseaddr, and then tuple with reuseaddr", @@ -295,7 +296,7 @@ func TestPortReservation(t *testing.T) { tname: "bind tuple with reuseaddr, and then wildcard", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind two tuples with reuseaddr", @@ -313,7 +314,7 @@ func TestPortReservation(t *testing.T) { tname: "bind wildcard, and then tuple with reuseaddr", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, dest: tcpip.FullAddress{}, want: nil}, - {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: &tcpip.ErrPortInUse{}}, }, }, { tname: "bind wildcard twice with reuseaddr", @@ -333,8 +334,8 @@ func TestPortReservation(t *testing.T) { continue } gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest, nil /* testPort */) - if err != test.want { - t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d, %v) = %v, want %v", test.ip, test.port, test.flags, test.device, test.dest, err, test.want) + if diff := cmp.Diff(test.want, err); diff != "" { + t.Fatalf("unexpected error from ReservePort(.., .., %s, %d, %+v, %d, %v), (-want, +got):\n%s", test.ip, test.port, test.flags, test.device, test.dest, diff) } if test.port == 0 && (gotPort == 0 || gotPort < FirstEphemeral) { t.Fatalf("ReservePort(.., .., .., 0, ..) = %d, want port number >= %d to be picked", gotPort, FirstEphemeral) @@ -345,30 +346,29 @@ func TestPortReservation(t *testing.T) { } func TestPickEphemeralPort(t *testing.T) { - customErr := &tcpip.Error{} for _, test := range []struct { name string - f func(port uint16) (bool, *tcpip.Error) - wantErr *tcpip.Error + f func(port uint16) (bool, tcpip.Error) + wantErr tcpip.Error wantPort uint16 }{ { name: "no-port-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { return false, nil }, - wantErr: tcpip.ErrNoPortAvailable, + wantErr: &tcpip.ErrNoPortAvailable{}, }, { name: "port-tester-error", - f: func(port uint16) (bool, *tcpip.Error) { - return false, customErr + f: func(port uint16) (bool, tcpip.Error) { + return false, &tcpip.ErrBadBuffer{} }, - wantErr: customErr, + wantErr: &tcpip.ErrBadBuffer{}, }, { name: "only-port-16042-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { if port == FirstEphemeral+42 { return true, nil } @@ -378,49 +378,52 @@ func TestPickEphemeralPort(t *testing.T) { }, { name: "only-port-under-16000-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { if port < FirstEphemeral { return true, nil } return false, nil }, - wantErr: tcpip.ErrNoPortAvailable, + wantErr: &tcpip.ErrNoPortAvailable{}, }, } { t.Run(test.name, func(t *testing.T) { pm := NewPortManager() - if port, err := pm.PickEphemeralPort(test.f); port != test.wantPort || err != test.wantErr { - t.Errorf("PickEphemeralPort(..) = (port %d, err %v); want (port %d, err %v)", port, err, test.wantPort, test.wantErr) + port, err := pm.PickEphemeralPort(test.f) + if diff := cmp.Diff(test.wantErr, err); diff != "" { + t.Fatalf("unexpected error from PickEphemeralPort(..), (-want, +got):\n%s", diff) + } + if port != test.wantPort { + t.Errorf("got PickEphemeralPort(..) = (%d, nil); want (%d, nil)", port, test.wantPort) } }) } } func TestPickEphemeralPortStable(t *testing.T) { - customErr := &tcpip.Error{} for _, test := range []struct { name string - f func(port uint16) (bool, *tcpip.Error) - wantErr *tcpip.Error + f func(port uint16) (bool, tcpip.Error) + wantErr tcpip.Error wantPort uint16 }{ { name: "no-port-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { return false, nil }, - wantErr: tcpip.ErrNoPortAvailable, + wantErr: &tcpip.ErrNoPortAvailable{}, }, { name: "port-tester-error", - f: func(port uint16) (bool, *tcpip.Error) { - return false, customErr + f: func(port uint16) (bool, tcpip.Error) { + return false, &tcpip.ErrBadBuffer{} }, - wantErr: customErr, + wantErr: &tcpip.ErrBadBuffer{}, }, { name: "only-port-16042-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { if port == FirstEphemeral+42 { return true, nil } @@ -430,20 +433,24 @@ func TestPickEphemeralPortStable(t *testing.T) { }, { name: "only-port-under-16000-available", - f: func(port uint16) (bool, *tcpip.Error) { + f: func(port uint16) (bool, tcpip.Error) { if port < FirstEphemeral { return true, nil } return false, nil }, - wantErr: tcpip.ErrNoPortAvailable, + wantErr: &tcpip.ErrNoPortAvailable{}, }, } { t.Run(test.name, func(t *testing.T) { pm := NewPortManager() portOffset := uint32(rand.Int31n(int32(numEphemeralPorts))) - if port, err := pm.PickEphemeralPortStable(portOffset, test.f); port != test.wantPort || err != test.wantErr { - t.Errorf("PickEphemeralPort(..) = (port %d, err %v); want (port %d, err %v)", port, err, test.wantPort, test.wantErr) + port, err := pm.PickEphemeralPortStable(portOffset, test.f) + if diff := cmp.Diff(test.wantErr, err); diff != "" { + t.Fatalf("unexpected error from PickEphemeralPort(..), (-want, +got):\n%s", diff) + } + if port != test.wantPort { + t.Errorf("got PickEphemeralPort(..) = (%d, nil); want (%d, nil)", port, test.wantPort) } }) } diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index 3d9954c84..856ea998d 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -175,7 +175,7 @@ func main() { waitEntry, notifyCh := waiter.NewChannelEntry(nil) wq.EventRegister(&waitEntry, waiter.EventOut) terr := ep.Connect(remote) - if terr == tcpip.ErrConnectStarted { + if _, ok := terr.(*tcpip.ErrConnectStarted); ok { fmt.Println("Connect is pending...") <-notifyCh terr = ep.LastError() @@ -198,11 +198,11 @@ func main() { for { _, err := ep.Read(os.Stdout, tcpip.ReadOptions{}) if err != nil { - if err == tcpip.ErrClosedForReceive { + if _, ok := err.(*tcpip.ErrClosedForReceive); ok { break } - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { <-notifyCh continue } diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index ae9cf44e7..9b23df3a9 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -51,7 +51,7 @@ type endpointWriter struct { } type tcpipError struct { - inner *tcpip.Error + inner tcpip.Error } func (e *tcpipError) Error() string { @@ -89,7 +89,7 @@ func echo(wq *waiter.Queue, ep tcpip.Endpoint) { for { _, err := ep.Read(&w, tcpip.ReadOptions{}) if err != nil { - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { <-notifyCh continue } @@ -217,7 +217,7 @@ func main() { for { n, wq, err := ep.Accept(nil) if err != nil { - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { <-notifyCh continue } diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go index 7eabbc599..1e00144a5 100644 --- a/pkg/tcpip/socketops.go +++ b/pkg/tcpip/socketops.go @@ -46,16 +46,16 @@ type SocketOptionsHandler interface { OnCorkOptionSet(v bool) // LastError is invoked when SO_ERROR is read for an endpoint. - LastError() *Error + LastError() Error // UpdateLastError updates the endpoint specific last error field. - UpdateLastError(err *Error) + UpdateLastError(err Error) // HasNIC is invoked to check if the NIC is valid for SO_BINDTODEVICE. HasNIC(v int32) bool // GetSendBufferSize is invoked to get the SO_SNDBUFSIZE. - GetSendBufferSize() (int64, *Error) + GetSendBufferSize() (int64, Error) // IsUnixSocket is invoked to check if the socket is of unix domain. IsUnixSocket() bool @@ -83,12 +83,12 @@ func (*DefaultSocketOptionsHandler) OnDelayOptionSet(bool) {} func (*DefaultSocketOptionsHandler) OnCorkOptionSet(bool) {} // LastError implements SocketOptionsHandler.LastError. -func (*DefaultSocketOptionsHandler) LastError() *Error { +func (*DefaultSocketOptionsHandler) LastError() Error { return nil } // UpdateLastError implements SocketOptionsHandler.UpdateLastError. -func (*DefaultSocketOptionsHandler) UpdateLastError(*Error) {} +func (*DefaultSocketOptionsHandler) UpdateLastError(Error) {} // HasNIC implements SocketOptionsHandler.HasNIC. func (*DefaultSocketOptionsHandler) HasNIC(int32) bool { @@ -96,7 +96,7 @@ func (*DefaultSocketOptionsHandler) HasNIC(int32) bool { } // GetSendBufferSize implements SocketOptionsHandler.GetSendBufferSize. -func (*DefaultSocketOptionsHandler) GetSendBufferSize() (int64, *Error) { +func (*DefaultSocketOptionsHandler) GetSendBufferSize() (int64, Error) { return 0, nil } @@ -109,11 +109,11 @@ func (*DefaultSocketOptionsHandler) IsUnixSocket() bool { // implemented by the stack. type StackHandler interface { // Option allows retrieving stack wide options. - Option(option interface{}) *Error + Option(option interface{}) Error // TransportProtocolOption allows retrieving individual protocol level // option values. - TransportProtocolOption(proto TransportProtocolNumber, option GettableTransportProtocolOption) *Error + TransportProtocolOption(proto TransportProtocolNumber, option GettableTransportProtocolOption) Error } // SocketOptions contains all the variables which store values for SOL_SOCKET, @@ -238,7 +238,7 @@ func storeAtomicBool(addr *uint32, v bool) { } // SetLastError sets the last error for a socket. -func (so *SocketOptions) SetLastError(err *Error) { +func (so *SocketOptions) SetLastError(err Error) { so.handler.UpdateLastError(err) } @@ -423,7 +423,7 @@ func (so *SocketOptions) SetRecvError(v bool) { } // GetLastError gets value for SO_ERROR option. -func (so *SocketOptions) GetLastError() *Error { +func (so *SocketOptions) GetLastError() Error { return so.handler.LastError() } @@ -473,6 +473,48 @@ func (origin SockErrOrigin) IsICMPErr() bool { return origin == SockExtErrorOriginICMP || origin == SockExtErrorOriginICMP6 } +// SockErrorCause is the cause of a socket error. +type SockErrorCause interface { + // Origin is the source of the error. + Origin() SockErrOrigin + + // Type is the origin specific type of error. + Type() uint8 + + // Code is the origin and type specific error code. + Code() uint8 + + // Info is any extra information about the error. + Info() uint32 +} + +// LocalSockError is a socket error that originated from the local host. +// +// +stateify savable +type LocalSockError struct { + info uint32 +} + +// Origin implements SockErrorCause. +func (*LocalSockError) Origin() SockErrOrigin { + return SockExtErrorOriginLocal +} + +// Type implements SockErrorCause. +func (*LocalSockError) Type() uint8 { + return 0 +} + +// Code implements SockErrorCause. +func (*LocalSockError) Code() uint8 { + return 0 +} + +// Info implements SockErrorCause. +func (l *LocalSockError) Info() uint32 { + return l.info +} + // SockError represents a queue entry in the per-socket error queue. // // +stateify savable @@ -480,15 +522,9 @@ type SockError struct { sockErrorEntry // Err is the error caused by the errant packet. - Err *Error - // ErrOrigin indicates the error origin. - ErrOrigin SockErrOrigin - // ErrType is the type in the ICMP header. - ErrType uint8 - // ErrCode is the code in the ICMP header. - ErrCode uint8 - // ErrInfo is additional info about the error. - ErrInfo uint32 + Err Error + // Cause is the detailed cause of the error. + Cause SockErrorCause // Payload is the errant packet's payload. Payload []byte @@ -538,14 +574,13 @@ func (so *SocketOptions) QueueErr(err *SockError) { } // QueueLocalErr queues a local error onto the local queue. -func (so *SocketOptions) QueueLocalErr(err *Error, net NetworkProtocolNumber, info uint32, dst FullAddress, payload []byte) { +func (so *SocketOptions) QueueLocalErr(err Error, net NetworkProtocolNumber, info uint32, dst FullAddress, payload []byte) { so.QueueErr(&SockError{ - Err: err, - ErrOrigin: SockExtErrorOriginLocal, - ErrInfo: info, - Payload: payload, - Dst: dst, - NetProto: net, + Err: err, + Cause: &LocalSockError{info: info}, + Payload: payload, + Dst: dst, + NetProto: net, }) } @@ -555,9 +590,9 @@ func (so *SocketOptions) GetBindToDevice() int32 { } // SetBindToDevice sets value for SO_BINDTODEVICE option. -func (so *SocketOptions) SetBindToDevice(bindToDevice int32) *Error { +func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error { if !so.handler.HasNIC(bindToDevice) { - return ErrUnknownDevice + return &ErrUnknownDevice{} } atomic.StoreInt32(&so.bindToDevice, bindToDevice) @@ -565,7 +600,7 @@ func (so *SocketOptions) SetBindToDevice(bindToDevice int32) *Error { } // GetSendBufferSize gets value for SO_SNDBUF option. -func (so *SocketOptions) GetSendBufferSize() (int64, *Error) { +func (so *SocketOptions) GetSendBufferSize() (int64, Error) { if so.handler.IsUnixSocket() { return so.handler.GetSendBufferSize() } diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index bb30556cf..ee23c9b98 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -72,6 +72,7 @@ go_library( "nud.go", "packet_buffer.go", "packet_buffer_list.go", + "packet_buffer_unsafe.go", "pending_packets.go", "rand.go", "registration.go", diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go index cd423bf71..e5590ecc0 100644 --- a/pkg/tcpip/stack/addressable_endpoint_state.go +++ b/pkg/tcpip/stack/addressable_endpoint_state.go @@ -117,7 +117,7 @@ func (a *AddressableEndpointState) releaseAddressStateLocked(addrState *addressS } // AddAndAcquirePermanentAddress implements AddressableEndpoint. -func (a *AddressableEndpointState) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, *tcpip.Error) { +func (a *AddressableEndpointState) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, tcpip.Error) { a.mu.Lock() defer a.mu.Unlock() ep, err := a.addAndAcquireAddressLocked(addr, peb, configType, deprecated, true /* permanent */) @@ -143,10 +143,10 @@ func (a *AddressableEndpointState) AddAndAcquirePermanentAddress(addr tcpip.Addr // AddAndAcquireTemporaryAddress adds a temporary address. // -// Returns tcpip.ErrDuplicateAddress if the address exists. +// Returns *tcpip.ErrDuplicateAddress if the address exists. // // The temporary address's endpoint is acquired and returned. -func (a *AddressableEndpointState) AddAndAcquireTemporaryAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior) (AddressEndpoint, *tcpip.Error) { +func (a *AddressableEndpointState) AddAndAcquireTemporaryAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior) (AddressEndpoint, tcpip.Error) { a.mu.Lock() defer a.mu.Unlock() ep, err := a.addAndAcquireAddressLocked(addr, peb, AddressConfigStatic, false /* deprecated */, false /* permanent */) @@ -176,11 +176,11 @@ func (a *AddressableEndpointState) AddAndAcquireTemporaryAddress(addr tcpip.Addr // If the addressable endpoint already has the address in a non-permanent state, // and addAndAcquireAddressLocked is adding a permanent address, that address is // promoted in place and its properties set to the properties provided. If the -// address already exists in any other state, then tcpip.ErrDuplicateAddress is +// address already exists in any other state, then *tcpip.ErrDuplicateAddress is // returned, regardless the kind of address that is being added. // // Precondition: a.mu must be write locked. -func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated, permanent bool) (*addressState, *tcpip.Error) { +func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated, permanent bool) (*addressState, tcpip.Error) { // attemptAddToPrimary is false when the address is already in the primary // address list. attemptAddToPrimary := true @@ -190,7 +190,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address // We are adding a non-permanent address but the address exists. No need // to go any further since we can only promote existing temporary/expired // addresses to permanent. - return nil, tcpip.ErrDuplicateAddress + return nil, &tcpip.ErrDuplicateAddress{} } addrState.mu.Lock() @@ -198,7 +198,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address addrState.mu.Unlock() // We are adding a permanent address but a permanent address already // exists. - return nil, tcpip.ErrDuplicateAddress + return nil, &tcpip.ErrDuplicateAddress{} } if addrState.mu.refs == 0 { @@ -293,7 +293,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address } // RemovePermanentAddress implements AddressableEndpoint. -func (a *AddressableEndpointState) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error { +func (a *AddressableEndpointState) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { a.mu.Lock() defer a.mu.Unlock() return a.removePermanentAddressLocked(addr) @@ -303,10 +303,10 @@ func (a *AddressableEndpointState) RemovePermanentAddress(addr tcpip.Address) *t // requirements. // // Precondition: a.mu must be write locked. -func (a *AddressableEndpointState) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error { +func (a *AddressableEndpointState) removePermanentAddressLocked(addr tcpip.Address) tcpip.Error { addrState, ok := a.mu.endpoints[addr] if !ok { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } return a.removePermanentEndpointLocked(addrState) @@ -314,10 +314,10 @@ func (a *AddressableEndpointState) removePermanentAddressLocked(addr tcpip.Addre // RemovePermanentEndpoint removes the passed endpoint if it is associated with // a and permanent. -func (a *AddressableEndpointState) RemovePermanentEndpoint(ep AddressEndpoint) *tcpip.Error { +func (a *AddressableEndpointState) RemovePermanentEndpoint(ep AddressEndpoint) tcpip.Error { addrState, ok := ep.(*addressState) if !ok || addrState.addressableEndpointState != a { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } a.mu.Lock() @@ -329,9 +329,9 @@ func (a *AddressableEndpointState) RemovePermanentEndpoint(ep AddressEndpoint) * // requirements. // // Precondition: a.mu must be write locked. -func (a *AddressableEndpointState) removePermanentEndpointLocked(addrState *addressState) *tcpip.Error { +func (a *AddressableEndpointState) removePermanentEndpointLocked(addrState *addressState) tcpip.Error { if !addrState.GetKind().IsPermanent() { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } addrState.SetKind(PermanentExpired) @@ -574,9 +574,11 @@ func (a *AddressableEndpointState) Cleanup() { defer a.mu.Unlock() for _, ep := range a.mu.endpoints { - // removePermanentEndpointLocked returns tcpip.ErrBadLocalAddress if ep is + // removePermanentEndpointLocked returns *tcpip.ErrBadLocalAddress if ep is // not a permanent address. - if err := a.removePermanentEndpointLocked(ep); err != nil && err != tcpip.ErrBadLocalAddress { + switch err := a.removePermanentEndpointLocked(ep); err.(type) { + case nil, *tcpip.ErrBadLocalAddress: + default: panic(fmt.Sprintf("unexpected error from removePermanentEndpointLocked(%s): %s", ep.addr, err)) } } diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go index 5e649cca6..54617f2e6 100644 --- a/pkg/tcpip/stack/conntrack.go +++ b/pkg/tcpip/stack/conntrack.go @@ -198,15 +198,15 @@ type bucket struct { // TCP header. // // Preconditions: pkt.NetworkHeader() is valid. -func packetToTupleID(pkt *PacketBuffer) (tupleID, *tcpip.Error) { +func packetToTupleID(pkt *PacketBuffer) (tupleID, tcpip.Error) { netHeader := pkt.Network() if netHeader.TransportProtocol() != header.TCPProtocolNumber { - return tupleID{}, tcpip.ErrUnknownProtocol + return tupleID{}, &tcpip.ErrUnknownProtocol{} } tcpHeader := header.TCP(pkt.TransportHeader().View()) if len(tcpHeader) < header.TCPMinimumSize { - return tupleID{}, tcpip.ErrUnknownProtocol + return tupleID{}, &tcpip.ErrUnknownProtocol{} } return tupleID{ @@ -617,7 +617,7 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo return true } -func (ct *ConnTrack) originalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, *tcpip.Error) { +func (ct *ConnTrack) originalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, tcpip.Error) { // Lookup the connection. The reply's original destination // describes the original address. tid := tupleID{ @@ -631,10 +631,10 @@ func (ct *ConnTrack) originalDst(epID TransportEndpointID, netProto tcpip.Networ conn, _ := ct.connForTID(tid) if conn == nil { // Not a tracked connection. - return "", 0, tcpip.ErrNotConnected + return "", 0, &tcpip.ErrNotConnected{} } else if conn.manip == manipNone { // Unmanipulated connection. - return "", 0, tcpip.ErrInvalidOptionValue + return "", 0, &tcpip.ErrInvalidOptionValue{} } return conn.original.dstAddr, conn.original.dstPort, nil diff --git a/pkg/tcpip/stack/forwarding_test.go b/pkg/tcpip/stack/forwarding_test.go index d29c9a49b..c24f56ece 100644 --- a/pkg/tcpip/stack/forwarding_test.go +++ b/pkg/tcpip/stack/forwarding_test.go @@ -41,6 +41,7 @@ const ( protocolNumberOffset = 2 ) +var _ LinkAddressResolver = (*fwdTestNetworkEndpoint)(nil) var _ NetworkEndpoint = (*fwdTestNetworkEndpoint)(nil) // fwdTestNetworkEndpoint is a network-layer protocol endpoint. @@ -55,7 +56,7 @@ type fwdTestNetworkEndpoint struct { dispatcher TransportDispatcher } -func (*fwdTestNetworkEndpoint) Enable() *tcpip.Error { +func (*fwdTestNetworkEndpoint) Enable() tcpip.Error { return nil } @@ -112,7 +113,7 @@ func (f *fwdTestNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNu return f.proto.Number() } -func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error { +func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error { // Add the protocol's header to the packet and send it to the link // endpoint. b := pkt.NetworkHeader().Push(fwdTestNetHeaderLen) @@ -124,14 +125,14 @@ func (f *fwdTestNetworkEndpoint) WritePacket(r *Route, gso *GSO, params NetworkH } // WritePackets implements LinkEndpoint.WritePackets. -func (*fwdTestNetworkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) { +func (*fwdTestNetworkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error) { panic("not implemented") } -func (f *fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error { +func (f *fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error { // The network header should not already be populated. if _, ok := pkt.NetworkHeader().Consume(fwdTestNetHeaderLen); !ok { - return tcpip.ErrMalformedHeader + return &tcpip.ErrMalformedHeader{} } return f.nic.WritePacket(r, nil /* gso */, fwdTestNetNumber, pkt) @@ -153,7 +154,6 @@ type fwdTestNetworkEndpointStats struct{} // IsNetworkEndpointStats implements stack.NetworkEndpointStats. func (*fwdTestNetworkEndpointStats) IsNetworkEndpointStats() {} -var _ LinkAddressResolver = (*fwdTestNetworkProtocol)(nil) var _ NetworkProtocol = (*fwdTestNetworkProtocol)(nil) // fwdTestNetworkProtocol is a network-layer protocol that implements Address @@ -161,10 +161,9 @@ var _ NetworkProtocol = (*fwdTestNetworkProtocol)(nil) type fwdTestNetworkProtocol struct { stack *Stack - addrCache *linkAddrCache - neigh *neighborCache + neighborTable neighborTable addrResolveDelay time.Duration - onLinkAddressResolved func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) + onLinkAddressResolved func(neighborTable, tcpip.Address, tcpip.LinkAddress) onResolveStaticAddress func(tcpip.Address) (tcpip.LinkAddress, bool) mu struct { @@ -197,7 +196,7 @@ func (*fwdTestNetworkProtocol) Parse(pkt *PacketBuffer) (tcpip.TransportProtocol return tcpip.TransportProtocolNumber(netHeader[protocolNumberOffset]), true, true } -func (f *fwdTestNetworkProtocol) NewEndpoint(nic NetworkInterface, _ LinkAddressCache, _ NUDHandler, dispatcher TransportDispatcher) NetworkEndpoint { +func (f *fwdTestNetworkProtocol) NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint { e := &fwdTestNetworkEndpoint{ nic: nic, proto: f, @@ -207,35 +206,35 @@ func (f *fwdTestNetworkProtocol) NewEndpoint(nic NetworkInterface, _ LinkAddress return e } -func (*fwdTestNetworkProtocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*fwdTestNetworkProtocol) SetOption(tcpip.SettableNetworkProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } -func (*fwdTestNetworkProtocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*fwdTestNetworkProtocol) Option(tcpip.GettableNetworkProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } func (*fwdTestNetworkProtocol) Close() {} func (*fwdTestNetworkProtocol) Wait() {} -func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, _ tcpip.Address, remoteLinkAddr tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error { - if f.onLinkAddressResolved != nil { - time.AfterFunc(f.addrResolveDelay, func() { - f.onLinkAddressResolved(f.addrCache, f.neigh, addr, remoteLinkAddr) +func (f *fwdTestNetworkEndpoint) LinkAddressRequest(addr, _ tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error { + if fn := f.proto.onLinkAddressResolved; fn != nil { + time.AfterFunc(f.proto.addrResolveDelay, func() { + fn(f.proto.neighborTable, addr, remoteLinkAddr) }) } return nil } -func (f *fwdTestNetworkProtocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { - if f.onResolveStaticAddress != nil { - return f.onResolveStaticAddress(addr) +func (f *fwdTestNetworkEndpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { + if fn := f.proto.onResolveStaticAddress; fn != nil { + return fn(addr) } return "", false } -func (*fwdTestNetworkProtocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber { +func (*fwdTestNetworkEndpoint) LinkAddressProtocol() tcpip.NetworkProtocolNumber { return fwdTestNetNumber } @@ -319,7 +318,7 @@ func (e *fwdTestLinkEndpoint) LinkAddress() tcpip.LinkAddress { return e.linkAddr } -func (e fwdTestLinkEndpoint) WritePacket(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error { +func (e fwdTestLinkEndpoint) WritePacket(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) tcpip.Error { p := fwdTestPacketInfo{ RemoteLinkAddress: r.RemoteLinkAddress, LocalLinkAddress: r.LocalLinkAddress, @@ -335,7 +334,7 @@ func (e fwdTestLinkEndpoint) WritePacket(r RouteInfo, gso *GSO, protocol tcpip.N } // WritePackets stores outbound packets into the channel. -func (e *fwdTestLinkEndpoint) WritePackets(r RouteInfo, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (e *fwdTestLinkEndpoint) WritePackets(r RouteInfo, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { n := 0 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { e.WritePacket(r, gso, protocol, pkt) @@ -401,11 +400,9 @@ func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol, useNeighborC if !ok { t.Fatal("NIC 2 does not exist") } - if useNeighborCache { - // Control the neighbor cache for NIC 2. - proto.neigh = nic.neigh - } else { - proto.addrCache = nic.linkAddrCache + + if l, ok := nic.linkAddrResolvers[fwdTestNetNumber]; ok { + proto.neighborTable = l.neighborTable } // Route all packets to NIC 2. @@ -482,43 +479,35 @@ func TestForwardingWithFakeResolver(t *testing.T) { tests := []struct { name string useNeighborCache bool - proto *fwdTestNetworkProtocol }{ { name: "linkAddrCache", useNeighborCache: false, - proto: &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any address will be resolved to the link address "c". - cache.AddLinkAddress(addr, "c") - }, - }, }, { name: "neighborCache", useNeighborCache: true, - proto: &fwdTestNetworkProtocol{ + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + proto := fwdTestNetworkProtocol{ addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + onLinkAddressResolved: func(neigh neighborTable, addr tcpip.Address, linkAddr tcpip.LinkAddress) { t.Helper() - if len(remoteLinkAddr) != 0 { - t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + if len(linkAddr) != 0 { + t.Fatalf("got linkAddr=%q, want unspecified", linkAddr) } // Any address will be resolved to the link address "c". - neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + neigh.handleConfirmation(addr, "c", ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, }) }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + } + ep1, ep2 := fwdTestNetFactory(t, &proto, test.useNeighborCache) // Inject an inbound packet to address 3 on NIC 1, and see if it is // forwarded to NIC 2. @@ -573,7 +562,7 @@ func TestForwardingWithNoResolver(t *testing.T) { func TestForwardingResolutionFailsForQueuedPackets(t *testing.T) { proto := &fwdTestNetworkProtocol{ addrResolveDelay: 50 * time.Millisecond, - onLinkAddressResolved: func(*linkAddrCache, *neighborCache, tcpip.Address, tcpip.LinkAddress) { + onLinkAddressResolved: func(neighborTable, tcpip.Address, tcpip.LinkAddress) { // Don't resolve the link address. }, } @@ -606,49 +595,38 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) { tests := []struct { name string useNeighborCache bool - proto *fwdTestNetworkProtocol }{ { name: "linkAddrCache", useNeighborCache: false, - proto: &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Only packets to address 3 will be resolved to the - // link address "c". - if addr == "\x03" { - cache.AddLinkAddress(addr, "c") - } - }, - }, }, { name: "neighborCache", useNeighborCache: true, - proto: &fwdTestNetworkProtocol{ + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + proto := fwdTestNetworkProtocol{ addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + onLinkAddressResolved: func(neigh neighborTable, addr tcpip.Address, linkAddr tcpip.LinkAddress) { t.Helper() - if len(remoteLinkAddr) != 0 { - t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + if len(linkAddr) != 0 { + t.Fatalf("got linkAddr=%q, want unspecified", linkAddr) } // Only packets to address 3 will be resolved to the // link address "c". if addr == "\x03" { - neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + neigh.handleConfirmation(addr, "c", ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, }) } }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + } + ep1, ep2 := fwdTestNetFactory(t, &proto, test.useNeighborCache) // Inject an inbound packet to address 4 on NIC 1. This packet should // not be forwarded. @@ -693,43 +671,35 @@ func TestForwardingWithFakeResolverTwoPackets(t *testing.T) { tests := []struct { name string useNeighborCache bool - proto *fwdTestNetworkProtocol }{ { name: "linkAddrCache", useNeighborCache: false, - proto: &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.AddLinkAddress(addr, "c") - }, - }, }, { name: "neighborCache", useNeighborCache: true, - proto: &fwdTestNetworkProtocol{ + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + proto := fwdTestNetworkProtocol{ addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + onLinkAddressResolved: func(neigh neighborTable, addr tcpip.Address, linkAddr tcpip.LinkAddress) { t.Helper() - if len(remoteLinkAddr) != 0 { - t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + if len(linkAddr) != 0 { + t.Fatalf("got linkAddr=%q, want unspecified", linkAddr) } // Any packets will be resolved to the link address "c". - neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + neigh.handleConfirmation(addr, "c", ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, }) }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + } + ep1, ep2 := fwdTestNetFactory(t, &proto, test.useNeighborCache) // Inject two inbound packets to address 3 on NIC 1. for i := 0; i < 2; i++ { @@ -769,43 +739,35 @@ func TestForwardingWithFakeResolverManyPackets(t *testing.T) { tests := []struct { name string useNeighborCache bool - proto *fwdTestNetworkProtocol }{ { name: "linkAddrCache", useNeighborCache: false, - proto: &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.AddLinkAddress(addr, "c") - }, - }, }, { name: "neighborCache", useNeighborCache: true, - proto: &fwdTestNetworkProtocol{ + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + proto := fwdTestNetworkProtocol{ addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + onLinkAddressResolved: func(neigh neighborTable, addr tcpip.Address, linkAddr tcpip.LinkAddress) { t.Helper() - if len(remoteLinkAddr) != 0 { - t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + if len(linkAddr) != 0 { + t.Fatalf("got linkAddr=%q, want unspecified", linkAddr) } // Any packets will be resolved to the link address "c". - neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + neigh.handleConfirmation(addr, "c", ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, }) }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + } + ep1, ep2 := fwdTestNetFactory(t, &proto, test.useNeighborCache) for i := 0; i < maxPendingPacketsPerResolution+5; i++ { // Inject inbound 'maxPendingPacketsPerResolution + 5' packets on NIC 1. @@ -864,38 +826,31 @@ func TestForwardingWithFakeResolverManyResolutions(t *testing.T) { { name: "linkAddrCache", useNeighborCache: false, - proto: &fwdTestNetworkProtocol{ - addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, _ tcpip.LinkAddress) { - // Any packets will be resolved to the link address "c". - cache.AddLinkAddress(addr, "c") - }, - }, }, { name: "neighborCache", useNeighborCache: true, - proto: &fwdTestNetworkProtocol{ + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + proto := fwdTestNetworkProtocol{ addrResolveDelay: 500 * time.Millisecond, - onLinkAddressResolved: func(cache *linkAddrCache, neigh *neighborCache, addr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + onLinkAddressResolved: func(neigh neighborTable, addr tcpip.Address, linkAddr tcpip.LinkAddress) { t.Helper() - if len(remoteLinkAddr) != 0 { - t.Fatalf("got remoteLinkAddr=%q, want unspecified", remoteLinkAddr) + if len(linkAddr) != 0 { + t.Fatalf("got linkAddr=%q, want unspecified", linkAddr) } // Any packets will be resolved to the link address "c". - neigh.HandleConfirmation(addr, "c", ReachabilityConfirmationFlags{ + neigh.handleConfirmation(addr, "c", ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, }) }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ep1, ep2 := fwdTestNetFactory(t, test.proto, test.useNeighborCache) + } + ep1, ep2 := fwdTestNetFactory(t, &proto, test.useNeighborCache) for i := 0; i < maxPendingResolutions+5; i++ { // Inject inbound 'maxPendingResolutions + 5' packets on NIC 1. diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 04af933a6..63832c200 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -229,7 +229,7 @@ func (it *IPTables) GetTable(id TableID, ipv6 bool) Table { // ReplaceTable replaces or inserts table by name. It panics when an invalid id // is provided. -func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) *tcpip.Error { +func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) tcpip.Error { it.mu.Lock() defer it.mu.Unlock() // If iptables is being enabled, initialize the conntrack table and @@ -483,11 +483,11 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx // OriginalDst returns the original destination of redirected connections. It // returns an error if the connection doesn't exist or isn't redirected. -func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, *tcpip.Error) { +func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, tcpip.Error) { it.mu.RLock() defer it.mu.RUnlock() if !it.modified { - return "", 0, tcpip.ErrNotConnected + return "", 0, &tcpip.ErrNotConnected{} } return it.connections.originalDst(epID, netProto) } diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go index ba6d56a7d..5b6b58b1d 100644 --- a/pkg/tcpip/stack/linkaddrcache.go +++ b/pkg/tcpip/stack/linkaddrcache.go @@ -24,8 +24,6 @@ import ( const linkAddrCacheSize = 512 // max cache entries -var _ LinkAddressCache = (*linkAddrCache)(nil) - // linkAddrCache is a fixed-sized cache mapping IP addresses to link addresses. // // The entries are stored in a ring buffer, oldest entry replaced first. @@ -34,6 +32,8 @@ var _ LinkAddressCache = (*linkAddrCache)(nil) type linkAddrCache struct { nic *NIC + linkRes LinkAddressResolver + // ageLimit is how long a cache entry is valid for. ageLimit time.Duration @@ -45,7 +45,7 @@ type linkAddrCache struct { // resolved before failing. resolutionAttempts int - cache struct { + mu struct { sync.Mutex table map[tcpip.Address]*linkAddrEntry lru linkAddrEntryList @@ -83,32 +83,32 @@ type linkAddrEntry struct { cache *linkAddrCache - // TODO(gvisor.dev/issue/5150): move these fields under mu. - // mu protects the fields below. - mu sync.RWMutex + mu struct { + sync.RWMutex - addr tcpip.Address - linkAddr tcpip.LinkAddress - expiration time.Time - s entryState + addr tcpip.Address + linkAddr tcpip.LinkAddress + expiration time.Time + s entryState - // done is closed when address resolution is complete. It is nil iff s is - // incomplete and resolution is not yet in progress. - done chan struct{} + // done is closed when address resolution is complete. It is nil iff s is + // incomplete and resolution is not yet in progress. + done chan struct{} - // onResolve is called with the result of address resolution. - onResolve []func(LinkResolutionResult) + // onResolve is called with the result of address resolution. + onResolve []func(LinkResolutionResult) + } } func (e *linkAddrEntry) notifyCompletionLocked(linkAddr tcpip.LinkAddress) { res := LinkResolutionResult{LinkAddress: linkAddr, Success: len(linkAddr) != 0} - for _, callback := range e.onResolve { + for _, callback := range e.mu.onResolve { callback(res) } - e.onResolve = nil - if ch := e.done; ch != nil { + e.mu.onResolve = nil + if ch := e.mu.done; ch != nil { close(ch) - e.done = nil + e.mu.done = nil // Dequeue the pending packets in a new goroutine to not hold up the current // goroutine as writing packets may be a costly operation. // @@ -129,30 +129,30 @@ func (e *linkAddrEntry) notifyCompletionLocked(linkAddr tcpip.LinkAddress) { // // Precondition: e.mu must be locked func (e *linkAddrEntry) changeStateLocked(ns entryState, expiration time.Time) { - if e.s == incomplete && ns == ready { - e.notifyCompletionLocked(e.linkAddr) + if e.mu.s == incomplete && ns == ready { + e.notifyCompletionLocked(e.mu.linkAddr) } - if expiration.IsZero() || expiration.After(e.expiration) { - e.expiration = expiration + if expiration.IsZero() || expiration.After(e.mu.expiration) { + e.mu.expiration = expiration } - e.s = ns + e.mu.s = ns } // add adds a k -> v mapping to the cache. -func (c *linkAddrCache) AddLinkAddress(k tcpip.Address, v tcpip.LinkAddress) { +func (c *linkAddrCache) add(k tcpip.Address, v tcpip.LinkAddress) { // Calculate expiration time before acquiring the lock, since expiration is // relative to the time when information was learned, rather than when it // happened to be inserted into the cache. expiration := time.Now().Add(c.ageLimit) - c.cache.Lock() + c.mu.Lock() entry := c.getOrCreateEntryLocked(k) - c.cache.Unlock() - entry.mu.Lock() defer entry.mu.Unlock() - entry.linkAddr = v + c.mu.Unlock() + + entry.mu.linkAddr = v entry.changeStateLocked(ready, expiration) } @@ -166,18 +166,18 @@ func (c *linkAddrCache) AddLinkAddress(k tcpip.Address, v tcpip.LinkAddress) { // cache is not full, a new entry with state incomplete is allocated and // returned. func (c *linkAddrCache) getOrCreateEntryLocked(k tcpip.Address) *linkAddrEntry { - if entry, ok := c.cache.table[k]; ok { - c.cache.lru.Remove(entry) - c.cache.lru.PushFront(entry) + if entry, ok := c.mu.table[k]; ok { + c.mu.lru.Remove(entry) + c.mu.lru.PushFront(entry) return entry } var entry *linkAddrEntry - if len(c.cache.table) == linkAddrCacheSize { - entry = c.cache.lru.Back() + if len(c.mu.table) == linkAddrCacheSize { + entry = c.mu.lru.Back() entry.mu.Lock() - delete(c.cache.table, entry.addr) - c.cache.lru.Remove(entry) + delete(c.mu.table, entry.mu.addr) + c.mu.lru.Remove(entry) // Wake waiters and mark the soon-to-be-reused entry as expired. entry.notifyCompletionLocked("" /* linkAddr */) @@ -188,53 +188,55 @@ func (c *linkAddrCache) getOrCreateEntryLocked(k tcpip.Address) *linkAddrEntry { *entry = linkAddrEntry{ cache: c, - addr: k, - s: incomplete, } - c.cache.table[k] = entry - c.cache.lru.PushFront(entry) + entry.mu.Lock() + entry.mu.addr = k + entry.mu.s = incomplete + entry.mu.Unlock() + c.mu.table[k] = entry + c.mu.lru.PushFront(entry) return entry } -// get reports any known link address for k. -func (c *linkAddrCache) get(k tcpip.Address, linkRes LinkAddressResolver, localAddr tcpip.Address, nic NetworkInterface, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) { - c.cache.Lock() - defer c.cache.Unlock() - entry := c.getOrCreateEntryLocked(k) +// get reports any known link address for addr. +func (c *linkAddrCache) get(addr, localAddr tcpip.Address, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, tcpip.Error) { + c.mu.Lock() + entry := c.getOrCreateEntryLocked(addr) entry.mu.Lock() defer entry.mu.Unlock() + c.mu.Unlock() - switch s := entry.s; s { + switch s := entry.mu.s; s { case ready: - if !time.Now().After(entry.expiration) { + if !time.Now().After(entry.mu.expiration) { // Not expired. if onResolve != nil { - onResolve(LinkResolutionResult{LinkAddress: entry.linkAddr, Success: true}) + onResolve(LinkResolutionResult{LinkAddress: entry.mu.linkAddr, Success: true}) } - return entry.linkAddr, nil, nil + return entry.mu.linkAddr, nil, nil } entry.changeStateLocked(incomplete, time.Time{}) fallthrough case incomplete: if onResolve != nil { - entry.onResolve = append(entry.onResolve, onResolve) + entry.mu.onResolve = append(entry.mu.onResolve, onResolve) } - if entry.done == nil { - entry.done = make(chan struct{}) - go c.startAddressResolution(k, linkRes, localAddr, nic, entry.done) // S/R-SAFE: link non-savable; wakers dropped synchronously. + if entry.mu.done == nil { + entry.mu.done = make(chan struct{}) + go c.startAddressResolution(addr, localAddr, entry.mu.done) // S/R-SAFE: link non-savable; wakers dropped synchronously. } - return entry.linkAddr, entry.done, tcpip.ErrWouldBlock + return entry.mu.linkAddr, entry.mu.done, &tcpip.ErrWouldBlock{} default: panic(fmt.Sprintf("invalid cache entry state: %s", s)) } } -func (c *linkAddrCache) startAddressResolution(k tcpip.Address, linkRes LinkAddressResolver, localAddr tcpip.Address, nic NetworkInterface, done <-chan struct{}) { +func (c *linkAddrCache) startAddressResolution(k tcpip.Address, localAddr tcpip.Address, done <-chan struct{}) { for i := 0; ; i++ { // Send link request, then wait for the timeout limit and check // whether the request succeeded. - linkRes.LinkAddressRequest(k, localAddr, "" /* linkAddr */, nic) + c.linkRes.LinkAddressRequest(k, localAddr, "" /* linkAddr */) select { case now := <-time.After(c.resolutionTimeout): @@ -251,9 +253,9 @@ func (c *linkAddrCache) startAddressResolution(k tcpip.Address, linkRes LinkAddr // succeeded and mark the entry accordingly. Returns true if request can stop, // false if another request should be sent. func (c *linkAddrCache) checkLinkRequest(now time.Time, k tcpip.Address, attempt int) bool { - c.cache.Lock() - defer c.cache.Unlock() - entry, ok := c.cache.table[k] + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.mu.table[k] if !ok { // Entry was evicted from the cache. return true @@ -261,7 +263,7 @@ func (c *linkAddrCache) checkLinkRequest(now time.Time, k tcpip.Address, attempt entry.mu.Lock() defer entry.mu.Unlock() - switch s := entry.s; s { + switch s := entry.mu.s; s { case ready: // Entry was made ready by resolver. case incomplete: @@ -271,20 +273,87 @@ func (c *linkAddrCache) checkLinkRequest(now time.Time, k tcpip.Address, attempt } // Max number of retries reached, delete entry. entry.notifyCompletionLocked("" /* linkAddr */) - delete(c.cache.table, k) + delete(c.mu.table, k) default: panic(fmt.Sprintf("invalid cache entry state: %s", s)) } return true } -func newLinkAddrCache(nic *NIC, ageLimit, resolutionTimeout time.Duration, resolutionAttempts int) *linkAddrCache { - c := &linkAddrCache{ +func (c *linkAddrCache) init(nic *NIC, ageLimit, resolutionTimeout time.Duration, resolutionAttempts int, linkRes LinkAddressResolver) { + *c = linkAddrCache{ nic: nic, + linkRes: linkRes, ageLimit: ageLimit, resolutionTimeout: resolutionTimeout, resolutionAttempts: resolutionAttempts, } - c.cache.table = make(map[tcpip.Address]*linkAddrEntry, linkAddrCacheSize) - return c + + c.mu.Lock() + c.mu.table = make(map[tcpip.Address]*linkAddrEntry, linkAddrCacheSize) + c.mu.Unlock() +} + +var _ neighborTable = (*linkAddrCache)(nil) + +func (*linkAddrCache) neighbors() ([]NeighborEntry, tcpip.Error) { + return nil, &tcpip.ErrNotSupported{} +} + +func (c *linkAddrCache) addStaticEntry(addr tcpip.Address, linkAddr tcpip.LinkAddress) { + c.add(addr, linkAddr) +} + +func (*linkAddrCache) remove(addr tcpip.Address) tcpip.Error { + return &tcpip.ErrNotSupported{} +} + +func (*linkAddrCache) removeAll() tcpip.Error { + return &tcpip.ErrNotSupported{} +} + +func (c *linkAddrCache) handleProbe(addr tcpip.Address, linkAddr tcpip.LinkAddress) { + if len(linkAddr) != 0 { + // NUD allows probes without a link address but linkAddrCache + // is a simple neighbor table which does not implement NUD. + // + // As per RFC 4861 section 4.3, + // + // Source link-layer address + // The link-layer address for the sender. MUST NOT be + // included when the source IP address is the + // unspecified address. Otherwise, on link layers + // that have addresses this option MUST be included in + // multicast solicitations and SHOULD be included in + // unicast solicitations. + c.add(addr, linkAddr) + } +} + +func (c *linkAddrCache) handleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) { + if len(linkAddr) != 0 { + // NUD allows confirmations without a link address but linkAddrCache + // is a simple neighbor table which does not implement NUD. + // + // As per RFC 4861 section 4.4, + // + // Target link-layer address + // The link-layer address for the target, i.e., the + // sender of the advertisement. This option MUST be + // included on link layers that have addresses when + // responding to multicast solicitations. When + // responding to a unicast Neighbor Solicitation this + // option SHOULD be included. + c.add(addr, linkAddr) + } +} + +func (c *linkAddrCache) handleUpperLevelConfirmation(tcpip.Address) {} + +func (*linkAddrCache) nudConfig() (NUDConfigurations, tcpip.Error) { + return NUDConfigurations{}, &tcpip.ErrNotSupported{} +} + +func (*linkAddrCache) setNUDConfig(NUDConfigurations) tcpip.Error { + return &tcpip.ErrNotSupported{} } diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go index 88fbbf3fe..9e7f331c9 100644 --- a/pkg/tcpip/stack/linkaddrcache_test.go +++ b/pkg/tcpip/stack/linkaddrcache_test.go @@ -48,7 +48,7 @@ type testLinkAddressResolver struct { onLinkAddressRequest func() } -func (r *testLinkAddressResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error { +func (r *testLinkAddressResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress) tcpip.Error { // TODO(gvisor.dev/issue/5141): Use a fake clock. time.AfterFunc(r.delay, func() { r.fakeRequest(targetAddr) }) if f := r.onLinkAddressRequest; f != nil { @@ -60,7 +60,7 @@ func (r *testLinkAddressResolver) LinkAddressRequest(targetAddr, _ tcpip.Address func (r *testLinkAddressResolver) fakeRequest(addr tcpip.Address) { for _, ta := range testAddrs { if ta.addr == addr { - r.cache.AddLinkAddress(ta.addr, ta.linkAddr) + r.cache.add(ta.addr, ta.linkAddr) break } } @@ -77,13 +77,13 @@ func (*testLinkAddressResolver) LinkAddressProtocol() tcpip.NetworkProtocolNumbe return 1 } -func getBlocking(c *linkAddrCache, addr tcpip.Address, linkRes LinkAddressResolver) (tcpip.LinkAddress, *tcpip.Error) { +func getBlocking(c *linkAddrCache, addr tcpip.Address) (tcpip.LinkAddress, tcpip.Error) { var attemptedResolution bool for { - got, ch, err := c.get(addr, linkRes, "", nil, nil) - if err == tcpip.ErrWouldBlock { + got, ch, err := c.get(addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); ok { if attemptedResolution { - return got, tcpip.ErrTimeout + return got, &tcpip.ErrTimeout{} } attemptedResolution = true <-ch @@ -100,50 +100,52 @@ func newEmptyNIC() *NIC { } func TestCacheOverflow(t *testing.T) { - c := newLinkAddrCache(newEmptyNIC(), 1<<63-1, 1*time.Second, 3) + var c linkAddrCache + c.init(newEmptyNIC(), 1<<63-1, 1*time.Second, 3, nil) for i := len(testAddrs) - 1; i >= 0; i-- { e := testAddrs[i] - c.AddLinkAddress(e.addr, e.linkAddr) - got, _, err := c.get(e.addr, nil, "", nil, nil) + c.add(e.addr, e.linkAddr) + got, _, err := c.get(e.addr, "", nil) if err != nil { - t.Errorf("insert %d, c.get(%s, nil, '', nil, nil): %s", i, e.addr, err) + t.Errorf("insert %d, c.get(%s, '', nil): %s", i, e.addr, err) } if got != e.linkAddr { - t.Errorf("insert %d, got c.get(%s, nil, '', nil, nil) = %s, want = %s", i, e.addr, got, e.linkAddr) + t.Errorf("insert %d, got c.get(%s, '', nil) = %s, want = %s", i, e.addr, got, e.linkAddr) } } // Expect to find at least half of the most recent entries. for i := 0; i < linkAddrCacheSize/2; i++ { e := testAddrs[i] - got, _, err := c.get(e.addr, nil, "", nil, nil) + got, _, err := c.get(e.addr, "", nil) if err != nil { - t.Errorf("check %d, c.get(%s, nil, '', nil, nil): %s", i, e.addr, err) + t.Errorf("check %d, c.get(%s, '', nil): %s", i, e.addr, err) } if got != e.linkAddr { - t.Errorf("check %d, got c.get(%s, nil, '', nil, nil) = %s, want = %s", i, e.addr, got, e.linkAddr) + t.Errorf("check %d, got c.get(%s, '', nil) = %s, want = %s", i, e.addr, got, e.linkAddr) } } // The earliest entries should no longer be in the cache. - c.cache.Lock() - defer c.cache.Unlock() + c.mu.Lock() + defer c.mu.Unlock() for i := len(testAddrs) - 1; i >= len(testAddrs)-linkAddrCacheSize; i-- { e := testAddrs[i] - if entry, ok := c.cache.table[e.addr]; ok { - t.Errorf("unexpected entry at c.cache.table[%s]: %#v", e.addr, entry) + if entry, ok := c.mu.table[e.addr]; ok { + t.Errorf("unexpected entry at c.mu.table[%s]: %#v", e.addr, entry) } } } func TestCacheConcurrent(t *testing.T) { - c := newLinkAddrCache(newEmptyNIC(), 1<<63-1, 1*time.Second, 3) - linkRes := &testLinkAddressResolver{cache: c} + var c linkAddrCache + linkRes := &testLinkAddressResolver{cache: &c} + c.init(newEmptyNIC(), 1<<63-1, 1*time.Second, 3, linkRes) var wg sync.WaitGroup for r := 0; r < 16; r++ { wg.Add(1) go func() { for _, e := range testAddrs { - c.AddLinkAddress(e.addr, e.linkAddr) + c.add(e.addr, e.linkAddr) } wg.Done() }() @@ -154,54 +156,57 @@ func TestCacheConcurrent(t *testing.T) { // can fit in the cache, so our eviction strategy requires that // the last entry be present and the first be missing. e := testAddrs[len(testAddrs)-1] - got, _, err := c.get(e.addr, linkRes, "", nil, nil) + got, _, err := c.get(e.addr, "", nil) if err != nil { - t.Errorf("c.get(%s, _, '', nil, nil): %s", e.addr, err) + t.Errorf("c.get(%s, '', nil): %s", e.addr, err) } if got != e.linkAddr { - t.Errorf("got c.get(%s, _, '', nil, nil) = %s, want = %s", e.addr, got, e.linkAddr) + t.Errorf("got c.get(%s, '', nil) = %s, want = %s", e.addr, got, e.linkAddr) } e = testAddrs[0] - c.cache.Lock() - defer c.cache.Unlock() - if entry, ok := c.cache.table[e.addr]; ok { - t.Errorf("unexpected entry at c.cache.table[%s]: %#v", e.addr, entry) + c.mu.Lock() + defer c.mu.Unlock() + if entry, ok := c.mu.table[e.addr]; ok { + t.Errorf("unexpected entry at c.mu.table[%s]: %#v", e.addr, entry) } } func TestCacheAgeLimit(t *testing.T) { - c := newLinkAddrCache(newEmptyNIC(), 1*time.Millisecond, 1*time.Second, 3) - linkRes := &testLinkAddressResolver{cache: c} + var c linkAddrCache + linkRes := &testLinkAddressResolver{cache: &c} + c.init(newEmptyNIC(), 1*time.Millisecond, 1*time.Second, 3, linkRes) e := testAddrs[0] - c.AddLinkAddress(e.addr, e.linkAddr) + c.add(e.addr, e.linkAddr) time.Sleep(50 * time.Millisecond) - if _, _, err := c.get(e.addr, linkRes, "", nil, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got c.get(%s, _, '', nil, nil) = %s, want = ErrWouldBlock", e.addr, err) + _, _, err := c.get(e.addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got c.get(%s, '', nil) = %s, want = ErrWouldBlock", e.addr, err) } } func TestCacheReplace(t *testing.T) { - c := newLinkAddrCache(newEmptyNIC(), 1<<63-1, 1*time.Second, 3) + var c linkAddrCache + c.init(newEmptyNIC(), 1<<63-1, 1*time.Second, 3, nil) e := testAddrs[0] l2 := e.linkAddr + "2" - c.AddLinkAddress(e.addr, e.linkAddr) - got, _, err := c.get(e.addr, nil, "", nil, nil) + c.add(e.addr, e.linkAddr) + got, _, err := c.get(e.addr, "", nil) if err != nil { - t.Errorf("c.get(%s, nil, '', nil, nil): %s", e.addr, err) + t.Errorf("c.get(%s, '', nil): %s", e.addr, err) } if got != e.linkAddr { - t.Errorf("got c.get(%s, nil, '', nil, nil) = %s, want = %s", e.addr, got, e.linkAddr) + t.Errorf("got c.get(%s, '', nil) = %s, want = %s", e.addr, got, e.linkAddr) } - c.AddLinkAddress(e.addr, l2) - got, _, err = c.get(e.addr, nil, "", nil, nil) + c.add(e.addr, l2) + got, _, err = c.get(e.addr, "", nil) if err != nil { - t.Errorf("c.get(%s, nil, '', nil, nil): %s", e.addr, err) + t.Errorf("c.get(%s, '', nil): %s", e.addr, err) } if got != l2 { - t.Errorf("got c.get(%s, nil, '', nil, nil) = %s, want = %s", e.addr, got, l2) + t.Errorf("got c.get(%s, '', nil) = %s, want = %s", e.addr, got, l2) } } @@ -212,34 +217,36 @@ func TestCacheResolution(t *testing.T) { // // Using a large resolution timeout decreases the probability of experiencing // this race condition and does not affect how long this test takes to run. - c := newLinkAddrCache(newEmptyNIC(), 1<<63-1, math.MaxInt64, 1) - linkRes := &testLinkAddressResolver{cache: c} + var c linkAddrCache + linkRes := &testLinkAddressResolver{cache: &c} + c.init(newEmptyNIC(), 1<<63-1, math.MaxInt64, 1, linkRes) for i, ta := range testAddrs { - got, err := getBlocking(c, ta.addr, linkRes) + got, err := getBlocking(&c, ta.addr) if err != nil { - t.Errorf("check %d, getBlocking(_, %s, _): %s", i, ta.addr, err) + t.Errorf("check %d, getBlocking(_, %s): %s", i, ta.addr, err) } if got != ta.linkAddr { - t.Errorf("check %d, got getBlocking(_, %s, _) = %s, want = %s", i, ta.addr, got, ta.linkAddr) + t.Errorf("check %d, got getBlocking(_, %s) = %s, want = %s", i, ta.addr, got, ta.linkAddr) } } // Check that after resolved, address stays in the cache and never returns WouldBlock. for i := 0; i < 10; i++ { e := testAddrs[len(testAddrs)-1] - got, _, err := c.get(e.addr, linkRes, "", nil, nil) + got, _, err := c.get(e.addr, "", nil) if err != nil { - t.Errorf("c.get(%s, _, '', nil, nil): %s", e.addr, err) + t.Errorf("c.get(%s, '', nil): %s", e.addr, err) } if got != e.linkAddr { - t.Errorf("got c.get(%s, _, '', nil, nil) = %s, want = %s", e.addr, got, e.linkAddr) + t.Errorf("got c.get(%s, '', nil) = %s, want = %s", e.addr, got, e.linkAddr) } } } func TestCacheResolutionFailed(t *testing.T) { - c := newLinkAddrCache(newEmptyNIC(), 1<<63-1, 10*time.Millisecond, 5) - linkRes := &testLinkAddressResolver{cache: c} + var c linkAddrCache + linkRes := &testLinkAddressResolver{cache: &c} + c.init(newEmptyNIC(), 1<<63-1, 10*time.Millisecond, 5, linkRes) var requestCount uint32 linkRes.onLinkAddressRequest = func() { @@ -248,19 +255,20 @@ func TestCacheResolutionFailed(t *testing.T) { // First, sanity check that resolution is working... e := testAddrs[0] - got, err := getBlocking(c, e.addr, linkRes) + got, err := getBlocking(&c, e.addr) if err != nil { - t.Errorf("getBlocking(_, %s, _): %s", e.addr, err) + t.Errorf("getBlocking(_, %s): %s", e.addr, err) } if got != e.linkAddr { - t.Errorf("got getBlocking(_, %s, _) = %s, want = %s", e.addr, got, e.linkAddr) + t.Errorf("got getBlocking(_, %s) = %s, want = %s", e.addr, got, e.linkAddr) } before := atomic.LoadUint32(&requestCount) e.addr += "2" - if a, err := getBlocking(c, e.addr, linkRes); err != tcpip.ErrTimeout { - t.Errorf("got getBlocking(_, %s, _) = (%s, %s), want = (_, %s)", e.addr, a, err, tcpip.ErrTimeout) + a, err := getBlocking(&c, e.addr) + if _, ok := err.(*tcpip.ErrTimeout); !ok { + t.Errorf("got getBlocking(_, %s) = (%s, %s), want = (_, %s)", e.addr, a, err, &tcpip.ErrTimeout{}) } if got, want := int(atomic.LoadUint32(&requestCount)-before), c.resolutionAttempts; got != want { @@ -271,11 +279,13 @@ func TestCacheResolutionFailed(t *testing.T) { func TestCacheResolutionTimeout(t *testing.T) { resolverDelay := 500 * time.Millisecond expiration := resolverDelay / 10 - c := newLinkAddrCache(newEmptyNIC(), expiration, 1*time.Millisecond, 3) - linkRes := &testLinkAddressResolver{cache: c, delay: resolverDelay} + var c linkAddrCache + linkRes := &testLinkAddressResolver{cache: &c, delay: resolverDelay} + c.init(newEmptyNIC(), expiration, 1*time.Millisecond, 3, linkRes) e := testAddrs[0] - if a, err := getBlocking(c, e.addr, linkRes); err != tcpip.ErrTimeout { - t.Errorf("got getBlocking(_, %s, _) = (%s, %s), want = (_, %s)", e.addr, a, err, tcpip.ErrTimeout) + a, err := getBlocking(&c, e.addr) + if _, ok := err.(*tcpip.ErrTimeout); !ok { + t.Errorf("got getBlocking(_, %s) = (%s, %s), want = (_, %s)", e.addr, a, err, &tcpip.ErrTimeout{}) } } diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index d7bbb25ea..0238605af 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -104,7 +104,7 @@ type ndpDADEvent struct { nicID tcpip.NICID addr tcpip.Address resolved bool - err *tcpip.Error + err tcpip.Error } type ndpRouterEvent struct { @@ -174,7 +174,7 @@ type ndpDispatcher struct { } // Implements ipv6.NDPDispatcher.OnDuplicateAddressDetectionStatus. -func (n *ndpDispatcher) OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error) { +func (n *ndpDispatcher) OnDuplicateAddressDetectionStatus(nicID tcpip.NICID, addr tcpip.Address, resolved bool, err tcpip.Error) { if n.dadC != nil { n.dadC <- ndpDADEvent{ nicID, @@ -311,7 +311,7 @@ func (l *channelLinkWithHeaderLength) MaxHeaderLength() uint16 { // Check e to make sure that the event is for addr on nic with ID 1, and the // resolved flag set to resolved with the specified err. -func checkDADEvent(e ndpDADEvent, nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error) string { +func checkDADEvent(e ndpDADEvent, nicID tcpip.NICID, addr tcpip.Address, resolved bool, err tcpip.Error) string { return cmp.Diff(ndpDADEvent{nicID: nicID, addr: addr, resolved: resolved, err: err}, e, cmp.AllowUnexported(e)) } @@ -465,8 +465,8 @@ func TestDADResolve(t *testing.T) { // tentative address. { r, err := s.FindRoute(nicID, "", addr2, header.IPv6ProtocolNumber, false) - if err != tcpip.ErrNoRoute { - t.Errorf("got FindRoute(%d, '', %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr2, header.IPv6ProtocolNumber, r, err, tcpip.ErrNoRoute) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Errorf("got FindRoute(%d, '', %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr2, header.IPv6ProtocolNumber, r, err, &tcpip.ErrNoRoute{}) } if r != nil { r.Release() @@ -474,8 +474,8 @@ func TestDADResolve(t *testing.T) { } { r, err := s.FindRoute(nicID, addr1, addr2, header.IPv6ProtocolNumber, false) - if err != tcpip.ErrNoRoute { - t.Errorf("got FindRoute(%d, %s, %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr1, addr2, header.IPv6ProtocolNumber, r, err, tcpip.ErrNoRoute) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Errorf("got FindRoute(%d, %s, %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr1, addr2, header.IPv6ProtocolNumber, r, err, &tcpip.ErrNoRoute{}) } if r != nil { r.Release() @@ -2796,14 +2796,8 @@ func stackAndNdpDispatcherWithDefaultRoute(t *testing.T, nicID tcpip.NICID, useN NIC: nicID, }}) - if useNeighborCache { - if err := s.AddStaticNeighbor(nicID, llAddr3, linkAddr3); err != nil { - t.Fatalf("s.AddStaticNeighbor(%d, %s, %s): %s", nicID, llAddr3, linkAddr3, err) - } - } else { - if err := s.AddLinkAddress(nicID, llAddr3, linkAddr3); err != nil { - t.Fatalf("s.AddLinkAddress(%d, %s, %s): %s", nicID, llAddr3, linkAddr3, err) - } + if err := s.AddStaticNeighbor(nicID, ipv6.ProtocolNumber, llAddr3, linkAddr3); err != nil { + t.Fatalf("s.AddStaticNeighbor(%d, %d, %s, %s): %s", nicID, ipv6.ProtocolNumber, llAddr3, linkAddr3, err) } return ndpDisp, e, s } @@ -3222,8 +3216,11 @@ func TestAutoGenAddrJobDeprecation(t *testing.T) { defer ep.Close() ep.SocketOptions().SetV6Only(true) - if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute { - t.Errorf("got ep.Connect(%+v) = %s, want = %s", dstAddr, err, tcpip.ErrNoRoute) + { + err := ep.Connect(dstAddr) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Errorf("got ep.Connect(%+v) = %s, want = %s", dstAddr, err, &tcpip.ErrNoRoute{}) + } } }) } diff --git a/pkg/tcpip/stack/neighbor_cache.go b/pkg/tcpip/stack/neighbor_cache.go index 204196d00..7e3132058 100644 --- a/pkg/tcpip/stack/neighbor_cache.go +++ b/pkg/tcpip/stack/neighbor_cache.go @@ -42,11 +42,10 @@ type NeighborStats struct { // 2. Static entries are explicitly added by a user and have no expiration. // Their state is always Static. The amount of static entries stored in the // cache is unbounded. -// -// neighborCache implements NUDHandler. type neighborCache struct { - nic *NIC - state *NUDState + nic *NIC + state *NUDState + linkRes LinkAddressResolver // mu protects the fields below. mu sync.RWMutex @@ -62,8 +61,6 @@ type neighborCache struct { } } -var _ NUDHandler = (*neighborCache)(nil) - // getOrCreateEntry retrieves a cache entry associated with addr. The // returned entry is always refreshed in the cache (it is reachable via the // map, and its place is bumped in LRU). @@ -73,7 +70,7 @@ var _ NUDHandler = (*neighborCache)(nil) // reset to state incomplete, and returned. If no matching entry exists and the // cache is not full, a new entry with state incomplete is allocated and // returned. -func (n *neighborCache) getOrCreateEntry(remoteAddr tcpip.Address, linkRes LinkAddressResolver) *neighborEntry { +func (n *neighborCache) getOrCreateEntry(remoteAddr tcpip.Address) *neighborEntry { n.mu.Lock() defer n.mu.Unlock() @@ -89,7 +86,7 @@ func (n *neighborCache) getOrCreateEntry(remoteAddr tcpip.Address, linkRes LinkA // The entry that needs to be created must be dynamic since all static // entries are directly added to the cache via addStaticEntry. - entry := newNeighborEntry(n.nic, remoteAddr, n.state, linkRes) + entry := newNeighborEntry(n, remoteAddr, n.state) if n.dynamic.count == neighborCacheSize { e := n.dynamic.lru.Back() e.mu.Lock() @@ -126,8 +123,8 @@ func (n *neighborCache) getOrCreateEntry(remoteAddr tcpip.Address, linkRes LinkA // packet prompting NUD/link address resolution. // // TODO(gvisor.dev/issue/5151): Don't return the neighbor entry. -func (n *neighborCache) entry(remoteAddr, localAddr tcpip.Address, linkRes LinkAddressResolver, onResolve func(LinkResolutionResult)) (NeighborEntry, <-chan struct{}, *tcpip.Error) { - entry := n.getOrCreateEntry(remoteAddr, linkRes) +func (n *neighborCache) entry(remoteAddr, localAddr tcpip.Address, onResolve func(LinkResolutionResult)) (NeighborEntry, <-chan struct{}, tcpip.Error) { + entry := n.getOrCreateEntry(remoteAddr) entry.mu.Lock() defer entry.mu.Unlock() @@ -154,7 +151,7 @@ func (n *neighborCache) entry(remoteAddr, localAddr tcpip.Address, linkRes LinkA entry.done = make(chan struct{}) } entry.handlePacketQueuedLocked(localAddr) - return entry.neigh, entry.done, tcpip.ErrWouldBlock + return entry.neigh, entry.done, &tcpip.ErrWouldBlock{} default: panic(fmt.Sprintf("Invalid cache entry state: %s", s)) } @@ -206,7 +203,7 @@ func (n *neighborCache) addStaticEntry(addr tcpip.Address, linkAddr tcpip.LinkAd entry.mu.Unlock() } - n.cache[addr] = newStaticNeighborEntry(n.nic, addr, linkAddr, n.state) + n.cache[addr] = newStaticNeighborEntry(n, addr, linkAddr, n.state) } // removeEntry removes a dynamic or static entry by address from the neighbor @@ -263,27 +260,45 @@ func (n *neighborCache) setConfig(config NUDConfigurations) { n.state.SetConfig(config) } -// HandleProbe implements NUDHandler.HandleProbe by following the logic defined -// in RFC 4861 section 7.2.3. Validation of the probe is expected to be handled -// by the caller. -func (n *neighborCache) HandleProbe(remoteAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver) { - entry := n.getOrCreateEntry(remoteAddr, linkRes) +var _ neighborTable = (*neighborCache)(nil) + +func (n *neighborCache) neighbors() ([]NeighborEntry, tcpip.Error) { + return n.entries(), nil +} + +func (n *neighborCache) get(addr, localAddr tcpip.Address, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, tcpip.Error) { + entry, ch, err := n.entry(addr, localAddr, onResolve) + return entry.LinkAddr, ch, err +} + +func (n *neighborCache) remove(addr tcpip.Address) tcpip.Error { + if !n.removeEntry(addr) { + return &tcpip.ErrBadAddress{} + } + + return nil +} + +func (n *neighborCache) removeAll() tcpip.Error { + n.clear() + return nil +} + +// handleProbe handles a neighbor probe as defined by RFC 4861 section 7.2.3. +// +// Validation of the probe is expected to be handled by the caller. +func (n *neighborCache) handleProbe(remoteAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) { + entry := n.getOrCreateEntry(remoteAddr) entry.mu.Lock() entry.handleProbeLocked(remoteLinkAddr) entry.mu.Unlock() } -// HandleConfirmation implements NUDHandler.HandleConfirmation by following the -// logic defined in RFC 4861 section 7.2.5. +// handleConfirmation handles a neighbor confirmation as defined by +// RFC 4861 section 7.2.5. // -// TODO(gvisor.dev/issue/2277): To protect against ARP poisoning and other -// attacks against NDP functions, Secure Neighbor Discovery (SEND) Protocol -// should be deployed where preventing access to the broadcast segment might -// not be possible. SEND uses RSA key pairs to produce cryptographically -// generated addresses, as defined in RFC 3972, Cryptographically Generated -// Addresses (CGA). This ensures that the claimed source of an NDP message is -// the owner of the claimed address. -func (n *neighborCache) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) { +// Validation of the confirmation is expected to be handled by the caller. +func (n *neighborCache) handleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) { n.mu.RLock() entry, ok := n.cache[addr] n.mu.RUnlock() @@ -297,10 +312,9 @@ func (n *neighborCache) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.Li // no matching entry for the remote address. } -// HandleUpperLevelConfirmation implements -// NUDHandler.HandleUpperLevelConfirmation by following the logic defined in -// RFC 4861 section 7.3.1. -func (n *neighborCache) HandleUpperLevelConfirmation(addr tcpip.Address) { +// handleUpperLevelConfirmation processes a confirmation of reachablity from +// some protocol that operates at a layer above the IP/link layer. +func (n *neighborCache) handleUpperLevelConfirmation(addr tcpip.Address) { n.mu.RLock() entry, ok := n.cache[addr] n.mu.RUnlock() @@ -310,3 +324,12 @@ func (n *neighborCache) HandleUpperLevelConfirmation(addr tcpip.Address) { entry.mu.Unlock() } } + +func (n *neighborCache) nudConfig() (NUDConfigurations, tcpip.Error) { + return n.config(), nil +} + +func (n *neighborCache) setNUDConfig(c NUDConfigurations) tcpip.Error { + n.setConfig(c) + return nil +} diff --git a/pkg/tcpip/stack/neighbor_cache_test.go b/pkg/tcpip/stack/neighbor_cache_test.go index 6723aef9b..b489b5e08 100644 --- a/pkg/tcpip/stack/neighbor_cache_test.go +++ b/pkg/tcpip/stack/neighbor_cache_test.go @@ -76,10 +76,15 @@ func entryDiffOptsWithSort() []cmp.Option { })) } -func newTestNeighborCache(nudDisp NUDDispatcher, config NUDConfigurations, clock tcpip.Clock) *neighborCache { +func newTestNeighborResolver(nudDisp NUDDispatcher, config NUDConfigurations, clock tcpip.Clock) *testNeighborResolver { config.resetInvalidFields() rng := rand.New(rand.NewSource(time.Now().UnixNano())) - neigh := &neighborCache{ + linkRes := &testNeighborResolver{ + clock: clock, + entries: newTestEntryStore(), + delay: typicalLatency, + } + linkRes.neigh = &neighborCache{ nic: &NIC{ stack: &Stack{ clock: clock, @@ -88,11 +93,11 @@ func newTestNeighborCache(nudDisp NUDDispatcher, config NUDConfigurations, clock id: 1, stats: makeNICStats(), }, - state: NewNUDState(config, rng), - cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), + state: NewNUDState(config, rng), + linkRes: linkRes, + cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), } - neigh.nic.neigh = neigh - return neigh + return linkRes } // testEntryStore contains a set of IP to NeighborEntry mappings. @@ -194,7 +199,7 @@ type testNeighborResolver struct { var _ LinkAddressResolver = (*testNeighborResolver)(nil) -func (r *testNeighborResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error { +func (r *testNeighborResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress) tcpip.Error { if !r.dropReplies { // Delay handling the request to emulate network latency. r.clock.AfterFunc(r.delay, func() { @@ -212,7 +217,7 @@ func (r *testNeighborResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ // fakeRequest emulates handling a response for a link address request. func (r *testNeighborResolver) fakeRequest(addr tcpip.Address) { if entry, ok := r.entries.entryByAddr(addr); ok { - r.neigh.HandleConfirmation(addr, entry.LinkAddr, ReachabilityConfirmationFlags{ + r.neigh.handleConfirmation(addr, entry.LinkAddr, ReachabilityConfirmationFlags{ Solicited: true, Override: false, IsRouter: false, @@ -242,17 +247,17 @@ func TestNeighborCacheGetConfig(t *testing.T) { nudDisp := testNUDDispatcher{} c := DefaultNUDConfigurations() clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, c, clock) + linkRes := newTestNeighborResolver(&nudDisp, c, clock) - if got, want := neigh.config(), c; got != want { - t.Errorf("got neigh.config() = %+v, want = %+v", got, want) + if got, want := linkRes.neigh.config(), c; got != want { + t.Errorf("got linkRes.neigh.config() = %+v, want = %+v", got, want) } // No events should have been dispatched. nudDisp.mu.Lock() defer nudDisp.mu.Unlock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -260,21 +265,21 @@ func TestNeighborCacheSetConfig(t *testing.T) { nudDisp := testNUDDispatcher{} c := DefaultNUDConfigurations() clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, c, clock) + linkRes := newTestNeighborResolver(&nudDisp, c, clock) c.MinRandomFactor = 1 c.MaxRandomFactor = 1 - neigh.setConfig(c) + linkRes.neigh.setConfig(c) - if got, want := neigh.config(), c; got != want { - t.Errorf("got neigh.config() = %+v, want = %+v", got, want) + if got, want := linkRes.neigh.config(), c; got != want { + t.Errorf("got linkRes.neigh.config() = %+v, want = %+v", got, want) } // No events should have been dispatched. nudDisp.mu.Lock() defer nudDisp.mu.Unlock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -282,21 +287,15 @@ func TestNeighborCacheEntry(t *testing.T) { c := DefaultNUDConfigurations() nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, c, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, c, clock) - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } - if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) @@ -321,22 +320,22 @@ func TestNeighborCacheEntry(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } - if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != nil { - t.Fatalf("unexpected error from neigh.entry(%s, '', _, nil, nil): %s", entry.Addr, err) + if _, _, err := linkRes.neigh.entry(entry.Addr, "", nil); err != nil { + t.Fatalf("unexpected error from linkRes.neigh.entry(%s, '', nil): %s", entry.Addr, err) } // No more events should have been dispatched. nudDisp.mu.Lock() defer nudDisp.mu.Unlock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -345,22 +344,16 @@ func TestNeighborCacheRemoveEntry(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, config, clock) - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } - if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) @@ -385,14 +378,14 @@ func TestNeighborCacheRemoveEntry(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } - neigh.removeEntry(entry.Addr) + linkRes.neigh.removeEntry(entry.Addr) { wantEvents := []testEntryEventInfo{ @@ -407,22 +400,23 @@ func TestNeighborCacheRemoveEntry(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } - if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + { + _, _, err := linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) + } } } type testContext struct { clock *faketime.ManualClock - neigh *neighborCache - store *testEntryStore linkRes *testNeighborResolver nudDisp *testNUDDispatcher } @@ -430,19 +424,10 @@ type testContext struct { func newTestContext(c NUDConfigurations) testContext { nudDisp := &testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(nudDisp, c, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(nudDisp, c, clock) return testContext{ clock: clock, - neigh: neigh, - store: store, linkRes: linkRes, nudDisp: nudDisp, } @@ -456,16 +441,17 @@ type overflowOptions struct { func (c *testContext) overflowCache(opts overflowOptions) error { // Fill the neighbor cache to capacity to verify the LRU eviction strategy is // working properly after the entry removal. - for i := opts.startAtEntryIndex; i < c.store.size(); i++ { + for i := opts.startAtEntryIndex; i < c.linkRes.entries.size(); i++ { // Add a new entry - entry, ok := c.store.entry(i) + entry, ok := c.linkRes.entries.entry(i) if !ok { - return fmt.Errorf("c.store.entry(%d) not found", i) + return fmt.Errorf("c.linkRes.entries.entry(%d) not found", i) } - if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock { - return fmt.Errorf("got c.neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := c.linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + return fmt.Errorf("got c.linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } - c.clock.Advance(c.neigh.config().RetransmitTimer) + c.clock.Advance(c.linkRes.neigh.config().RetransmitTimer) var wantEvents []testEntryEventInfo @@ -473,9 +459,9 @@ func (c *testContext) overflowCache(opts overflowOptions) error { // LRU eviction strategy. Note that the number of static entries should not // affect the total number of dynamic entries that can be added. if i >= neighborCacheSize+opts.startAtEntryIndex { - removedEntry, ok := c.store.entry(i - neighborCacheSize) + removedEntry, ok := c.linkRes.entries.entry(i - neighborCacheSize) if !ok { - return fmt.Errorf("store.entry(%d) not found", i-neighborCacheSize) + return fmt.Errorf("linkRes.entries.entry(%d) not found", i-neighborCacheSize) } wantEvents = append(wantEvents, testEntryEventInfo{ EventType: entryTestRemoved, @@ -506,11 +492,11 @@ func (c *testContext) overflowCache(opts overflowOptions) error { }) c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - return fmt.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + return fmt.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -518,10 +504,10 @@ func (c *testContext) overflowCache(opts overflowOptions) error { // by entries() is nondeterministic, so entries have to be sorted before // comparison. wantUnsortedEntries := opts.wantStaticEntries - for i := c.store.size() - neighborCacheSize; i < c.store.size(); i++ { - entry, ok := c.store.entry(i) + for i := c.linkRes.entries.size() - neighborCacheSize; i < c.linkRes.entries.size(); i++ { + entry, ok := c.linkRes.entries.entry(i) if !ok { - return fmt.Errorf("c.store.entry(%d) not found", i) + return fmt.Errorf("c.linkRes.entries.entry(%d) not found", i) } wantEntry := NeighborEntry{ Addr: entry.Addr, @@ -531,15 +517,15 @@ func (c *testContext) overflowCache(opts overflowOptions) error { wantUnsortedEntries = append(wantUnsortedEntries, wantEntry) } - if diff := cmp.Diff(c.neigh.entries(), wantUnsortedEntries, entryDiffOptsWithSort()...); diff != "" { - return fmt.Errorf("neighbor entries mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantUnsortedEntries, c.linkRes.neigh.entries(), entryDiffOptsWithSort()...); diff != "" { + return fmt.Errorf("neighbor entries mismatch (-want, +got):\n%s", diff) } // No more events should have been dispatched. c.nudDisp.mu.Lock() defer c.nudDisp.mu.Unlock() - if diff := cmp.Diff(c.nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - return fmt.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), c.nudDisp.events); diff != "" { + return fmt.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } return nil @@ -575,14 +561,15 @@ func TestNeighborCacheRemoveEntryThenOverflow(t *testing.T) { c := newTestContext(config) // Add a dynamic entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } - if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got c.neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := c.linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got c.linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } - c.clock.Advance(c.neigh.config().RetransmitTimer) + c.clock.Advance(c.linkRes.neigh.config().RetransmitTimer) wantEvents := []testEntryEventInfo{ { EventType: entryTestAdded, @@ -603,15 +590,15 @@ func TestNeighborCacheRemoveEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Remove the entry - c.neigh.removeEntry(entry.Addr) + c.linkRes.neigh.removeEntry(entry.Addr) { wantEvents := []testEntryEventInfo{ @@ -626,11 +613,11 @@ func TestNeighborCacheRemoveEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -650,12 +637,12 @@ func TestNeighborCacheDuplicateStaticEntryWithSameLinkAddress(t *testing.T) { c := newTestContext(config) // Add a static entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } staticLinkAddr := entry.LinkAddr + "static" - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) wantEvents := []testEntryEventInfo{ { EventType: entryTestAdded, @@ -668,21 +655,21 @@ func TestNeighborCacheDuplicateStaticEntryWithSameLinkAddress(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Remove the static entry that was just added - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) // No more events should have been dispatched. c.nudDisp.mu.Lock() defer c.nudDisp.mu.Unlock() - if diff := cmp.Diff(c.nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), c.nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -694,12 +681,12 @@ func TestNeighborCacheDuplicateStaticEntryWithDifferentLinkAddress(t *testing.T) c := newTestContext(config) // Add a static entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } staticLinkAddr := entry.LinkAddr + "static" - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) wantEvents := []testEntryEventInfo{ { EventType: entryTestAdded, @@ -712,16 +699,16 @@ func TestNeighborCacheDuplicateStaticEntryWithDifferentLinkAddress(t *testing.T) }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Add a duplicate entry with a different link address staticLinkAddr += "duplicate" - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) { wantEvents := []testEntryEventInfo{ { @@ -736,8 +723,8 @@ func TestNeighborCacheDuplicateStaticEntryWithDifferentLinkAddress(t *testing.T) } c.nudDisp.mu.Lock() defer c.nudDisp.mu.Unlock() - if diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } } @@ -756,12 +743,12 @@ func TestNeighborCacheRemoveStaticEntryThenOverflow(t *testing.T) { c := newTestContext(config) // Add a static entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } staticLinkAddr := entry.LinkAddr + "static" - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) wantEvents := []testEntryEventInfo{ { EventType: entryTestAdded, @@ -774,15 +761,15 @@ func TestNeighborCacheRemoveStaticEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Remove the static entry that was just added - c.neigh.removeEntry(entry.Addr) + c.linkRes.neigh.removeEntry(entry.Addr) { wantEvents := []testEntryEventInfo{ { @@ -796,11 +783,11 @@ func TestNeighborCacheRemoveStaticEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -826,12 +813,13 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) { c := newTestContext(config) // Add a dynamic entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } - if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got c.neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := c.linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got c.linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } c.clock.Advance(typicalLatency) wantEvents := []testEntryEventInfo{ @@ -854,16 +842,16 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Override the entry with a static one using the same address staticLinkAddr := entry.LinkAddr + "static" - c.neigh.addStaticEntry(entry.Addr, staticLinkAddr) + c.linkRes.neigh.addStaticEntry(entry.Addr, staticLinkAddr) { wantEvents := []testEntryEventInfo{ { @@ -886,11 +874,11 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -918,22 +906,22 @@ func TestNeighborCacheAddStaticEntryThenOverflow(t *testing.T) { c := newTestContext(config) - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } - c.neigh.addStaticEntry(entry.Addr, entry.LinkAddr) - e, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil) + c.linkRes.neigh.addStaticEntry(entry.Addr, entry.LinkAddr) + e, _, err := c.linkRes.neigh.entry(entry.Addr, "", nil) if err != nil { - t.Errorf("unexpected error from c.neigh.entry(%s, \"\", _, nil, nil): %s", entry.Addr, err) + t.Errorf("unexpected error from c.linkRes.neigh.entry(%s, \"\", nil): %s", entry.Addr, err) } want := NeighborEntry{ Addr: entry.Addr, LinkAddr: entry.LinkAddr, State: Static, } - if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" { - t.Errorf("c.neigh.entry(%s, \"\", _, nil, nil) mismatch (-got, +want):\n%s", entry.Addr, diff) + if diff := cmp.Diff(want, e, entryDiffOpts()...); diff != "" { + t.Errorf("c.linkRes.neigh.entry(%s, \"\", nil) mismatch (-want, +got):\n%s", entry.Addr, diff) } wantEvents := []testEntryEventInfo{ @@ -948,11 +936,11 @@ func TestNeighborCacheAddStaticEntryThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } opts := overflowOptions{ @@ -975,22 +963,16 @@ func TestNeighborCacheClear(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, config, clock) // Add a dynamic entry. - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } - if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) @@ -1014,15 +996,15 @@ func TestNeighborCacheClear(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Add a static entry. - neigh.addStaticEntry(entryTestAddr1, entryTestLinkAddr1) + linkRes.neigh.addStaticEntry(entryTestAddr1, entryTestLinkAddr1) { wantEvents := []testEntryEventInfo{ @@ -1037,16 +1019,16 @@ func TestNeighborCacheClear(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } // Clear should remove both dynamic and static entries. - neigh.clear() + linkRes.neigh.clear() // Remove events dispatched from clear() have no deterministic order so they // need to be sorted beforehand. @@ -1072,8 +1054,8 @@ func TestNeighborCacheClear(t *testing.T) { } nudDisp.mu.Lock() defer nudDisp.mu.Unlock() - if diff := cmp.Diff(nudDisp.events, wantUnsortedEvents, eventDiffOptsWithSort()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantUnsortedEvents, nudDisp.events, eventDiffOptsWithSort()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -1090,12 +1072,13 @@ func TestNeighborCacheClearThenOverflow(t *testing.T) { c := newTestContext(config) // Add a dynamic entry - entry, ok := c.store.entry(0) + entry, ok := c.linkRes.entries.entry(0) if !ok { - t.Fatal("c.store.entry(0) not found") + t.Fatal("c.linkRes.entries.entry(0) not found") } - if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock { - t.Errorf("got c.neigh.entry(%s, '', _, nil, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + _, _, err := c.linkRes.neigh.entry(entry.Addr, "", nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got c.linkRes.neigh.entry(%s, '', nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } c.clock.Advance(typicalLatency) wantEvents := []testEntryEventInfo{ @@ -1118,15 +1101,15 @@ func TestNeighborCacheClearThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } // Clear the cache. - c.neigh.clear() + c.linkRes.neigh.clear() { wantEvents := []testEntryEventInfo{ { @@ -1140,11 +1123,11 @@ func TestNeighborCacheClearThenOverflow(t *testing.T) { }, } c.nudDisp.mu.Lock() - diff := cmp.Diff(c.nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, c.nudDisp.events, eventDiffOpts()...) c.nudDisp.events = nil c.nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -1165,18 +1148,11 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, config, clock) - frequentlyUsedEntry, ok := store.entry(0) + frequentlyUsedEntry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } // The following logic is very similar to overflowCache, but @@ -1184,23 +1160,23 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { // Fill the neighbor cache to capacity for i := 0; i < neighborCacheSize; i++ { - entry, ok := store.entry(i) + entry, ok := linkRes.entries.entry(i) if !ok { - t.Fatalf("store.entry(%d) not found", i) + t.Fatalf("linkRes.entries.entry(%d) not found", i) } - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } wantEvents := []testEntryEventInfo{ { @@ -1222,47 +1198,47 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } // Keep adding more entries - for i := neighborCacheSize; i < store.size(); i++ { + for i := neighborCacheSize; i < linkRes.entries.size(); i++ { // Periodically refresh the frequently used entry if i%(neighborCacheSize/2) == 0 { - if _, _, err := neigh.entry(frequentlyUsedEntry.Addr, "", linkRes, nil); err != nil { - t.Errorf("unexpected error from neigh.entry(%s, '', _, nil, nil): %s", frequentlyUsedEntry.Addr, err) + if _, _, err := linkRes.neigh.entry(frequentlyUsedEntry.Addr, "", nil); err != nil { + t.Errorf("unexpected error from linkRes.neigh.entry(%s, '', nil): %s", frequentlyUsedEntry.Addr, err) } } - entry, ok := store.entry(i) + entry, ok := linkRes.entries.entry(i) if !ok { - t.Fatalf("store.entry(%d) not found", i) + t.Fatalf("linkRes.entries.entry(%d) not found", i) } - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } // An entry should have been removed, as per the LRU eviction strategy - removedEntry, ok := store.entry(i - neighborCacheSize + 1) + removedEntry, ok := linkRes.entries.entry(i - neighborCacheSize + 1) if !ok { - t.Fatalf("store.entry(%d) not found", i-neighborCacheSize+1) + t.Fatalf("linkRes.entries.entry(%d) not found", i-neighborCacheSize+1) } wantEvents := []testEntryEventInfo{ { @@ -1293,11 +1269,11 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { }, } nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.events = nil nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -1312,10 +1288,10 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { }, } - for i := store.size() - neighborCacheSize + 1; i < store.size(); i++ { - entry, ok := store.entry(i) + for i := linkRes.entries.size() - neighborCacheSize + 1; i < linkRes.entries.size(); i++ { + entry, ok := linkRes.entries.entry(i) if !ok { - t.Fatalf("store.entry(%d) not found", i) + t.Fatalf("linkRes.entries.entry(%d) not found", i) } wantEntry := NeighborEntry{ Addr: entry.Addr, @@ -1325,15 +1301,15 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) { wantUnsortedEntries = append(wantUnsortedEntries, wantEntry) } - if diff := cmp.Diff(neigh.entries(), wantUnsortedEntries, entryDiffOptsWithSort()...); diff != "" { - t.Errorf("neighbor entries mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantUnsortedEntries, linkRes.neigh.entries(), entryDiffOptsWithSort()...); diff != "" { + t.Errorf("neighbor entries mismatch (-want, +got):\n%s", diff) } // No more events should have been dispatched. nudDisp.mu.Lock() defer nudDisp.mu.Unlock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } @@ -1344,24 +1320,19 @@ func TestNeighborCacheConcurrent(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, config, clock) - storeEntries := store.entries() + storeEntries := linkRes.entries.entries() for _, entry := range storeEntries { var wg sync.WaitGroup for r := 0; r < concurrentProcesses; r++ { wg.Add(1) go func(entry NeighborEntry) { defer wg.Done() - if e, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != nil && err != tcpip.ErrWouldBlock { - t.Errorf("got neigh.entry(%s, '', _, nil, nil) = (%+v, _, %s), want (_, _, nil) or (_, _, %s)", entry.Addr, e, err, tcpip.ErrWouldBlock) + switch e, _, err := linkRes.neigh.entry(entry.Addr, "", nil); err.(type) { + case nil, *tcpip.ErrWouldBlock: + default: + t.Errorf("got linkRes.neigh.entry(%s, '', nil) = (%+v, _, %s), want (_, _, nil) or (_, _, %s)", entry.Addr, e, err, &tcpip.ErrWouldBlock{}) } }(entry) } @@ -1379,10 +1350,10 @@ func TestNeighborCacheConcurrent(t *testing.T) { // The order of entries reported by entries() is nondeterministic, so entries // have to be sorted before comparison. var wantUnsortedEntries []NeighborEntry - for i := store.size() - neighborCacheSize; i < store.size(); i++ { - entry, ok := store.entry(i) + for i := linkRes.entries.size() - neighborCacheSize; i < linkRes.entries.size(); i++ { + entry, ok := linkRes.entries.entry(i) if !ok { - t.Errorf("store.entry(%d) not found", i) + t.Errorf("linkRes.entries.entry(%d) not found", i) } wantEntry := NeighborEntry{ Addr: entry.Addr, @@ -1392,8 +1363,8 @@ func TestNeighborCacheConcurrent(t *testing.T) { wantUnsortedEntries = append(wantUnsortedEntries, wantEntry) } - if diff := cmp.Diff(neigh.entries(), wantUnsortedEntries, entryDiffOptsWithSort()...); diff != "" { - t.Errorf("neighbor entries mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantUnsortedEntries, linkRes.neigh.entries(), entryDiffOptsWithSort()...); diff != "" { + t.Errorf("neighbor entries mismatch (-want, +got):\n%s", diff) } } @@ -1402,41 +1373,34 @@ func TestNeighborCacheReplace(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - } + linkRes := newTestNeighborResolver(&nudDisp, config, clock) // Add an entry - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } // Verify the entry exists { - e, _, err := neigh.entry(entry.Addr, "", linkRes, nil) + e, _, err := linkRes.neigh.entry(entry.Addr, "", nil) if err != nil { - t.Errorf("unexpected error from neigh.entry(%s, '', _, _, nil): %s", entry.Addr, err) + t.Errorf("unexpected error from linkRes.neigh.entry(%s, '', _, _, nil): %s", entry.Addr, err) } if t.Failed() { t.FailNow() @@ -1446,22 +1410,22 @@ func TestNeighborCacheReplace(t *testing.T) { LinkAddr: entry.LinkAddr, State: Reachable, } - if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" { - t.Errorf("neigh.entry(%s, '', _, _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff) + if diff := cmp.Diff(want, e, entryDiffOpts()...); diff != "" { + t.Errorf("linkRes.neigh.entry(%s, '', _, _, nil) mismatch (-want, +got):\n%s", entry.Addr, diff) } } // Notify of a link address change var updatedLinkAddr tcpip.LinkAddress { - entry, ok := store.entry(1) + entry, ok := linkRes.entries.entry(1) if !ok { - t.Fatal("store.entry(1) not found") + t.Fatal("linkRes.entries.entry(1) not found") } updatedLinkAddr = entry.LinkAddr } - store.set(0, updatedLinkAddr) - neigh.HandleConfirmation(entry.Addr, updatedLinkAddr, ReachabilityConfirmationFlags{ + linkRes.entries.set(0, updatedLinkAddr) + linkRes.neigh.handleConfirmation(entry.Addr, updatedLinkAddr, ReachabilityConfirmationFlags{ Solicited: false, Override: true, IsRouter: false, @@ -1471,35 +1435,35 @@ func TestNeighborCacheReplace(t *testing.T) { // // Verify the entry's new link address and the new state. { - e, _, err := neigh.entry(entry.Addr, "", linkRes, nil) + e, _, err := linkRes.neigh.entry(entry.Addr, "", nil) if err != nil { - t.Fatalf("neigh.entry(%s, '', _, nil, nil): %s", entry.Addr, err) + t.Fatalf("linkRes.neigh.entry(%s, '', nil): %s", entry.Addr, err) } want := NeighborEntry{ Addr: entry.Addr, LinkAddr: updatedLinkAddr, State: Delay, } - if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" { - t.Errorf("neigh.entry(%s, '', _, nil, nil) mismatch (-got, +want):\n%s", entry.Addr, diff) + if diff := cmp.Diff(want, e, entryDiffOpts()...); diff != "" { + t.Errorf("linkRes.neigh.entry(%s, '', nil) mismatch (-want, +got):\n%s", entry.Addr, diff) } clock.Advance(config.DelayFirstProbeTime + typicalLatency) } // Verify that the neighbor is now reachable. { - e, _, err := neigh.entry(entry.Addr, "", linkRes, nil) + e, _, err := linkRes.neigh.entry(entry.Addr, "", nil) clock.Advance(typicalLatency) if err != nil { - t.Errorf("unexpected error from neigh.entry(%s, '', _, nil, nil): %s", entry.Addr, err) + t.Errorf("unexpected error from linkRes.neigh.entry(%s, '', nil): %s", entry.Addr, err) } want := NeighborEntry{ Addr: entry.Addr, LinkAddr: updatedLinkAddr, State: Reachable, } - if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" { - t.Errorf("neigh.entry(%s, '', _, nil, nil) mismatch (-got, +want):\n%s", entry.Addr, diff) + if diff := cmp.Diff(want, e, entryDiffOpts()...); diff != "" { + t.Errorf("linkRes.neigh.entry(%s, '', nil) mismatch (-want, +got):\n%s", entry.Addr, diff) } } } @@ -1509,54 +1473,47 @@ func TestNeighborCacheResolutionFailed(t *testing.T) { nudDisp := testNUDDispatcher{} clock := faketime.NewManualClock() - neigh := newTestNeighborCache(&nudDisp, config, clock) - store := newTestEntryStore() + linkRes := newTestNeighborResolver(&nudDisp, config, clock) var requestCount uint32 - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - onLinkAddressRequest: func() { - atomic.AddUint32(&requestCount, 1) - }, + linkRes.onLinkAddressRequest = func() { + atomic.AddUint32(&requestCount, 1) } - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } // First, sanity check that resolution is working { - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } clock.Advance(typicalLatency) select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } - got, _, err := neigh.entry(entry.Addr, "", linkRes, nil) + got, _, err := linkRes.neigh.entry(entry.Addr, "", nil) if err != nil { - t.Fatalf("unexpected error from neigh.entry(%s, '', _, nil, nil): %s", entry.Addr, err) + t.Fatalf("unexpected error from linkRes.neigh.entry(%s, '', nil): %s", entry.Addr, err) } want := NeighborEntry{ Addr: entry.Addr, LinkAddr: entry.LinkAddr, State: Reachable, } - if diff := cmp.Diff(got, want, entryDiffOpts()...); diff != "" { - t.Errorf("neigh.entry(%s, '', _, nil, nil) mismatch (-got, +want):\n%s", entry.Addr, diff) + if diff := cmp.Diff(want, got, entryDiffOpts()...); diff != "" { + t.Errorf("linkRes.neigh.entry(%s, '', nil) mismatch (-want, +got):\n%s", entry.Addr, diff) } // Verify address resolution fails for an unknown address. @@ -1564,24 +1521,24 @@ func TestNeighborCacheResolutionFailed(t *testing.T) { entry.Addr += "2" { - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{Success: false}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } waitFor := config.DelayFirstProbeTime + typicalLatency*time.Duration(config.MaxMulticastProbes) clock.Advance(waitFor) select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } - maxAttempts := neigh.config().MaxUnicastProbes + maxAttempts := linkRes.neigh.config().MaxUnicastProbes if got, want := atomic.LoadUint32(&requestCount)-before, maxAttempts; got != want { t.Errorf("got link address request count = %d, want = %d", got, want) } @@ -1595,27 +1552,22 @@ func TestNeighborCacheResolutionTimeout(t *testing.T) { config.RetransmitTimer = time.Millisecond // small enough to cause timeout clock := faketime.NewManualClock() - neigh := newTestNeighborCache(nil, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: time.Minute, // large enough to cause timeout - } + linkRes := newTestNeighborResolver(nil, config, clock) + // large enough to cause timeout + linkRes.delay = time.Minute - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{Success: false}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } waitFor := config.RetransmitTimer * time.Duration(config.MaxMulticastProbes) clock.Advance(waitFor) @@ -1623,7 +1575,7 @@ func TestNeighborCacheResolutionTimeout(t *testing.T) { select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } @@ -1632,31 +1584,24 @@ func TestNeighborCacheResolutionTimeout(t *testing.T) { func TestNeighborCacheRetryResolution(t *testing.T) { config := DefaultNUDConfigurations() clock := faketime.NewManualClock() - neigh := newTestNeighborCache(nil, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: typicalLatency, - // Simulate a faulty link. - dropReplies: true, - } + linkRes := newTestNeighborResolver(nil, config, clock) + // Simulate a faulty link. + linkRes.dropReplies = true - entry, ok := store.entry(0) + entry, ok := linkRes.entries.entry(0) if !ok { - t.Fatal("store.entry(0) not found") + t.Fatal("linkRes.entries.entry(0) not found") } // Perform address resolution with a faulty link, which will fail. { - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{Success: false}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } waitFor := config.RetransmitTimer * time.Duration(config.MaxMulticastProbes) clock.Advance(waitFor) @@ -1664,7 +1609,7 @@ func TestNeighborCacheRetryResolution(t *testing.T) { select { case <-ch: default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } @@ -1676,20 +1621,20 @@ func TestNeighborCacheRetryResolution(t *testing.T) { State: Failed, }, } - if diff := cmp.Diff(neigh.entries(), wantEntries, entryDiffOptsWithSort()...); diff != "" { + if diff := cmp.Diff(linkRes.neigh.entries(), wantEntries, entryDiffOptsWithSort()...); diff != "" { t.Fatalf("neighbor entries mismatch (-got, +want):\n%s", diff) } // Retry address resolution with a working link. linkRes.dropReplies = false { - incompleteEntry, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + incompleteEntry, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { t.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - t.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got linkRes.neigh.entry(%s, '', _) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } if incompleteEntry.State != Incomplete { t.Fatalf("got entry.State = %s, want = %s", incompleteEntry.State, Incomplete) @@ -1701,9 +1646,9 @@ func TestNeighborCacheRetryResolution(t *testing.T) { if !ok { t.Fatal("expected successful address resolution") } - reachableEntry, _, err := neigh.entry(entry.Addr, "", linkRes, nil) + reachableEntry, _, err := linkRes.neigh.entry(entry.Addr, "", nil) if err != nil { - t.Fatalf("neigh.entry(%s, '', _, _, nil): %v", entry.Addr, err) + t.Fatalf("linkRes.neigh.entry(%s, '', _, _, nil): %v", entry.Addr, err) } if reachableEntry.Addr != entry.Addr { t.Fatalf("got entry.Addr = %s, want = %s", reachableEntry.Addr, entry.Addr) @@ -1715,7 +1660,7 @@ func TestNeighborCacheRetryResolution(t *testing.T) { t.Fatalf("got entry.State = %s, want = %s", reachableEntry.State.String(), Reachable.String()) } default: - t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + t.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } } @@ -1724,42 +1669,36 @@ func BenchmarkCacheClear(b *testing.B) { b.StopTimer() config := DefaultNUDConfigurations() clock := &tcpip.StdClock{} - neigh := newTestNeighborCache(nil, config, clock) - store := newTestEntryStore() - linkRes := &testNeighborResolver{ - clock: clock, - neigh: neigh, - entries: store, - delay: 0, - } + linkRes := newTestNeighborResolver(nil, config, clock) + linkRes.delay = 0 // Clear for every possible size of the cache for cacheSize := 0; cacheSize < neighborCacheSize; cacheSize++ { // Fill the neighbor cache to capacity. for i := 0; i < cacheSize; i++ { - entry, ok := store.entry(i) + entry, ok := linkRes.entries.entry(i) if !ok { - b.Fatalf("store.entry(%d) not found", i) + b.Fatalf("linkRes.entries.entry(%d) not found", i) } - _, ch, err := neigh.entry(entry.Addr, "", linkRes, func(r LinkResolutionResult) { + _, ch, err := linkRes.neigh.entry(entry.Addr, "", func(r LinkResolutionResult) { if diff := cmp.Diff(LinkResolutionResult{LinkAddress: entry.LinkAddr, Success: true}, r); diff != "" { b.Fatalf("got link resolution result mismatch (-want +got):\n%s", diff) } }) - if err != tcpip.ErrWouldBlock { - b.Fatalf("got neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + b.Fatalf("got linkRes.neigh.entry(%s, '', _, _, nil) = %v, want = %s", entry.Addr, err, &tcpip.ErrWouldBlock{}) } select { case <-ch: default: - b.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, _, nil)", entry.Addr) + b.Fatalf("expected notification from done channel returned by linkRes.neigh.entry(%s, '', _, _, nil)", entry.Addr) } } b.StartTimer() - neigh.clear() + linkRes.neigh.clear() b.StopTimer() } } diff --git a/pkg/tcpip/stack/neighbor_entry.go b/pkg/tcpip/stack/neighbor_entry.go index 53ac9bb6e..b05f96d4f 100644 --- a/pkg/tcpip/stack/neighbor_entry.go +++ b/pkg/tcpip/stack/neighbor_entry.go @@ -77,11 +77,7 @@ const ( type neighborEntry struct { neighborEntryEntry - nic *NIC - - // linkRes provides the functionality to send reachability probes, used in - // Neighbor Unreachability Detection. - linkRes LinkAddressResolver + cache *neighborCache // nudState points to the Neighbor Unreachability Detection configuration. nudState *NUDState @@ -106,10 +102,9 @@ type neighborEntry struct { // state, Unknown. Transition out of Unknown by calling either // `handlePacketQueuedLocked` or `handleProbeLocked` on the newly created // neighborEntry. -func newNeighborEntry(nic *NIC, remoteAddr tcpip.Address, nudState *NUDState, linkRes LinkAddressResolver) *neighborEntry { +func newNeighborEntry(cache *neighborCache, remoteAddr tcpip.Address, nudState *NUDState) *neighborEntry { return &neighborEntry{ - nic: nic, - linkRes: linkRes, + cache: cache, nudState: nudState, neigh: NeighborEntry{ Addr: remoteAddr, @@ -121,18 +116,18 @@ func newNeighborEntry(nic *NIC, remoteAddr tcpip.Address, nudState *NUDState, li // newStaticNeighborEntry creates a neighbor cache entry starting at the // Static state. The entry can only transition out of Static by directly // calling `setStateLocked`. -func newStaticNeighborEntry(nic *NIC, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry { +func newStaticNeighborEntry(cache *neighborCache, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry { entry := NeighborEntry{ Addr: addr, LinkAddr: linkAddr, State: Static, - UpdatedAtNanos: nic.stack.clock.NowNanoseconds(), + UpdatedAtNanos: cache.nic.stack.clock.NowNanoseconds(), } - if nic.stack.nudDisp != nil { - nic.stack.nudDisp.OnNeighborAdded(nic.id, entry) + if nudDisp := cache.nic.stack.nudDisp; nudDisp != nil { + nudDisp.OnNeighborAdded(cache.nic.id, entry) } return &neighborEntry{ - nic: nic, + cache: cache, nudState: state, neigh: entry, } @@ -158,7 +153,7 @@ func (e *neighborEntry) notifyCompletionLocked(succeeded bool) { // is resolved (which ends up obtaining the entry's lock) while holding the // link resolution queue's lock. Dequeuing packets in a new goroutine avoids // a lock ordering violation. - go e.nic.linkResQueue.dequeue(ch, e.neigh.LinkAddr, succeeded) + go e.cache.nic.linkResQueue.dequeue(ch, e.neigh.LinkAddr, succeeded) } } @@ -167,8 +162,8 @@ func (e *neighborEntry) notifyCompletionLocked(succeeded bool) { // // Precondition: e.mu MUST be locked. func (e *neighborEntry) dispatchAddEventLocked() { - if nudDisp := e.nic.stack.nudDisp; nudDisp != nil { - nudDisp.OnNeighborAdded(e.nic.id, e.neigh) + if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { + nudDisp.OnNeighborAdded(e.cache.nic.id, e.neigh) } } @@ -177,8 +172,8 @@ func (e *neighborEntry) dispatchAddEventLocked() { // // Precondition: e.mu MUST be locked. func (e *neighborEntry) dispatchChangeEventLocked() { - if nudDisp := e.nic.stack.nudDisp; nudDisp != nil { - nudDisp.OnNeighborChanged(e.nic.id, e.neigh) + if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { + nudDisp.OnNeighborChanged(e.cache.nic.id, e.neigh) } } @@ -187,8 +182,8 @@ func (e *neighborEntry) dispatchChangeEventLocked() { // // Precondition: e.mu MUST be locked. func (e *neighborEntry) dispatchRemoveEventLocked() { - if nudDisp := e.nic.stack.nudDisp; nudDisp != nil { - nudDisp.OnNeighborRemoved(e.nic.id, e.neigh) + if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { + nudDisp.OnNeighborRemoved(e.cache.nic.id, e.neigh) } } @@ -206,7 +201,7 @@ func (e *neighborEntry) cancelJobLocked() { // // Precondition: e.mu MUST be locked. func (e *neighborEntry) removeLocked() { - e.neigh.UpdatedAtNanos = e.nic.stack.clock.NowNanoseconds() + e.neigh.UpdatedAtNanos = e.cache.nic.stack.clock.NowNanoseconds() e.dispatchRemoveEventLocked() e.cancelJobLocked() e.notifyCompletionLocked(false /* succeeded */) @@ -222,7 +217,7 @@ func (e *neighborEntry) setStateLocked(next NeighborState) { prev := e.neigh.State e.neigh.State = next - e.neigh.UpdatedAtNanos = e.nic.stack.clock.NowNanoseconds() + e.neigh.UpdatedAtNanos = e.cache.nic.stack.clock.NowNanoseconds() config := e.nudState.Config() switch next { @@ -230,14 +225,14 @@ func (e *neighborEntry) setStateLocked(next NeighborState) { panic(fmt.Sprintf("should never transition to Incomplete with setStateLocked; neigh = %#v, prev state = %s", e.neigh, prev)) case Reachable: - e.job = e.nic.stack.newJob(&e.mu, func() { + e.job = e.cache.nic.stack.newJob(&e.mu, func() { e.setStateLocked(Stale) e.dispatchChangeEventLocked() }) e.job.Schedule(e.nudState.ReachableTime()) case Delay: - e.job = e.nic.stack.newJob(&e.mu, func() { + e.job = e.cache.nic.stack.newJob(&e.mu, func() { e.setStateLocked(Probe) e.dispatchChangeEventLocked() }) @@ -254,14 +249,14 @@ func (e *neighborEntry) setStateLocked(next NeighborState) { return } - if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, "" /* localAddr */, e.neigh.LinkAddr, e.nic); err != nil { + if err := e.cache.linkRes.LinkAddressRequest(e.neigh.Addr, "" /* localAddr */, e.neigh.LinkAddr); err != nil { e.dispatchRemoveEventLocked() e.setStateLocked(Failed) return } retryCounter++ - e.job = e.nic.stack.newJob(&e.mu, sendUnicastProbe) + e.job = e.cache.nic.stack.newJob(&e.mu, sendUnicastProbe) e.job.Schedule(config.RetransmitTimer) } @@ -269,7 +264,7 @@ func (e *neighborEntry) setStateLocked(next NeighborState) { // for finishing the state transition. This is necessary to avoid // deadlock where sending and processing probes are done synchronously, // such as loopback and integration tests. - e.job = e.nic.stack.newJob(&e.mu, sendUnicastProbe) + e.job = e.cache.nic.stack.newJob(&e.mu, sendUnicastProbe) e.job.Schedule(immediateDuration) case Failed: @@ -292,12 +287,12 @@ func (e *neighborEntry) setStateLocked(next NeighborState) { func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { switch e.neigh.State { case Failed: - e.nic.stats.Neighbor.FailedEntryLookups.Increment() + e.cache.nic.stats.Neighbor.FailedEntryLookups.Increment() fallthrough case Unknown: e.neigh.State = Incomplete - e.neigh.UpdatedAtNanos = e.nic.stack.clock.NowNanoseconds() + e.neigh.UpdatedAtNanos = e.cache.nic.stack.clock.NowNanoseconds() e.dispatchAddEventLocked() @@ -340,7 +335,7 @@ func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { // address SHOULD be placed in the IP Source Address of the outgoing // solicitation. // - if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, localAddr, "", e.nic); err != nil { + if err := e.cache.linkRes.LinkAddressRequest(e.neigh.Addr, localAddr, ""); err != nil { // There is no need to log the error here; the NUD implementation may // assume a working link. A valid link should be the responsibility of // the NIC/stack.LinkEndpoint. @@ -350,7 +345,7 @@ func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { } retryCounter++ - e.job = e.nic.stack.newJob(&e.mu, sendMulticastProbe) + e.job = e.cache.nic.stack.newJob(&e.mu, sendMulticastProbe) e.job.Schedule(config.RetransmitTimer) } @@ -358,7 +353,7 @@ func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { // for finishing the state transition. This is necessary to avoid // deadlock where sending and processing probes are done synchronously, // such as loopback and integration tests. - e.job = e.nic.stack.newJob(&e.mu, sendMulticastProbe) + e.job = e.cache.nic.stack.newJob(&e.mu, sendMulticastProbe) e.job.Schedule(immediateDuration) case Stale: @@ -504,7 +499,7 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla // // TODO(gvisor.dev/issue/4085): Remove the special casing we do for IPv6 // here. - ep, ok := e.nic.networkEndpoints[header.IPv6ProtocolNumber] + ep, ok := e.cache.nic.networkEndpoints[header.IPv6ProtocolNumber] if !ok { panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint")) } diff --git a/pkg/tcpip/stack/neighbor_entry_test.go b/pkg/tcpip/stack/neighbor_entry_test.go index ec34ffa5a..57cfbdb8b 100644 --- a/pkg/tcpip/stack/neighbor_entry_test.go +++ b/pkg/tcpip/stack/neighbor_entry_test.go @@ -193,7 +193,7 @@ func (p entryTestProbeInfo) String() string { // LinkAddressRequest sends a request for the LinkAddress of addr. Broadcasts // to the local network if linkAddr is the zero value. -func (r *entryTestLinkResolver) LinkAddressRequest(targetAddr, localAddr tcpip.Address, linkAddr tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error { +func (r *entryTestLinkResolver) LinkAddressRequest(targetAddr, localAddr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { p := entryTestProbeInfo{ RemoteAddress: targetAddr, RemoteLinkAddress: linkAddr, @@ -230,22 +230,30 @@ func entryTestSetup(c NUDConfigurations) (*neighborEntry, *testNUDDispatcher, *e }, stats: makeNICStats(), } + netEP := (&testIPv6Protocol{}).NewEndpoint(&nic, nil) nic.networkEndpoints = map[tcpip.NetworkProtocolNumber]NetworkEndpoint{ - header.IPv6ProtocolNumber: (&testIPv6Protocol{}).NewEndpoint(&nic, nil, nil, nil), + header.IPv6ProtocolNumber: netEP, } rng := rand.New(rand.NewSource(time.Now().UnixNano())) nudState := NewNUDState(c, rng) - linkRes := entryTestLinkResolver{} - entry := newNeighborEntry(&nic, entryTestAddr1 /* remoteAddr */, nudState, &linkRes) - + var linkRes entryTestLinkResolver // Stub out the neighbor cache to verify deletion from the cache. - nic.neigh = &neighborCache{ - nic: &nic, - state: nudState, - cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), + neigh := &neighborCache{ + nic: &nic, + state: nudState, + linkRes: &linkRes, + cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), + } + l := linkResolver{ + resolver: &linkRes, + neighborTable: neigh, + } + entry := newNeighborEntry(neigh, entryTestAddr1 /* remoteAddr */, nudState) + neigh.cache[entryTestAddr1] = entry + nic.linkAddrResolvers = map[tcpip.NetworkProtocolNumber]linkResolver{ + header.IPv6ProtocolNumber: l, } - nic.neigh.cache[entryTestAddr1] = entry return entry, &disp, &linkRes, clock } @@ -266,16 +274,16 @@ func TestEntryInitiallyUnknown(t *testing.T) { // No probes should have been sent. linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, []entryTestProbeInfo(nil)) + diff := cmp.Diff([]entryTestProbeInfo(nil), linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } // No events should have been dispatched. nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -299,16 +307,16 @@ func TestEntryUnknownToUnknownWhenConfirmationWithUnknownAddress(t *testing.T) { // No probes should have been sent. linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, []entryTestProbeInfo(nil)) + diff := cmp.Diff([]entryTestProbeInfo(nil), linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } // No events should have been dispatched. nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, []testEntryEventInfo(nil)); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff([]testEntryEventInfo(nil), nudDisp.events); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -333,10 +341,10 @@ func TestEntryUnknownToIncomplete(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } wantEvents := []testEntryEventInfo{ @@ -352,10 +360,10 @@ func TestEntryUnknownToIncomplete(t *testing.T) { } { nudDisp.mu.Lock() - diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...) + diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...) nudDisp.mu.Unlock() if diff != "" { - t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + t.Fatalf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } } } @@ -374,10 +382,10 @@ func TestEntryUnknownToStale(t *testing.T) { // No probes should have been sent. runImmediatelyScheduledJobs(clock) linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, []entryTestProbeInfo(nil)) + diff := cmp.Diff([]entryTestProbeInfo(nil), linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } wantEvents := []testEntryEventInfo{ @@ -392,8 +400,8 @@ func TestEntryUnknownToStale(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -427,11 +435,11 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -453,10 +461,10 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -483,8 +491,8 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() @@ -515,10 +523,10 @@ func TestEntryIncompleteToReachable(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -553,8 +561,8 @@ func TestEntryIncompleteToReachable(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -579,10 +587,10 @@ func TestEntryIncompleteToReachableWithRouterFlag(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -620,8 +628,8 @@ func TestEntryIncompleteToReachableWithRouterFlag(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -646,10 +654,10 @@ func TestEntryIncompleteToStaleWhenUnsolicitedConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -684,8 +692,8 @@ func TestEntryIncompleteToStaleWhenUnsolicitedConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -710,10 +718,10 @@ func TestEntryIncompleteToStaleWhenProbe(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -744,8 +752,8 @@ func TestEntryIncompleteToStaleWhenProbe(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -785,10 +793,10 @@ func TestEntryIncompleteToFailed(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } wantEvents := []testEntryEventInfo{ @@ -812,8 +820,8 @@ func TestEntryIncompleteToFailed(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() @@ -835,7 +843,7 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) { c := DefaultNUDConfigurations() e, nudDisp, linkRes, clock := entryTestSetup(c) - ipv6EP := e.nic.networkEndpoints[header.IPv6ProtocolNumber].(*testIPv6Endpoint) + ipv6EP := e.cache.nic.networkEndpoints[header.IPv6ProtocolNumber].(*testIPv6Endpoint) e.mu.Lock() e.handlePacketQueuedLocked(entryTestAddr2) @@ -850,10 +858,10 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -903,8 +911,8 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() @@ -932,10 +940,10 @@ func TestEntryStaysReachableWhenProbeWithSameAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -977,8 +985,8 @@ func TestEntryStaysReachableWhenProbeWithSameAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1005,10 +1013,10 @@ func TestEntryReachableToStaleWhenTimeout(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1054,8 +1062,8 @@ func TestEntryReachableToStaleWhenTimeout(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() @@ -1083,10 +1091,10 @@ func TestEntryReachableToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1134,8 +1142,8 @@ func TestEntryReachableToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1157,10 +1165,10 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddress(t *testing.T) }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1212,8 +1220,8 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddress(t *testing.T) }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1235,10 +1243,10 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddressAndOverride(t }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1290,8 +1298,8 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddressAndOverride(t }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1313,10 +1321,10 @@ func TestEntryStaysStaleWhenProbeWithSameAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1358,8 +1366,8 @@ func TestEntryStaysStaleWhenProbeWithSameAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1381,10 +1389,10 @@ func TestEntryStaleToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1439,8 +1447,8 @@ func TestEntryStaleToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1462,10 +1470,10 @@ func TestEntryStaleToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1520,8 +1528,8 @@ func TestEntryStaleToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1543,10 +1551,10 @@ func TestEntryStaleToStaleWhenOverrideConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1601,8 +1609,8 @@ func TestEntryStaleToStaleWhenOverrideConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1624,10 +1632,10 @@ func TestEntryStaleToStaleWhenProbeUpdateAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1678,8 +1686,8 @@ func TestEntryStaleToStaleWhenProbeUpdateAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1701,10 +1709,10 @@ func TestEntryStaleToDelay(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1752,8 +1760,8 @@ func TestEntryStaleToDelay(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1780,10 +1788,10 @@ func TestEntryDelayToReachableWhenUpperLevelConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1851,8 +1859,8 @@ func TestEntryDelayToReachableWhenUpperLevelConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1880,10 +1888,10 @@ func TestEntryDelayToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -1958,8 +1966,8 @@ func TestEntryDelayToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -1987,10 +1995,10 @@ func TestEntryDelayToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -2065,8 +2073,8 @@ func TestEntryDelayToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2088,10 +2096,10 @@ func TestEntryStaysDelayWhenOverrideConfirmationWithSameAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -2147,8 +2155,8 @@ func TestEntryStaysDelayWhenOverrideConfirmationWithSameAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2170,10 +2178,10 @@ func TestEntryDelayToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -2231,8 +2239,8 @@ func TestEntryDelayToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2254,10 +2262,10 @@ func TestEntryDelayToStaleWhenConfirmationWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -2319,8 +2327,8 @@ func TestEntryDelayToStaleWhenConfirmationWithDifferentAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2343,11 +2351,11 @@ func TestEntryDelayToProbe(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2372,10 +2380,10 @@ func TestEntryDelayToProbe(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2418,8 +2426,8 @@ func TestEntryDelayToProbe(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() @@ -2448,11 +2456,11 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2474,10 +2482,10 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2539,8 +2547,8 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2563,11 +2571,11 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2589,10 +2597,10 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2658,8 +2666,8 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2682,11 +2690,11 @@ func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2709,10 +2717,10 @@ func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2772,8 +2780,8 @@ func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2806,10 +2814,10 @@ func TestEntryUnknownToStaleToProbeToReachable(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -2878,8 +2886,8 @@ func TestEntryUnknownToStaleToProbeToReachable(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -2907,11 +2915,11 @@ func TestEntryProbeToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -2933,10 +2941,10 @@ func TestEntryProbeToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3015,8 +3023,8 @@ func TestEntryProbeToReachableWhenSolicitedOverrideConfirmation(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -3044,11 +3052,11 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithSameAddress(t *testin }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3070,10 +3078,10 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithSameAddress(t *testin }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3149,8 +3157,8 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithSameAddress(t *testin }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -3178,11 +3186,11 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3204,10 +3212,10 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3283,8 +3291,8 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithoutAddress(t *testing }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -3309,11 +3317,11 @@ func TestEntryProbeToFailed(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } } @@ -3336,11 +3344,11 @@ func TestEntryProbeToFailed(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.probes = nil linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probe #%d mismatch (-got, +want):\n%s", i+1, diff) + t.Fatalf("link address resolver probe #%d mismatch (-want, +got):\n%s", i+1, diff) } e.mu.Lock() @@ -3406,8 +3414,8 @@ func TestEntryProbeToFailed(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } @@ -3449,10 +3457,10 @@ func TestEntryFailedToIncomplete(t *testing.T) { }, } linkRes.mu.Lock() - diff := cmp.Diff(linkRes.probes, wantProbes) + diff := cmp.Diff(wantProbes, linkRes.probes) linkRes.mu.Unlock() if diff != "" { - t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff) + t.Fatalf("link address resolver probes mismatch (-want, +got):\n%s", diff) } e.mu.Lock() @@ -3498,8 +3506,8 @@ func TestEntryFailedToIncomplete(t *testing.T) { }, } nudDisp.mu.Lock() - if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" { - t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff) + if diff := cmp.Diff(wantEvents, nudDisp.events, eventDiffOpts()...); diff != "" { + t.Errorf("nud dispatcher events mismatch (-want, +got):\n%s", diff) } nudDisp.mu.Unlock() } diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 1bbfe6213..41a489047 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -16,7 +16,6 @@ package stack import ( "fmt" - "math/rand" "reflect" "sync/atomic" @@ -25,8 +24,37 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/header" ) +type neighborTable interface { + neighbors() ([]NeighborEntry, tcpip.Error) + addStaticEntry(tcpip.Address, tcpip.LinkAddress) + get(addr, localAddr tcpip.Address, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, tcpip.Error) + remove(tcpip.Address) tcpip.Error + removeAll() tcpip.Error + + handleProbe(tcpip.Address, tcpip.LinkAddress) + handleConfirmation(tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) + handleUpperLevelConfirmation(tcpip.Address) + + nudConfig() (NUDConfigurations, tcpip.Error) + setNUDConfig(NUDConfigurations) tcpip.Error +} + var _ NetworkInterface = (*NIC)(nil) +type linkResolver struct { + resolver LinkAddressResolver + + neighborTable neighborTable +} + +func (l *linkResolver) getNeighborLinkAddress(addr, localAddr tcpip.Address, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, tcpip.Error) { + return l.neighborTable.get(addr, localAddr, onResolve) +} + +func (l *linkResolver) confirmReachable(addr tcpip.Address) { + l.neighborTable.handleUpperLevelConfirmation(addr) +} + // NIC represents a "network interface card" to which the networking stack is // attached. type NIC struct { @@ -38,11 +66,11 @@ type NIC struct { context NICContext stats NICStats - neigh *neighborCache // The network endpoints themselves may be modified by calling the interface's // methods, but the map reference and entries must be constant. - networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint + networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint + linkAddrResolvers map[tcpip.NetworkProtocolNumber]linkResolver // enabled is set to 1 when the NIC is enabled and 0 when it is disabled. // @@ -53,8 +81,6 @@ type NIC struct { // complete. linkResQueue packetsPendingLinkResolution - linkAddrCache *linkAddrCache - mu struct { sync.RWMutex spoofing bool @@ -133,35 +159,18 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC nic := &NIC{ LinkEndpoint: ep, - stack: stack, - id: id, - name: name, - context: ctx, - stats: makeNICStats(), - networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint), + stack: stack, + id: id, + name: name, + context: ctx, + stats: makeNICStats(), + networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint), + linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]linkResolver), } nic.linkResQueue.init(nic) - nic.linkAddrCache = newLinkAddrCache(nic, ageLimit, resolutionTimeout, resolutionAttempts) nic.mu.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList) - // Check for Neighbor Unreachability Detection support. - var nud NUDHandler - if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 && stack.useNeighborCache { - rng := rand.New(rand.NewSource(stack.clock.NowNanoseconds())) - nic.neigh = &neighborCache{ - nic: nic, - state: NewNUDState(stack.nudConfigs, rng), - cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), - } - - // An interface value that holds a nil pointer but non-nil type is not the - // same as the nil interface. Because of this, nud must only be assignd if - // nic.neigh is non-nil since a nil reference to a neighborCache is not - // valid. - // - // See https://golang.org/doc/faq#nil_error for more information. - nud = nic.neigh - } + resolutionRequired := ep.Capabilities()&CapabilityResolutionRequired != 0 // Register supported packet and network endpoint protocols. for _, netProto := range header.Ethertypes { @@ -170,7 +179,32 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC for _, netProto := range stack.networkProtocols { netNum := netProto.Number() nic.mu.packetEPs[netNum] = new(packetEndpointList) - nic.networkEndpoints[netNum] = netProto.NewEndpoint(nic, nic.linkAddrCache, nud, nic) + + netEP := netProto.NewEndpoint(nic, nic) + nic.networkEndpoints[netNum] = netEP + + if resolutionRequired { + if r, ok := netEP.(LinkAddressResolver); ok { + l := linkResolver{ + resolver: r, + } + + if stack.useNeighborCache { + l.neighborTable = &neighborCache{ + nic: nic, + state: NewNUDState(stack.nudConfigs, stack.randomGenerator), + linkRes: r, + + cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize), + } + } else { + cache := new(linkAddrCache) + cache.init(nic, ageLimit, resolutionTimeout, resolutionAttempts, r) + l.neighborTable = cache + } + nic.linkAddrResolvers[r.LinkAddressProtocol()] = l + } + } } nic.LinkEndpoint.Attach(nic) @@ -223,16 +257,19 @@ func (n *NIC) disableLocked() { for _, ep := range n.networkEndpoints { ep.Disable() - } - // Clear the neighbour table (including static entries) as we cannot guarantee - // that the current neighbour table will be valid when the NIC is enabled - // again. - // - // This matches linux's behaviour at the time of writing: - // https://github.com/torvalds/linux/blob/71c061d2443814de15e177489d5cc00a4a253ef3/net/core/neighbour.c#L371 - if err := n.clearNeighbors(); err != nil && err != tcpip.ErrNotSupported { - panic(fmt.Sprintf("n.clearNeighbors(): %s", err)) + // Clear the neighbour table (including static entries) as we cannot + // guarantee that the current neighbour table will be valid when the NIC is + // enabled again. + // + // This matches linux's behaviour at the time of writing: + // https://github.com/torvalds/linux/blob/71c061d2443814de15e177489d5cc00a4a253ef3/net/core/neighbour.c#L371 + netProto := ep.NetworkProtocolNumber() + switch err := n.clearNeighbors(netProto); err.(type) { + case nil, *tcpip.ErrNotSupported: + default: + panic(fmt.Sprintf("n.clearNeighbors(%d): %s", netProto, err)) + } } if !n.setEnabled(false) { @@ -246,7 +283,7 @@ func (n *NIC) disableLocked() { // address (ff02::1), start DAD for permanent addresses, and start soliciting // routers if the stack is not operating as a router. If the stack is also // configured to auto-generate a link-local address, one will be generated. -func (n *NIC) enable() *tcpip.Error { +func (n *NIC) enable() tcpip.Error { n.mu.Lock() defer n.mu.Unlock() @@ -266,7 +303,7 @@ func (n *NIC) enable() *tcpip.Error { // remove detaches NIC from the link endpoint and releases network endpoint // resources. This guarantees no packets between this NIC and the network // stack. -func (n *NIC) remove() *tcpip.Error { +func (n *NIC) remove() tcpip.Error { n.mu.Lock() defer n.mu.Unlock() @@ -302,40 +339,63 @@ func (n *NIC) IsLoopback() bool { } // WritePacket implements NetworkLinkEndpoint. -func (n *NIC) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error { +func (n *NIC) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) tcpip.Error { _, err := n.enqueuePacketBuffer(r, gso, protocol, pkt) return err } -func (n *NIC) enqueuePacketBuffer(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, *tcpip.Error) { - // As per relevant RFCs, we should queue packets while we wait for link - // resolution to complete. - // - // RFC 1122 section 2.3.2.2 (for IPv4): - // The link layer SHOULD save (rather than discard) at least - // one (the latest) packet of each set of packets destined to - // the same unresolved IP address, and transmit the saved - // packet when the address has been resolved. - // - // RFC 4861 section 7.2.2 (for IPv6): - // While waiting for address resolution to complete, the sender MUST, for - // each neighbor, retain a small queue of packets waiting for address - // resolution to complete. The queue MUST hold at least one packet, and MAY - // contain more. However, the number of queued packets per neighbor SHOULD - // be limited to some small value. When a queue overflows, the new arrival - // SHOULD replace the oldest entry. Once address resolution completes, the - // node transmits any queued packets. - return n.linkResQueue.enqueue(r, gso, protocol, pkt) + +func (n *NIC) writePacketBuffer(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, tcpip.Error) { + switch pkt := pkt.(type) { + case *PacketBuffer: + if err := n.writePacket(r, gso, protocol, pkt); err != nil { + return 0, err + } + return 1, nil + case *PacketBufferList: + return n.writePackets(r, gso, protocol, *pkt) + default: + panic(fmt.Sprintf("unrecognized pending packet buffer type = %T", pkt)) + } +} + +func (n *NIC) enqueuePacketBuffer(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, tcpip.Error) { + routeInfo, _, err := r.resolvedFields(nil) + switch err.(type) { + case nil: + return n.writePacketBuffer(routeInfo, gso, protocol, pkt) + case *tcpip.ErrWouldBlock: + // As per relevant RFCs, we should queue packets while we wait for link + // resolution to complete. + // + // RFC 1122 section 2.3.2.2 (for IPv4): + // The link layer SHOULD save (rather than discard) at least + // one (the latest) packet of each set of packets destined to + // the same unresolved IP address, and transmit the saved + // packet when the address has been resolved. + // + // RFC 4861 section 7.2.2 (for IPv6): + // While waiting for address resolution to complete, the sender MUST, for + // each neighbor, retain a small queue of packets waiting for address + // resolution to complete. The queue MUST hold at least one packet, and + // MAY contain more. However, the number of queued packets per neighbor + // SHOULD be limited to some small value. When a queue overflows, the new + // arrival SHOULD replace the oldest entry. Once address resolution + // completes, the node transmits any queued packets. + return n.linkResQueue.enqueue(r, gso, protocol, pkt) + default: + return 0, err + } } // WritePacketToRemote implements NetworkInterface. -func (n *NIC) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error { +func (n *NIC) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) tcpip.Error { var r RouteInfo r.NetProto = protocol r.RemoteLinkAddress = remoteLinkAddr return n.writePacket(r, gso, protocol, pkt) } -func (n *NIC) writePacket(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error { +func (n *NIC) writePacket(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) tcpip.Error { // WritePacket takes ownership of pkt, calculate numBytes first. numBytes := pkt.Size() @@ -352,11 +412,11 @@ func (n *NIC) writePacket(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolN } // WritePackets implements NetworkLinkEndpoint. -func (n *NIC) WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (n *NIC) WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, tcpip.Error) { return n.enqueuePacketBuffer(r, gso, protocol, &pkts) } -func (n *NIC) writePackets(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkts PacketBufferList) (int, *tcpip.Error) { +func (n *NIC) writePackets(r RouteInfo, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkts PacketBufferList) (int, tcpip.Error) { for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { pkt.EgressRoute = r pkt.GSOOptions = gso @@ -472,15 +532,15 @@ func (n *NIC) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, // addAddress adds a new address to n, so that it starts accepting packets // targeted at the given address (and network protocol). -func (n *NIC) addAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error { +func (n *NIC) addAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) tcpip.Error { ep, ok := n.networkEndpoints[protocolAddress.Protocol] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } addressableEndpoint, ok := ep.(AddressableEndpoint) if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } addressEndpoint, err := addressableEndpoint.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, peb, AddressConfigStatic, false /* deprecated */) @@ -544,72 +604,75 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit } // removeAddress removes an address from n. -func (n *NIC) removeAddress(addr tcpip.Address) *tcpip.Error { +func (n *NIC) removeAddress(addr tcpip.Address) tcpip.Error { for _, ep := range n.networkEndpoints { addressableEndpoint, ok := ep.(AddressableEndpoint) if !ok { continue } - if err := addressableEndpoint.RemovePermanentAddress(addr); err == tcpip.ErrBadLocalAddress { + switch err := addressableEndpoint.RemovePermanentAddress(addr); err.(type) { + case *tcpip.ErrBadLocalAddress: continue - } else { + default: return err } } - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } -func (n *NIC) getNeighborLinkAddress(addr, localAddr tcpip.Address, linkRes LinkAddressResolver, onResolve func(LinkResolutionResult)) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) { - if n.neigh != nil { - entry, ch, err := n.neigh.entry(addr, localAddr, linkRes, onResolve) - return entry.LinkAddr, ch, err +func (n *NIC) getLinkAddress(addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { + linkRes, ok := n.linkAddrResolvers[protocol] + if !ok { + return &tcpip.ErrNotSupported{} } - return n.linkAddrCache.get(addr, linkRes, localAddr, n, onResolve) + if linkAddr, ok := linkRes.resolver.ResolveStaticAddress(addr); ok { + onResolve(LinkResolutionResult{LinkAddress: linkAddr, Success: true}) + return nil + } + + _, _, err := linkRes.getNeighborLinkAddress(addr, localAddr, onResolve) + return err } -func (n *NIC) neighbors() ([]NeighborEntry, *tcpip.Error) { - if n.neigh == nil { - return nil, tcpip.ErrNotSupported +func (n *NIC) neighbors(protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + return linkRes.neighborTable.neighbors() } - return n.neigh.entries(), nil + return nil, &tcpip.ErrNotSupported{} } -func (n *NIC) addStaticNeighbor(addr tcpip.Address, linkAddress tcpip.LinkAddress) *tcpip.Error { - if n.neigh == nil { - return tcpip.ErrNotSupported +func (n *NIC) addStaticNeighbor(addr tcpip.Address, protocol tcpip.NetworkProtocolNumber, linkAddress tcpip.LinkAddress) tcpip.Error { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + linkRes.neighborTable.addStaticEntry(addr, linkAddress) + return nil } - n.neigh.addStaticEntry(addr, linkAddress) - return nil + return &tcpip.ErrNotSupported{} } -func (n *NIC) removeNeighbor(addr tcpip.Address) *tcpip.Error { - if n.neigh == nil { - return tcpip.ErrNotSupported +func (n *NIC) removeNeighbor(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + return linkRes.neighborTable.remove(addr) } - if !n.neigh.removeEntry(addr) { - return tcpip.ErrBadAddress - } - return nil + return &tcpip.ErrNotSupported{} } -func (n *NIC) clearNeighbors() *tcpip.Error { - if n.neigh == nil { - return tcpip.ErrNotSupported +func (n *NIC) clearNeighbors(protocol tcpip.NetworkProtocolNumber) tcpip.Error { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + return linkRes.neighborTable.removeAll() } - n.neigh.clear() - return nil + return &tcpip.ErrNotSupported{} } // joinGroup adds a new endpoint for the given multicast address, if none // exists yet. Otherwise it just increments its count. -func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { +func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { // TODO(b/143102137): When implementing MLD, make sure MLD packets are // not sent unless a valid link-local address is available for use on n // as an MLD packet's source address must be a link-local address as @@ -617,12 +680,12 @@ func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address ep, ok := n.networkEndpoints[protocol] if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } gep, ok := ep.(GroupAddressableEndpoint) if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } return gep.JoinGroup(addr) @@ -630,15 +693,15 @@ func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address // leaveGroup decrements the count for the given multicast address, and when it // reaches zero removes the endpoint for this address. -func (n *NIC) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { +func (n *NIC) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { ep, ok := n.networkEndpoints[protocol] if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } gep, ok := ep.(GroupAddressableEndpoint) if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } return gep.LeaveGroup(addr) @@ -848,9 +911,8 @@ func (n *NIC) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt } } -// DeliverTransportControlPacket delivers control packets to the appropriate -// transport protocol endpoint. -func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) { +// DeliverTransportError implements TransportDispatcher. +func (n *NIC) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer) { state, ok := n.stack.transportProtocols[trans] if !ok { return @@ -872,7 +934,7 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp } id := TransportEndpointID{srcPort, local, dstPort, remote} - if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) { + if n.stack.demux.deliverError(n, net, trans, transErr, pkt, id) { return } } @@ -888,33 +950,34 @@ func (n *NIC) Name() string { } // nudConfigs gets the NUD configurations for n. -func (n *NIC) nudConfigs() (NUDConfigurations, *tcpip.Error) { - if n.neigh == nil { - return NUDConfigurations{}, tcpip.ErrNotSupported +func (n *NIC) nudConfigs(protocol tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + return linkRes.neighborTable.nudConfig() } - return n.neigh.config(), nil + + return NUDConfigurations{}, &tcpip.ErrNotSupported{} } // setNUDConfigs sets the NUD configurations for n. // // Note, if c contains invalid NUD configuration values, it will be fixed to // use default values for the erroneous values. -func (n *NIC) setNUDConfigs(c NUDConfigurations) *tcpip.Error { - if n.neigh == nil { - return tcpip.ErrNotSupported +func (n *NIC) setNUDConfigs(protocol tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { + if linkRes, ok := n.linkAddrResolvers[protocol]; ok { + c.resetInvalidFields() + return linkRes.neighborTable.setNUDConfig(c) } - c.resetInvalidFields() - n.neigh.setConfig(c) - return nil + + return &tcpip.ErrNotSupported{} } -func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error { +func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { n.mu.Lock() defer n.mu.Unlock() eps, ok := n.mu.packetEPs[netProto] if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } eps.add(ep) @@ -941,3 +1004,23 @@ func (n *NIC) isValidForOutgoing(ep AssignableAddressEndpoint) bool { n.mu.RUnlock() return n.Enabled() && ep.IsAssigned(spoofing) } + +// HandleNeighborProbe implements NetworkInterface. +func (n *NIC) HandleNeighborProbe(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { + if l, ok := n.linkAddrResolvers[protocol]; ok { + l.neighborTable.handleProbe(addr, linkAddr) + return nil + } + + return &tcpip.ErrNotSupported{} +} + +// HandleNeighborConfirmation implements NetworkInterface. +func (n *NIC) HandleNeighborConfirmation(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) tcpip.Error { + if l, ok := n.linkAddrResolvers[protocol]; ok { + l.neighborTable.handleConfirmation(addr, linkAddr, flags) + return nil + } + + return &tcpip.ErrNotSupported{} +} diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go index be5df7b01..9992d6eb4 100644 --- a/pkg/tcpip/stack/nic_test.go +++ b/pkg/tcpip/stack/nic_test.go @@ -39,7 +39,7 @@ type testIPv6Endpoint struct { invalidatedRtr tcpip.Address } -func (*testIPv6Endpoint) Enable() *tcpip.Error { +func (*testIPv6Endpoint) Enable() tcpip.Error { return nil } @@ -65,21 +65,21 @@ func (e *testIPv6Endpoint) MaxHeaderLength() uint16 { } // WritePacket implements NetworkEndpoint.WritePacket. -func (*testIPv6Endpoint) WritePacket(*Route, *GSO, NetworkHeaderParams, *PacketBuffer) *tcpip.Error { +func (*testIPv6Endpoint) WritePacket(*Route, *GSO, NetworkHeaderParams, *PacketBuffer) tcpip.Error { return nil } // WritePackets implements NetworkEndpoint.WritePackets. -func (*testIPv6Endpoint) WritePackets(*Route, *GSO, PacketBufferList, NetworkHeaderParams) (int, *tcpip.Error) { +func (*testIPv6Endpoint) WritePackets(*Route, *GSO, PacketBufferList, NetworkHeaderParams) (int, tcpip.Error) { // Our tests don't use this so we don't support it. - return 0, tcpip.ErrNotSupported + return 0, &tcpip.ErrNotSupported{} } // WriteHeaderIncludedPacket implements // NetworkEndpoint.WriteHeaderIncludedPacket. -func (*testIPv6Endpoint) WriteHeaderIncludedPacket(*Route, *PacketBuffer) *tcpip.Error { +func (*testIPv6Endpoint) WriteHeaderIncludedPacket(*Route, *PacketBuffer) tcpip.Error { // Our tests don't use this so we don't support it. - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } // HandlePacket implements NetworkEndpoint.HandlePacket. @@ -111,8 +111,6 @@ type testIPv6EndpointStats struct{} // IsNetworkEndpointStats implements stack.NetworkEndpointStats. func (*testIPv6EndpointStats) IsNetworkEndpointStats() {} -var _ LinkAddressResolver = (*testIPv6Protocol)(nil) - // We use this instead of ipv6.protocol because the ipv6 package depends on // the stack package which this test lives in, causing a cyclic dependency. type testIPv6Protocol struct{} @@ -139,7 +137,7 @@ func (*testIPv6Protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) } // NewEndpoint implements NetworkProtocol.NewEndpoint. -func (p *testIPv6Protocol) NewEndpoint(nic NetworkInterface, _ LinkAddressCache, _ NUDHandler, _ TransportDispatcher) NetworkEndpoint { +func (p *testIPv6Protocol) NewEndpoint(nic NetworkInterface, _ TransportDispatcher) NetworkEndpoint { e := &testIPv6Endpoint{ nic: nic, protocol: p, @@ -149,12 +147,12 @@ func (p *testIPv6Protocol) NewEndpoint(nic NetworkInterface, _ LinkAddressCache, } // SetOption implements NetworkProtocol.SetOption. -func (*testIPv6Protocol) SetOption(tcpip.SettableNetworkProtocolOption) *tcpip.Error { +func (*testIPv6Protocol) SetOption(tcpip.SettableNetworkProtocolOption) tcpip.Error { return nil } // Option implements NetworkProtocol.Option. -func (*testIPv6Protocol) Option(tcpip.GettableNetworkProtocolOption) *tcpip.Error { +func (*testIPv6Protocol) Option(tcpip.GettableNetworkProtocolOption) tcpip.Error { return nil } @@ -169,24 +167,6 @@ func (*testIPv6Protocol) Parse(*PacketBuffer) (tcpip.TransportProtocolNumber, bo return 0, false, false } -// LinkAddressProtocol implements LinkAddressResolver. -func (*testIPv6Protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber { - return header.IPv6ProtocolNumber -} - -// LinkAddressRequest implements LinkAddressResolver. -func (*testIPv6Protocol) LinkAddressRequest(_, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error { - return nil -} - -// ResolveStaticAddress implements LinkAddressResolver. -func (*testIPv6Protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) { - if header.IsV6MulticastAddress(addr) { - return header.EthernetAddressFromMulticastIPv6Address(addr), true - } - return "", false -} - func TestDisabledRxStatsWhenNICDisabled(t *testing.T) { // When the NIC is disabled, the only field that matters is the stats field. // This test is limited to stats counter checks. diff --git a/pkg/tcpip/stack/nud.go b/pkg/tcpip/stack/nud.go index 12d67409a..5a94e9ac6 100644 --- a/pkg/tcpip/stack/nud.go +++ b/pkg/tcpip/stack/nud.go @@ -161,25 +161,6 @@ type ReachabilityConfirmationFlags struct { IsRouter bool } -// NUDHandler communicates external events to the Neighbor Unreachability -// Detection state machine, which is implemented per-interface. This is used by -// network endpoints to inform the Neighbor Cache of probes and confirmations. -type NUDHandler interface { - // HandleProbe processes an incoming neighbor probe (e.g. ARP request or - // Neighbor Solicitation for ARP or NDP, respectively). Validation of the - // probe needs to be performed before calling this function since the - // Neighbor Cache doesn't have access to view the NIC's assigned addresses. - HandleProbe(remoteAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver) - - // HandleConfirmation processes an incoming neighbor confirmation (e.g. ARP - // reply or Neighbor Advertisement for ARP or NDP, respectively). - HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) - - // HandleUpperLevelConfirmation processes an incoming upper-level protocol - // (e.g. TCP acknowledgements) reachability confirmation. - HandleUpperLevelConfirmation(addr tcpip.Address) -} - // NUDConfigurations is the NUD configurations for the netstack. This is used // by the neighbor cache to operate the NUD state machine on each device in the // local network. diff --git a/pkg/tcpip/stack/nud_test.go b/pkg/tcpip/stack/nud_test.go index 7bca1373e..e9acef6a2 100644 --- a/pkg/tcpip/stack/nud_test.go +++ b/pkg/tcpip/stack/nud_test.go @@ -19,7 +19,9 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/faketime" "gvisor.dev/gvisor/pkg/tcpip/link/channel" "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -52,80 +54,146 @@ func (f *fakeRand) Float32() float32 { return f.num } -// TestSetNUDConfigurationFailsForBadNICID tests to make sure we get an error if -// we attempt to update NUD configurations using an invalid NICID. -func TestSetNUDConfigurationFailsForBadNICID(t *testing.T) { - s := stack.New(stack.Options{ - // A neighbor cache is required to store NUDConfigurations. The networking - // stack will only allocate neighbor caches if a protocol providing link - // address resolution is specified (e.g. ARP or IPv6). - NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocol}, - UseNeighborCache: true, - }) +func TestNUDFunctions(t *testing.T) { + const nicID = 1 - // No NIC with ID 1 yet. - config := stack.NUDConfigurations{} - if err := s.SetNUDConfigurations(1, config); err != tcpip.ErrUnknownNICID { - t.Fatalf("got s.SetNDPConfigurations(1, %+v) = %v, want = %s", config, err, tcpip.ErrUnknownNICID) + tests := []struct { + name string + nicID tcpip.NICID + netProtoFactory []stack.NetworkProtocolFactory + extraLinkCapabilities stack.LinkEndpointCapabilities + expectedErr tcpip.Error + }{ + { + name: "Invalid NICID", + nicID: nicID + 1, + netProtoFactory: []stack.NetworkProtocolFactory{ipv6.NewProtocol}, + extraLinkCapabilities: stack.CapabilityResolutionRequired, + expectedErr: &tcpip.ErrUnknownNICID{}, + }, + { + name: "No network protocol", + nicID: nicID, + expectedErr: &tcpip.ErrNotSupported{}, + }, + { + name: "With IPv6", + nicID: nicID, + netProtoFactory: []stack.NetworkProtocolFactory{ipv6.NewProtocol}, + expectedErr: &tcpip.ErrNotSupported{}, + }, + { + name: "With resolution capability", + nicID: nicID, + extraLinkCapabilities: stack.CapabilityResolutionRequired, + expectedErr: &tcpip.ErrNotSupported{}, + }, + { + name: "With IPv6 and resolution capability", + nicID: nicID, + netProtoFactory: []stack.NetworkProtocolFactory{ipv6.NewProtocol}, + extraLinkCapabilities: stack.CapabilityResolutionRequired, + }, } -} -// TestNUDConfigurationFailsForNotSupported tests to make sure we get a -// NotSupported error if we attempt to retrieve NUD configurations when the -// stack doesn't support NUD. -// -// The stack will report to not support NUD if a neighbor cache for a given NIC -// is not allocated. The networking stack will only allocate neighbor caches if -// a protocol providing link address resolution is specified (e.g. ARP, IPv6). -func TestNUDConfigurationFailsForNotSupported(t *testing.T) { - const nicID = 1 + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + clock := faketime.NewManualClock() + s := stack.New(stack.Options{ + NUDConfigs: stack.DefaultNUDConfigurations(), + UseNeighborCache: true, + NetworkProtocols: test.netProtoFactory, + Clock: clock, + }) - e := channel.New(0, 1280, linkAddr1) - e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + e := channel.New(0, 0, linkAddr1) + e.LinkEPCapabilities &^= stack.CapabilityResolutionRequired + e.LinkEPCapabilities |= test.extraLinkCapabilities - s := stack.New(stack.Options{ - NUDConfigs: stack.DefaultNUDConfigurations(), - UseNeighborCache: true, - }) - if err := s.CreateNIC(nicID, e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) - } - if _, err := s.NUDConfigurations(nicID); err != tcpip.ErrNotSupported { - t.Fatalf("got s.NDPConfigurations(%d) = %v, want = %s", nicID, err, tcpip.ErrNotSupported) - } -} + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } -// TestNUDConfigurationFailsForNotSupported tests to make sure we get a -// NotSupported error if we attempt to set NUD configurations when the stack -// doesn't support NUD. -// -// The stack will report to not support NUD if a neighbor cache for a given NIC -// is not allocated. The networking stack will only allocate neighbor caches if -// a protocol providing link address resolution is specified (e.g. ARP, IPv6). -func TestSetNUDConfigurationFailsForNotSupported(t *testing.T) { - const nicID = 1 + configs := stack.DefaultNUDConfigurations() + configs.BaseReachableTime = time.Hour - e := channel.New(0, 1280, linkAddr1) - e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + { + err := s.SetNUDConfigurations(test.nicID, ipv6.ProtocolNumber, configs) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Errorf("s.SetNUDConfigurations(%d, %d, _) error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, diff) + } + } - s := stack.New(stack.Options{ - NUDConfigs: stack.DefaultNUDConfigurations(), - UseNeighborCache: true, - }) - if err := s.CreateNIC(nicID, e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) - } + { + gotConfigs, err := s.NUDConfigurations(test.nicID, ipv6.ProtocolNumber) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Errorf("s.NUDConfigurations(%d, %d) error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, diff) + } else if test.expectedErr == nil { + if diff := cmp.Diff(configs, gotConfigs); diff != "" { + t.Errorf("got configs mismatch (-want +got):\n%s", diff) + } + } + } - config := stack.NUDConfigurations{} - if err := s.SetNUDConfigurations(nicID, config); err != tcpip.ErrNotSupported { - t.Fatalf("got s.SetNDPConfigurations(%d, %+v) = %v, want = %s", nicID, config, err, tcpip.ErrNotSupported) + for _, addr := range []tcpip.Address{llAddr1, llAddr2} { + { + err := s.AddStaticNeighbor(test.nicID, ipv6.ProtocolNumber, addr, linkAddr1) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Errorf("s.AddStaticNeighbor(%d, %d, %s, %s) error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, addr, linkAddr1, diff) + } + } + } + + { + wantErr := test.expectedErr + for i := 0; i < 2; i++ { + { + err := s.RemoveNeighbor(test.nicID, ipv6.ProtocolNumber, llAddr1) + if diff := cmp.Diff(wantErr, err); diff != "" { + t.Errorf("s.RemoveNeighbor(%d, %d, '') error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, diff) + } + } + + if test.expectedErr != nil { + break + } + + // Removing a neighbor that does not exist should give us a bad address + // error. + wantErr = &tcpip.ErrBadAddress{} + } + } + + { + neighbors, err := s.Neighbors(test.nicID, ipv6.ProtocolNumber) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Errorf("s.Neigbors(%d, %d) error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, diff) + } else if test.expectedErr == nil { + if diff := cmp.Diff( + []stack.NeighborEntry{{Addr: llAddr2, LinkAddr: linkAddr1, State: stack.Static, UpdatedAtNanos: clock.NowNanoseconds()}}, + neighbors, + ); diff != "" { + t.Errorf("neighbors mismatch (-want +got):\n%s", diff) + } + } + } + + { + err := s.ClearNeighbors(test.nicID, ipv6.ProtocolNumber) + if diff := cmp.Diff(test.expectedErr, err); diff != "" { + t.Errorf("s.ClearNeigbors(%d, %d) error mismatch (-want +got):\n%s", test.nicID, ipv6.ProtocolNumber, diff) + } else if test.expectedErr == nil { + if neighbors, err := s.Neighbors(test.nicID, ipv6.ProtocolNumber); err != nil { + t.Errorf("s.Neighbors(%d, %d): %s", test.nicID, ipv6.ProtocolNumber, err) + } else if len(neighbors) != 0 { + t.Errorf("got len(neighbors) = %d, want = 0; neighbors = %#v", len(neighbors), neighbors) + } + } + } + }) } } -// TestDefaultNUDConfigurationIsValid verifies that calling -// resetInvalidFields() on the result of DefaultNUDConfigurations() does not -// change anything. DefaultNUDConfigurations() should return a valid -// NUDConfigurations. func TestDefaultNUDConfigurations(t *testing.T) { const nicID = 1 @@ -143,12 +211,12 @@ func TestDefaultNUDConfigurations(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - c, err := s.NUDConfigurations(nicID) + c, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got, want := c, stack.DefaultNUDConfigurations(); got != want { - t.Errorf("got stack.NUDConfigurations(%d) = %+v, want = %+v", nicID, got, want) + t.Errorf("got stack.NUDConfigurations(%d, %d) = %+v, want = %+v", nicID, ipv6.ProtocolNumber, got, want) } } @@ -198,9 +266,9 @@ func TestNUDConfigurationsBaseReachableTime(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.BaseReachableTime; got != test.want { t.Errorf("got BaseReachableTime = %q, want = %q", got, test.want) @@ -255,9 +323,9 @@ func TestNUDConfigurationsMinRandomFactor(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.MinRandomFactor; got != test.want { t.Errorf("got MinRandomFactor = %f, want = %f", got, test.want) @@ -335,9 +403,9 @@ func TestNUDConfigurationsMaxRandomFactor(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.MaxRandomFactor; got != test.want { t.Errorf("got MaxRandomFactor = %f, want = %f", got, test.want) @@ -397,9 +465,9 @@ func TestNUDConfigurationsRetransmitTimer(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.RetransmitTimer; got != test.want { t.Errorf("got RetransmitTimer = %q, want = %q", got, test.want) @@ -449,9 +517,9 @@ func TestNUDConfigurationsDelayFirstProbeTime(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.DelayFirstProbeTime; got != test.want { t.Errorf("got DelayFirstProbeTime = %q, want = %q", got, test.want) @@ -501,9 +569,9 @@ func TestNUDConfigurationsMaxMulticastProbes(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.MaxMulticastProbes; got != test.want { t.Errorf("got MaxMulticastProbes = %q, want = %q", got, test.want) @@ -553,9 +621,9 @@ func TestNUDConfigurationsMaxUnicastProbes(t *testing.T) { if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - sc, err := s.NUDConfigurations(nicID) + sc, err := s.NUDConfigurations(nicID, ipv6.ProtocolNumber) if err != nil { - t.Fatalf("got stack.NUDConfigurations(%d) = %s", nicID, err) + t.Fatalf("got stack.NUDConfigurations(%d, %d) = %s", nicID, ipv6.ProtocolNumber, err) } if got := sc.MaxUnicastProbes; got != test.want { t.Errorf("got MaxUnicastProbes = %q, want = %q", got, test.want) diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9d4fc3e48..4f013b212 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -187,6 +187,12 @@ func (pk *PacketBuffer) Size() int { return pk.HeaderSize() + pk.Data.Size() } +// MemSize returns the estimation size of the pk in memory, including backing +// buffer data. +func (pk *PacketBuffer) MemSize() int { + return pk.HeaderSize() + pk.Data.MemSize() + packetBufferStructSize +} + // Views returns the underlying storage of the whole packet. func (pk *PacketBuffer) Views() []buffer.View { // Optimization for outbound packets that headers are in pk.header. diff --git a/pkg/tcpip/stack/packet_buffer_unsafe.go b/pkg/tcpip/stack/packet_buffer_unsafe.go new file mode 100644 index 000000000..ee3d47270 --- /dev/null +++ b/pkg/tcpip/stack/packet_buffer_unsafe.go @@ -0,0 +1,19 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import "unsafe" + +const packetBufferStructSize = int(unsafe.Sizeof(PacketBuffer{})) diff --git a/pkg/tcpip/stack/pending_packets.go b/pkg/tcpip/stack/pending_packets.go index c4769b17e..1c651e216 100644 --- a/pkg/tcpip/stack/pending_packets.go +++ b/pkg/tcpip/stack/pending_packets.go @@ -114,26 +114,12 @@ func (f *packetsPendingLinkResolution) dequeue(ch <-chan struct{}, linkAddr tcpi } } -func (f *packetsPendingLinkResolution) writePacketBuffer(r RouteInfo, gso *GSO, proto tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, *tcpip.Error) { - switch pkt := pkt.(type) { - case *PacketBuffer: - if err := f.nic.writePacket(r, gso, proto, pkt); err != nil { - return 0, err - } - return 1, nil - case *PacketBufferList: - return f.nic.writePackets(r, gso, proto, *pkt) - default: - panic(fmt.Sprintf("unrecognized pending packet buffer type = %T", pkt)) - } -} - // enqueue a packet to be sent once link resolution completes. // // If the maximum number of pending resolutions is reached, the packets // associated with the oldest link resolution will be dequeued as if they failed // link resolution. -func (f *packetsPendingLinkResolution) enqueue(r *Route, gso *GSO, proto tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, *tcpip.Error) { +func (f *packetsPendingLinkResolution) enqueue(r *Route, gso *GSO, proto tcpip.NetworkProtocolNumber, pkt pendingPacketBuffer) (int, tcpip.Error) { f.mu.Lock() // Make sure we attempt resolution while holding f's lock so that we avoid // a race where link resolution completes before we enqueue the packets. @@ -146,13 +132,13 @@ func (f *packetsPendingLinkResolution) enqueue(r *Route, gso *GSO, proto tcpip.N // To make sure B does not interleave with A and C, we make sure A and C are // done while holding the lock. routeInfo, ch, err := r.resolvedFields(nil) - switch err { + switch err.(type) { case nil: // The route resolved immediately, so we don't need to wait for link // resolution to send the packet. f.mu.Unlock() - return f.writePacketBuffer(routeInfo, gso, proto, pkt) - case tcpip.ErrWouldBlock: + return f.nic.writePacketBuffer(routeInfo, gso, proto, pkt) + case *tcpip.ErrWouldBlock: // We need to wait for link resolution to complete. default: f.mu.Unlock() @@ -225,7 +211,7 @@ func (f *packetsPendingLinkResolution) dequeuePackets(packets []pendingPacket, l for _, p := range packets { if success { p.routeInfo.RemoteLinkAddress = linkAddr - _, _ = f.writePacketBuffer(p.routeInfo, p.gso, p.proto, p.pkt) + _, _ = f.nic.writePacketBuffer(p.routeInfo, p.gso, p.proto, p.pkt) } else { f.incrementOutgoingPacketErrors(p.proto, p.pkt) diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index 33df192aa..d589f798d 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -49,31 +49,6 @@ type TransportEndpointID struct { RemoteAddress tcpip.Address } -// ControlType is the type of network control message. -type ControlType int - -// The following are the allowed values for ControlType values. -// TODO(http://gvisor.dev/issue/3210): Support time exceeded messages. -const ( - // ControlAddressUnreachable indicates that an IPv6 packet did not reach its - // destination as the destination address was unreachable. - // - // This maps to the ICMPv6 Destination Ureachable Code 3 error; see - // RFC 4443 section 3.1 for more details. - ControlAddressUnreachable ControlType = iota - ControlNetworkUnreachable - // ControlNoRoute indicates that an IPv4 packet did not reach its destination - // because the destination host was unreachable. - // - // This maps to the ICMPv4 Destination Ureachable Code 1 error; see - // RFC 791's Destination Unreachable Message section (page 4) for more - // details. - ControlNoRoute - ControlPacketTooBig - ControlPortUnreachable - ControlUnknown -) - // NetworkPacketInfo holds information about a network layer packet. type NetworkPacketInfo struct { // LocalAddressBroadcast is true if the packet's local address is a broadcast @@ -81,6 +56,39 @@ type NetworkPacketInfo struct { LocalAddressBroadcast bool } +// TransportErrorKind enumerates error types that are handled by the transport +// layer. +type TransportErrorKind int + +const ( + // PacketTooBigTransportError indicates that a packet did not reach its + // destination because a link on the path to the destination had an MTU that + // was too small to carry the packet. + PacketTooBigTransportError TransportErrorKind = iota + + // DestinationHostUnreachableTransportError indicates that the destination + // host was unreachable. + DestinationHostUnreachableTransportError + + // DestinationPortUnreachableTransportError indicates that a packet reached + // the destination host, but the transport protocol was not active on the + // destination port. + DestinationPortUnreachableTransportError + + // DestinationNetworkUnreachableTransportError indicates that the destination + // network was unreachable. + DestinationNetworkUnreachableTransportError +) + +// TransportError is a marker interface for errors that may be handled by the +// transport layer. +type TransportError interface { + tcpip.SockErrorCause + + // Kind returns the type of the transport error. + Kind() TransportErrorKind +} + // TransportEndpoint is the interface that needs to be implemented by transport // protocol (e.g., tcp, udp) endpoints that can handle packets. type TransportEndpoint interface { @@ -93,10 +101,10 @@ type TransportEndpoint interface { // HandlePacket takes ownership of the packet. HandlePacket(TransportEndpointID, *PacketBuffer) - // HandleControlPacket is called by the stack when new control (e.g. - // ICMP) packets arrive to this transport endpoint. - // HandleControlPacket takes ownership of pkt. - HandleControlPacket(typ ControlType, extra uint32, pkt *PacketBuffer) + // HandleError is called when the transport endpoint receives an error. + // + // HandleError takes ownership of the packet buffer. + HandleError(TransportError, *PacketBuffer) // Abort initiates an expedited endpoint teardown. It puts the endpoint // in a closed state and frees all resources associated with it. This @@ -172,10 +180,10 @@ type TransportProtocol interface { Number() tcpip.TransportProtocolNumber // NewEndpoint creates a new endpoint of the transport protocol. - NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) // NewRawEndpoint creates a new raw endpoint of the transport protocol. - NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) // MinimumPacketSize returns the minimum valid packet size of this // transport protocol. The stack automatically drops any packets smaller @@ -184,7 +192,7 @@ type TransportProtocol interface { // ParsePorts returns the source and destination ports stored in a // packet of this protocol. - ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) + ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) // HandleUnknownDestinationPacket handles packets targeted at this // protocol that don't match any existing endpoint. For example, @@ -197,12 +205,12 @@ type TransportProtocol interface { // SetOption allows enabling/disabling protocol specific features. // SetOption returns an error if the option is not supported or the // provided option value is invalid. - SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error + SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error // Option allows retrieving protocol specific option values. // Option returns an error if the option is not supported or the // provided option value is invalid. - Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error + Option(option tcpip.GettableTransportProtocolOption) tcpip.Error // Close requests that any worker goroutines owned by the protocol // stop. @@ -248,14 +256,11 @@ type TransportDispatcher interface { // DeliverTransportPacket takes ownership of the packet. DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition - // DeliverTransportControlPacket delivers control packets to the - // appropriate transport protocol endpoint. - // - // pkt.NetworkHeader must be set before calling - // DeliverTransportControlPacket. + // DeliverTransportError delivers an error to the appropriate transport + // endpoint. // - // DeliverTransportControlPacket takes ownership of pkt. - DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) + // DeliverTransportError takes ownership of the packet buffer. + DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer) } // PacketLooping specifies where an outbound packet should be sent. @@ -289,10 +294,10 @@ type NetworkHeaderParams struct { // endpoints may associate themselves with the same identifier (group address). type GroupAddressableEndpoint interface { // JoinGroup joins the specified group. - JoinGroup(group tcpip.Address) *tcpip.Error + JoinGroup(group tcpip.Address) tcpip.Error // LeaveGroup attempts to leave the specified group. - LeaveGroup(group tcpip.Address) *tcpip.Error + LeaveGroup(group tcpip.Address) tcpip.Error // IsInGroup returns true if the endpoint is a member of the specified group. IsInGroup(group tcpip.Address) bool @@ -440,17 +445,17 @@ func (k AddressKind) IsPermanent() bool { type AddressableEndpoint interface { // AddAndAcquirePermanentAddress adds the passed permanent address. // - // Returns tcpip.ErrDuplicateAddress if the address exists. + // Returns *tcpip.ErrDuplicateAddress if the address exists. // // Acquires and returns the AddressEndpoint for the added address. - AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, *tcpip.Error) + AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, tcpip.Error) // RemovePermanentAddress removes the passed address if it is a permanent // address. // - // Returns tcpip.ErrBadLocalAddress if the endpoint does not have the passed + // Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed // permanent address. - RemovePermanentAddress(addr tcpip.Address) *tcpip.Error + RemovePermanentAddress(addr tcpip.Address) tcpip.Error // MainAddress returns the endpoint's primary permanent address. MainAddress() tcpip.AddressWithPrefix @@ -512,14 +517,14 @@ type NetworkInterface interface { Promiscuous() bool // WritePacketToRemote writes the packet to the given remote link address. - WritePacketToRemote(tcpip.LinkAddress, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) *tcpip.Error + WritePacketToRemote(tcpip.LinkAddress, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error // WritePacket writes a packet with the given protocol through the given // route. // // WritePacket takes ownership of the packet buffer. The packet buffer's // network and transport header must be set. - WritePacket(*Route, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) *tcpip.Error + WritePacket(*Route, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error // WritePackets writes packets with the given protocol through the given // route. Must not be called with an empty list of packet buffers. @@ -529,7 +534,18 @@ type NetworkInterface interface { // Right now, WritePackets is used only when the software segmentation // offload is enabled. If it will be used for something else, syscall filters // may need to be updated. - WritePackets(*Route, *GSO, PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) + WritePackets(*Route, *GSO, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) + + // HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP + // request or NDP Neighbor Solicitation). + // + // HandleNeighborProbe assumes that the probe is valid for the network + // interface the probe was received on. + HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error + + // HandleNeighborConfirmation processes an incoming neighbor confirmation + // (e.g. ARP reply or NDP Neighbor Advertisement). + HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error } // LinkResolvableNetworkEndpoint handles link resolution events. @@ -547,8 +563,8 @@ type NetworkEndpoint interface { // Must only be called when the stack is in a state that allows the endpoint // to send and receive packets. // - // Returns tcpip.ErrNotPermitted if the endpoint cannot be enabled. - Enable() *tcpip.Error + // Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled. + Enable() tcpip.Error // Enabled returns true if the endpoint is enabled. Enabled() bool @@ -574,16 +590,16 @@ type NetworkEndpoint interface { // WritePacket writes a packet to the given destination address and // protocol. It takes ownership of pkt. pkt.TransportHeader must have // already been set. - WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error + WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error // WritePackets writes packets to the given destination address and // protocol. pkts must not be zero length. It takes ownership of pkts and // underlying packets. - WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) + WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error) // WriteHeaderIncludedPacket writes a packet that includes a network // header to the given destination address. It takes ownership of pkt. - WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error + WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error // HandlePacket is called by the link layer when new packets arrive to // this network endpoint. It sets pkt.NetworkHeader. @@ -649,17 +665,17 @@ type NetworkProtocol interface { ParseAddresses(v buffer.View) (src, dst tcpip.Address) // NewEndpoint creates a new endpoint of this protocol. - NewEndpoint(nic NetworkInterface, linkAddrCache LinkAddressCache, nud NUDHandler, dispatcher TransportDispatcher) NetworkEndpoint + NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint // SetOption allows enabling/disabling protocol specific features. // SetOption returns an error if the option is not supported or the // provided option value is invalid. - SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error + SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error // Option allows retrieving protocol specific option values. // Option returns an error if the option is not supported or the // provided option value is invalid. - Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error + Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error // Close requests that any worker goroutines owned by the protocol // stop. @@ -796,7 +812,7 @@ type LinkEndpoint interface { // To participate in transparent bridging, a LinkEndpoint implementation // should call eth.Encode with header.EthernetFields.SrcAddr set to // r.LocalLinkAddress if it is provided. - WritePacket(RouteInfo, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) *tcpip.Error + WritePacket(RouteInfo, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error // WritePackets writes packets with the given protocol and route. Must not be // called with an empty list of packet buffers. @@ -806,7 +822,7 @@ type LinkEndpoint interface { // Right now, WritePackets is used only when the software segmentation // offload is enabled. If it will be used for something else, syscall filters // may need to be updated. - WritePackets(RouteInfo, *GSO, PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) + WritePackets(RouteInfo, *GSO, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) } // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are @@ -821,19 +837,15 @@ type InjectableLinkEndpoint interface { // link. // // dest is used by endpoints with multiple raw destinations. - InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error + InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error } -// A LinkAddressResolver is an extension to a NetworkProtocol that -// can resolve link addresses. +// A LinkAddressResolver handles link address resolution for a network protocol. type LinkAddressResolver interface { // LinkAddressRequest sends a request for the link address of the target // address. The request is broadcasted on the local network if a remote link // address is not provided. - // - // The request is sent from the passed network interface. If the interface - // local address is unspecified, any interface local address may be used. - LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic NetworkInterface) *tcpip.Error + LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error // ResolveStaticAddress attempts to resolve address without sending // requests. It either resolves the name immediately or returns the @@ -847,22 +859,16 @@ type LinkAddressResolver interface { LinkAddressProtocol() tcpip.NetworkProtocolNumber } -// A LinkAddressCache caches link addresses. -type LinkAddressCache interface { - // AddLinkAddress adds a link address to the cache. - AddLinkAddress(addr tcpip.Address, linkAddr tcpip.LinkAddress) -} - // RawFactory produces endpoints for writing various types of raw packets. type RawFactory interface { // NewUnassociatedEndpoint produces endpoints for writing packets not // associated with a particular transport protocol. Such endpoints can // be used to write arbitrary packets that include the network header. - NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) // NewPacketEndpoint produces endpoints for reading and writing packets // that include network and (when cooked is false) link layer headers. - NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) } // GSOType is the type of GSO segments. diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index d9a8554e2..bab55ce49 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -53,7 +53,7 @@ type Route struct { // linkRes is set if link address resolution is enabled for this protocol on // the route's NIC. - linkRes LinkAddressResolver + linkRes linkResolver } type routeInfo struct { @@ -174,7 +174,7 @@ func makeRoute(netProto tcpip.NetworkProtocolNumber, gateway, localAddr, remoteA } if r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilityResolutionRequired != 0 { - if linkRes, ok := r.outgoingNIC.stack.linkAddrResolvers[r.NetProto]; ok { + if linkRes, ok := r.outgoingNIC.linkAddrResolvers[r.NetProto]; ok { r.linkRes = linkRes } } @@ -184,11 +184,11 @@ func makeRoute(netProto tcpip.NetworkProtocolNumber, gateway, localAddr, remoteA return r } - if r.linkRes == nil { + if r.linkRes.resolver == nil { return r } - if linkAddr, ok := r.linkRes.ResolveStaticAddress(r.RemoteAddress); ok { + if linkAddr, ok := r.linkRes.resolver.ResolveStaticAddress(r.RemoteAddress); ok { r.ResolveWith(linkAddr) return r } @@ -331,7 +331,7 @@ type ResolvedFieldsResult struct { // // Note, the route will not cache the remote link address when address // resolution completes. -func (r *Route) ResolvedFields(afterResolve func(ResolvedFieldsResult)) *tcpip.Error { +func (r *Route) ResolvedFields(afterResolve func(ResolvedFieldsResult)) tcpip.Error { _, _, err := r.resolvedFields(afterResolve) return err } @@ -342,7 +342,7 @@ func (r *Route) ResolvedFields(afterResolve func(ResolvedFieldsResult)) *tcpip.E // // The route's fields will also be returned, regardless of whether address // resolution is required or not. -func (r *Route) resolvedFields(afterResolve func(ResolvedFieldsResult)) (RouteInfo, <-chan struct{}, *tcpip.Error) { +func (r *Route) resolvedFields(afterResolve func(ResolvedFieldsResult)) (RouteInfo, <-chan struct{}, tcpip.Error) { r.mu.RLock() fields := r.fieldsLocked() resolutionRequired := r.isResolutionRequiredRLocked() @@ -354,11 +354,6 @@ func (r *Route) resolvedFields(afterResolve func(ResolvedFieldsResult)) (RouteIn return fields, nil, nil } - nextAddr := r.NextHop - if nextAddr == "" { - nextAddr = r.RemoteAddress - } - // If specified, the local address used for link address resolution must be an // address on the outgoing interface. var linkAddressResolutionRequestLocalAddr tcpip.Address @@ -367,7 +362,7 @@ func (r *Route) resolvedFields(afterResolve func(ResolvedFieldsResult)) (RouteIn } afterResolveFields := fields - linkAddr, ch, err := r.outgoingNIC.getNeighborLinkAddress(nextAddr, linkAddressResolutionRequestLocalAddr, r.linkRes, func(r LinkResolutionResult) { + linkAddr, ch, err := r.linkRes.getNeighborLinkAddress(r.nextHop(), linkAddressResolutionRequestLocalAddr, func(r LinkResolutionResult) { if afterResolve != nil { if r.Success { afterResolveFields.RemoteLinkAddress = r.LinkAddress @@ -382,6 +377,13 @@ func (r *Route) resolvedFields(afterResolve func(ResolvedFieldsResult)) (RouteIn return fields, ch, err } +func (r *Route) nextHop() tcpip.Address { + if len(r.NextHop) == 0 { + return r.RemoteAddress + } + return r.NextHop +} + // local returns true if the route is a local route. func (r *Route) local() bool { return r.Loop == PacketLoop || r.outgoingNIC.IsLoopback() @@ -398,7 +400,7 @@ func (r *Route) IsResolutionRequired() bool { } func (r *Route) isResolutionRequiredRLocked() bool { - return len(r.mu.remoteLinkAddress) == 0 && r.linkRes != nil && r.isValidForOutgoingRLocked() && !r.local() + return len(r.mu.remoteLinkAddress) == 0 && r.linkRes.resolver != nil && r.isValidForOutgoingRLocked() && !r.local() } func (r *Route) isValidForOutgoing() bool { @@ -427,9 +429,9 @@ func (r *Route) isValidForOutgoingRLocked() bool { } // WritePacket writes the packet through the given route. -func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error { +func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error { if !r.isValidForOutgoing() { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WritePacket(r, gso, params, pkt) @@ -437,9 +439,9 @@ func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuf // WritePackets writes a list of n packets through the given route and returns // the number of packets written. -func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) { +func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error) { if !r.isValidForOutgoing() { - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WritePackets(r, gso, pkts, params) @@ -447,9 +449,9 @@ func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHead // WriteHeaderIncludedPacket writes a packet already containing a network // header through the given route. -func (r *Route) WriteHeaderIncludedPacket(pkt *PacketBuffer) *tcpip.Error { +func (r *Route) WriteHeaderIncludedPacket(pkt *PacketBuffer) tcpip.Error { if !r.isValidForOutgoing() { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WriteHeaderIncludedPacket(r, pkt) @@ -519,3 +521,14 @@ func (r *Route) IsOutboundBroadcast() bool { // Only IPv4 has a notion of broadcast. return r.isV4Broadcast(r.RemoteAddress) } + +// ConfirmReachable informs the network/link layer that the neighbour used for +// the route is reachable. +// +// "Reachable" is defined as having full-duplex communication between the +// local and remote ends of the route. +func (r *Route) ConfirmReachable() { + if r.linkRes.resolver != nil { + r.linkRes.confirmReachable(r.nextHop()) + } +} diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index e9c5db4c3..a51d758d0 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -76,12 +76,16 @@ type TCPCubicState struct { // TCPRACKState is used to hold a copy of the internal RACK state when the // TCPProbeFunc is invoked. type TCPRACKState struct { - XmitTime time.Time - EndSequence seqnum.Value - FACK seqnum.Value - RTT time.Duration - Reord bool - DSACKSeen bool + XmitTime time.Time + EndSequence seqnum.Value + FACK seqnum.Value + RTT time.Duration + Reord bool + DSACKSeen bool + ReoWnd time.Duration + ReoWndIncr uint8 + ReoWndPersist int8 + RTTSeq seqnum.Value } // TCPEndpointID is the unique 4 tuple that identifies a given endpoint. @@ -372,7 +376,6 @@ func (u *uniqueIDGenerator) UniqueID() uint64 { type Stack struct { transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol - linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver // rawFactory creates raw endpoints. If nil, raw endpoints are // disabled. It is set during Stack creation and is immutable. @@ -382,6 +385,15 @@ type Stack struct { stats tcpip.Stats + // LOCK ORDERING: mu > route.mu. + route struct { + mu struct { + sync.RWMutex + + table []tcpip.Route + } + } + mu sync.RWMutex nics map[tcpip.NICID]*NIC @@ -389,11 +401,6 @@ type Stack struct { cleanupEndpointsMu sync.Mutex cleanupEndpoints map[TransportEndpoint]struct{} - // route is the route table passed in by the user via SetRouteTable(), - // it is used by FindRoute() to build a route for a specific - // destination. - routeTable []tcpip.Route - *ports.PortManager // If not nil, then any new endpoints will have this probe function @@ -429,6 +436,8 @@ type Stack struct { // useNeighborCache indicates whether ARP and NDP packets should be handled // by the NIC's neighborCache instead of linkAddrCache. + // + // TODO(gvisor.dev/issue/4658): Remove this field. useNeighborCache bool // nudDisp is the NUD event dispatcher that is used to send the netstack @@ -449,6 +458,18 @@ type Stack struct { // receiveBufferSize holds the min/default/max receive buffer sizes for // endpoints other than TCP. receiveBufferSize ReceiveBufferSizeOption + + // tcpInvalidRateLimit is the maximal rate for sending duplicate + // acknowledgements in response to incoming TCP packets that are for an existing + // connection but that are invalid due to any of the following reasons: + // + // a) out-of-window sequence number. + // b) out-of-window acknowledgement number. + // c) PAWS check failure (when implemented). + // + // This is required to prevent potential ACK loops. + // Setting this to 0 will disable all rate limiting. + tcpInvalidRateLimit time.Duration } // UniqueID is an abstract generator of unique identifiers. @@ -495,13 +516,17 @@ type Options struct { // NUDConfigs is the default NUD configurations used by interfaces. NUDConfigs NUDConfigurations - // UseNeighborCache indicates whether ARP and NDP packets should be handled - // by the Neighbor Unreachability Detection (NUD) state machine. This flag - // also enables the APIs for inspecting and modifying the neighbor table via - // NUDDispatcher and the following Stack methods: Neighbors, RemoveNeighbor, - // and ClearNeighbors. + // UseNeighborCache is unused. + // + // TODO(gvisor.dev/issue/4658): Remove this field. UseNeighborCache bool + // UseLinkAddrCache indicates that the legacy link address cache should be + // used for link resolution. + // + // TODO(gvisor.dev/issue/4658): Remove this field. + UseLinkAddrCache bool + // NUDDisp is the NUD event dispatcher that an integrator can provide to // receive NUD related events. NUDDisp NUDDispatcher @@ -552,7 +577,7 @@ type TransportEndpointInfo struct { // incompatible with the receiver. // // Preconditon: the parent endpoint mu must be held while calling this method. -func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) { +func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { netProto := t.NetProto switch len(addr.Addr) { case header.IPv4AddressSize: @@ -570,11 +595,11 @@ func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6onl switch len(t.ID.LocalAddress) { case header.IPv4AddressSize: if len(addr.Addr) == header.IPv6AddressSize { - return tcpip.FullAddress{}, 0, tcpip.ErrInvalidEndpointState + return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} } case header.IPv6AddressSize: if len(addr.Addr) == header.IPv4AddressSize { - return tcpip.FullAddress{}, 0, tcpip.ErrNetworkUnreachable + return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} } } @@ -582,10 +607,10 @@ func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6onl case netProto == t.NetProto: case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: if v6only { - return tcpip.FullAddress{}, 0, tcpip.ErrNoRoute + return tcpip.FullAddress{}, 0, &tcpip.ErrNoRoute{} } default: - return tcpip.FullAddress{}, 0, tcpip.ErrInvalidEndpointState + return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} } return addr, netProto, nil @@ -631,7 +656,6 @@ func New(opts Options) *Stack { s := &Stack{ transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), - linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]LinkAddressResolver), nics: make(map[tcpip.NICID]*NIC), cleanupEndpoints: make(map[TransportEndpoint]struct{}), PortManager: ports.NewPortManager(), @@ -642,7 +666,7 @@ func New(opts Options) *Stack { icmpRateLimiter: NewICMPRateLimiter(), seed: generateRandUint32(), nudConfigs: opts.NUDConfigs, - useNeighborCache: opts.UseNeighborCache, + useNeighborCache: !opts.UseLinkAddrCache, uniqueIDGenerator: opts.UniqueID, nudDisp: opts.NUDDisp, randomGenerator: mathrand.New(randSrc), @@ -656,15 +680,13 @@ func New(opts Options) *Stack { Default: DefaultBufferSize, Max: DefaultMaxBufferSize, }, + tcpInvalidRateLimit: defaultTCPInvalidRateLimit, } // Add specified network protocols. for _, netProtoFactory := range opts.NetworkProtocols { netProto := netProtoFactory(s) s.networkProtocols[netProto.Number()] = netProto - if r, ok := netProto.(LinkAddressResolver); ok { - s.linkAddrResolvers[r.LinkAddressProtocol()] = r - } } // Add specified transport protocols. @@ -698,10 +720,10 @@ func (s *Stack) UniqueID() uint64 { // options. This method returns an error if the protocol is not supported or // option is not supported by the protocol implementation or the provided value // is incorrect. -func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) *tcpip.Error { +func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { netProto, ok := s.networkProtocols[network] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } return netProto.SetOption(option) } @@ -715,10 +737,10 @@ func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, op // if err != nil { // ... // } -func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) *tcpip.Error { +func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { netProto, ok := s.networkProtocols[network] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } return netProto.Option(option) } @@ -727,10 +749,10 @@ func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, optio // options. This method returns an error if the protocol is not supported or // option is not supported by the protocol implementation or the provided value // is incorrect. -func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) *tcpip.Error { +func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { transProtoState, ok := s.transportProtocols[transport] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } return transProtoState.proto.SetOption(option) } @@ -742,10 +764,10 @@ func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumb // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { // ... // } -func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) *tcpip.Error { +func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { transProtoState, ok := s.transportProtocols[transport] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } return transProtoState.proto.Option(option) } @@ -778,15 +800,15 @@ func (s *Stack) Stats() tcpip.Stats { // SetForwarding enables or disables packet forwarding between NICs for the // passed protocol. -func (s *Stack) SetForwarding(protocolNum tcpip.NetworkProtocolNumber, enable bool) *tcpip.Error { +func (s *Stack) SetForwarding(protocolNum tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { protocol, ok := s.networkProtocols[protocolNum] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } forwardingProtocol, ok := protocol.(ForwardingNetworkProtocol) if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } forwardingProtocol.SetForwarding(enable) @@ -814,45 +836,44 @@ func (s *Stack) Forwarding(protocolNum tcpip.NetworkProtocolNumber) bool { // // This method takes ownership of the table. func (s *Stack) SetRouteTable(table []tcpip.Route) { - s.mu.Lock() - defer s.mu.Unlock() - - s.routeTable = table + s.route.mu.Lock() + defer s.route.mu.Unlock() + s.route.mu.table = table } // GetRouteTable returns the route table which is currently in use. func (s *Stack) GetRouteTable() []tcpip.Route { - s.mu.Lock() - defer s.mu.Unlock() - return append([]tcpip.Route(nil), s.routeTable...) + s.route.mu.RLock() + defer s.route.mu.RUnlock() + return append([]tcpip.Route(nil), s.route.mu.table...) } // AddRoute appends a route to the route table. func (s *Stack) AddRoute(route tcpip.Route) { - s.mu.Lock() - defer s.mu.Unlock() - s.routeTable = append(s.routeTable, route) + s.route.mu.Lock() + defer s.route.mu.Unlock() + s.route.mu.table = append(s.route.mu.table, route) } // RemoveRoutes removes matching routes from the route table. func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { - s.mu.Lock() - defer s.mu.Unlock() + s.route.mu.Lock() + defer s.route.mu.Unlock() var filteredRoutes []tcpip.Route - for _, route := range s.routeTable { + for _, route := range s.route.mu.table { if !match(route) { filteredRoutes = append(filteredRoutes, route) } } - s.routeTable = filteredRoutes + s.route.mu.table = filteredRoutes } // NewEndpoint creates a new transport layer endpoint of the given protocol. -func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { t, ok := s.transportProtocols[transport] if !ok { - return nil, tcpip.ErrUnknownProtocol + return nil, &tcpip.ErrUnknownProtocol{} } return t.proto.NewEndpoint(network, waiterQueue) @@ -861,9 +882,9 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp // NewRawEndpoint creates a new raw transport layer endpoint of the given // protocol. Raw endpoints receive all traffic for a given protocol regardless // of address. -func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { +func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { if s.rawFactory == nil { - return nil, tcpip.ErrNotPermitted + return nil, &tcpip.ErrNotPermitted{} } if !associated { @@ -872,7 +893,7 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network t, ok := s.transportProtocols[transport] if !ok { - return nil, tcpip.ErrUnknownProtocol + return nil, &tcpip.ErrUnknownProtocol{} } return t.proto.NewRawEndpoint(network, waiterQueue) @@ -880,9 +901,9 @@ func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network // NewPacketEndpoint creates a new packet endpoint listening for the given // netProto. -func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { if s.rawFactory == nil { - return nil, tcpip.ErrNotPermitted + return nil, &tcpip.ErrNotPermitted{} } return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) @@ -913,20 +934,20 @@ type NICOptions struct { // NICs can be configured. // // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. -func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *tcpip.Error { +func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { s.mu.Lock() defer s.mu.Unlock() // Make sure id is unique. if _, ok := s.nics[id]; ok { - return tcpip.ErrDuplicateNICID + return &tcpip.ErrDuplicateNICID{} } // Make sure name is unique, unless unnamed. if opts.Name != "" { for _, n := range s.nics { if n.Name() == opts.Name { - return tcpip.ErrDuplicateNICID + return &tcpip.ErrDuplicateNICID{} } } } @@ -942,7 +963,7 @@ func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOp // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. -func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error { +func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { return s.CreateNICWithOptions(id, ep, NICOptions{}) } @@ -960,26 +981,26 @@ func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { // EnableNIC enables the given NIC so that the link-layer endpoint can start // delivering packets to it. -func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error { +func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() nic, ok := s.nics[id] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } return nic.enable() } // DisableNIC disables the given NIC. -func (s *Stack) DisableNIC(id tcpip.NICID) *tcpip.Error { +func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() nic, ok := s.nics[id] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } nic.disable() @@ -1000,7 +1021,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { } // RemoveNIC removes NIC and all related routes from the network stack. -func (s *Stack) RemoveNIC(id tcpip.NICID) *tcpip.Error { +func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { s.mu.Lock() defer s.mu.Unlock() @@ -1010,25 +1031,26 @@ func (s *Stack) RemoveNIC(id tcpip.NICID) *tcpip.Error { // removeNICLocked removes NIC and all related routes from the network stack. // // s.mu must be locked. -func (s *Stack) removeNICLocked(id tcpip.NICID) *tcpip.Error { +func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { nic, ok := s.nics[id] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } delete(s.nics, id) // Remove routes in-place. n tracks the number of routes written. + s.route.mu.Lock() n := 0 - for i, r := range s.routeTable { - s.routeTable[i] = tcpip.Route{} + for i, r := range s.route.mu.table { + s.route.mu.table[i] = tcpip.Route{} if r.NIC != id { // Keep this route. - s.routeTable[n] = r + s.route.mu.table[n] = r n++ } } - - s.routeTable = s.routeTable[:n] + s.route.mu.table = s.route.mu.table[:n] + s.route.mu.Unlock() return nic.remove() } @@ -1118,13 +1140,13 @@ type NICStateFlags struct { } // AddAddress adds a new network-layer address to the specified NIC. -func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { +func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { return s.AddAddressWithOptions(id, protocol, addr, CanBePrimaryEndpoint) } // AddAddressWithPrefix is the same as AddAddress, but allows you to specify // the address prefix. -func (s *Stack) AddAddressWithPrefix(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.AddressWithPrefix) *tcpip.Error { +func (s *Stack) AddAddressWithPrefix(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.AddressWithPrefix) tcpip.Error { ap := tcpip.ProtocolAddress{ Protocol: protocol, AddressWithPrefix: addr, @@ -1134,16 +1156,16 @@ func (s *Stack) AddAddressWithPrefix(id tcpip.NICID, protocol tcpip.NetworkProto // AddProtocolAddress adds a new network-layer protocol address to the // specified NIC. -func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress) *tcpip.Error { +func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress) tcpip.Error { return s.AddProtocolAddressWithOptions(id, protocolAddress, CanBePrimaryEndpoint) } // AddAddressWithOptions is the same as AddAddress, but allows you to specify // whether the new endpoint can be primary or not. -func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) *tcpip.Error { +func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) tcpip.Error { netProto, ok := s.networkProtocols[protocol] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } return s.AddProtocolAddressWithOptions(id, tcpip.ProtocolAddress{ Protocol: protocol, @@ -1156,13 +1178,13 @@ func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProt // AddProtocolAddressWithOptions is the same as AddProtocolAddress, but allows // you to specify whether the new endpoint can be primary or not. -func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error { +func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() nic, ok := s.nics[id] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } return nic.addAddress(protocolAddress, peb) @@ -1170,7 +1192,7 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc // RemoveAddress removes an existing network-layer address from the specified // NIC. -func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error { +func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() @@ -1178,7 +1200,7 @@ func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error { return nic.removeAddress(addr) } - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } // AllAddresses returns a map of NICIDs to their protocol addresses (primary @@ -1308,7 +1330,7 @@ func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, // If no local address is provided, the stack will select a local address. If no // remote address is provided, the stack wil use a remote address equal to the // local address. -func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, *tcpip.Error) { +func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { s.mu.RLock() defer s.mu.RUnlock() @@ -1344,48 +1366,58 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } if isLoopback { - return nil, tcpip.ErrBadLocalAddress + return nil, &tcpip.ErrBadLocalAddress{} } - return nil, tcpip.ErrNetworkUnreachable + return nil, &tcpip.ErrNetworkUnreachable{} } canForward := s.Forwarding(netProto) && !header.IsV6LinkLocalAddress(localAddr) && !isLinkLocal // Find a route to the remote with the route table. var chosenRoute tcpip.Route - for _, route := range s.routeTable { - if len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr) { - continue - } + if r := func() *Route { + s.route.mu.RLock() + defer s.route.mu.RUnlock() - nic, ok := s.nics[route.NIC] - if !ok || !nic.Enabled() { - continue - } + for _, route := range s.route.mu.table { + if len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr) { + continue + } - if id == 0 || id == route.NIC { - if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil { - var gateway tcpip.Address - if needRoute { - gateway = route.Gateway - } - r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) - if r == nil { - panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) + nic, ok := s.nics[route.NIC] + if !ok || !nic.Enabled() { + continue + } + + if id == 0 || id == route.NIC { + if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil { + var gateway tcpip.Address + if needRoute { + gateway = route.Gateway + } + r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) + if r == nil { + panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) + } + return r } - return r, nil } - } - // If the stack has forwarding enabled and we haven't found a valid route to - // the remote address yet, keep track of the first valid route. We keep - // iterating because we prefer routes that let us use a local address that - // is assigned to the outgoing interface. There is no requirement to do this - // from any RFC but simply a choice made to better follow a strong host - // model which the netstack follows at the time of writing. - if canForward && chosenRoute == (tcpip.Route{}) { - chosenRoute = route + // If the stack has forwarding enabled and we haven't found a valid route + // to the remote address yet, keep track of the first valid route. We + // keep iterating because we prefer routes that let us use a local + // address that is assigned to the outgoing interface. There is no + // requirement to do this from any RFC but simply a choice made to better + // follow a strong host model which the netstack follows at the time of + // writing. + if canForward && chosenRoute == (tcpip.Route{}) { + chosenRoute = route + } } + + return nil + }(); r != nil { + return r, nil } if chosenRoute != (tcpip.Route{}) { @@ -1412,7 +1444,7 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } } - return nil, tcpip.ErrNoRoute + return nil, &tcpip.ErrNoRoute{} } if id == 0 { @@ -1432,12 +1464,12 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } if needRoute { - return nil, tcpip.ErrNoRoute + return nil, &tcpip.ErrNoRoute{} } if header.IsV6LoopbackAddress(remoteAddr) { - return nil, tcpip.ErrBadLocalAddress + return nil, &tcpip.ErrBadLocalAddress{} } - return nil, tcpip.ErrNetworkUnreachable + return nil, &tcpip.ErrNetworkUnreachable{} } // CheckNetworkProtocol checks if a given network protocol is enabled in the @@ -1483,13 +1515,13 @@ func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProto } // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. -func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error { +func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() nic, ok := s.nics[nicID] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } nic.setPromiscuousMode(enable) @@ -1499,13 +1531,13 @@ func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error // SetSpoofing enables or disables address spoofing in the given NIC, allowing // endpoints to bind to any address in the NIC. -func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error { +func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() nic, ok := s.nics[nicID] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } nic.setSpoofing(enable) @@ -1513,20 +1545,6 @@ func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error { return nil } -// AddLinkAddress adds a link address for the neighbor on the specified NIC. -func (s *Stack) AddLinkAddress(nicID tcpip.NICID, neighbor tcpip.Address, linkAddr tcpip.LinkAddress) *tcpip.Error { - s.mu.RLock() - defer s.mu.RUnlock() - - nic, ok := s.nics[nicID] - if !ok { - return tcpip.ErrUnknownNICID - } - - nic.linkAddrCache.AddLinkAddress(neighbor, linkAddr) - return nil -} - // LinkResolutionResult is the result of a link address resolution attempt. type LinkResolutionResult struct { LinkAddress tcpip.LinkAddress @@ -1549,93 +1567,82 @@ type LinkResolutionResult struct { // If specified, the local address must be an address local to the interface // the neighbor cache belongs to. The local address is the source address of // a packet prompting NUD/link address resolution. -func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) *tcpip.Error { +func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { s.mu.RLock() nic, ok := s.nics[nicID] s.mu.RUnlock() if !ok { - return tcpip.ErrUnknownNICID - } - - linkRes, ok := s.linkAddrResolvers[protocol] - if !ok { - return tcpip.ErrNotSupported - } - - if linkAddr, ok := linkRes.ResolveStaticAddress(addr); ok { - onResolve(LinkResolutionResult{LinkAddress: linkAddr, Success: true}) - return nil + return &tcpip.ErrUnknownNICID{} } - _, _, err := nic.getNeighborLinkAddress(addr, localAddr, linkRes, onResolve) - return err + return nic.getLinkAddress(addr, localAddr, protocol, onResolve) } // Neighbors returns all IP to MAC address associations. -func (s *Stack) Neighbors(nicID tcpip.NICID) ([]NeighborEntry, *tcpip.Error) { +func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { s.mu.RLock() nic, ok := s.nics[nicID] s.mu.RUnlock() if !ok { - return nil, tcpip.ErrUnknownNICID + return nil, &tcpip.ErrUnknownNICID{} } - return nic.neighbors() + return nic.neighbors(protocol) } // AddStaticNeighbor statically associates an IP address to a MAC address. -func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) *tcpip.Error { +func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { s.mu.RLock() nic, ok := s.nics[nicID] s.mu.RUnlock() if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } - return nic.addStaticNeighbor(addr, linkAddr) + return nic.addStaticNeighbor(addr, protocol, linkAddr) } // RemoveNeighbor removes an IP to MAC address association previously created // either automically or by AddStaticNeighbor. Returns ErrBadAddress if there // is no association with the provided address. -func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, addr tcpip.Address) *tcpip.Error { +func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { s.mu.RLock() nic, ok := s.nics[nicID] s.mu.RUnlock() if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } - return nic.removeNeighbor(addr) + return nic.removeNeighbor(protocol, addr) } // ClearNeighbors removes all IP to MAC address associations. -func (s *Stack) ClearNeighbors(nicID tcpip.NICID) *tcpip.Error { +func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { s.mu.RLock() nic, ok := s.nics[nicID] s.mu.RUnlock() if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } - return nic.clearNeighbors() + return nic.clearNeighbors(protocol) } // RegisterTransportEndpoint registers the given endpoint with the stack // transport dispatcher. Received packets that match the provided id will be // delivered to the given endpoint; specifying a nic is optional, but // nic-specific IDs have precedence over global ones. -func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) } // CheckRegisterTransportEndpoint checks if an endpoint can be registered with // the stack transport dispatcher. -func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) } @@ -1672,7 +1679,7 @@ func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, tran // RegisterRawTransportEndpoint registers the given endpoint with the stack // transport dispatcher. Received packets that match the provided transport // protocol will be delivered to the given endpoint. -func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error { +func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { return s.demux.registerRawEndpoint(netProto, transProto, ep) } @@ -1782,7 +1789,7 @@ func (s *Stack) Resume() { // RegisterPacketEndpoint registers ep with the stack, causing it to receive // all traffic of the specified netProto on the given NIC. If nicID is 0, it // receives traffic from every NIC. -func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error { +func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { s.mu.Lock() defer s.mu.Unlock() @@ -1801,7 +1808,7 @@ func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.Network // Capture on a specific device. nic, ok := s.nics[nicID] if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } if err := nic.registerPacketEndpoint(netProto, ep); err != nil { return err @@ -1839,12 +1846,12 @@ func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip // WritePacketToRemote writes a payload on the specified NIC using the provided // network protocol and remote link address. -func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) *tcpip.Error { +func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) tcpip.Error { s.mu.Lock() nic, ok := s.nics[nicID] s.mu.Unlock() if !ok { - return tcpip.ErrUnknownDevice + return &tcpip.ErrUnknownDevice{} } pkt := NewPacketBuffer(PacketBufferOptions{ ReserveHeaderBytes: int(nic.MaxHeaderLength()), @@ -1909,37 +1916,37 @@ func (s *Stack) RemoveTCPProbe() { } // JoinGroup joins the given multicast group on the given NIC. -func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error { +func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() if nic, ok := s.nics[nicID]; ok { return nic.joinGroup(protocol, multicastAddr) } - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } // LeaveGroup leaves the given multicast group on the given NIC. -func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error { +func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { s.mu.RLock() defer s.mu.RUnlock() if nic, ok := s.nics[nicID]; ok { return nic.leaveGroup(protocol, multicastAddr) } - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } // IsInGroup returns true if the NIC with ID nicID has joined the multicast // group multicastAddr. -func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, *tcpip.Error) { +func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { s.mu.RLock() defer s.mu.RUnlock() if nic, ok := s.nics[nicID]; ok { return nic.isInGroup(multicastAddr), nil } - return false, tcpip.ErrUnknownNICID + return false, &tcpip.ErrUnknownNICID{} } // IPTables returns the stack's iptables. @@ -1979,45 +1986,45 @@ func (s *Stack) AllowICMPMessage() bool { // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol // number installed on the specified NIC. -func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, *tcpip.Error) { +func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { s.mu.Lock() defer s.mu.Unlock() nic, ok := s.nics[nicID] if !ok { - return nil, tcpip.ErrUnknownNICID + return nil, &tcpip.ErrUnknownNICID{} } return nic.getNetworkEndpoint(proto), nil } // NUDConfigurations gets the per-interface NUD configurations. -func (s *Stack) NUDConfigurations(id tcpip.NICID) (NUDConfigurations, *tcpip.Error) { +func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { s.mu.RLock() nic, ok := s.nics[id] s.mu.RUnlock() if !ok { - return NUDConfigurations{}, tcpip.ErrUnknownNICID + return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} } - return nic.nudConfigs() + return nic.nudConfigs(proto) } // SetNUDConfigurations sets the per-interface NUD configurations. // // Note, if c contains invalid NUD configuration values, it will be fixed to // use default values for the erroneous values. -func (s *Stack) SetNUDConfigurations(id tcpip.NICID, c NUDConfigurations) *tcpip.Error { +func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { s.mu.RLock() nic, ok := s.nics[id] s.mu.RUnlock() if !ok { - return tcpip.ErrUnknownNICID + return &tcpip.ErrUnknownNICID{} } - return nic.setNUDConfigs(c) + return nic.setNUDConfigs(proto, c) } // Seed returns a 32 bit value that can be used as a seed value for port @@ -2056,7 +2063,7 @@ func generateRandInt64() int64 { } // FindNetworkEndpoint returns the network endpoint for the given address. -func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, address tcpip.Address) (NetworkEndpoint, *tcpip.Error) { +func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, address tcpip.Address) (NetworkEndpoint, tcpip.Error) { s.mu.RLock() defer s.mu.RUnlock() @@ -2068,7 +2075,7 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres addressEndpoint.DecRef() return nic.getNetworkEndpoint(netProto), nil } - return nil, tcpip.ErrBadAddress + return nil, &tcpip.ErrBadAddress{} } // FindNICNameFromID returns the name of the NIC for the given NICID. diff --git a/pkg/tcpip/stack/stack_options.go b/pkg/tcpip/stack/stack_options.go index 92e70f94e..3066f4ffd 100644 --- a/pkg/tcpip/stack/stack_options.go +++ b/pkg/tcpip/stack/stack_options.go @@ -15,6 +15,8 @@ package stack import ( + "time" + "gvisor.dev/gvisor/pkg/tcpip" ) @@ -29,6 +31,10 @@ const ( // DefaultMaxBufferSize is the default maximum permitted size of a // send/receive buffer. DefaultMaxBufferSize = 4 << 20 // 4 MiB + + // defaultTCPInvalidRateLimit is the default value for + // stack.TCPInvalidRateLimit. + defaultTCPInvalidRateLimit = 500 * time.Millisecond ) // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to @@ -39,18 +45,22 @@ type ReceiveBufferSizeOption struct { Max int } +// TCPInvalidRateLimitOption is used by stack.(Stack*).Option/SetOption to get/set +// stack.tcpInvalidRateLimit. +type TCPInvalidRateLimitOption time.Duration + // SetOption allows setting stack wide options. -func (s *Stack) SetOption(option interface{}) *tcpip.Error { +func (s *Stack) SetOption(option interface{}) tcpip.Error { switch v := option.(type) { case tcpip.SendBufferSizeOption: // Make sure we don't allow lowering the buffer below minimum // required for stack to work. if v.Min < MinBufferSize { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } if v.Default < v.Min || v.Default > v.Max { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } s.mu.Lock() @@ -62,11 +72,11 @@ func (s *Stack) SetOption(option interface{}) *tcpip.Error { // Make sure we don't allow lowering the buffer below minimum // required for stack to work. if v.Min < MinBufferSize { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } if v.Default < v.Min || v.Default > v.Max { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } s.mu.Lock() @@ -74,13 +84,22 @@ func (s *Stack) SetOption(option interface{}) *tcpip.Error { s.mu.Unlock() return nil + case TCPInvalidRateLimitOption: + if v < 0 { + return &tcpip.ErrInvalidOptionValue{} + } + s.mu.Lock() + s.tcpInvalidRateLimit = time.Duration(v) + s.mu.Unlock() + return nil + default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // Option allows retrieving stack wide options. -func (s *Stack) Option(option interface{}) *tcpip.Error { +func (s *Stack) Option(option interface{}) tcpip.Error { switch v := option.(type) { case *tcpip.SendBufferSizeOption: s.mu.RLock() @@ -94,7 +113,13 @@ func (s *Stack) Option(option interface{}) *tcpip.Error { s.mu.RUnlock() return nil + case *TCPInvalidRateLimitOption: + s.mu.RLock() + *v = TCPInvalidRateLimitOption(s.tcpInvalidRateLimit) + s.mu.RUnlock() + return nil + default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 0f02f1d53..b641a4aaa 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -31,6 +31,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/faketime" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/link/channel" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" @@ -90,7 +91,7 @@ type fakeNetworkEndpoint struct { dispatcher stack.TransportDispatcher } -func (f *fakeNetworkEndpoint) Enable() *tcpip.Error { +func (f *fakeNetworkEndpoint) Enable() tcpip.Error { f.mu.Lock() defer f.mu.Unlock() f.mu.enabled = true @@ -137,12 +138,15 @@ func (f *fakeNetworkEndpoint) HandlePacket(pkt *stack.PacketBuffer) { return } pkt.Data.TrimFront(fakeNetHeaderLen) - f.dispatcher.DeliverTransportControlPacket( + f.dispatcher.DeliverTransportError( tcpip.Address(nb[srcAddrOffset:srcAddrOffset+1]), tcpip.Address(nb[dstAddrOffset:dstAddrOffset+1]), fakeNetNumber, tcpip.TransportProtocolNumber(nb[protocolNumberOffset]), - stack.ControlPortUnreachable, 0, pkt) + // Nothing checks the error. + nil, /* transport error */ + pkt, + ) return } @@ -162,7 +166,7 @@ func (f *fakeNetworkEndpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumbe return f.proto.Number() } -func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error { +func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { // Increment the sent packet count in the protocol descriptor. f.proto.sendPacketCount[int(r.RemoteAddress[0])%len(f.proto.sendPacketCount)]++ @@ -185,12 +189,12 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (*fakeNetworkEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) { +func (*fakeNetworkEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { panic("not implemented") } -func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error { - return tcpip.ErrNotSupported +func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { + return &tcpip.ErrNotSupported{} } func (f *fakeNetworkEndpoint) Close() { @@ -243,7 +247,7 @@ func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Addres return tcpip.Address(v[srcAddrOffset : srcAddrOffset+1]), tcpip.Address(v[dstAddrOffset : dstAddrOffset+1]) } -func (f *fakeNetworkProtocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCache, _ stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { +func (f *fakeNetworkProtocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { e := &fakeNetworkEndpoint{ nic: nic, proto: f, @@ -253,23 +257,23 @@ func (f *fakeNetworkProtocol) NewEndpoint(nic stack.NetworkInterface, _ stack.Li return e } -func (f *fakeNetworkProtocol) SetOption(option tcpip.SettableNetworkProtocolOption) *tcpip.Error { +func (f *fakeNetworkProtocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: f.defaultTTL = uint8(*v) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } -func (f *fakeNetworkProtocol) Option(option tcpip.GettableNetworkProtocolOption) *tcpip.Error { +func (f *fakeNetworkProtocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.DefaultTTLOption: *v = tcpip.DefaultTTLOption(f.defaultTTL) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } @@ -418,7 +422,7 @@ func TestNetworkReceive(t *testing.T) { } } -func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) *tcpip.Error { +func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) tcpip.Error { r, err := s.FindRoute(0, "", addr, fakeNetNumber, false /* multicastLoop */) if err != nil { return err @@ -427,7 +431,7 @@ func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) *tcpip.Erro return send(r, payload) } -func send(r *stack.Route, payload buffer.View) *tcpip.Error { +func send(r *stack.Route, payload buffer.View) tcpip.Error { return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(r.MaxHeaderLength()), Data: payload.ToVectorisedView(), @@ -456,14 +460,14 @@ func testSend(t *testing.T, r *stack.Route, ep *channel.Endpoint, payload buffer } } -func testFailingSend(t *testing.T, r *stack.Route, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) { +func testFailingSend(t *testing.T, r *stack.Route, ep *channel.Endpoint, payload buffer.View, wantErr tcpip.Error) { t.Helper() if gotErr := send(r, payload); gotErr != wantErr { t.Errorf("send failed: got = %s, want = %s ", gotErr, wantErr) } } -func testFailingSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) { +func testFailingSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, ep *channel.Endpoint, payload buffer.View, wantErr tcpip.Error) { t.Helper() if gotErr := sendTo(s, addr, payload); gotErr != wantErr { t.Errorf("sendto failed: got = %s, want = %s ", gotErr, wantErr) @@ -600,8 +604,8 @@ func testRoute(t *testing.T, s *stack.Stack, nic tcpip.NICID, srcAddr, dstAddr, func testNoRoute(t *testing.T, s *stack.Stack, nic tcpip.NICID, srcAddr, dstAddr tcpip.Address) { _, err := s.FindRoute(nic, srcAddr, dstAddr, fakeNetNumber, false /* multicastLoop */) - if err != tcpip.ErrNoRoute { - t.Fatalf("FindRoute returned unexpected error, got = %v, want = %s", err, tcpip.ErrNoRoute) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Fatalf("FindRoute returned unexpected error, got = %v, want = %s", err, &tcpip.ErrNoRoute{}) } } @@ -649,8 +653,9 @@ func TestDisableUnknownNIC(t *testing.T) { NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory}, }) - if err := s.DisableNIC(1); err != tcpip.ErrUnknownNICID { - t.Fatalf("got s.DisableNIC(1) = %v, want = %s", err, tcpip.ErrUnknownNICID) + err := s.DisableNIC(1) + if _, ok := err.(*tcpip.ErrUnknownNICID); !ok { + t.Fatalf("got s.DisableNIC(1) = %v, want = %s", err, &tcpip.ErrUnknownNICID{}) } } @@ -708,8 +713,9 @@ func TestRemoveUnknownNIC(t *testing.T) { NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory}, }) - if err := s.RemoveNIC(1); err != tcpip.ErrUnknownNICID { - t.Fatalf("got s.RemoveNIC(1) = %v, want = %s", err, tcpip.ErrUnknownNICID) + err := s.RemoveNIC(1) + if _, ok := err.(*tcpip.ErrUnknownNICID); !ok { + t.Fatalf("got s.RemoveNIC(1) = %v, want = %s", err, &tcpip.ErrUnknownNICID{}) } } @@ -752,8 +758,8 @@ func TestRemoveNIC(t *testing.T) { func TestRouteWithDownNIC(t *testing.T) { tests := []struct { name string - downFn func(s *stack.Stack, nicID tcpip.NICID) *tcpip.Error - upFn func(s *stack.Stack, nicID tcpip.NICID) *tcpip.Error + downFn func(s *stack.Stack, nicID tcpip.NICID) tcpip.Error + upFn func(s *stack.Stack, nicID tcpip.NICID) tcpip.Error }{ { name: "Disabled NIC", @@ -911,15 +917,15 @@ func TestRouteWithDownNIC(t *testing.T) { if err := test.downFn(s, nicID1); err != nil { t.Fatalf("test.downFn(_, %d): %s", nicID1, err) } - testFailingSend(t, r1, ep1, buf, tcpip.ErrInvalidEndpointState) + testFailingSend(t, r1, ep1, buf, &tcpip.ErrInvalidEndpointState{}) testSend(t, r2, ep2, buf) // Writes with Routes that use NIC2 after being brought down should fail. if err := test.downFn(s, nicID2); err != nil { t.Fatalf("test.downFn(_, %d): %s", nicID2, err) } - testFailingSend(t, r1, ep1, buf, tcpip.ErrInvalidEndpointState) - testFailingSend(t, r2, ep2, buf, tcpip.ErrInvalidEndpointState) + testFailingSend(t, r1, ep1, buf, &tcpip.ErrInvalidEndpointState{}) + testFailingSend(t, r2, ep2, buf, &tcpip.ErrInvalidEndpointState{}) if upFn := test.upFn; upFn != nil { // Writes with Routes that use NIC1 after being brought up should @@ -932,7 +938,7 @@ func TestRouteWithDownNIC(t *testing.T) { t.Fatalf("test.upFn(_, %d): %s", nicID1, err) } testSend(t, r1, ep1, buf) - testFailingSend(t, r2, ep2, buf, tcpip.ErrInvalidEndpointState) + testFailingSend(t, r2, ep2, buf, &tcpip.ErrInvalidEndpointState{}) } }) } @@ -1057,11 +1063,12 @@ func TestAddressRemoval(t *testing.T) { t.Fatal("RemoveAddress failed:", err) } testFailingRecv(t, fakeNet, localAddrByte, ep, buf) - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) // Check that removing the same address fails. - if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress { - t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, tcpip.ErrBadLocalAddress) + err := s.RemoveAddress(1, localAddr) + if _, ok := err.(*tcpip.ErrBadLocalAddress); !ok { + t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, &tcpip.ErrBadLocalAddress{}) } } @@ -1108,12 +1115,15 @@ func TestAddressRemovalWithRouteHeld(t *testing.T) { t.Fatal("RemoveAddress failed:", err) } testFailingRecv(t, fakeNet, localAddrByte, ep, buf) - testFailingSend(t, r, ep, nil, tcpip.ErrInvalidEndpointState) - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSend(t, r, ep, nil, &tcpip.ErrInvalidEndpointState{}) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) // Check that removing the same address fails. - if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress { - t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, tcpip.ErrBadLocalAddress) + { + err := s.RemoveAddress(1, localAddr) + if _, ok := err.(*tcpip.ErrBadLocalAddress); !ok { + t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, &tcpip.ErrBadLocalAddress{}) + } } } @@ -1207,7 +1217,7 @@ func TestEndpointExpiration(t *testing.T) { // FIXME(b/139841518):Spoofing doesn't work if there is no primary address. // testSendTo(t, s, remoteAddr, ep, nil) } else { - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) } // 2. Add Address, everything should work. @@ -1235,7 +1245,7 @@ func TestEndpointExpiration(t *testing.T) { // FIXME(b/139841518):Spoofing doesn't work if there is no primary address. // testSendTo(t, s, remoteAddr, ep, nil) } else { - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) } // 4. Add Address back, everything should work again. @@ -1274,8 +1284,8 @@ func TestEndpointExpiration(t *testing.T) { testSend(t, r, ep, nil) testSendTo(t, s, remoteAddr, ep, nil) } else { - testFailingSend(t, r, ep, nil, tcpip.ErrInvalidEndpointState) - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSend(t, r, ep, nil, &tcpip.ErrInvalidEndpointState{}) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) } // 7. Add Address back, everything should work again. @@ -1311,7 +1321,7 @@ func TestEndpointExpiration(t *testing.T) { // FIXME(b/139841518):Spoofing doesn't work if there is no primary address. // testSendTo(t, s, remoteAddr, ep, nil) } else { - testFailingSendTo(t, s, remoteAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSendTo(t, s, remoteAddr, ep, nil, &tcpip.ErrNoRoute{}) } }) } @@ -1354,8 +1364,8 @@ func TestPromiscuousMode(t *testing.T) { // Check that we can't get a route as there is no local address. _, err := s.FindRoute(0, "", "\x02", fakeNetNumber, false /* multicastLoop */) - if err != tcpip.ErrNoRoute { - t.Fatalf("FindRoute returned unexpected error: got = %v, want = %s", err, tcpip.ErrNoRoute) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Fatalf("FindRoute returned unexpected error: got = %v, want = %s", err, &tcpip.ErrNoRoute{}) } // Set promiscuous mode to false, then check that packet can't be @@ -1561,7 +1571,7 @@ func TestSpoofingNoAddress(t *testing.T) { t.Errorf("FindRoute succeeded with route %+v when it should have failed", r) } // Sending a packet fails. - testFailingSendTo(t, s, dstAddr, ep, nil, tcpip.ErrNoRoute) + testFailingSendTo(t, s, dstAddr, ep, nil, &tcpip.ErrNoRoute{}) // With address spoofing enabled, FindRoute permits any address to be used // as the source. @@ -1611,8 +1621,11 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) { s.SetRouteTable([]tcpip.Route{}) // If there is no endpoint, it won't work. - if _, err := s.FindRoute(1, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */); err != tcpip.ErrNetworkUnreachable { - t.Fatalf("got FindRoute(1, %s, %s, %d) = %s, want = %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) + { + _, err := s.FindRoute(1, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) + if _, ok := err.(*tcpip.ErrNetworkUnreachable); !ok { + t.Fatalf("got FindRoute(1, %s, %s, %d) = %s, want = %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, &tcpip.ErrNetworkUnreachable{}) + } } protoAddr := tcpip.ProtocolAddress{Protocol: fakeNetNumber, AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Any, 0}} @@ -1631,8 +1644,11 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) { } // If the NIC doesn't exist, it won't work. - if _, err := s.FindRoute(2, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */); err != tcpip.ErrNetworkUnreachable { - t.Fatalf("got FindRoute(2, %v, %v, %d) = %v want = %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) + { + _, err := s.FindRoute(2, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) + if _, ok := err.(*tcpip.ErrNetworkUnreachable); !ok { + t.Fatalf("got FindRoute(2, %v, %v, %d) = %v want = %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, &tcpip.ErrNetworkUnreachable{}) + } } } @@ -1774,9 +1790,9 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) { anyAddr = header.IPv6Any } - want := tcpip.ErrNetworkUnreachable + var want tcpip.Error = &tcpip.ErrNetworkUnreachable{} if tc.routeNeeded { - want = tcpip.ErrNoRoute + want = &tcpip.ErrNoRoute{} } // If there is no endpoint, it won't work. @@ -1790,8 +1806,8 @@ func TestMulticastOrIPv6LinkLocalNeedsNoRoute(t *testing.T) { if r, err := s.FindRoute(1, anyAddr, tc.address, fakeNetNumber, false /* multicastLoop */); tc.routeNeeded { // Route table is empty but we need a route, this should cause an error. - if err != tcpip.ErrNoRoute { - t.Fatalf("got FindRoute(1, %v, %v, %v) = %v, want = %v", anyAddr, tc.address, fakeNetNumber, err, tcpip.ErrNoRoute) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Fatalf("got FindRoute(1, %v, %v, %v) = %v, want = %v", anyAddr, tc.address, fakeNetNumber, err, &tcpip.ErrNoRoute{}) } } else { if err != nil { @@ -2115,7 +2131,7 @@ func TestCreateNICWithOptions(t *testing.T) { type callArgsAndExpect struct { nicID tcpip.NICID opts stack.NICOptions - err *tcpip.Error + err tcpip.Error } tests := []struct { @@ -2133,7 +2149,7 @@ func TestCreateNICWithOptions(t *testing.T) { { nicID: tcpip.NICID(1), opts: stack.NICOptions{Name: "eth2"}, - err: tcpip.ErrDuplicateNICID, + err: &tcpip.ErrDuplicateNICID{}, }, }, }, @@ -2148,7 +2164,7 @@ func TestCreateNICWithOptions(t *testing.T) { { nicID: tcpip.NICID(2), opts: stack.NICOptions{Name: "lo"}, - err: tcpip.ErrDuplicateNICID, + err: &tcpip.ErrDuplicateNICID{}, }, }, }, @@ -2178,7 +2194,7 @@ func TestCreateNICWithOptions(t *testing.T) { { nicID: tcpip.NICID(1), opts: stack.NICOptions{}, - err: tcpip.ErrDuplicateNICID, + err: &tcpip.ErrDuplicateNICID{}, }, }, }, @@ -3297,14 +3313,14 @@ func TestStackReceiveBufferSizeOption(t *testing.T) { testCases := []struct { name string rs stack.ReceiveBufferSizeOption - err *tcpip.Error + err tcpip.Error }{ // Invalid configurations. - {"min_below_zero", stack.ReceiveBufferSizeOption{Min: -1, Default: sMin, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"min_zero", stack.ReceiveBufferSizeOption{Min: 0, Default: sMin, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"default_below_min", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin - 1, Max: sMin - 1}, tcpip.ErrInvalidOptionValue}, - {"default_above_max", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"max_below_min", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin - 1}, tcpip.ErrInvalidOptionValue}, + {"min_below_zero", stack.ReceiveBufferSizeOption{Min: -1, Default: sMin, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"min_zero", stack.ReceiveBufferSizeOption{Min: 0, Default: sMin, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"default_below_min", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin - 1, Max: sMin - 1}, &tcpip.ErrInvalidOptionValue{}}, + {"default_above_max", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"max_below_min", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin - 1}, &tcpip.ErrInvalidOptionValue{}}, // Valid Configurations {"in_ascending_order", stack.ReceiveBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin + 2}, nil}, @@ -3337,14 +3353,14 @@ func TestStackSendBufferSizeOption(t *testing.T) { testCases := []struct { name string ss tcpip.SendBufferSizeOption - err *tcpip.Error + err tcpip.Error }{ // Invalid configurations. - {"min_below_zero", tcpip.SendBufferSizeOption{Min: -1, Default: sMin, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"min_zero", tcpip.SendBufferSizeOption{Min: 0, Default: sMin, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"default_below_min", tcpip.SendBufferSizeOption{Min: 0, Default: sMin - 1, Max: sMin - 1}, tcpip.ErrInvalidOptionValue}, - {"default_above_max", tcpip.SendBufferSizeOption{Min: 0, Default: sMin + 1, Max: sMin}, tcpip.ErrInvalidOptionValue}, - {"max_below_min", tcpip.SendBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin - 1}, tcpip.ErrInvalidOptionValue}, + {"min_below_zero", tcpip.SendBufferSizeOption{Min: -1, Default: sMin, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"min_zero", tcpip.SendBufferSizeOption{Min: 0, Default: sMin, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"default_below_min", tcpip.SendBufferSizeOption{Min: 0, Default: sMin - 1, Max: sMin - 1}, &tcpip.ErrInvalidOptionValue{}}, + {"default_above_max", tcpip.SendBufferSizeOption{Min: 0, Default: sMin + 1, Max: sMin}, &tcpip.ErrInvalidOptionValue{}}, + {"max_below_min", tcpip.SendBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin - 1}, &tcpip.ErrInvalidOptionValue{}}, // Valid Configurations {"in_ascending_order", tcpip.SendBufferSizeOption{Min: sMin, Default: sMin + 1, Max: sMin + 2}, nil}, @@ -3356,11 +3372,12 @@ func TestStackSendBufferSizeOption(t *testing.T) { t.Run(tc.name, func(t *testing.T) { s := stack.New(stack.Options{}) defer s.Close() - if err := s.SetOption(tc.ss); err != tc.err { - t.Fatalf("s.SetOption(%+v) = %v, want: %v", tc.ss, err, tc.err) + err := s.SetOption(tc.ss) + if diff := cmp.Diff(tc.err, err); diff != "" { + t.Fatalf("unexpected error from s.SetOption(%+v), (-want, +got):\n%s", tc.ss, diff) } - var ss tcpip.SendBufferSizeOption if tc.err == nil { + var ss tcpip.SendBufferSizeOption if err := s.Option(&ss); err != nil { t.Fatalf("s.Option(%+v) = %v, want: nil", ss, err) } @@ -3919,7 +3936,7 @@ func TestFindRouteWithForwarding(t *testing.T) { addrNIC tcpip.NICID localAddr tcpip.Address - findRouteErr *tcpip.Error + findRouteErr tcpip.Error dependentOnForwarding bool }{ { @@ -3928,7 +3945,7 @@ func TestFindRouteWithForwarding(t *testing.T) { forwardingEnabled: false, addrNIC: nicID1, localAddr: fakeNetCfg.nic2Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -3937,7 +3954,7 @@ func TestFindRouteWithForwarding(t *testing.T) { forwardingEnabled: true, addrNIC: nicID1, localAddr: fakeNetCfg.nic2Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -3946,7 +3963,7 @@ func TestFindRouteWithForwarding(t *testing.T) { forwardingEnabled: false, addrNIC: nicID1, localAddr: fakeNetCfg.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -3982,7 +3999,7 @@ func TestFindRouteWithForwarding(t *testing.T) { forwardingEnabled: false, addrNIC: nicID2, localAddr: fakeNetCfg.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -3991,7 +4008,7 @@ func TestFindRouteWithForwarding(t *testing.T) { forwardingEnabled: true, addrNIC: nicID2, localAddr: fakeNetCfg.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4015,7 +4032,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: fakeNetCfg, forwardingEnabled: false, localAddr: fakeNetCfg.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4031,7 +4048,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6LinkLocalNIC1WithGlobalRemote, forwardingEnabled: false, addrNIC: nicID1, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4039,7 +4056,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6LinkLocalNIC1WithGlobalRemote, forwardingEnabled: true, addrNIC: nicID1, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4047,7 +4064,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6LinkLocalNIC1WithGlobalRemote, forwardingEnabled: false, localAddr: ipv6LinkLocalNIC1WithGlobalRemote.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4055,7 +4072,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6LinkLocalNIC1WithGlobalRemote, forwardingEnabled: true, localAddr: ipv6LinkLocalNIC1WithGlobalRemote.nic1Addr, - findRouteErr: tcpip.ErrNoRoute, + findRouteErr: &tcpip.ErrNoRoute{}, dependentOnForwarding: false, }, { @@ -4087,7 +4104,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6GlobalNIC1WithLinkLocalRemote, forwardingEnabled: false, localAddr: ipv6GlobalNIC1WithLinkLocalRemote.nic1Addr, - findRouteErr: tcpip.ErrNetworkUnreachable, + findRouteErr: &tcpip.ErrNetworkUnreachable{}, dependentOnForwarding: false, }, { @@ -4095,7 +4112,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6GlobalNIC1WithLinkLocalRemote, forwardingEnabled: true, localAddr: ipv6GlobalNIC1WithLinkLocalRemote.nic1Addr, - findRouteErr: tcpip.ErrNetworkUnreachable, + findRouteErr: &tcpip.ErrNetworkUnreachable{}, dependentOnForwarding: false, }, { @@ -4103,7 +4120,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6GlobalNIC1WithLinkLocalMulticastRemote, forwardingEnabled: false, localAddr: ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic1Addr, - findRouteErr: tcpip.ErrNetworkUnreachable, + findRouteErr: &tcpip.ErrNetworkUnreachable{}, dependentOnForwarding: false, }, { @@ -4111,7 +4128,7 @@ func TestFindRouteWithForwarding(t *testing.T) { netCfg: ipv6GlobalNIC1WithLinkLocalMulticastRemote, forwardingEnabled: true, localAddr: ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic1Addr, - findRouteErr: tcpip.ErrNetworkUnreachable, + findRouteErr: &tcpip.ErrNetworkUnreachable{}, dependentOnForwarding: false, }, { @@ -4166,8 +4183,8 @@ func TestFindRouteWithForwarding(t *testing.T) { if r != nil { defer r.Release() } - if err != test.findRouteErr { - t.Fatalf("FindRoute(%d, %s, %s, %d, false) = %s, want = %s", test.addrNIC, test.localAddr, test.netCfg.remoteAddr, test.netCfg.proto, err, test.findRouteErr) + if diff := cmp.Diff(test.findRouteErr, err); diff != "" { + t.Fatalf("unexpected error from FindRoute(%d, %s, %s, %d, false), (-want, +got):\n%s", test.addrNIC, test.localAddr, test.netCfg.remoteAddr, test.netCfg.proto, diff) } if test.findRouteErr != nil { @@ -4214,8 +4231,11 @@ func TestFindRouteWithForwarding(t *testing.T) { if err := s.SetForwarding(test.netCfg.proto, false); err != nil { t.Fatalf("SetForwarding(%d, false): %s", test.netCfg.proto, err) } - if err := send(r, data); err != tcpip.ErrInvalidEndpointState { - t.Fatalf("got send(_, _) = %s, want = %s", err, tcpip.ErrInvalidEndpointState) + { + err := send(r, data) + if _, ok := err.(*tcpip.ErrInvalidEndpointState); !ok { + t.Fatalf("got send(_, _) = %s, want = %s", err, &tcpip.ErrInvalidEndpointState{}) + } } if n := ep1.Drain(); n != 0 { t.Errorf("got %d unexpected packets from ep1", n) @@ -4277,8 +4297,9 @@ func TestWritePacketToRemote(t *testing.T) { } t.Run("InvalidNICID", func(t *testing.T) { - if got, want := s.WritePacketToRemote(234, linkAddr2, header.IPv4ProtocolNumber, buffer.View([]byte{1}).ToVectorisedView()), tcpip.ErrUnknownDevice; got != want { - t.Fatalf("s.WritePacketToRemote(_, _, _, _) = %s, want = %s", got, want) + err := s.WritePacketToRemote(234, linkAddr2, header.IPv4ProtocolNumber, buffer.View([]byte{1}).ToVectorisedView()) + if _, ok := err.(*tcpip.ErrUnknownDevice); !ok { + t.Fatalf("s.WritePacketToRemote(_, _, _, _) = %s, want = %s", err, &tcpip.ErrUnknownDevice{}) } pkt, ok := e.Read() if got, want := ok, false; got != want { @@ -4296,9 +4317,11 @@ func TestClearNeighborCacheOnNICDisable(t *testing.T) { linkAddr = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06") ) + clock := faketime.NewManualClock() s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol, ipv6.NewProtocol}, UseNeighborCache: true, + Clock: clock, }) e := channel.New(0, 0, "") e.LinkEPCapabilities |= stack.CapabilityResolutionRequired @@ -4306,36 +4329,56 @@ func TestClearNeighborCacheOnNICDisable(t *testing.T) { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - if err := s.AddStaticNeighbor(nicID, ipv4Addr, linkAddr); err != nil { - t.Fatalf("s.AddStaticNeighbor(%d, %s, %s): %s", nicID, ipv4Addr, linkAddr, err) - } - if err := s.AddStaticNeighbor(nicID, ipv6Addr, linkAddr); err != nil { - t.Fatalf("s.AddStaticNeighbor(%d, %s, %s): %s", nicID, ipv6Addr, linkAddr, err) + addrs := []struct { + proto tcpip.NetworkProtocolNumber + addr tcpip.Address + }{ + { + proto: ipv4.ProtocolNumber, + addr: ipv4Addr, + }, + { + proto: ipv6.ProtocolNumber, + addr: ipv6Addr, + }, } - if neighbors, err := s.Neighbors(nicID); err != nil { - t.Fatalf("s.Neighbors(%d): %s", nicID, err) - } else if len(neighbors) != 2 { - t.Fatalf("got len(neighbors) = %d, want = 2; neighbors = %#v", len(neighbors), neighbors) + for _, addr := range addrs { + if err := s.AddStaticNeighbor(nicID, addr.proto, addr.addr, linkAddr); err != nil { + t.Fatalf("s.AddStaticNeighbor(%d, %d, %s, %s): %s", nicID, addr.proto, addr.addr, linkAddr, err) + } + + if neighbors, err := s.Neighbors(nicID, addr.proto); err != nil { + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, addr.proto, err) + } else if diff := cmp.Diff( + []stack.NeighborEntry{{Addr: addr.addr, LinkAddr: linkAddr, State: stack.Static, UpdatedAtNanos: clock.NowNanoseconds()}}, + neighbors, + ); diff != "" { + t.Fatalf("proto=%d neighbors mismatch (-want +got):\n%s", addr.proto, diff) + } } // Disabling the NIC should clear the neighbor table. if err := s.DisableNIC(nicID); err != nil { t.Fatalf("s.DisableNIC(%d): %s", nicID, err) } - if neighbors, err := s.Neighbors(nicID); err != nil { - t.Fatalf("s.Neighbors(%d): %s", nicID, err) - } else if len(neighbors) != 0 { - t.Fatalf("got len(neighbors) = %d, want = 0; neighbors = %#v", len(neighbors), neighbors) + for _, addr := range addrs { + if neighbors, err := s.Neighbors(nicID, addr.proto); err != nil { + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, addr.proto, err) + } else if len(neighbors) != 0 { + t.Fatalf("got proto=%d len(neighbors) = %d, want = 0; neighbors = %#v", addr.proto, len(neighbors), neighbors) + } } // Enabling the NIC should have an empty neighbor table. if err := s.EnableNIC(nicID); err != nil { t.Fatalf("s.EnableNIC(%d): %s", nicID, err) } - if neighbors, err := s.Neighbors(nicID); err != nil { - t.Fatalf("s.Neighbors(%d): %s", nicID, err) - } else if len(neighbors) != 0 { - t.Fatalf("got len(neighbors) = %d, want = 0; neighbors = %#v", len(neighbors), neighbors) + for _, addr := range addrs { + if neighbors, err := s.Neighbors(nicID, addr.proto); err != nil { + t.Fatalf("s.Neighbors(%d, %d): %s", nicID, addr.proto, err) + } else if len(neighbors) != 0 { + t.Fatalf("got proto=%d len(neighbors) = %d, want = 0; neighbors = %#v", addr.proto, len(neighbors), neighbors) + } } } @@ -4352,11 +4395,17 @@ func TestGetLinkAddressErrors(t *testing.T) { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } - if err := s.GetLinkAddress(unknownNICID, "", "", ipv4.ProtocolNumber, nil); err != tcpip.ErrUnknownNICID { - t.Errorf("got s.GetLinkAddress(%d, '', '', %d, nil) = %s, want = %s", unknownNICID, ipv4.ProtocolNumber, err, tcpip.ErrUnknownNICID) + { + err := s.GetLinkAddress(unknownNICID, "", "", ipv4.ProtocolNumber, nil) + if _, ok := err.(*tcpip.ErrUnknownNICID); !ok { + t.Errorf("got s.GetLinkAddress(%d, '', '', %d, nil) = %s, want = %s", unknownNICID, ipv4.ProtocolNumber, err, &tcpip.ErrUnknownNICID{}) + } } - if err := s.GetLinkAddress(nicID, "", "", ipv4.ProtocolNumber, nil); err != tcpip.ErrNotSupported { - t.Errorf("got s.GetLinkAddress(%d, '', '', %d, nil) = %s, want = %s", unknownNICID, ipv4.ProtocolNumber, err, tcpip.ErrNotSupported) + { + err := s.GetLinkAddress(nicID, "", "", ipv4.ProtocolNumber, nil) + if _, ok := err.(*tcpip.ErrNotSupported); !ok { + t.Errorf("got s.GetLinkAddress(%d, '', '', %d, nil) = %s, want = %s", unknownNICID, ipv4.ProtocolNumber, err, &tcpip.ErrNotSupported{}) + } } } @@ -4368,7 +4417,9 @@ func TestStaticGetLinkAddress(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol, ipv6.NewProtocol}, }) - if err := s.CreateNIC(nicID, channel.New(0, 0, "")); err != nil { + e := channel.New(0, 0, "") + e.LinkEPCapabilities |= stack.CapabilityResolutionRequired + if err := s.CreateNIC(nicID, e); err != nil { t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) } diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 07b2818d2..7d8d0851e 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -182,9 +182,8 @@ func (epsByNIC *endpointsByNIC) handlePacket(id TransportEndpointID, pkt *Packet epsByNIC.mu.RUnlock() // Don't use defer for performance reasons. } -// handleControlPacket delivers a control packet to the transport endpoint -// identified by id. -func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer) { +// handleError delivers an error to the transport endpoint identified by id. +func (epsByNIC *endpointsByNIC) handleError(n *NIC, id TransportEndpointID, transErr TransportError, pkt *PacketBuffer) { epsByNIC.mu.RLock() defer epsByNIC.mu.RUnlock() @@ -200,12 +199,12 @@ func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpoint // broadcast like we are doing with handlePacket above? // multiPortEndpoints are guaranteed to have at least one element. - selectEndpoint(id, mpep, epsByNIC.seed).HandleControlPacket(typ, extra, pkt) + selectEndpoint(id, mpep, epsByNIC.seed).HandleError(transErr, pkt) } // registerEndpoint returns true if it succeeds. It fails and returns // false if ep already has an element with the same key. -func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { epsByNIC.mu.Lock() defer epsByNIC.mu.Unlock() @@ -222,7 +221,7 @@ func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto t return multiPortEp.singleRegisterEndpoint(t, flags) } -func (epsByNIC *endpointsByNIC) checkEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (epsByNIC *endpointsByNIC) checkEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { epsByNIC.mu.RLock() defer epsByNIC.mu.RUnlock() @@ -294,7 +293,7 @@ func newTransportDemuxer(stack *Stack) *transportDemuxer { // registerEndpoint registers the given endpoint with the dispatcher such that // packets that match the endpoint ID are delivered to it. -func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { for i, n := range netProtos { if err := d.singleRegisterEndpoint(n, protocol, id, ep, flags, bindToDevice); err != nil { d.unregisterEndpoint(netProtos[:i], protocol, id, ep, flags, bindToDevice) @@ -306,7 +305,7 @@ func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNum } // checkEndpoint checks if an endpoint can be registered with the dispatcher. -func (d *transportDemuxer) checkEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) checkEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { for _, n := range netProtos { if err := d.singleCheckEndpoint(n, protocol, id, flags, bindToDevice); err != nil { return err @@ -403,7 +402,7 @@ func (ep *multiPortEndpoint) handlePacketAll(id TransportEndpointID, pkt *Packet // singleRegisterEndpoint tries to add an endpoint to the multiPortEndpoint // list. The list might be empty already. -func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags ports.Flags) *tcpip.Error { +func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags ports.Flags) tcpip.Error { ep.mu.Lock() defer ep.mu.Unlock() @@ -412,7 +411,7 @@ func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags p if len(ep.endpoints) != 0 { // If it was previously bound, we need to check if we can bind again. if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 { - return tcpip.ErrPortInUse + return &tcpip.ErrPortInUse{} } } @@ -422,7 +421,7 @@ func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags p return nil } -func (ep *multiPortEndpoint) singleCheckEndpoint(flags ports.Flags) *tcpip.Error { +func (ep *multiPortEndpoint) singleCheckEndpoint(flags ports.Flags) tcpip.Error { ep.mu.RLock() defer ep.mu.RUnlock() @@ -431,7 +430,7 @@ func (ep *multiPortEndpoint) singleCheckEndpoint(flags ports.Flags) *tcpip.Error if len(ep.endpoints) != 0 { // If it was previously bound, we need to check if we can bind again. if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 { - return tcpip.ErrPortInUse + return &tcpip.ErrPortInUse{} } } @@ -456,7 +455,7 @@ func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint, flags ports return len(ep.endpoints) == 0 } -func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { if id.RemotePort != 0 { // SO_REUSEPORT only applies to bound/listening endpoints. flags.LoadBalanced = false @@ -464,7 +463,7 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol eps, ok := d.protocol[protocolIDs{netProto, protocol}] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } eps.mu.Lock() @@ -482,7 +481,7 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol return epsByNIC.registerEndpoint(d, netProto, protocol, ep, flags, bindToDevice) } -func (d *transportDemuxer) singleCheckEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) singleCheckEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { if id.RemotePort != 0 { // SO_REUSEPORT only applies to bound/listening endpoints. flags.LoadBalanced = false @@ -490,7 +489,7 @@ func (d *transportDemuxer) singleCheckEndpoint(netProto tcpip.NetworkProtocolNum eps, ok := d.protocol[protocolIDs{netProto, protocol}] if !ok { - return tcpip.ErrUnknownProtocol + return &tcpip.ErrUnknownProtocol{} } eps.mu.RLock() @@ -596,9 +595,11 @@ func (d *transportDemuxer) deliverRawPacket(protocol tcpip.TransportProtocolNumb return foundRaw } -// deliverControlPacket attempts to deliver the given control packet. Returns -// true if it found an endpoint, false otherwise. -func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer, id TransportEndpointID) bool { +// deliverError attempts to deliver the given error to the appropriate transport +// endpoint. +// +// Returns true if the error was delivered. +func (d *transportDemuxer) deliverError(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer, id TransportEndpointID) bool { eps, ok := d.protocol[protocolIDs{net, trans}] if !ok { return false @@ -611,7 +612,7 @@ func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtoco return false } - ep.handleControlPacket(n, id, typ, extra, pkt) + ep.handleError(n, id, transErr, pkt) return true } @@ -649,10 +650,10 @@ func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolN // that packets of the appropriate protocol are delivered to it. A single // packet can be sent to one or more raw endpoints along with a non-raw // endpoint. -func (d *transportDemuxer) registerRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error { +func (d *transportDemuxer) registerRawEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { eps, ok := d.protocol[protocolIDs{netProto, transProto}] if !ok { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } eps.mu.Lock() diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go index de4b5fbdc..10cbbe589 100644 --- a/pkg/tcpip/stack/transport_demuxer_test.go +++ b/pkg/tcpip/stack/transport_demuxer_test.go @@ -175,9 +175,9 @@ func TestTransportDemuxerRegister(t *testing.T) { for _, test := range []struct { name string proto tcpip.NetworkProtocolNumber - want *tcpip.Error + want tcpip.Error }{ - {"failure", ipv6.ProtocolNumber, tcpip.ErrUnknownProtocol}, + {"failure", ipv6.ProtocolNumber, &tcpip.ErrUnknownProtocol{}}, {"success", ipv4.ProtocolNumber, nil}, } { t.Run(test.name, func(t *testing.T) { @@ -294,7 +294,7 @@ func TestBindToDeviceDistribution(t *testing.T) { defer wq.EventUnregister(&we) defer close(ch) - var err *tcpip.Error + var err tcpip.Error ep, err := c.s.NewEndpoint(udp.ProtocolNumber, netProtoNum, &wq) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index c49427c4c..bebf4e6b5 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -87,18 +87,18 @@ func (*fakeTransportEndpoint) Readiness(mask waiter.EventMask) waiter.EventMask return mask } -func (*fakeTransportEndpoint) Read(io.Writer, tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (*fakeTransportEndpoint) Read(io.Writer, tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { return tcpip.ReadResult{}, nil } -func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { if len(f.route.RemoteAddress) == 0 { - return 0, tcpip.ErrNoRoute + return 0, &tcpip.ErrNoRoute{} } v := make([]byte, p.Len()) if _, err := io.ReadFull(p, v); err != nil { - return 0, tcpip.ErrBadBuffer + return 0, &tcpip.ErrBadBuffer{} } pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ @@ -114,37 +114,37 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions } // SetSockOpt sets a socket option. Currently not supported. -func (*fakeTransportEndpoint) SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*fakeTransportEndpoint) SetSockOpt(tcpip.SettableSocketOption) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // SetSockOptInt sets a socket option. Currently not supported. -func (*fakeTransportEndpoint) SetSockOptInt(tcpip.SockOptInt, int) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*fakeTransportEndpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (*fakeTransportEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { - return -1, tcpip.ErrUnknownProtocolOption +func (*fakeTransportEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { + return -1, &tcpip.ErrUnknownProtocolOption{} } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (*fakeTransportEndpoint) GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*fakeTransportEndpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // Disconnect implements tcpip.Endpoint.Disconnect. -func (*fakeTransportEndpoint) Disconnect() *tcpip.Error { - return tcpip.ErrNotSupported +func (*fakeTransportEndpoint) Disconnect() tcpip.Error { + return &tcpip.ErrNotSupported{} } -func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { +func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) tcpip.Error { f.peerAddr = addr.Addr // Find the route. r, err := f.proto.stack.FindRoute(addr.NIC, "", addr.Addr, fakeNetNumber, false /* multicastLoop */) if err != nil { - return tcpip.ErrNoRoute + return &tcpip.ErrNoRoute{} } // Try to register so that we can start receiving packets. @@ -164,22 +164,22 @@ func (f *fakeTransportEndpoint) UniqueID() uint64 { return f.uniqueID } -func (*fakeTransportEndpoint) ConnectEndpoint(e tcpip.Endpoint) *tcpip.Error { +func (*fakeTransportEndpoint) ConnectEndpoint(e tcpip.Endpoint) tcpip.Error { return nil } -func (*fakeTransportEndpoint) Shutdown(tcpip.ShutdownFlags) *tcpip.Error { +func (*fakeTransportEndpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error { return nil } func (*fakeTransportEndpoint) Reset() { } -func (*fakeTransportEndpoint) Listen(int) *tcpip.Error { +func (*fakeTransportEndpoint) Listen(int) tcpip.Error { return nil } -func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { +func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { if len(f.acceptQueue) == 0 { return nil, nil, nil } @@ -188,7 +188,7 @@ func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *wai return a, nil, nil } -func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error { +func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) tcpip.Error { if err := f.proto.stack.RegisterTransportEndpoint( []tcpip.NetworkProtocolNumber{fakeNetNumber}, fakeTransNumber, @@ -203,11 +203,11 @@ func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error { return nil } -func (*fakeTransportEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (*fakeTransportEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { return tcpip.FullAddress{}, nil } -func (*fakeTransportEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (*fakeTransportEndpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { return tcpip.FullAddress{}, nil } @@ -237,7 +237,7 @@ func (f *fakeTransportEndpoint) HandlePacket(id stack.TransportEndpointID, pkt * f.acceptQueue = append(f.acceptQueue, ep) } -func (f *fakeTransportEndpoint) HandleControlPacket(stack.ControlType, uint32, *stack.PacketBuffer) { +func (f *fakeTransportEndpoint) HandleError(stack.TransportError, *stack.PacketBuffer) { // Increment the number of received control packets. f.proto.controlCount++ } @@ -252,7 +252,7 @@ func (*fakeTransportEndpoint) Resume(*stack.Stack) {} func (*fakeTransportEndpoint) Wait() {} -func (*fakeTransportEndpoint) LastError() *tcpip.Error { +func (*fakeTransportEndpoint) LastError() tcpip.Error { return nil } @@ -280,19 +280,19 @@ func (*fakeTransportProtocol) Number() tcpip.TransportProtocolNumber { return fakeTransNumber } -func (f *fakeTransportProtocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (f *fakeTransportProtocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return newFakeTransportEndpoint(f, netProto, f.stack), nil } -func (*fakeTransportProtocol) NewRawEndpoint(tcpip.NetworkProtocolNumber, *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { - return nil, tcpip.ErrUnknownProtocol +func (*fakeTransportProtocol) NewRawEndpoint(tcpip.NetworkProtocolNumber, *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { + return nil, &tcpip.ErrUnknownProtocol{} } func (*fakeTransportProtocol) MinimumPacketSize() int { return fakeTransHeaderLen } -func (*fakeTransportProtocol) ParsePorts(buffer.View) (src, dst uint16, err *tcpip.Error) { +func (*fakeTransportProtocol) ParsePorts(buffer.View) (src, dst uint16, err tcpip.Error) { return 0, 0, nil } @@ -300,23 +300,23 @@ func (*fakeTransportProtocol) HandleUnknownDestinationPacket(stack.TransportEndp return stack.UnknownDestinationPacketHandled } -func (f *fakeTransportProtocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error { +func (f *fakeTransportProtocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.TCPModerateReceiveBufferOption: f.opts.good = bool(*v) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } -func (f *fakeTransportProtocol) Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error { +func (f *fakeTransportProtocol) Option(option tcpip.GettableTransportProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.TCPModerateReceiveBufferOption: *v = tcpip.TCPModerateReceiveBufferOption(f.opts.good) return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 812ee36ed..c500a0d1c 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -47,141 +47,6 @@ import ( // Using header.IPv4AddressSize would cause an import cycle. const ipv4AddressSize = 4 -// Error represents an error in the netstack error space. Using a special type -// ensures that errors outside of this space are not accidentally introduced. -// -// All errors must have unique msg strings. -// -// +stateify savable -type Error struct { - msg string - - ignoreStats bool -} - -// String implements fmt.Stringer.String. -func (e *Error) String() string { - if e == nil { - return "<nil>" - } - return e.msg -} - -// IgnoreStats indicates whether this error type should be included in failure -// counts in tcpip.Stats structs. -func (e *Error) IgnoreStats() bool { - return e.ignoreStats -} - -// Errors that can be returned by the network stack. -var ( - ErrUnknownProtocol = &Error{msg: "unknown protocol"} - ErrUnknownNICID = &Error{msg: "unknown nic id"} - ErrUnknownDevice = &Error{msg: "unknown device"} - ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} - ErrDuplicateNICID = &Error{msg: "duplicate nic id"} - ErrDuplicateAddress = &Error{msg: "duplicate address"} - ErrNoRoute = &Error{msg: "no route"} - ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} - ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} - ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} - ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} - ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} - ErrNoPortAvailable = &Error{msg: "no ports are available"} - ErrPortInUse = &Error{msg: "port is in use"} - ErrBadLocalAddress = &Error{msg: "bad local address"} - ErrClosedForSend = &Error{msg: "endpoint is closed for send"} - ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} - ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} - ErrConnectionRefused = &Error{msg: "connection was refused"} - ErrTimeout = &Error{msg: "operation timed out"} - ErrAborted = &Error{msg: "operation aborted"} - ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} - ErrDestinationRequired = &Error{msg: "destination address is required"} - ErrNotSupported = &Error{msg: "operation not supported"} - ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} - ErrNotConnected = &Error{msg: "endpoint not connected"} - ErrConnectionReset = &Error{msg: "connection reset by peer"} - ErrConnectionAborted = &Error{msg: "connection aborted"} - ErrNoSuchFile = &Error{msg: "no such file"} - ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} - ErrBadAddress = &Error{msg: "bad address"} - ErrNetworkUnreachable = &Error{msg: "network is unreachable"} - ErrMessageTooLong = &Error{msg: "message too long"} - ErrNoBufferSpace = &Error{msg: "no buffer space available"} - ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} - ErrNotPermitted = &Error{msg: "operation not permitted"} - ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} - ErrMalformedHeader = &Error{msg: "header is malformed"} - ErrBadBuffer = &Error{msg: "bad buffer"} -) - -var messageToError map[string]*Error - -var populate sync.Once - -// StringToError converts an error message to the error. -func StringToError(s string) *Error { - populate.Do(func() { - var errors = []*Error{ - ErrUnknownProtocol, - ErrUnknownNICID, - ErrUnknownDevice, - ErrUnknownProtocolOption, - ErrDuplicateNICID, - ErrDuplicateAddress, - ErrNoRoute, - ErrBadLinkEndpoint, - ErrAlreadyBound, - ErrInvalidEndpointState, - ErrAlreadyConnecting, - ErrAlreadyConnected, - ErrNoPortAvailable, - ErrPortInUse, - ErrBadLocalAddress, - ErrClosedForSend, - ErrClosedForReceive, - ErrWouldBlock, - ErrConnectionRefused, - ErrTimeout, - ErrAborted, - ErrConnectStarted, - ErrDestinationRequired, - ErrNotSupported, - ErrQueueSizeNotSupported, - ErrNotConnected, - ErrConnectionReset, - ErrConnectionAborted, - ErrNoSuchFile, - ErrInvalidOptionValue, - ErrBadAddress, - ErrNetworkUnreachable, - ErrMessageTooLong, - ErrNoBufferSpace, - ErrBroadcastDisabled, - ErrNotPermitted, - ErrAddressFamilyNotSupported, - ErrMalformedHeader, - ErrBadBuffer, - } - - messageToError = make(map[string]*Error) - for _, e := range errors { - if messageToError[e.String()] != nil { - panic("tcpip errors with duplicated message: " + e.String()) - } - messageToError[e.String()] = e - } - }) - - e, ok := messageToError[s] - if !ok { - panic("unknown error message: " + s) - } - - return e -} - // Errors related to Subnet var ( errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") @@ -633,7 +498,7 @@ type Endpoint interface { // If non-zero number of bytes are successfully read and written to dst, err // must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer // should be returned. - Read(dst io.Writer, opts ReadOptions) (res ReadResult, err *Error) + Read(io.Writer, ReadOptions) (ReadResult, Error) // Write writes data to the endpoint's peer. This method does not block if // the data cannot be written. @@ -648,7 +513,7 @@ type Endpoint interface { // stream (TCP) Endpoints may return partial writes, and even then only // in the case where writing additional data would block. Other Endpoints // will either write the entire message or return an error. - Write(Payloader, WriteOptions) (int64, *Error) + Write(Payloader, WriteOptions) (int64, Error) // Connect connects the endpoint to its peer. Specifying a NIC is // optional. @@ -662,21 +527,21 @@ type Endpoint interface { // connected returns nil. Calling connect again results in ErrAlreadyConnected. // Anything else -- the attempt to connect failed. // - // If address.Addr is empty, this means that Enpoint has to be + // If address.Addr is empty, this means that Endpoint has to be // disconnected if this is supported, otherwise // ErrAddressFamilyNotSupported must be returned. - Connect(address FullAddress) *Error + Connect(address FullAddress) Error // Disconnect disconnects the endpoint from its peer. - Disconnect() *Error + Disconnect() Error // Shutdown closes the read and/or write end of the endpoint connection // to its peer. - Shutdown(flags ShutdownFlags) *Error + Shutdown(flags ShutdownFlags) Error // Listen puts the endpoint in "listen" mode, which allows it to accept // new connections. - Listen(backlog int) *Error + Listen(backlog int) Error // Accept returns a new endpoint if a peer has established a connection // to an endpoint previously set to listen mode. This method does not @@ -686,36 +551,36 @@ type Endpoint interface { // // If peerAddr is not nil then it is populated with the peer address of the // returned endpoint. - Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, *Error) + Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error) // Bind binds the endpoint to a specific local address and port. // Specifying a NIC is optional. - Bind(address FullAddress) *Error + Bind(address FullAddress) Error // GetLocalAddress returns the address to which the endpoint is bound. - GetLocalAddress() (FullAddress, *Error) + GetLocalAddress() (FullAddress, Error) // GetRemoteAddress returns the address to which the endpoint is // connected. - GetRemoteAddress() (FullAddress, *Error) + GetRemoteAddress() (FullAddress, Error) // Readiness returns the current readiness of the endpoint. For example, // if waiter.EventIn is set, the endpoint is immediately readable. Readiness(mask waiter.EventMask) waiter.EventMask // SetSockOpt sets a socket option. - SetSockOpt(opt SettableSocketOption) *Error + SetSockOpt(opt SettableSocketOption) Error // SetSockOptInt sets a socket option, for simple cases where a value // has the int type. - SetSockOptInt(opt SockOptInt, v int) *Error + SetSockOptInt(opt SockOptInt, v int) Error // GetSockOpt gets a socket option. - GetSockOpt(opt GettableSocketOption) *Error + GetSockOpt(opt GettableSocketOption) Error // GetSockOptInt gets a socket option for simple cases where a return // value has the int type. - GetSockOptInt(SockOptInt) (int, *Error) + GetSockOptInt(SockOptInt) (int, Error) // State returns a socket's lifecycle state. The returned value is // protocol-specific and is primarily used for diagnostics. @@ -738,7 +603,7 @@ type Endpoint interface { SetOwner(owner PacketOwner) // LastError clears and returns the last error reported by the endpoint. - LastError() *Error + LastError() Error // SocketOptions returns the structure which contains all the socket // level options. @@ -993,12 +858,54 @@ type SettableSocketOption interface { isSettableSocketOption() } +// CongestionControlState indicates the current congestion control state for +// TCP sender. +type CongestionControlState int + +const ( + // Open indicates that the sender is receiving acks in order and + // no loss or dupACK's etc have been detected. + Open CongestionControlState = iota + // RTORecovery indicates that an RTO has occurred and the sender + // has entered an RTO based recovery phase. + RTORecovery + // FastRecovery indicates that the sender has entered FastRecovery + // based on receiving nDupAck's. This state is entered only when + // SACK is not in use. + FastRecovery + // SACKRecovery indicates that the sender has entered SACK based + // recovery. + SACKRecovery + // Disorder indicates the sender either received some SACK blocks + // or dupACK's. + Disorder +) + // TCPInfoOption is used by GetSockOpt to expose TCP statistics. // // TODO(b/64800844): Add and populate stat fields. type TCPInfoOption struct { - RTT time.Duration + // RTT is the smoothed round trip time. + RTT time.Duration + + // RTTVar is the round trip time variation. RTTVar time.Duration + + // RTO is the retransmission timeout for the endpoint. + RTO time.Duration + + // CcState is the congestion control state. + CcState CongestionControlState + + // SndCwnd is the congestion window, in packets. + SndCwnd uint32 + + // SndSsthresh is the threshold between slow start and congestion + // avoidance. + SndSsthresh uint32 + + // ReorderSeen indicates if reordering is seen in the endpoint. + ReorderSeen bool } func (*TCPInfoOption) isGettableSocketOption() {} diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD index 218b218e7..71695b630 100644 --- a/pkg/tcpip/tests/integration/BUILD +++ b/pkg/tcpip/tests/integration/BUILD @@ -17,6 +17,7 @@ go_test( "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/checker", + "//pkg/tcpip/faketime", "//pkg/tcpip/header", "//pkg/tcpip/link/channel", "//pkg/tcpip/link/ethernet", diff --git a/pkg/tcpip/tests/integration/forward_test.go b/pkg/tcpip/tests/integration/forward_test.go index aedf1845e..38e1881c7 100644 --- a/pkg/tcpip/tests/integration/forward_test.go +++ b/pkg/tcpip/tests/integration/forward_test.go @@ -38,96 +38,207 @@ import ( var _ stack.NetworkDispatcher = (*endpointWithDestinationCheck)(nil) var _ stack.LinkEndpoint = (*endpointWithDestinationCheck)(nil) -// newEthernetEndpoint returns an ethernet link endpoint that wraps an inner -// link endpoint and checks the destination link address before delivering -// network packets to the network dispatcher. -// -// See ethernet.Endpoint for more details. -func newEthernetEndpoint(ep stack.LinkEndpoint) *endpointWithDestinationCheck { - var e endpointWithDestinationCheck - e.Endpoint.Init(ethernet.New(ep), &e) - return &e -} - -// endpointWithDestinationCheck is a link endpoint that checks the destination -// link address before delivering network packets to the network dispatcher. -type endpointWithDestinationCheck struct { - nested.Endpoint -} - -// DeliverNetworkPacket implements stack.NetworkDispatcher. -func (e *endpointWithDestinationCheck) DeliverNetworkPacket(src, dst tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { - if dst == e.Endpoint.LinkAddress() || dst == header.EthernetBroadcastAddress || header.IsMulticastEthernetAddress(dst) { - e.Endpoint.DeliverNetworkPacket(src, dst, proto, pkt) - } -} - -func TestForwarding(t *testing.T) { - const ( - host1NICID = 1 - routerNICID1 = 2 - routerNICID2 = 3 - host2NICID = 4 - - listenPort = 8080 - ) +const ( + host1NICID = 1 + routerNICID1 = 2 + routerNICID2 = 3 + host2NICID = 4 +) - host1IPv4Addr := tcpip.ProtocolAddress{ +var ( + host1IPv4Addr = tcpip.ProtocolAddress{ Protocol: ipv4.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("192.168.0.2").To4()), PrefixLen: 24, }, } - routerNIC1IPv4Addr := tcpip.ProtocolAddress{ + routerNIC1IPv4Addr = tcpip.ProtocolAddress{ Protocol: ipv4.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("192.168.0.1").To4()), PrefixLen: 24, }, } - routerNIC2IPv4Addr := tcpip.ProtocolAddress{ + routerNIC2IPv4Addr = tcpip.ProtocolAddress{ Protocol: ipv4.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("10.0.0.1").To4()), PrefixLen: 8, }, } - host2IPv4Addr := tcpip.ProtocolAddress{ + host2IPv4Addr = tcpip.ProtocolAddress{ Protocol: ipv4.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("10.0.0.2").To4()), PrefixLen: 8, }, } - host1IPv6Addr := tcpip.ProtocolAddress{ + host1IPv6Addr = tcpip.ProtocolAddress{ Protocol: ipv6.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("a::2").To16()), PrefixLen: 64, }, } - routerNIC1IPv6Addr := tcpip.ProtocolAddress{ + routerNIC1IPv6Addr = tcpip.ProtocolAddress{ Protocol: ipv6.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("a::1").To16()), PrefixLen: 64, }, } - routerNIC2IPv6Addr := tcpip.ProtocolAddress{ + routerNIC2IPv6Addr = tcpip.ProtocolAddress{ Protocol: ipv6.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("b::1").To16()), PrefixLen: 64, }, } - host2IPv6Addr := tcpip.ProtocolAddress{ + host2IPv6Addr = tcpip.ProtocolAddress{ Protocol: ipv6.ProtocolNumber, AddressWithPrefix: tcpip.AddressWithPrefix{ Address: tcpip.Address(net.ParseIP("b::2").To16()), PrefixLen: 64, }, } +) + +func setupRoutedStacks(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack) { + host1NIC, routerNIC1 := pipe.New(linkAddr1, linkAddr2) + routerNIC2, host2NIC := pipe.New(linkAddr3, linkAddr4) + + if err := host1Stack.CreateNIC(host1NICID, newEthernetEndpoint(host1NIC)); err != nil { + t.Fatalf("host1Stack.CreateNIC(%d, _): %s", host1NICID, err) + } + if err := routerStack.CreateNIC(routerNICID1, newEthernetEndpoint(routerNIC1)); err != nil { + t.Fatalf("routerStack.CreateNIC(%d, _): %s", routerNICID1, err) + } + if err := routerStack.CreateNIC(routerNICID2, newEthernetEndpoint(routerNIC2)); err != nil { + t.Fatalf("routerStack.CreateNIC(%d, _): %s", routerNICID2, err) + } + if err := host2Stack.CreateNIC(host2NICID, newEthernetEndpoint(host2NIC)); err != nil { + t.Fatalf("host2Stack.CreateNIC(%d, _): %s", host2NICID, err) + } + + if err := routerStack.SetForwarding(ipv4.ProtocolNumber, true); err != nil { + t.Fatalf("routerStack.SetForwarding(%d): %s", ipv4.ProtocolNumber, err) + } + if err := routerStack.SetForwarding(ipv6.ProtocolNumber, true); err != nil { + t.Fatalf("routerStack.SetForwarding(%d): %s", ipv6.ProtocolNumber, err) + } + + if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv4Addr); err != nil { + t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv4Addr, err) + } + if err := routerStack.AddProtocolAddress(routerNICID1, routerNIC1IPv4Addr); err != nil { + t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID1, routerNIC1IPv4Addr, err) + } + if err := routerStack.AddProtocolAddress(routerNICID2, routerNIC2IPv4Addr); err != nil { + t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID2, routerNIC2IPv4Addr, err) + } + if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv4Addr); err != nil { + t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv4Addr, err) + } + if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv6Addr); err != nil { + t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv6Addr, err) + } + if err := routerStack.AddProtocolAddress(routerNICID1, routerNIC1IPv6Addr); err != nil { + t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID1, routerNIC1IPv6Addr, err) + } + if err := routerStack.AddProtocolAddress(routerNICID2, routerNIC2IPv6Addr); err != nil { + t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID2, routerNIC2IPv6Addr, err) + } + if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv6Addr); err != nil { + t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv6Addr, err) + } + + host1Stack.SetRouteTable([]tcpip.Route{ + { + Destination: host1IPv4Addr.AddressWithPrefix.Subnet(), + NIC: host1NICID, + }, + { + Destination: host1IPv6Addr.AddressWithPrefix.Subnet(), + NIC: host1NICID, + }, + { + Destination: host2IPv4Addr.AddressWithPrefix.Subnet(), + Gateway: routerNIC1IPv4Addr.AddressWithPrefix.Address, + NIC: host1NICID, + }, + { + Destination: host2IPv6Addr.AddressWithPrefix.Subnet(), + Gateway: routerNIC1IPv6Addr.AddressWithPrefix.Address, + NIC: host1NICID, + }, + }) + routerStack.SetRouteTable([]tcpip.Route{ + { + Destination: routerNIC1IPv4Addr.AddressWithPrefix.Subnet(), + NIC: routerNICID1, + }, + { + Destination: routerNIC1IPv6Addr.AddressWithPrefix.Subnet(), + NIC: routerNICID1, + }, + { + Destination: routerNIC2IPv4Addr.AddressWithPrefix.Subnet(), + NIC: routerNICID2, + }, + { + Destination: routerNIC2IPv6Addr.AddressWithPrefix.Subnet(), + NIC: routerNICID2, + }, + }) + host2Stack.SetRouteTable([]tcpip.Route{ + { + Destination: host2IPv4Addr.AddressWithPrefix.Subnet(), + NIC: host2NICID, + }, + { + Destination: host2IPv6Addr.AddressWithPrefix.Subnet(), + NIC: host2NICID, + }, + { + Destination: host1IPv4Addr.AddressWithPrefix.Subnet(), + Gateway: routerNIC2IPv4Addr.AddressWithPrefix.Address, + NIC: host2NICID, + }, + { + Destination: host1IPv6Addr.AddressWithPrefix.Subnet(), + Gateway: routerNIC2IPv6Addr.AddressWithPrefix.Address, + NIC: host2NICID, + }, + }) +} + +// newEthernetEndpoint returns an ethernet link endpoint that wraps an inner +// link endpoint and checks the destination link address before delivering +// network packets to the network dispatcher. +// +// See ethernet.Endpoint for more details. +func newEthernetEndpoint(ep stack.LinkEndpoint) *endpointWithDestinationCheck { + var e endpointWithDestinationCheck + e.Endpoint.Init(ethernet.New(ep), &e) + return &e +} + +// endpointWithDestinationCheck is a link endpoint that checks the destination +// link address before delivering network packets to the network dispatcher. +type endpointWithDestinationCheck struct { + nested.Endpoint +} + +// DeliverNetworkPacket implements stack.NetworkDispatcher. +func (e *endpointWithDestinationCheck) DeliverNetworkPacket(src, dst tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + if dst == e.Endpoint.LinkAddress() || dst == header.EthernetBroadcastAddress || header.IsMulticastEthernetAddress(dst) { + e.Endpoint.DeliverNetworkPacket(src, dst, proto, pkt) + } +} + +func TestForwarding(t *testing.T) { + const listenPort = 8080 type endpointAndAddresses struct { serverEP tcpip.Endpoint @@ -229,7 +340,7 @@ func TestForwarding(t *testing.T) { subTests := []struct { name string proto tcpip.TransportProtocolNumber - expectedConnectErr *tcpip.Error + expectedConnectErr tcpip.Error setupServerSide func(t *testing.T, ep tcpip.Endpoint, ch <-chan struct{}, clientAddr tcpip.FullAddress) (tcpip.Endpoint, chan struct{}) needRemoteAddr bool }{ @@ -250,7 +361,7 @@ func TestForwarding(t *testing.T) { { name: "TCP", proto: tcp.ProtocolNumber, - expectedConnectErr: tcpip.ErrConnectStarted, + expectedConnectErr: &tcpip.ErrConnectStarted{}, setupServerSide: func(t *testing.T, ep tcpip.Endpoint, ch <-chan struct{}, clientAddr tcpip.FullAddress) (tcpip.Endpoint, chan struct{}) { t.Helper() @@ -260,7 +371,7 @@ func TestForwarding(t *testing.T) { var addr tcpip.FullAddress for { newEP, wq, err := ep.Accept(&addr) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { <-ch continue } @@ -294,113 +405,7 @@ func TestForwarding(t *testing.T) { host1Stack := stack.New(stackOpts) routerStack := stack.New(stackOpts) host2Stack := stack.New(stackOpts) - - host1NIC, routerNIC1 := pipe.New(linkAddr1, linkAddr2) - routerNIC2, host2NIC := pipe.New(linkAddr3, linkAddr4) - - if err := host1Stack.CreateNIC(host1NICID, newEthernetEndpoint(host1NIC)); err != nil { - t.Fatalf("host1Stack.CreateNIC(%d, _): %s", host1NICID, err) - } - if err := routerStack.CreateNIC(routerNICID1, newEthernetEndpoint(routerNIC1)); err != nil { - t.Fatalf("routerStack.CreateNIC(%d, _): %s", routerNICID1, err) - } - if err := routerStack.CreateNIC(routerNICID2, newEthernetEndpoint(routerNIC2)); err != nil { - t.Fatalf("routerStack.CreateNIC(%d, _): %s", routerNICID2, err) - } - if err := host2Stack.CreateNIC(host2NICID, newEthernetEndpoint(host2NIC)); err != nil { - t.Fatalf("host2Stack.CreateNIC(%d, _): %s", host2NICID, err) - } - - if err := routerStack.SetForwarding(ipv4.ProtocolNumber, true); err != nil { - t.Fatalf("routerStack.SetForwarding(%d): %s", ipv4.ProtocolNumber, err) - } - if err := routerStack.SetForwarding(ipv6.ProtocolNumber, true); err != nil { - t.Fatalf("routerStack.SetForwarding(%d): %s", ipv6.ProtocolNumber, err) - } - - if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv4Addr); err != nil { - t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv4Addr, err) - } - if err := routerStack.AddProtocolAddress(routerNICID1, routerNIC1IPv4Addr); err != nil { - t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID1, routerNIC1IPv4Addr, err) - } - if err := routerStack.AddProtocolAddress(routerNICID2, routerNIC2IPv4Addr); err != nil { - t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID2, routerNIC2IPv4Addr, err) - } - if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv4Addr); err != nil { - t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv4Addr, err) - } - if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv6Addr); err != nil { - t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv6Addr, err) - } - if err := routerStack.AddProtocolAddress(routerNICID1, routerNIC1IPv6Addr); err != nil { - t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID1, routerNIC1IPv6Addr, err) - } - if err := routerStack.AddProtocolAddress(routerNICID2, routerNIC2IPv6Addr); err != nil { - t.Fatalf("routerStack.AddProtocolAddress(%d, %#v): %s", routerNICID2, routerNIC2IPv6Addr, err) - } - if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv6Addr); err != nil { - t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv6Addr, err) - } - - host1Stack.SetRouteTable([]tcpip.Route{ - { - Destination: host1IPv4Addr.AddressWithPrefix.Subnet(), - NIC: host1NICID, - }, - { - Destination: host1IPv6Addr.AddressWithPrefix.Subnet(), - NIC: host1NICID, - }, - { - Destination: host2IPv4Addr.AddressWithPrefix.Subnet(), - Gateway: routerNIC1IPv4Addr.AddressWithPrefix.Address, - NIC: host1NICID, - }, - { - Destination: host2IPv6Addr.AddressWithPrefix.Subnet(), - Gateway: routerNIC1IPv6Addr.AddressWithPrefix.Address, - NIC: host1NICID, - }, - }) - routerStack.SetRouteTable([]tcpip.Route{ - { - Destination: routerNIC1IPv4Addr.AddressWithPrefix.Subnet(), - NIC: routerNICID1, - }, - { - Destination: routerNIC1IPv6Addr.AddressWithPrefix.Subnet(), - NIC: routerNICID1, - }, - { - Destination: routerNIC2IPv4Addr.AddressWithPrefix.Subnet(), - NIC: routerNICID2, - }, - { - Destination: routerNIC2IPv6Addr.AddressWithPrefix.Subnet(), - NIC: routerNICID2, - }, - }) - host2Stack.SetRouteTable([]tcpip.Route{ - { - Destination: host2IPv4Addr.AddressWithPrefix.Subnet(), - NIC: host2NICID, - }, - { - Destination: host2IPv6Addr.AddressWithPrefix.Subnet(), - NIC: host2NICID, - }, - { - Destination: host1IPv4Addr.AddressWithPrefix.Subnet(), - Gateway: routerNIC2IPv4Addr.AddressWithPrefix.Address, - NIC: host2NICID, - }, - { - Destination: host1IPv6Addr.AddressWithPrefix.Subnet(), - Gateway: routerNIC2IPv6Addr.AddressWithPrefix.Address, - NIC: host2NICID, - }, - }) + setupRoutedStacks(t, host1Stack, routerStack, host2Stack) epsAndAddrs := test.epAndAddrs(t, host1Stack, routerStack, host2Stack, subTest.proto) defer epsAndAddrs.serverEP.Close() @@ -415,8 +420,11 @@ func TestForwarding(t *testing.T) { t.Fatalf("epsAndAddrs.clientEP.Bind(%#v): %s", clientAddr, err) } - if err := epsAndAddrs.clientEP.Connect(serverAddr); err != subTest.expectedConnectErr { - t.Fatalf("got epsAndAddrs.clientEP.Connect(%#v) = %s, want = %s", serverAddr, err, subTest.expectedConnectErr) + { + err := epsAndAddrs.clientEP.Connect(serverAddr) + if diff := cmp.Diff(subTest.expectedConnectErr, err); diff != "" { + t.Fatalf("unexpected error from epsAndAddrs.clientEP.Connect(%#v), (-want, +got):\n%s", serverAddr, diff) + } } if addr, err := epsAndAddrs.clientEP.GetLocalAddress(); err != nil { t.Fatalf("epsAndAddrs.clientEP.GetLocalAddress(): %s", err) diff --git a/pkg/tcpip/tests/integration/link_resolution_test.go b/pkg/tcpip/tests/integration/link_resolution_test.go index f85164c5b..f2301a9e6 100644 --- a/pkg/tcpip/tests/integration/link_resolution_test.go +++ b/pkg/tcpip/tests/integration/link_resolution_test.go @@ -19,12 +19,14 @@ import ( "fmt" "net" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/checker" + "gvisor.dev/gvisor/pkg/tcpip/faketime" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/link/pipe" "gvisor.dev/gvisor/pkg/tcpip/network/arp" @@ -245,6 +247,14 @@ func TestPing(t *testing.T) { } } +type transportError struct { + origin tcpip.SockErrOrigin + typ uint8 + code uint8 + info uint32 + kind stack.TransportErrorKind +} + func TestTCPLinkResolutionFailure(t *testing.T) { const ( host1NICID = 1 @@ -255,8 +265,9 @@ func TestTCPLinkResolutionFailure(t *testing.T) { name string netProto tcpip.NetworkProtocolNumber remoteAddr tcpip.Address - expectedWriteErr *tcpip.Error + expectedWriteErr tcpip.Error sockError tcpip.SockError + transErr transportError }{ { name: "IPv4 with resolvable remote", @@ -274,12 +285,9 @@ func TestTCPLinkResolutionFailure(t *testing.T) { name: "IPv4 without resolvable remote", netProto: ipv4.ProtocolNumber, remoteAddr: ipv4Addr3.AddressWithPrefix.Address, - expectedWriteErr: tcpip.ErrNoRoute, + expectedWriteErr: &tcpip.ErrNoRoute{}, sockError: tcpip.SockError{ - Err: tcpip.ErrNoRoute, - ErrType: byte(header.ICMPv4DstUnreachable), - ErrCode: byte(header.ICMPv4HostUnreachable), - ErrOrigin: tcpip.SockExtErrorOriginICMP, + Err: &tcpip.ErrNoRoute{}, Dst: tcpip.FullAddress{ NIC: host1NICID, Addr: ipv4Addr3.AddressWithPrefix.Address, @@ -291,17 +299,20 @@ func TestTCPLinkResolutionFailure(t *testing.T) { }, NetProto: ipv4.ProtocolNumber, }, + transErr: transportError{ + origin: tcpip.SockExtErrorOriginICMP, + typ: uint8(header.ICMPv4DstUnreachable), + code: uint8(header.ICMPv4HostUnreachable), + kind: stack.DestinationHostUnreachableTransportError, + }, }, { name: "IPv6 without resolvable remote", netProto: ipv6.ProtocolNumber, remoteAddr: ipv6Addr3.AddressWithPrefix.Address, - expectedWriteErr: tcpip.ErrNoRoute, + expectedWriteErr: &tcpip.ErrNoRoute{}, sockError: tcpip.SockError{ - Err: tcpip.ErrNoRoute, - ErrType: byte(header.ICMPv6DstUnreachable), - ErrCode: byte(header.ICMPv6AddressUnreachable), - ErrOrigin: tcpip.SockExtErrorOriginICMP6, + Err: &tcpip.ErrNoRoute{}, Dst: tcpip.FullAddress{ NIC: host1NICID, Addr: ipv6Addr3.AddressWithPrefix.Address, @@ -313,6 +324,12 @@ func TestTCPLinkResolutionFailure(t *testing.T) { }, NetProto: ipv6.ProtocolNumber, }, + transErr: transportError{ + origin: tcpip.SockExtErrorOriginICMP6, + typ: uint8(header.ICMPv6DstUnreachable), + code: uint8(header.ICMPv6AddressUnreachable), + kind: stack.DestinationHostUnreachableTransportError, + }, }, } @@ -355,18 +372,24 @@ func TestTCPLinkResolutionFailure(t *testing.T) { remoteAddr := listenerAddr remoteAddr.Addr = test.remoteAddr - if err := clientEP.Connect(remoteAddr); err != tcpip.ErrConnectStarted { - t.Fatalf("got clientEP.Connect(%#v) = %s, want = %s", remoteAddr, err, tcpip.ErrConnectStarted) + { + err := clientEP.Connect(remoteAddr) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got clientEP.Connect(%#v) = %s, want = %s", remoteAddr, err, &tcpip.ErrConnectStarted{}) + } } // Wait for an error due to link resolution failing, or the endpoint to be // writable. <-ch - var r bytes.Reader - r.Reset([]byte{0}) - var wOpts tcpip.WriteOptions - if n, err := clientEP.Write(&r, wOpts); err != test.expectedWriteErr { - t.Errorf("got clientEP.Write(_, %#v) = (%d, %s), want = (_, %s)", wOpts, n, err, test.expectedWriteErr) + { + var r bytes.Reader + r.Reset([]byte{0}) + var wOpts tcpip.WriteOptions + _, err := clientEP.Write(&r, wOpts) + if diff := cmp.Diff(test.expectedWriteErr, err); diff != "" { + t.Errorf("unexpected error from clientEP.Write(_, %#v), (-want, +got):\n%s", wOpts, diff) + } } if test.expectedWriteErr == nil { @@ -380,14 +403,17 @@ func TestTCPLinkResolutionFailure(t *testing.T) { sockErrCmpOpts := []cmp.Option{ cmpopts.IgnoreUnexported(tcpip.SockError{}), - cmp.Comparer(func(a, b *tcpip.Error) bool { + cmp.Comparer(func(a, b tcpip.Error) bool { // tcpip.Error holds an unexported field but the errors netstack uses // are pre defined so we can simply compare pointers. return a == b }), - // Ignore the payload since we do not know the TCP seq/ack numbers. checker.IgnoreCmpPath( + // Ignore the payload since we do not know the TCP seq/ack numbers. "Payload", + // Ignore the cause since we will compare its properties separately + // since the concrete type of the cause is unknown. + "Cause", ), } @@ -399,6 +425,24 @@ func TestTCPLinkResolutionFailure(t *testing.T) { if diff := cmp.Diff(&test.sockError, sockErr, sockErrCmpOpts...); diff != "" { t.Errorf("socket error mismatch (-want +got):\n%s", diff) } + + transErr, ok := sockErr.Cause.(stack.TransportError) + if !ok { + t.Fatalf("socket error cause is not a transport error; cause = %#v", sockErr.Cause) + } + if diff := cmp.Diff( + test.transErr, + transportError{ + origin: transErr.Origin(), + typ: transErr.Type(), + code: transErr.Code(), + info: transErr.Info(), + kind: transErr.Kind(), + }, + cmp.AllowUnexported(transportError{}), + ); diff != "" { + t.Errorf("socket error mismatch (-want +got):\n%s", diff) + } }) } } @@ -453,10 +497,11 @@ func TestGetLinkAddress(t *testing.T) { host1Stack, _ := setupStack(t, stackOpts, host1NICID, host2NICID) ch := make(chan stack.LinkResolutionResult, 1) - if err := host1Stack.GetLinkAddress(host1NICID, test.remoteAddr, "", test.netProto, func(r stack.LinkResolutionResult) { + err := host1Stack.GetLinkAddress(host1NICID, test.remoteAddr, "", test.netProto, func(r stack.LinkResolutionResult) { ch <- r - }); err != tcpip.ErrWouldBlock { - t.Fatalf("got host1Stack.GetLinkAddress(%d, %s, '', %d, _) = %s, want = %s", host1NICID, test.remoteAddr, test.netProto, err, tcpip.ErrWouldBlock) + }) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got host1Stack.GetLinkAddress(%d, %s, '', %d, _) = %s, want = %s", host1NICID, test.remoteAddr, test.netProto, err, &tcpip.ErrWouldBlock{}) } wantRes := stack.LinkResolutionResult{Success: test.expectedOk} if test.expectedOk { @@ -570,10 +615,11 @@ func TestRouteResolvedFields(t *testing.T) { wantUnresolvedRouteInfo := wantRouteInfo wantUnresolvedRouteInfo.RemoteLinkAddress = "" - if err := r.ResolvedFields(func(r stack.ResolvedFieldsResult) { + err := r.ResolvedFields(func(r stack.ResolvedFieldsResult) { ch <- r - }); err != tcpip.ErrWouldBlock { - t.Errorf("got r.ResolvedFields(_) = %s, want = %s", err, tcpip.ErrWouldBlock) + }) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Errorf("got r.ResolvedFields(_) = %s, want = %s", err, &tcpip.ErrWouldBlock{}) } if diff := cmp.Diff(stack.ResolvedFieldsResult{RouteInfo: wantRouteInfo, Success: test.expectedSuccess}, <-ch, cmp.AllowUnexported(stack.RouteInfo{})); diff != "" { t.Errorf("route resolve result mismatch (-want +got):\n%s", diff) @@ -616,7 +662,7 @@ func TestWritePacketsLinkResolution(t *testing.T) { name string netProto tcpip.NetworkProtocolNumber remoteAddr tcpip.Address - expectedWriteErr *tcpip.Error + expectedWriteErr tcpip.Error }{ { name: "IPv4", @@ -703,7 +749,7 @@ func TestWritePacketsLinkResolution(t *testing.T) { var rOpts tcpip.ReadOptions res, err := serverEP.Read(&writer, rOpts) if err != nil { - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Should not have anymore bytes to read after we read the sent // number of bytes. if count == len(data) { @@ -728,3 +774,439 @@ func TestWritePacketsLinkResolution(t *testing.T) { }) } } + +type eventType int + +const ( + entryAdded eventType = iota + entryChanged + entryRemoved +) + +func (t eventType) String() string { + switch t { + case entryAdded: + return "add" + case entryChanged: + return "change" + case entryRemoved: + return "remove" + default: + return fmt.Sprintf("unknown (%d)", t) + } +} + +type eventInfo struct { + eventType eventType + nicID tcpip.NICID + entry stack.NeighborEntry +} + +func (e eventInfo) String() string { + return fmt.Sprintf("%s event for NIC #%d, %#v", e.eventType, e.nicID, e.entry) +} + +var _ stack.NUDDispatcher = (*nudDispatcher)(nil) + +type nudDispatcher struct { + c chan eventInfo +} + +func (d *nudDispatcher) OnNeighborAdded(nicID tcpip.NICID, entry stack.NeighborEntry) { + e := eventInfo{ + eventType: entryAdded, + nicID: nicID, + entry: entry, + } + d.c <- e +} + +func (d *nudDispatcher) OnNeighborChanged(nicID tcpip.NICID, entry stack.NeighborEntry) { + e := eventInfo{ + eventType: entryChanged, + nicID: nicID, + entry: entry, + } + d.c <- e +} + +func (d *nudDispatcher) OnNeighborRemoved(nicID tcpip.NICID, entry stack.NeighborEntry) { + e := eventInfo{ + eventType: entryRemoved, + nicID: nicID, + entry: entry, + } + d.c <- e +} + +func (d *nudDispatcher) waitForEvent(want eventInfo) error { + if diff := cmp.Diff(want, <-d.c, cmp.AllowUnexported(eventInfo{}), cmpopts.IgnoreFields(stack.NeighborEntry{}, "UpdatedAtNanos")); diff != "" { + return fmt.Errorf("got invalid event (-want +got):\n%s", diff) + } + return nil +} + +// TestTCPConfirmNeighborReachability tests that TCP informs layers beneath it +// that the neighbor used for a route is reachable. +func TestTCPConfirmNeighborReachability(t *testing.T) { + tests := []struct { + name string + netProto tcpip.NetworkProtocolNumber + remoteAddr tcpip.Address + neighborAddr tcpip.Address + getEndpoints func(*testing.T, *stack.Stack, *stack.Stack, *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) + isHost1Listener bool + }{ + { + name: "IPv4 active connection through neighbor", + netProto: ipv4.ProtocolNumber, + remoteAddr: host2IPv4Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv4Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, _, host2Stack *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host2Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host2Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + }, + { + name: "IPv6 active connection through neighbor", + netProto: ipv6.ProtocolNumber, + remoteAddr: host2IPv6Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv6Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, _, host2Stack *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host2Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host2Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + }, + { + name: "IPv4 active connection to neighbor", + netProto: ipv4.ProtocolNumber, + remoteAddr: routerNIC1IPv4Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv4Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, routerStack, _ *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := routerStack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("routerStack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + }, + { + name: "IPv6 active connection to neighbor", + netProto: ipv6.ProtocolNumber, + remoteAddr: routerNIC1IPv6Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv6Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, routerStack, _ *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := routerStack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("routerStack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + }, + { + name: "IPv4 passive connection to neighbor", + netProto: ipv4.ProtocolNumber, + remoteAddr: host1IPv4Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv4Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, routerStack, _ *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := routerStack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("routerStack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + isHost1Listener: true, + }, + { + name: "IPv6 passive connection to neighbor", + netProto: ipv6.ProtocolNumber, + remoteAddr: host1IPv6Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv6Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, routerStack, _ *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := routerStack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("routerStack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + isHost1Listener: true, + }, + { + name: "IPv4 passive connection through neighbor", + netProto: ipv4.ProtocolNumber, + remoteAddr: host1IPv4Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv4Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, _, host2Stack *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host2Stack.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host2Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv4.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + isHost1Listener: true, + }, + { + name: "IPv6 passive connection through neighbor", + netProto: ipv6.ProtocolNumber, + remoteAddr: host1IPv6Addr.AddressWithPrefix.Address, + neighborAddr: routerNIC1IPv6Addr.AddressWithPrefix.Address, + getEndpoints: func(t *testing.T, host1Stack, _, host2Stack *stack.Stack) (tcpip.Endpoint, tcpip.Endpoint, <-chan struct{}) { + var listenerWQ waiter.Queue + listenerEP, err := host1Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &listenerWQ) + if err != nil { + t.Fatalf("host1Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + var clientWQ waiter.Queue + clientWE, clientCH := waiter.NewChannelEntry(nil) + clientWQ.EventRegister(&clientWE, waiter.EventOut) + clientEP, err := host2Stack.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &clientWQ) + if err != nil { + listenerEP.Close() + t.Fatalf("host2Stack.NewEndpoint(%d, %d, _): %s", tcp.ProtocolNumber, ipv6.ProtocolNumber, err) + } + + return listenerEP, clientEP, clientCH + }, + isHost1Listener: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + clock := faketime.NewManualClock() + nudDisp := nudDispatcher{ + c: make(chan eventInfo, 3), + } + stackOpts := stack.Options{ + NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol, ipv6.NewProtocol}, + TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol}, + Clock: clock, + UseNeighborCache: true, + } + host1StackOpts := stackOpts + host1StackOpts.NUDDisp = &nudDisp + + host1Stack := stack.New(host1StackOpts) + routerStack := stack.New(stackOpts) + host2Stack := stack.New(stackOpts) + setupRoutedStacks(t, host1Stack, routerStack, host2Stack) + + // Add a reachable dynamic entry to our neighbor table for the remote. + { + ch := make(chan stack.LinkResolutionResult, 1) + err := host1Stack.GetLinkAddress(host1NICID, test.neighborAddr, "", test.netProto, func(r stack.LinkResolutionResult) { + ch <- r + }) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got host1Stack.GetLinkAddress(%d, %s, '', %d, _) = %s, want = %s", host1NICID, test.neighborAddr, test.netProto, err, &tcpip.ErrWouldBlock{}) + } + if diff := cmp.Diff(stack.LinkResolutionResult{LinkAddress: linkAddr2, Success: true}, <-ch); diff != "" { + t.Fatalf("link resolution mismatch (-want +got):\n%s", diff) + } + } + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryAdded, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Incomplete, Addr: test.neighborAddr}, + }); err != nil { + t.Fatalf("error waiting for initial NUD event: %s", err) + } + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Reachable, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for reachable NUD event: %s", err) + } + + // Wait for the remote's neighbor entry to be stale before creating a + // TCP connection from host1 to some remote. + nudConfigs, err := host1Stack.NUDConfigurations(host1NICID, test.netProto) + if err != nil { + t.Fatalf("host1Stack.NUDConfigurations(%d, %d): %s", host1NICID, test.netProto, err) + } + // The maximum reachable time for a neighbor is some maximum random factor + // applied to the base reachable time. + // + // See NUDConfigurations.BaseReachableTime for more information. + maxReachableTime := time.Duration(float32(nudConfigs.BaseReachableTime) * nudConfigs.MaxRandomFactor) + clock.Advance(maxReachableTime) + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Stale, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for stale NUD event: %s", err) + } + + listenerEP, clientEP, clientCH := test.getEndpoints(t, host1Stack, routerStack, host2Stack) + defer listenerEP.Close() + defer clientEP.Close() + listenerAddr := tcpip.FullAddress{Addr: test.remoteAddr, Port: 1234} + if err := listenerEP.Bind(listenerAddr); err != nil { + t.Fatalf("listenerEP.Bind(%#v): %s", listenerAddr, err) + } + if err := listenerEP.Listen(1); err != nil { + t.Fatalf("listenerEP.Listen(1): %s", err) + } + { + err := clientEP.Connect(listenerAddr) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got clientEP.Connect(%#v) = %s, want = %s", listenerAddr, err, &tcpip.ErrConnectStarted{}) + } + } + + // Wait for the TCP handshake to complete then make sure the neighbor is + // reachable without entering the probe state as TCP should provide NUD + // with confirmation that the neighbor is reachable (indicated by a + // successful 3-way handshake). + <-clientCH + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Delay, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for delay NUD event: %s", err) + } + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Reachable, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for reachable NUD event: %s", err) + } + + // Wait for the neighbor to be stale again then send data to the remote. + // + // On successful transmission, the neighbor should become reachable + // without probing the neighbor as a TCP ACK would be received which is an + // indication of the neighbor being reachable. + clock.Advance(maxReachableTime) + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Stale, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for stale NUD event: %s", err) + } + var r bytes.Reader + r.Reset([]byte{0}) + var wOpts tcpip.WriteOptions + if _, err := clientEP.Write(&r, wOpts); err != nil { + t.Errorf("clientEP.Write(_, %#v): %s", wOpts, err) + } + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Delay, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for delay NUD event: %s", err) + } + if test.isHost1Listener { + // If host1 is not the client, host1 does not send any data so TCP + // has no way to know it is making forward progress. Because of this, + // TCP should not mark the route reachable and NUD should go through the + // probe state. + clock.Advance(nudConfigs.DelayFirstProbeTime) + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Probe, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for probe NUD event: %s", err) + } + } + if err := nudDisp.waitForEvent(eventInfo{ + eventType: entryChanged, + nicID: host1NICID, + entry: stack.NeighborEntry{State: stack.Reachable, Addr: test.neighborAddr, LinkAddr: linkAddr2}, + }); err != nil { + t.Fatalf("error waiting for reachable NUD event: %s", err) + } + }) + } +} diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go index 761283b66..ab67762ef 100644 --- a/pkg/tcpip/tests/integration/loopback_test.go +++ b/pkg/tcpip/tests/integration/loopback_test.go @@ -37,7 +37,7 @@ var _ ipv6.NDPDispatcher = (*ndpDispatcher)(nil) type ndpDispatcher struct{} -func (*ndpDispatcher) OnDuplicateAddressDetectionStatus(tcpip.NICID, tcpip.Address, bool, *tcpip.Error) { +func (*ndpDispatcher) OnDuplicateAddressDetectionStatus(tcpip.NICID, tcpip.Address, bool, tcpip.Error) { } func (*ndpDispatcher) OnDefaultRouterDiscovered(tcpip.NICID, tcpip.Address) bool { @@ -262,8 +262,8 @@ func TestLoopbackAcceptAllInSubnetUDP(t *testing.T) { if diff := cmp.Diff(data, buf.Bytes()); diff != "" { t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff) } - } else if err != tcpip.ErrWouldBlock { - t.Fatalf("got rep.Read = (%v, %s) [with data %x], want = (_, %s)", res, err, buf.Bytes(), tcpip.ErrWouldBlock) + } else if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got rep.Read = (%v, %s) [with data %x], want = (_, %s)", res, err, buf.Bytes(), &tcpip.ErrWouldBlock{}) } }) } @@ -322,11 +322,14 @@ func TestLoopbackSubnetLifetimeBoundToAddr(t *testing.T) { if err := s.RemoveAddress(nicID, protoAddr.AddressWithPrefix.Address); err != nil { t.Fatalf("s.RemoveAddress(%d, %s): %s", nicID, protoAddr.AddressWithPrefix.Address, err) } - if err := r.WritePacket(nil /* gso */, params, stack.NewPacketBuffer(stack.PacketBufferOptions{ - ReserveHeaderBytes: int(r.MaxHeaderLength()), - Data: data.ToVectorisedView(), - })); err != tcpip.ErrInvalidEndpointState { - t.Fatalf("got r.WritePacket(nil, %#v, _) = %s, want = %s", params, err, tcpip.ErrInvalidEndpointState) + { + err := r.WritePacket(nil /* gso */, params, stack.NewPacketBuffer(stack.PacketBufferOptions{ + ReserveHeaderBytes: int(r.MaxHeaderLength()), + Data: data.ToVectorisedView(), + })) + if _, ok := err.(*tcpip.ErrInvalidEndpointState); !ok { + t.Fatalf("got r.WritePacket(nil, %#v, _) = %s, want = %s", params, err, &tcpip.ErrInvalidEndpointState{}) + } } } @@ -470,13 +473,17 @@ func TestLoopbackAcceptAllInSubnetTCP(t *testing.T) { Addr: test.dstAddr, Port: localPort, } - if err := connectingEndpoint.Connect(connectAddr); err != tcpip.ErrConnectStarted { - t.Fatalf("connectingEndpoint.Connect(%#v): %s", connectAddr, err) + { + err := connectingEndpoint.Connect(connectAddr) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("connectingEndpoint.Connect(%#v): %s", connectAddr, err) + } } if !test.expectAccept { - if _, _, err := listeningEndpoint.Accept(nil); err != tcpip.ErrWouldBlock { - t.Fatalf("got listeningEndpoint.Accept(nil) = %s, want = %s", err, tcpip.ErrWouldBlock) + _, _, err := listeningEndpoint.Accept(nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got listeningEndpoint.Accept(nil) = %s, want = %s", err, &tcpip.ErrWouldBlock{}) } return } diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go index 9cc12fa58..d685fdd36 100644 --- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go +++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go @@ -479,8 +479,8 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) { if diff := cmp.Diff(data, buf.Bytes()); diff != "" { t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff) } - } else if err != tcpip.ErrWouldBlock { - t.Fatalf("got Read = (%v, %s) [with data %x], want = (_, %s)", res, err, buf.Bytes(), tcpip.ErrWouldBlock) + } else if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got Read = (%v, %s) [with data %x], want = (_, %s)", res, err, buf.Bytes(), &tcpip.ErrWouldBlock{}) } }) } @@ -761,8 +761,11 @@ func TestUDPAddRemoveMembershipSocketOption(t *testing.T) { if err := ep.SetSockOpt(&removeOpt); err != nil { t.Fatalf("ep.SetSockOpt(&%#v): %s", removeOpt, err) } - if _, err := ep.Read(&buf, tcpip.ReadOptions{}); err != tcpip.ErrWouldBlock { - t.Fatalf("got ep.Read = (_, %s), want = (_, %s)", err, tcpip.ErrWouldBlock) + { + _, err := ep.Read(&buf, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got ep.Read = (_, %s), want = (_, %s)", err, &tcpip.ErrWouldBlock{}) + } } }) } diff --git a/pkg/tcpip/tests/integration/route_test.go b/pkg/tcpip/tests/integration/route_test.go index 35ee7437a..9654c9527 100644 --- a/pkg/tcpip/tests/integration/route_test.go +++ b/pkg/tcpip/tests/integration/route_test.go @@ -81,7 +81,7 @@ func TestLocalPing(t *testing.T) { linkEndpoint func() stack.LinkEndpoint localAddr tcpip.Address icmpBuf func(*testing.T) buffer.View - expectedConnectErr *tcpip.Error + expectedConnectErr tcpip.Error checkLinkEndpoint func(t *testing.T, e stack.LinkEndpoint) }{ { @@ -126,7 +126,7 @@ func TestLocalPing(t *testing.T) { netProto: ipv4.ProtocolNumber, linkEndpoint: loopback.New, icmpBuf: ipv4ICMPBuf, - expectedConnectErr: tcpip.ErrNoRoute, + expectedConnectErr: &tcpip.ErrNoRoute{}, checkLinkEndpoint: func(*testing.T, stack.LinkEndpoint) {}, }, { @@ -135,7 +135,7 @@ func TestLocalPing(t *testing.T) { netProto: ipv6.ProtocolNumber, linkEndpoint: loopback.New, icmpBuf: ipv6ICMPBuf, - expectedConnectErr: tcpip.ErrNoRoute, + expectedConnectErr: &tcpip.ErrNoRoute{}, checkLinkEndpoint: func(*testing.T, stack.LinkEndpoint) {}, }, { @@ -144,7 +144,7 @@ func TestLocalPing(t *testing.T) { netProto: ipv4.ProtocolNumber, linkEndpoint: channelEP, icmpBuf: ipv4ICMPBuf, - expectedConnectErr: tcpip.ErrNoRoute, + expectedConnectErr: &tcpip.ErrNoRoute{}, checkLinkEndpoint: channelEPCheck, }, { @@ -153,7 +153,7 @@ func TestLocalPing(t *testing.T) { netProto: ipv6.ProtocolNumber, linkEndpoint: channelEP, icmpBuf: ipv6ICMPBuf, - expectedConnectErr: tcpip.ErrNoRoute, + expectedConnectErr: &tcpip.ErrNoRoute{}, checkLinkEndpoint: channelEPCheck, }, } @@ -186,8 +186,11 @@ func TestLocalPing(t *testing.T) { defer ep.Close() connAddr := tcpip.FullAddress{Addr: test.localAddr} - if err := ep.Connect(connAddr); err != test.expectedConnectErr { - t.Fatalf("got ep.Connect(%#v) = %s, want = %s", connAddr, err, test.expectedConnectErr) + { + err := ep.Connect(connAddr) + if diff := cmp.Diff(test.expectedConnectErr, err); diff != "" { + t.Fatalf("unexpected error from ep.Connect(%#v), (-want, +got):\n%s", connAddr, diff) + } } if test.expectedConnectErr != nil { @@ -263,12 +266,12 @@ func TestLocalUDP(t *testing.T) { subTests := []struct { name string addAddress bool - expectedWriteErr *tcpip.Error + expectedWriteErr tcpip.Error }{ { name: "Unassigned local address", addAddress: false, - expectedWriteErr: tcpip.ErrNoRoute, + expectedWriteErr: &tcpip.ErrNoRoute{}, }, { name: "Assigned local address", diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index e4bcd3120..f5e1a6e45 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -84,7 +84,7 @@ type endpoint struct { ops tcpip.SocketOptions } -func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { ep := &endpoint{ stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{ @@ -159,14 +159,14 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { } // Read implements tcpip.Endpoint.Read. -func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { e.rcvMu.Lock() if e.rcvList.Empty() { - err := tcpip.ErrWouldBlock + var err tcpip.Error = &tcpip.ErrWouldBlock{} if e.rcvClosed { e.stats.ReadErrors.ReadClosed.Increment() - err = tcpip.ErrClosedForReceive + err = &tcpip.ErrClosedForReceive{} } e.rcvMu.Unlock() return tcpip.ReadResult{}, err @@ -193,7 +193,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult n, err := p.data.ReadTo(dst, opts.Peek) if n == 0 && err != nil { - return res, tcpip.ErrBadBuffer + return res, &tcpip.ErrBadBuffer{} } res.Count = n return res, nil @@ -204,7 +204,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // reacquire the mutex in exclusive mode. // // Returns true for retry if preparation should be retried. -func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpip.Error) { +func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) { switch e.state { case stateInitial: case stateConnected: @@ -212,11 +212,11 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi case stateBound: if to == nil { - return false, tcpip.ErrDestinationRequired + return false, &tcpip.ErrDestinationRequired{} } return false, nil default: - return false, tcpip.ErrInvalidEndpointState + return false, &tcpip.ErrInvalidEndpointState{} } e.mu.RUnlock() @@ -241,18 +241,18 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi // Write writes data to the endpoint's peer. This method does not block // if the data cannot be written. -func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { n, err := e.write(p, opts) - switch err { + switch err.(type) { case nil: e.stats.PacketsSent.Increment() - case tcpip.ErrMessageTooLong, tcpip.ErrInvalidOptionValue: + case *tcpip.ErrMessageTooLong, *tcpip.ErrInvalidOptionValue: e.stats.WriteErrors.InvalidArgs.Increment() - case tcpip.ErrClosedForSend: + case *tcpip.ErrClosedForSend: e.stats.WriteErrors.WriteClosed.Increment() - case tcpip.ErrInvalidEndpointState: + case *tcpip.ErrInvalidEndpointState: e.stats.WriteErrors.InvalidEndpointState.Increment() - case tcpip.ErrNoRoute, tcpip.ErrBroadcastDisabled, tcpip.ErrNetworkUnreachable: + case *tcpip.ErrNoRoute, *tcpip.ErrBroadcastDisabled, *tcpip.ErrNetworkUnreachable: // Errors indicating any problem with IP routing of the packet. e.stats.SendErrors.NoRoute.Increment() default: @@ -262,10 +262,10 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc return n, err } -func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) if opts.More { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } to := opts.To @@ -275,7 +275,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // If we've shutdown with SHUT_WR we are in an invalid state for sending. if e.shutdownFlags&tcpip.ShutdownWrite != 0 { - return 0, tcpip.ErrClosedForSend + return 0, &tcpip.ErrClosedForSend{} } // Prepare for write. @@ -297,7 +297,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc nicID := to.NIC if e.BindNICID != 0 { if nicID != 0 && nicID != e.BindNICID { - return 0, tcpip.ErrNoRoute + return 0, &tcpip.ErrNoRoute{} } nicID = e.BindNICID @@ -320,10 +320,10 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc v := make([]byte, p.Len()) if _, err := io.ReadFull(p, v); err != nil { - return 0, tcpip.ErrBadBuffer + return 0, &tcpip.ErrBadBuffer{} } - var err *tcpip.Error + var err tcpip.Error switch e.NetProto { case header.IPv4ProtocolNumber: err = send4(route, e.ID.LocalPort, v, e.ttl, e.owner) @@ -340,12 +340,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc } // SetSockOpt sets a socket option. -func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { return nil } // SetSockOptInt sets a socket option. Currently not supported. -func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.TTLOption: e.mu.Lock() @@ -357,7 +357,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.ReceiveQueueSizeOption: v := 0 @@ -382,18 +382,18 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { return v, nil default: - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } -func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpip.PacketOwner) *tcpip.Error { +func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpip.PacketOwner) tcpip.Error { if len(data) < header.ICMPv4MinimumSize { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ @@ -411,7 +411,7 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpi // Linux performs these basic checks. if icmpv4.Type() != header.ICMPv4Echo || icmpv4.Code() != 0 { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } icmpv4.SetChecksum(0) @@ -425,9 +425,9 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpi return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, pkt) } -func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Error { +func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) tcpip.Error { if len(data) < header.ICMPv6EchoMinimumSize { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ @@ -442,7 +442,7 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err data = data[header.ICMPv6MinimumSize:] if icmpv6.Type() != header.ICMPv6EchoRequest || icmpv6.Code() != 0 { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } dataVV := data.ToVectorisedView() @@ -457,7 +457,7 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err // checkV4MappedLocked determines the effective network protocol and converts // addr to its canonical form. -func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) { +func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, false /* v6only */) if err != nil { return tcpip.FullAddress{}, 0, err @@ -466,12 +466,12 @@ func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddres } // Disconnect implements tcpip.Endpoint.Disconnect. -func (*endpoint) Disconnect() *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Disconnect() tcpip.Error { + return &tcpip.ErrNotSupported{} } // Connect connects the endpoint to its peer. Specifying a NIC is optional. -func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Connect(addr tcpip.FullAddress) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() @@ -486,12 +486,12 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } if nicID != 0 && nicID != e.BindNICID { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } nicID = e.BindNICID default: - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } addr, netProto, err := e.checkV4MappedLocked(addr) @@ -536,19 +536,19 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } // ConnectEndpoint is not supported. -func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*endpoint) ConnectEndpoint(tcpip.Endpoint) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // Shutdown closes the read and/or write end of the endpoint connection // to its peer. -func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { +func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() e.shutdownFlags |= flags if e.state != stateConnected { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } if flags&tcpip.ShutdownRead != 0 { @@ -566,16 +566,16 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { } // Listen is not supported by UDP, it just fails. -func (*endpoint) Listen(int) *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Listen(int) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Accept is not supported by UDP, it just fails. -func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { - return nil, nil, tcpip.ErrNotSupported +func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { + return nil, nil, &tcpip.ErrNotSupported{} } -func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, *tcpip.Error) { +func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.Error) { if id.LocalPort != 0 { // The endpoint already has a local port, just attempt to // register it. @@ -584,13 +584,13 @@ func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.Networ } // We need to find a port for the endpoint. - _, err := e.stack.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { + _, err := e.stack.PickEphemeralPort(func(p uint16) (bool, tcpip.Error) { id.LocalPort = p err := e.stack.RegisterTransportEndpoint(netProtos, e.TransProto, id, e, ports.Flags{}, 0 /* bindtodevice */) - switch err { + switch err.(type) { case nil: return true, nil - case tcpip.ErrPortInUse: + case *tcpip.ErrPortInUse: return false, nil default: return false, err @@ -600,11 +600,11 @@ func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.Networ return id, err } -func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) bindLocked(addr tcpip.FullAddress) tcpip.Error { // Don't allow binding once endpoint is not in the initial state // anymore. if e.state != stateInitial { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } addr, netProto, err := e.checkV4MappedLocked(addr) @@ -620,7 +620,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { if len(addr.Addr) != 0 { // A local address was specified, verify that it's valid. if e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) == 0 { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } } @@ -648,7 +648,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { // Bind binds the endpoint to a specific local address and port. // Specifying a NIC is optional. -func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() @@ -664,7 +664,7 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { } // GetLocalAddress returns the address to which the endpoint is bound. -func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() @@ -676,12 +676,12 @@ func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { } // GetRemoteAddress returns the address to which the endpoint is connected. -func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() if e.state != stateConnected { - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } return tcpip.FullAddress{ @@ -778,9 +778,8 @@ func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketB } } -// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { -} +// HandleError implements stack.TransportEndpoint. +func (*endpoint) HandleError(stack.TransportError, *stack.PacketBuffer) {} // State implements tcpip.Endpoint.State. The ICMP endpoint currently doesn't // expose internal socket state. @@ -806,7 +805,7 @@ func (e *endpoint) Stats() tcpip.EndpointStats { func (*endpoint) Wait() {} // LastError implements tcpip.Endpoint.LastError. -func (*endpoint) LastError() *tcpip.Error { +func (*endpoint) LastError() tcpip.Error { return nil } diff --git a/pkg/tcpip/transport/icmp/endpoint_state.go b/pkg/tcpip/transport/icmp/endpoint_state.go index afe96998a..c9fa9974a 100644 --- a/pkg/tcpip/transport/icmp/endpoint_state.go +++ b/pkg/tcpip/transport/icmp/endpoint_state.go @@ -75,7 +75,7 @@ func (e *endpoint) Resume(s *stack.Stack) { return } - var err *tcpip.Error + var err tcpip.Error if e.state == stateConnected { e.route, err = e.stack.FindRoute(e.RegisterNICID, e.BindAddr, e.ID.RemoteAddress, e.NetProto, false /* multicastLoop */) if err != nil { @@ -85,7 +85,7 @@ func (e *endpoint) Resume(s *stack.Stack) { e.ID.LocalAddress = e.route.LocalAddress } else if len(e.ID.LocalAddress) != 0 { // stateBound if e.stack.CheckLocalAddress(e.RegisterNICID, e.NetProto, e.ID.LocalAddress) == 0 { - panic(tcpip.ErrBadLocalAddress) + panic(&tcpip.ErrBadLocalAddress{}) } } diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go index 3820e5dc7..47f7dd1cb 100644 --- a/pkg/tcpip/transport/icmp/protocol.go +++ b/pkg/tcpip/transport/icmp/protocol.go @@ -59,18 +59,18 @@ func (p *protocol) netProto() tcpip.NetworkProtocolNumber { // NewEndpoint creates a new icmp endpoint. It implements // stack.TransportProtocol.NewEndpoint. -func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { if netProto != p.netProto() { - return nil, tcpip.ErrUnknownProtocol + return nil, &tcpip.ErrUnknownProtocol{} } return newEndpoint(p.stack, netProto, p.number, waiterQueue) } // NewRawEndpoint creates a new raw icmp endpoint. It implements // stack.TransportProtocol.NewRawEndpoint. -func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { if netProto != p.netProto() { - return nil, tcpip.ErrUnknownProtocol + return nil, &tcpip.ErrUnknownProtocol{} } return raw.NewEndpoint(p.stack, netProto, p.number, waiterQueue) } @@ -87,7 +87,7 @@ func (p *protocol) MinimumPacketSize() int { } // ParsePorts in case of ICMP sets src to 0, dst to ICMP ID, and err to nil. -func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { +func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) { switch p.number { case ProtocolNumber4: hdr := header.ICMPv4(v) @@ -106,13 +106,13 @@ func (*protocol) HandleUnknownDestinationPacket(stack.TransportEndpointID, *stac } // SetOption implements stack.TransportProtocol.SetOption. -func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Option implements stack.TransportProtocol.Option. -func (*protocol) Option(tcpip.GettableTransportProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) Option(tcpip.GettableTransportProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Close implements stack.TransportProtocol.Close. diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index d48877677..73bb66830 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -86,15 +86,15 @@ type endpoint struct { boundNIC tcpip.NICID // lastErrorMu protects lastError. - lastErrorMu sync.Mutex `state:"nosave"` - lastError *tcpip.Error `state:".(string)"` + lastErrorMu sync.Mutex `state:"nosave"` + lastError tcpip.Error // ops is used to get socket level options. ops tcpip.SocketOptions } // NewEndpoint returns a new packet endpoint. -func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { ep := &endpoint{ stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{ @@ -159,16 +159,16 @@ func (ep *endpoint) Close() { func (ep *endpoint) ModerateRecvBuf(copied int) {} // Read implements tcpip.Endpoint.Read. -func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { ep.rcvMu.Lock() // If there's no data to read, return that read would block or that the // endpoint is closed. if ep.rcvList.Empty() { - err := tcpip.ErrWouldBlock + var err tcpip.Error = &tcpip.ErrWouldBlock{} if ep.rcvClosed { ep.stats.ReadErrors.ReadClosed.Increment() - err = tcpip.ErrClosedForReceive + err = &tcpip.ErrClosedForReceive{} } ep.rcvMu.Unlock() return tcpip.ReadResult{}, err @@ -198,49 +198,49 @@ func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResul n, err := packet.data.ReadTo(dst, opts.Peek) if n == 0 && err != nil { - return res, tcpip.ErrBadBuffer + return res, &tcpip.ErrBadBuffer{} } res.Count = n return res, nil } -func (*endpoint) Write(tcpip.Payloader, tcpip.WriteOptions) (int64, *tcpip.Error) { +func (*endpoint) Write(tcpip.Payloader, tcpip.WriteOptions) (int64, tcpip.Error) { // TODO(gvisor.dev/issue/173): Implement. - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be // disconnected, and this function always returns tpcip.ErrNotSupported. -func (*endpoint) Disconnect() *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Disconnect() tcpip.Error { + return &tcpip.ErrNotSupported{} } // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be -// connected, and this function always returnes tcpip.ErrNotSupported. -func (*endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { - return tcpip.ErrNotSupported +// connected, and this function always returnes *tcpip.ErrNotSupported. +func (*endpoint) Connect(addr tcpip.FullAddress) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used -// with Shutdown, and this function always returns tcpip.ErrNotSupported. -func (*endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { - return tcpip.ErrNotSupported +// with Shutdown, and this function always returns *tcpip.ErrNotSupported. +func (*endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with -// Listen, and this function always returns tcpip.ErrNotSupported. -func (*endpoint) Listen(backlog int) *tcpip.Error { - return tcpip.ErrNotSupported +// Listen, and this function always returns *tcpip.ErrNotSupported. +func (*endpoint) Listen(backlog int) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with -// Accept, and this function always returns tcpip.ErrNotSupported. -func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { - return nil, nil, tcpip.ErrNotSupported +// Accept, and this function always returns *tcpip.ErrNotSupported. +func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { + return nil, nil, &tcpip.ErrNotSupported{} } // Bind implements tcpip.Endpoint.Bind. -func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { +func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { // TODO(gvisor.dev/issue/173): Add Bind support. // "By default, all packets of the specified protocol type are passed @@ -274,14 +274,14 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { } // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. -func (*endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { - return tcpip.FullAddress{}, tcpip.ErrNotSupported +func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { + return tcpip.FullAddress{}, &tcpip.ErrNotSupported{} } // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. -func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { // Even a connected socket doesn't return a remote address. - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } // Readiness implements tcpip.Endpoint.Readiness. @@ -303,19 +303,19 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be // used with SetSockOpt, and this function always returns -// tcpip.ErrNotSupported. -func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +// *tcpip.ErrNotSupported. +func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch opt.(type) { case *tcpip.SocketDetachFilterOption: return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max @@ -336,11 +336,11 @@ func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } -func (ep *endpoint) LastError() *tcpip.Error { +func (ep *endpoint) LastError() tcpip.Error { ep.lastErrorMu.Lock() defer ep.lastErrorMu.Unlock() @@ -350,19 +350,19 @@ func (ep *endpoint) LastError() *tcpip.Error { } // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError. -func (ep *endpoint) UpdateLastError(err *tcpip.Error) { +func (ep *endpoint) UpdateLastError(err tcpip.Error) { ep.lastErrorMu.Lock() ep.lastError = err ep.lastErrorMu.Unlock() } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (ep *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { - return tcpip.ErrNotSupported +func (ep *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { + return &tcpip.ErrNotSupported{} } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.ReceiveQueueSizeOption: v := 0 @@ -381,7 +381,7 @@ func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { return v, nil default: - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } diff --git a/pkg/tcpip/transport/packet/endpoint_state.go b/pkg/tcpip/transport/packet/endpoint_state.go index 4d98fb051..ece662c0d 100644 --- a/pkg/tcpip/transport/packet/endpoint_state.go +++ b/pkg/tcpip/transport/packet/endpoint_state.go @@ -68,24 +68,6 @@ func (ep *endpoint) afterLoad() { // TODO(gvisor.dev/173): Once bind is supported, choose the right NIC. if err := ep.stack.RegisterPacketEndpoint(0, ep.netProto, ep); err != nil { - panic(*err) + panic(err) } } - -// saveLastError is invoked by stateify. -func (ep *endpoint) saveLastError() string { - if ep.lastError == nil { - return "" - } - - return ep.lastError.String() -} - -// loadLastError is invoked by stateify. -func (ep *endpoint) loadLastError(s string) { - if s == "" { - return - } - - ep.lastError = tcpip.StringToError(s) -} diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 6c6d45188..9c9ccc0ff 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -93,13 +93,13 @@ type endpoint struct { } // NewEndpoint returns a raw endpoint for the given protocols. -func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */) } -func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { +func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { if netProto != header.IPv4ProtocolNumber && netProto != header.IPv6ProtocolNumber { - return nil, tcpip.ErrUnknownProtocol + return nil, &tcpip.ErrUnknownProtocol{} } e := &endpoint{ @@ -189,16 +189,16 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { } // Read implements tcpip.Endpoint.Read. -func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { e.rcvMu.Lock() // If there's no data to read, return that read would block or that the // endpoint is closed. if e.rcvList.Empty() { - err := tcpip.ErrWouldBlock + var err tcpip.Error = &tcpip.ErrWouldBlock{} if e.rcvClosed { e.stats.ReadErrors.ReadClosed.Increment() - err = tcpip.ErrClosedForReceive + err = &tcpip.ErrClosedForReceive{} } e.rcvMu.Unlock() return tcpip.ReadResult{}, err @@ -225,37 +225,37 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult n, err := pkt.data.ReadTo(dst, opts.Peek) if n == 0 && err != nil { - return res, tcpip.ErrBadBuffer + return res, &tcpip.ErrBadBuffer{} } res.Count = n return res, nil } // Write implements tcpip.Endpoint.Write. -func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { // We can create, but not write to, unassociated IPv6 endpoints. if !e.associated && e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } if opts.To != nil { // Raw sockets do not support sending to a IPv4 address on a IPv6 endpoint. if e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber && len(opts.To.Addr) != header.IPv6AddressSize { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } } n, err := e.write(p, opts) - switch err { + switch err.(type) { case nil: e.stats.PacketsSent.Increment() - case tcpip.ErrMessageTooLong, tcpip.ErrInvalidOptionValue: + case *tcpip.ErrMessageTooLong, *tcpip.ErrInvalidOptionValue: e.stats.WriteErrors.InvalidArgs.Increment() - case tcpip.ErrClosedForSend: + case *tcpip.ErrClosedForSend: e.stats.WriteErrors.WriteClosed.Increment() - case tcpip.ErrInvalidEndpointState: + case *tcpip.ErrInvalidEndpointState: e.stats.WriteErrors.InvalidEndpointState.Increment() - case tcpip.ErrNoRoute, tcpip.ErrBroadcastDisabled, tcpip.ErrNetworkUnreachable: + case *tcpip.ErrNoRoute, *tcpip.ErrBroadcastDisabled, *tcpip.ErrNetworkUnreachable: // Errors indicating any problem with IP routing of the packet. e.stats.SendErrors.NoRoute.Increment() default: @@ -265,22 +265,22 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc return n, err } -func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { // MSG_MORE is unimplemented. This also means that MSG_EOR is a no-op. if opts.More { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } e.mu.RLock() defer e.mu.RUnlock() if e.closed { - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } payloadBytes := make([]byte, p.Len()) if _, err := io.ReadFull(p, payloadBytes); err != nil { - return 0, tcpip.ErrBadBuffer + return 0, &tcpip.ErrBadBuffer{} } // If this is an unassociated socket and callee provided a nonzero @@ -288,7 +288,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc if e.ops.GetHeaderIncluded() { ip := header.IPv4(payloadBytes) if !ip.IsValid(len(payloadBytes)) { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } dstAddr := ip.DestinationAddress() // Update dstAddr with the address in the IP header, unless @@ -309,7 +309,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // If the user doesn't specify a destination, they should have // connected to another address. if !e.connected { - return 0, tcpip.ErrDestinationRequired + return 0, &tcpip.ErrDestinationRequired{} } return e.finishWrite(payloadBytes, e.route) @@ -319,7 +319,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // goes through a different NIC than the endpoint was bound to. nic := opts.To.NIC if e.bound && nic != 0 && nic != e.BindNICID { - return 0, tcpip.ErrNoRoute + return 0, &tcpip.ErrNoRoute{} } // Find the route to the destination. If BindAddress is 0, @@ -336,7 +336,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // finishWrite writes the payload to a route. It resolves the route if // necessary. It's really just a helper to make defer unnecessary in Write. -func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, *tcpip.Error) { +func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, tcpip.Error) { if e.ops.GetHeaderIncluded() { pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: buffer.View(payloadBytes).ToVectorisedView(), @@ -363,22 +363,22 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, } // Disconnect implements tcpip.Endpoint.Disconnect. -func (*endpoint) Disconnect() *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Disconnect() tcpip.Error { + return &tcpip.ErrNotSupported{} } // Connect implements tcpip.Endpoint.Connect. -func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Connect(addr tcpip.FullAddress) tcpip.Error { // Raw sockets do not support connecting to a IPv4 address on a IPv6 endpoint. if e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber && len(addr.Addr) != header.IPv6AddressSize { - return tcpip.ErrAddressFamilyNotSupported + return &tcpip.ErrAddressFamilyNotSupported{} } e.mu.Lock() defer e.mu.Unlock() if e.closed { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } nic := addr.NIC @@ -393,7 +393,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } else if addr.NIC != e.BindNICID { // We're bound and addr specifies a NIC. They must be // the same. - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } } @@ -424,34 +424,34 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } // Shutdown implements tcpip.Endpoint.Shutdown. It's a noop for raw sockets. -func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { +func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() if !e.connected { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } return nil } // Listen implements tcpip.Endpoint.Listen. -func (*endpoint) Listen(backlog int) *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Listen(backlog int) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Accept implements tcpip.Endpoint.Accept. -func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { - return nil, nil, tcpip.ErrNotSupported +func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { + return nil, nil, &tcpip.ErrNotSupported{} } // Bind implements tcpip.Endpoint.Bind. -func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() // If a local address was specified, verify that it's valid. if len(addr.Addr) != 0 && e.stack.CheckLocalAddress(e.RegisterNICID, e.NetProto, addr.Addr) == 0 { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } if e.associated { @@ -471,14 +471,14 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { } // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. -func (*endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { - return tcpip.FullAddress{}, tcpip.ErrNotSupported +func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { + return tcpip.FullAddress{}, &tcpip.ErrNotSupported{} } // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. -func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { // Even a connected socket doesn't return a remote address. - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } // Readiness implements tcpip.Endpoint.Readiness. @@ -499,18 +499,18 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { } // SetSockOpt implements tcpip.Endpoint.SetSockOpt. -func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch opt.(type) { case *tcpip.SocketDetachFilterOption: return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max @@ -531,17 +531,17 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.ReceiveQueueSizeOption: v := 0 @@ -560,7 +560,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { return v, nil default: - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } @@ -680,7 +680,7 @@ func (e *endpoint) Stats() tcpip.EndpointStats { func (*endpoint) Wait() {} // LastError implements tcpip.Endpoint.LastError. -func (*endpoint) LastError() *tcpip.Error { +func (*endpoint) LastError() tcpip.Error { return nil } diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go index 65c64d99f..263ec5146 100644 --- a/pkg/tcpip/transport/raw/endpoint_state.go +++ b/pkg/tcpip/transport/raw/endpoint_state.go @@ -73,7 +73,7 @@ func (e *endpoint) Resume(s *stack.Stack) { // If the endpoint is connected, re-connect. if e.connected { - var err *tcpip.Error + var err tcpip.Error // TODO(gvisor.dev/issue/4906): Properly restore the route with the right // remote address. We used to pass e.remote.RemoteAddress which was // effectively the empty address but since moving e.route to hold a pointer @@ -89,7 +89,7 @@ func (e *endpoint) Resume(s *stack.Stack) { // If the endpoint is bound, re-bind. if e.bound { if e.stack.CheckLocalAddress(e.RegisterNICID, e.NetProto, e.BindAddr) == 0 { - panic(tcpip.ErrBadLocalAddress) + panic(&tcpip.ErrBadLocalAddress{}) } } diff --git a/pkg/tcpip/transport/raw/protocol.go b/pkg/tcpip/transport/raw/protocol.go index f30aa2a4a..e393b993d 100644 --- a/pkg/tcpip/transport/raw/protocol.go +++ b/pkg/tcpip/transport/raw/protocol.go @@ -25,11 +25,11 @@ import ( type EndpointFactory struct{} // NewUnassociatedEndpoint implements stack.RawFactory.NewUnassociatedEndpoint. -func (EndpointFactory) NewUnassociatedEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (EndpointFactory) NewUnassociatedEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */) } // NewPacketEndpoint implements stack.RawFactory.NewPacketEndpoint. -func (EndpointFactory) NewPacketEndpoint(stack *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (EndpointFactory) NewPacketEndpoint(stack *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return packet.NewEndpoint(stack, cooked, netProto, waiterQueue) } diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index e475c36f3..842c1622b 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -199,7 +199,7 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu // createConnectingEndpoint creates a new endpoint in a connecting state, with // the connection parameters given by the arguments. -func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { +func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, tcpip.Error) { // Create a new endpoint. netProto := l.netProto if netProto == 0 { @@ -242,7 +242,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // On success, a handshake h is returned with h.ep.mu held. // // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked. -func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*handshake, *tcpip.Error) { +func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*handshake, tcpip.Error) { // Create new endpoint. irs := s.sequenceNumber isn := generateSecureISN(s.id, l.stack.Seed()) @@ -267,7 +267,7 @@ func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, q ep.mu.Unlock() ep.Close() - return nil, tcpip.ErrConnectionAborted + return nil, &tcpip.ErrConnectionAborted{} } l.addPendingEndpoint(ep) @@ -281,7 +281,7 @@ func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, q l.removePendingEndpoint(ep) - return nil, tcpip.ErrConnectionAborted + return nil, &tcpip.ErrConnectionAborted{} } deferAccept = l.listenEP.deferAccept @@ -313,7 +313,7 @@ func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, q // established endpoint is returned with e.mu held. // // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked. -func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*endpoint, *tcpip.Error) { +func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*endpoint, tcpip.Error) { h, err := l.startHandshake(s, opts, queue, owner) if err != nil { return nil, err @@ -467,7 +467,7 @@ func (e *endpoint) notifyAborted() { // cookies to accept connections. // // Precondition: if ctx.listenEP != nil, ctx.listenEP.mu must be locked. -func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header.TCPSynOptions) *tcpip.Error { +func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header.TCPSynOptions) tcpip.Error { defer s.decRef() h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner) @@ -522,7 +522,7 @@ func (e *endpoint) acceptQueueIsFull() bool { // and needs to handle it. // // Precondition: if ctx.listenEP != nil, ctx.listenEP.mu must be locked. -func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) *tcpip.Error { +func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Error { e.rcvListMu.Lock() rcvClosed := e.rcvClosed e.rcvListMu.Unlock() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 62954d7e4..34a631b53 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -226,7 +226,7 @@ func (h *handshake) checkAck(s *segment) bool { // synSentState handles a segment received when the TCP 3-way handshake is in // the SYN-SENT state. -func (h *handshake) synSentState(s *segment) *tcpip.Error { +func (h *handshake) synSentState(s *segment) tcpip.Error { // RFC 793, page 37, states that in the SYN-SENT state, a reset is // acceptable if the ack field acknowledges the SYN. if s.flagIsSet(header.TCPFlagRst) { @@ -237,7 +237,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { h.ep.workerCleanup = true // Although the RFC above calls out ECONNRESET, Linux actually returns // ECONNREFUSED here so we do as well. - return tcpip.ErrConnectionRefused + return &tcpip.ErrConnectionRefused{} } return nil } @@ -314,12 +314,12 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // synRcvdState handles a segment received when the TCP 3-way handshake is in // the SYN-RCVD state. -func (h *handshake) synRcvdState(s *segment) *tcpip.Error { +func (h *handshake) synRcvdState(s *segment) tcpip.Error { if s.flagIsSet(header.TCPFlagRst) { // RFC 793, page 37, states that in the SYN-RCVD state, a reset // is acceptable if the sequence number is in the window. if s.sequenceNumber.InWindow(h.ackNum, h.rcvWnd) { - return tcpip.ErrConnectionRefused + return &tcpip.ErrConnectionRefused{} } return nil } @@ -333,7 +333,9 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { // number and "After sending the acknowledgment, drop the unacceptable // segment and return." if !s.sequenceNumber.InWindow(h.ackNum, h.rcvWnd) { - h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd) + if h.ep.allowOutOfWindowAck() { + h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd) + } return nil } @@ -349,7 +351,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagRst|header.TCPFlagAck, seq, ack, 0) if !h.active { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } h.resetState() @@ -412,7 +414,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { return nil } -func (h *handshake) handleSegment(s *segment) *tcpip.Error { +func (h *handshake) handleSegment(s *segment) tcpip.Error { h.sndWnd = s.window if !s.flagIsSet(header.TCPFlagSyn) && h.sndWndScale > 0 { h.sndWnd <<= uint8(h.sndWndScale) @@ -429,7 +431,7 @@ func (h *handshake) handleSegment(s *segment) *tcpip.Error { // processSegments goes through the segment queue and processes up to // maxSegmentsPerWake (if they're available). -func (h *handshake) processSegments() *tcpip.Error { +func (h *handshake) processSegments() tcpip.Error { for i := 0; i < maxSegmentsPerWake; i++ { s := h.ep.segmentQueue.dequeue() if s == nil { @@ -505,7 +507,7 @@ func (h *handshake) start() { } // complete completes the TCP 3-way handshake initiated by h.start(). -func (h *handshake) complete() *tcpip.Error { +func (h *handshake) complete() tcpip.Error { // Set up the wakers. var s sleep.Sleeper resendWaker := sleep.Waker{} @@ -555,7 +557,7 @@ func (h *handshake) complete() *tcpip.Error { case wakerForNotification: n := h.ep.fetchNotifications() if (n¬ifyClose)|(n¬ifyAbort) != 0 { - return tcpip.ErrAborted + return &tcpip.ErrAborted{} } if n¬ifyDrain != 0 { for !h.ep.segmentQueue.empty() { @@ -593,19 +595,19 @@ type backoffTimer struct { t *time.Timer } -func newBackoffTimer(timeout, maxTimeout time.Duration, f func()) (*backoffTimer, *tcpip.Error) { +func newBackoffTimer(timeout, maxTimeout time.Duration, f func()) (*backoffTimer, tcpip.Error) { if timeout > maxTimeout { - return nil, tcpip.ErrTimeout + return nil, &tcpip.ErrTimeout{} } bt := &backoffTimer{timeout: timeout, maxTimeout: maxTimeout} bt.t = time.AfterFunc(timeout, f) return bt, nil } -func (bt *backoffTimer) reset() *tcpip.Error { +func (bt *backoffTimer) reset() tcpip.Error { bt.timeout *= 2 if bt.timeout > MaxRTO { - return tcpip.ErrTimeout + return &tcpip.ErrTimeout{} } bt.t.Reset(bt.timeout) return nil @@ -706,7 +708,7 @@ type tcpFields struct { txHash uint32 } -func (e *endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOptions) *tcpip.Error { +func (e *endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOptions) tcpip.Error { tf.opts = makeSynOptions(opts) // We ignore SYN send errors and let the callers re-attempt send. if err := e.sendTCP(r, tf, buffer.VectorisedView{}, nil); err != nil { @@ -716,7 +718,7 @@ func (e *endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOp return nil } -func (e *endpoint) sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO) *tcpip.Error { +func (e *endpoint) sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO) tcpip.Error { tf.txHash = e.txHash if err := sendTCP(r, tf, data, gso, e.owner); err != nil { e.stats.SendErrors.SegmentSendToNetworkFailed.Increment() @@ -755,7 +757,7 @@ func buildTCPHdr(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso *sta } } -func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) *tcpip.Error { +func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) tcpip.Error { // We need to shallow clone the VectorisedView here as ReadToView will // split the VectorisedView and Trim underlying views as it splits. Not // doing the clone here will cause the underlying views of data itself @@ -803,7 +805,7 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso // sendTCP sends a TCP segment with the provided options via the provided // network endpoint and under the provided identity. -func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) *tcpip.Error { +func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) tcpip.Error { optLen := len(tf.opts) if tf.rcvWnd > math.MaxUint16 { tf.rcvWnd = math.MaxUint16 @@ -875,7 +877,7 @@ func (e *endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte { } // sendRaw sends a TCP segment to the endpoint's peer. -func (e *endpoint) sendRaw(data buffer.VectorisedView, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size) *tcpip.Error { +func (e *endpoint) sendRaw(data buffer.VectorisedView, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size) tcpip.Error { var sackBlocks []header.SACKBlock if e.EndpointState() == StateEstablished && e.rcv.pendingRcvdSegments.Len() > 0 && (flags&header.TCPFlagAck != 0) { sackBlocks = e.sack.Blocks[:e.sack.NumBlocks] @@ -941,12 +943,14 @@ func (e *endpoint) handleClose() { // error code and sends a RST if and only if the error is not ErrConnectionReset // indicating that the connection is being reset due to receiving a RST. This // method must only be called from the protocol goroutine. -func (e *endpoint) resetConnectionLocked(err *tcpip.Error) { +func (e *endpoint) resetConnectionLocked(err tcpip.Error) { // Only send a reset if the connection is being aborted for a reason // other than receiving a reset. e.setEndpointState(StateError) e.hardError = err - if err != tcpip.ErrConnectionReset && err != tcpip.ErrTimeout { + switch err.(type) { + case *tcpip.ErrConnectionReset, *tcpip.ErrTimeout: + default: // The exact sequence number to be used for the RST is the same as the // one used by Linux. We need to handle the case of window being shrunk // which can cause sndNxt to be outside the acceptable window on the @@ -1056,7 +1060,7 @@ func (e *endpoint) drainClosingSegmentQueue() { } } -func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { +func (e *endpoint) handleReset(s *segment) (ok bool, err tcpip.Error) { if e.rcv.acceptable(s.sequenceNumber, 0) { // RFC 793, page 37 states that "in all states // except SYN-SENT, all reset (RST) segments are @@ -1084,7 +1088,7 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { // delete the TCB, and return. case StateCloseWait: e.transitionToStateCloseLocked() - e.hardError = tcpip.ErrAborted + e.hardError = &tcpip.ErrAborted{} e.notifyProtocolGoroutine(notifyTickleWorker) return false, nil default: @@ -1097,14 +1101,14 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { // handleSegment is invoked from the processor goroutine // rather than the worker goroutine. e.notifyProtocolGoroutine(notifyResetByPeer) - return false, tcpip.ErrConnectionReset + return false, &tcpip.ErrConnectionReset{} } } return true, nil } // handleSegments processes all inbound segments. -func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error { +func (e *endpoint) handleSegments(fastPath bool) tcpip.Error { checkRequeue := true for i := 0; i < maxSegmentsPerWake; i++ { if e.EndpointState().closed() { @@ -1151,7 +1155,7 @@ func (e *endpoint) probeSegment() { // handleSegment handles a given segment and notifies the worker goroutine if // if the connection should be terminated. -func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) { +func (e *endpoint) handleSegment(s *segment) (cont bool, err tcpip.Error) { // Invoke the tcp probe if installed. The tcp probe function will update // the TCPEndpointState after the segment is processed. defer e.probeSegment() @@ -1183,8 +1187,7 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) { // endpoint MUST terminate its connection. The local TCP endpoint // should then rely on SYN retransmission from the remote end to // re-establish the connection. - - e.snd.sendAck() + e.snd.maybeSendOutOfWindowAck(s) } else if s.flagIsSet(header.TCPFlagAck) { // Patch the window size in the segment according to the // send window scale. @@ -1225,7 +1228,7 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) { // keepaliveTimerExpired is called when the keepaliveTimer fires. We send TCP // keepalive packets periodically when the connection is idle. If we don't hear // from the other side after a number of tries, we terminate the connection. -func (e *endpoint) keepaliveTimerExpired() *tcpip.Error { +func (e *endpoint) keepaliveTimerExpired() tcpip.Error { userTimeout := e.userTimeout e.keepalive.Lock() @@ -1239,13 +1242,13 @@ func (e *endpoint) keepaliveTimerExpired() *tcpip.Error { if userTimeout != 0 && time.Since(e.rcv.lastRcvdAckTime) >= userTimeout && e.keepalive.unacked > 0 { e.keepalive.Unlock() e.stack.Stats().TCP.EstablishedTimedout.Increment() - return tcpip.ErrTimeout + return &tcpip.ErrTimeout{} } if e.keepalive.unacked >= e.keepalive.count { e.keepalive.Unlock() e.stack.Stats().TCP.EstablishedTimedout.Increment() - return tcpip.ErrTimeout + return &tcpip.ErrTimeout{} } // RFC1122 4.2.3.6: TCP keepalive is a dataless ACK with @@ -1289,7 +1292,7 @@ func (e *endpoint) disableKeepaliveTimer() { // protocolMainLoop is the main loop of the TCP protocol. It runs in its own // goroutine and is responsible for sending segments and handling received // segments. -func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) *tcpip.Error { +func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error { e.mu.Lock() var closeTimer *time.Timer var closeWaker sleep.Waker @@ -1335,6 +1338,14 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ } } + // Reaching this point means that we successfully completed the 3-way + // handshake with our peer. + // + // Completing the 3-way handshake is an indication that the route is valid + // and the remote is reachable as the only way we can complete a handshake + // is if our SYN reached the remote and their ACK reached us. + e.route.ConfirmReachable() + drained := e.drainDone != nil if drained { close(e.drainDone) @@ -1347,25 +1358,25 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ // wakes up. funcs := []struct { w *sleep.Waker - f func() *tcpip.Error + f func() tcpip.Error }{ { w: &e.sndWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { e.handleWrite() return nil }, }, { w: &e.sndCloseWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { e.handleClose() return nil }, }, { w: &closeWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { // This means the socket is being closed due // to the TCP-FIN-WAIT2 timeout was hit. Just // mark the socket as closed. @@ -1376,10 +1387,10 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ }, { w: &e.snd.resendWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { if !e.snd.retransmitTimerExpired() { e.stack.Stats().TCP.EstablishedTimedout.Increment() - return tcpip.ErrTimeout + return &tcpip.ErrTimeout{} } return nil }, @@ -1390,7 +1401,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ }, { w: &e.newSegmentWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { return e.handleSegments(false /* fastPath */) }, }, @@ -1400,7 +1411,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ }, { w: &e.notificationWaker, - f: func() *tcpip.Error { + f: func() tcpip.Error { n := e.fetchNotifications() if n¬ifyNonZeroReceiveWindow != 0 { e.rcv.nonZeroWindow() @@ -1417,11 +1428,11 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ } if n¬ifyReset != 0 || n¬ifyAbort != 0 { - return tcpip.ErrConnectionAborted + return &tcpip.ErrConnectionAborted{} } if n¬ifyResetByPeer != 0 { - return tcpip.ErrConnectionReset + return &tcpip.ErrConnectionReset{} } if n¬ifyClose != 0 && closeTimer == nil { @@ -1500,7 +1511,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ // Main loop. Handle segments until both send and receive ends of the // connection have completed. - cleanupOnError := func(err *tcpip.Error) { + cleanupOnError := func(err tcpip.Error) { e.stack.Stats().TCP.CurrentConnected.Decrement() e.workerCleanup = true if err != nil { diff --git a/pkg/tcpip/transport/tcp/cubic.go b/pkg/tcpip/transport/tcp/cubic.go index 7b1f5e763..1975f1a44 100644 --- a/pkg/tcpip/transport/tcp/cubic.go +++ b/pkg/tcpip/transport/tcp/cubic.go @@ -178,8 +178,8 @@ func (c *cubicState) getCwnd(packetsAcked, sndCwnd int, srtt time.Duration) int return int(cwnd) } -// HandleNDupAcks implements congestionControl.HandleNDupAcks. -func (c *cubicState) HandleNDupAcks() { +// HandleLossDetected implements congestionControl.HandleLossDetected. +func (c *cubicState) HandleLossDetected() { // See: https://tools.ietf.org/html/rfc8312#section-4.5 c.numCongestionEvents++ c.t = time.Now() diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go index 809c88732..2d90246e4 100644 --- a/pkg/tcpip/transport/tcp/dual_stack_test.go +++ b/pkg/tcpip/transport/tcp/dual_stack_test.go @@ -37,7 +37,7 @@ func TestV4MappedConnectOnV6Only(t *testing.T) { // Start connection attempt, it must fail. err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestV4MappedAddr, Port: context.TestPort}) - if err != tcpip.ErrNoRoute { + if _, ok := err.(*tcpip.ErrNoRoute); !ok { t.Fatalf("Unexpected return value from Connect: %v", err) } } @@ -49,7 +49,7 @@ func testV4Connect(t *testing.T, c *context.Context, checkers ...checker.Network defer c.WQ.EventUnregister(&we) err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestV4MappedAddr, Port: context.TestPort}) - if err != tcpip.ErrConnectStarted { + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { t.Fatalf("Unexpected return value from Connect: %v", err) } @@ -156,7 +156,7 @@ func testV6Connect(t *testing.T, c *context.Context, checkers ...checker.Network defer c.WQ.EventUnregister(&we) err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestV6Addr, Port: context.TestPort}) - if err != tcpip.ErrConnectStarted { + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { t.Fatalf("Unexpected return value from Connect: %v", err) } @@ -391,7 +391,7 @@ func testV4Accept(t *testing.T, c *context.Context) { defer c.WQ.EventUnregister(&we) nep, _, err := c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -525,7 +525,7 @@ func TestV6AcceptOnV6(t *testing.T) { defer c.WQ.EventUnregister(&we) var addr tcpip.FullAddress _, _, err := c.EP.Accept(&addr) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -549,7 +549,7 @@ func TestV4AcceptOnV4(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %v", err) @@ -613,7 +613,7 @@ func testV4ListenClose(t *testing.T, c *context.Context) { c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) nep, _, err := c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -635,7 +635,7 @@ func TestV4ListenCloseOnV4(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %v", err) diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index b6bd6d455..4e5a6089f 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -386,12 +386,12 @@ type endpoint struct { // hardError is meaningful only when state is stateError. It stores the // error to be returned when read/write syscalls are called and the // endpoint is in this state. hardError is protected by endpoint mu. - hardError *tcpip.Error `state:".(string)"` + hardError tcpip.Error // lastError represents the last error that the endpoint reported; // access to it is protected by the following mutex. - lastErrorMu sync.Mutex `state:"nosave"` - lastError *tcpip.Error `state:".(string)"` + lastErrorMu sync.Mutex `state:"nosave"` + lastError tcpip.Error // rcvReadMu synchronizes calls to Read. // @@ -688,6 +688,10 @@ type endpoint struct { // ops is used to get socket level options. ops tcpip.SocketOptions + + // lastOutOfWindowAckTime is the time at which the an ACK was sent in response + // to an out of window segment being received by this endpoint. + lastOutOfWindowAckTime time.Time `state:".(unixTime)"` } // UniqueID implements stack.TransportEndpoint.UniqueID. @@ -1059,7 +1063,7 @@ func (e *endpoint) Close() { if isResetState { // Close the endpoint without doing full shutdown and // send a RST. - e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.resetConnectionLocked(&tcpip.ErrConnectionAborted{}) e.closeNoShutdownLocked() // Wake up worker to close the endpoint. @@ -1293,14 +1297,14 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { } // Preconditions: e.mu must be held to call this function. -func (e *endpoint) hardErrorLocked() *tcpip.Error { +func (e *endpoint) hardErrorLocked() tcpip.Error { err := e.hardError e.hardError = nil return err } // Preconditions: e.mu must be held to call this function. -func (e *endpoint) lastErrorLocked() *tcpip.Error { +func (e *endpoint) lastErrorLocked() tcpip.Error { e.lastErrorMu.Lock() defer e.lastErrorMu.Unlock() err := e.lastError @@ -1309,7 +1313,7 @@ func (e *endpoint) lastErrorLocked() *tcpip.Error { } // LastError implements tcpip.Endpoint.LastError. -func (e *endpoint) LastError() *tcpip.Error { +func (e *endpoint) LastError() tcpip.Error { e.LockUser() defer e.UnlockUser() if err := e.hardErrorLocked(); err != nil { @@ -1319,7 +1323,7 @@ func (e *endpoint) LastError() *tcpip.Error { } // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError. -func (e *endpoint) UpdateLastError(err *tcpip.Error) { +func (e *endpoint) UpdateLastError(err tcpip.Error) { e.LockUser() e.lastErrorMu.Lock() e.lastError = err @@ -1328,7 +1332,7 @@ func (e *endpoint) UpdateLastError(err *tcpip.Error) { } // Read implements tcpip.Endpoint.Read. -func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { e.rcvReadMu.Lock() defer e.rcvReadMu.Unlock() @@ -1337,7 +1341,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // can remove segments from the list through commitRead(). first, last, serr := e.startRead() if serr != nil { - if serr == tcpip.ErrClosedForReceive { + if _, ok := serr.(*tcpip.ErrClosedForReceive); ok { e.stats.ReadErrors.ReadClosed.Increment() } return tcpip.ReadResult{}, serr @@ -1377,7 +1381,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // If something is read, we must report it. Report error when nothing is read. if done == 0 && err != nil { - return tcpip.ReadResult{}, tcpip.ErrBadBuffer + return tcpip.ReadResult{}, &tcpip.ErrBadBuffer{} } return tcpip.ReadResult{ Count: done, @@ -1389,7 +1393,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // inclusive range of segments that can be read. // // Precondition: e.rcvReadMu must be held. -func (e *endpoint) startRead() (first, last *segment, err *tcpip.Error) { +func (e *endpoint) startRead() (first, last *segment, err tcpip.Error) { e.LockUser() defer e.UnlockUser() @@ -1398,7 +1402,7 @@ func (e *endpoint) startRead() (first, last *segment, err *tcpip.Error) { // on a receive. It can expect to read any data after the handshake // is complete. RFC793, section 3.9, p58. if e.EndpointState() == StateSynSent { - return nil, nil, tcpip.ErrWouldBlock + return nil, nil, &tcpip.ErrWouldBlock{} } // The endpoint can be read if it's connected, or if it's already closed @@ -1414,17 +1418,17 @@ func (e *endpoint) startRead() (first, last *segment, err *tcpip.Error) { if err := e.hardErrorLocked(); err != nil { return nil, nil, err } - return nil, nil, tcpip.ErrClosedForReceive + return nil, nil, &tcpip.ErrClosedForReceive{} } e.stats.ReadErrors.NotConnected.Increment() - return nil, nil, tcpip.ErrNotConnected + return nil, nil, &tcpip.ErrNotConnected{} } if e.rcvBufUsed == 0 { if e.rcvClosed || !e.EndpointState().connected() { - return nil, nil, tcpip.ErrClosedForReceive + return nil, nil, &tcpip.ErrClosedForReceive{} } - return nil, nil, tcpip.ErrWouldBlock + return nil, nil, &tcpip.ErrWouldBlock{} } return e.rcvList.Front(), e.rcvList.Back(), nil @@ -1476,39 +1480,39 @@ func (e *endpoint) commitRead(done int) *segment { // moment. If the endpoint is not writable then it returns an error // indicating the reason why it's not writable. // Caller must hold e.mu and e.sndBufMu -func (e *endpoint) isEndpointWritableLocked() (int, *tcpip.Error) { +func (e *endpoint) isEndpointWritableLocked() (int, tcpip.Error) { // The endpoint cannot be written to if it's not connected. switch s := e.EndpointState(); { case s == StateError: if err := e.hardErrorLocked(); err != nil { return 0, err } - return 0, tcpip.ErrClosedForSend + return 0, &tcpip.ErrClosedForSend{} case !s.connecting() && !s.connected(): - return 0, tcpip.ErrClosedForSend + return 0, &tcpip.ErrClosedForSend{} case s.connecting(): // As per RFC793, page 56, a send request arriving when in connecting // state, can be queued to be completed after the state becomes // connected. Return an error code for the caller of endpoint Write to // try again, until the connection handshake is complete. - return 0, tcpip.ErrWouldBlock + return 0, &tcpip.ErrWouldBlock{} } // Check if the connection has already been closed for sends. if e.sndClosed { - return 0, tcpip.ErrClosedForSend + return 0, &tcpip.ErrClosedForSend{} } sndBufSize := e.getSendBufferSize() avail := sndBufSize - e.sndBufUsed if avail <= 0 { - return 0, tcpip.ErrWouldBlock + return 0, &tcpip.ErrWouldBlock{} } return avail, nil } // Write writes data to the endpoint's peer. -func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { // Linux completely ignores any address passed to sendto(2) for TCP sockets // (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More // and opts.EndOfRecord are also ignored. @@ -1516,7 +1520,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc e.LockUser() defer e.UnlockUser() - nextSeg, n, err := func() (*segment, int, *tcpip.Error) { + nextSeg, n, err := func() (*segment, int, tcpip.Error) { e.sndBufMu.Lock() defer e.sndBufMu.Unlock() @@ -1526,7 +1530,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc return nil, 0, err } - v, err := func() ([]byte, *tcpip.Error) { + v, err := func() ([]byte, tcpip.Error) { // We can release locks while copying data. // // This is not possible if atomic is set, because we can't allow the @@ -1549,7 +1553,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc } v := make([]byte, avail) if _, err := io.ReadFull(p, v); err != nil { - return nil, tcpip.ErrBadBuffer + return nil, &tcpip.ErrBadBuffer{} } return v, nil }() @@ -1702,7 +1706,7 @@ func (e *endpoint) getSendBufferSize() int { } // SetSockOptInt sets a socket option. -func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { // Lower 2 bits represents ECN bits. RFC 3168, section 23.1 const inetECNMask = 3 @@ -1730,7 +1734,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { case tcpip.MaxSegOption: userMSS := v if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } e.LockUser() e.userMSS = uint16(userMSS) @@ -1741,7 +1745,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { // Return not supported if attempting to set this option to // anything other than path MTU discovery disabled. if v != tcpip.PMTUDiscoveryDont { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } case tcpip.ReceiveBufferSizeOption: @@ -1801,7 +1805,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { case tcpip.TCPSynCountOption: if v < 1 || v > 255 { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } e.LockUser() e.maxSynRetries = uint8(v) @@ -1817,7 +1821,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { return nil default: e.UnlockUser() - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } } var rs tcpip.TCPReceiveBufferSizeRangeOption @@ -1838,7 +1842,7 @@ func (e *endpoint) HasNIC(id int32) bool { } // SetSockOpt sets a socket option. -func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch v := opt.(type) { case *tcpip.KeepaliveIdleOption: e.keepalive.Lock() @@ -1884,7 +1888,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { // Linux returns ENOENT when an invalid congestion // control algorithm is specified. - return tcpip.ErrNoSuchFile + return &tcpip.ErrNoSuchFile{} case *tcpip.TCPLingerTimeoutOption: e.LockUser() @@ -1927,13 +1931,13 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { } // readyReceiveSize returns the number of bytes ready to be received. -func (e *endpoint) readyReceiveSize() (int, *tcpip.Error) { +func (e *endpoint) readyReceiveSize() (int, tcpip.Error) { e.LockUser() defer e.UnlockUser() // The endpoint cannot be in listen state. if e.EndpointState() == StateListen { - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } e.rcvListMu.Lock() @@ -1943,7 +1947,7 @@ func (e *endpoint) readyReceiveSize() (int, *tcpip.Error) { } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.KeepaliveCountOption: e.keepalive.Lock() @@ -2007,24 +2011,38 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { return 1, nil default: - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } +func (e *endpoint) getTCPInfo() tcpip.TCPInfoOption { + info := tcpip.TCPInfoOption{} + e.LockUser() + snd := e.snd + if snd != nil { + // We do not calculate RTT before sending the data packets. If + // the connection did not send and receive data, then RTT will + // be zero. + snd.rtt.Lock() + info.RTT = snd.rtt.srtt + info.RTTVar = snd.rtt.rttvar + snd.rtt.Unlock() + + info.RTO = snd.rto + info.CcState = snd.state + info.SndSsthresh = uint32(snd.sndSsthresh) + info.SndCwnd = uint32(snd.sndCwnd) + info.ReorderSeen = snd.rc.reorderSeen + } + e.UnlockUser() + return info +} + // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { +func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { switch o := opt.(type) { case *tcpip.TCPInfoOption: - *o = tcpip.TCPInfoOption{} - e.LockUser() - snd := e.snd - e.UnlockUser() - if snd != nil { - snd.rtt.Lock() - o.RTT = snd.rtt.srtt - o.RTTVar = snd.rtt.rttvar - snd.rtt.Unlock() - } + *o = e.getTCPInfo() case *tcpip.KeepaliveIdleOption: e.keepalive.Lock() @@ -2070,14 +2088,14 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { } default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } return nil } // checkV4MappedLocked determines the effective network protocol and converts // addr to its canonical form. -func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) { +func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.ops.GetV6Only()) if err != nil { return tcpip.FullAddress{}, 0, err @@ -2086,18 +2104,20 @@ func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddres } // Disconnect implements tcpip.Endpoint.Disconnect. -func (*endpoint) Disconnect() *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Disconnect() tcpip.Error { + return &tcpip.ErrNotSupported{} } // Connect connects the endpoint to its peer. -func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Connect(addr tcpip.FullAddress) tcpip.Error { err := e.connect(addr, true, true) - if err != nil && !err.IgnoreStats() { - // Connect failed. Let's wake up any waiters. - e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) - e.stack.Stats().TCP.FailedConnectionAttempts.Increment() - e.stats.FailedConnectionAttempts.Increment() + if err != nil { + if !err.IgnoreStats() { + // Connect failed. Let's wake up any waiters. + e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) + e.stack.Stats().TCP.FailedConnectionAttempts.Increment() + e.stats.FailedConnectionAttempts.Increment() + } } return err } @@ -2108,7 +2128,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { // created (so no new handshaking is done); for stack-accepted connections not // yet accepted by the app, they are restored without running the main goroutine // here. -func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tcpip.Error { +func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) tcpip.Error { e.LockUser() defer e.UnlockUser() @@ -2127,7 +2147,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc return nil } // Otherwise return that it's already connected. - return tcpip.ErrAlreadyConnected + return &tcpip.ErrAlreadyConnected{} } nicID := addr.NIC @@ -2140,7 +2160,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } if nicID != 0 && nicID != e.boundNICID { - return tcpip.ErrNoRoute + return &tcpip.ErrNoRoute{} } nicID = e.boundNICID @@ -2152,16 +2172,16 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc case StateConnecting, StateSynSent, StateSynRecv: // A connection request has already been issued but hasn't completed // yet. - return tcpip.ErrAlreadyConnecting + return &tcpip.ErrAlreadyConnecting{} case StateError: if err := e.hardErrorLocked(); err != nil { return err } - return tcpip.ErrConnectionAborted + return &tcpip.ErrConnectionAborted{} default: - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } // Find a route to the desired destination. @@ -2217,12 +2237,12 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } bindToDevice := tcpip.NICID(e.ops.GetBindToDevice()) - if _, err := e.stack.PickEphemeralPortStable(portOffset, func(p uint16) (bool, *tcpip.Error) { + if _, err := e.stack.PickEphemeralPortStable(portOffset, func(p uint16) (bool, tcpip.Error) { if sameAddr && p == e.ID.RemotePort { return false, nil } if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, bindToDevice, addr, nil /* testPort */); err != nil { - if err != tcpip.ErrPortInUse || !reuse { + if _, ok := err.(*tcpip.ErrPortInUse); !ok || !reuse { return false, nil } transEPID := e.ID @@ -2268,7 +2288,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc id.LocalPort = p if err := e.stack.RegisterTransportEndpoint(netProtos, ProtocolNumber, id, e, e.portFlags, bindToDevice); err != nil { e.stack.ReleasePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, bindToDevice, addr) - if err == tcpip.ErrPortInUse { + if _, ok := err.(*tcpip.ErrPortInUse); ok { return false, nil } return false, err @@ -2323,23 +2343,23 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc go e.protocolMainLoop(handshake, nil) // S/R-SAFE: will be drained before save. } - return tcpip.ErrConnectStarted + return &tcpip.ErrConnectStarted{} } // ConnectEndpoint is not supported. -func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*endpoint) ConnectEndpoint(tcpip.Endpoint) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // Shutdown closes the read and/or write end of the endpoint connection to its // peer. -func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { +func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error { e.LockUser() defer e.UnlockUser() return e.shutdownLocked(flags) } -func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { +func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) tcpip.Error { e.shutdownFlags |= flags switch { case e.EndpointState().connected(): @@ -2354,7 +2374,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { // If we're fully closed and we have unread data we need to abort // the connection with a RST. if e.shutdownFlags&tcpip.ShutdownWrite != 0 && rcvBufUsed > 0 { - e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.resetConnectionLocked(&tcpip.ErrConnectionAborted{}) // Wake up worker to terminate loop. e.notifyProtocolGoroutine(notifyTickleWorker) return nil @@ -2368,7 +2388,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { // Already closed. e.sndBufMu.Unlock() if e.EndpointState() == StateTimeWait { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } return nil } @@ -2401,22 +2421,24 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { } return nil default: - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } } // Listen puts the endpoint in "listen" mode, which allows it to accept // new connections. -func (e *endpoint) Listen(backlog int) *tcpip.Error { +func (e *endpoint) Listen(backlog int) tcpip.Error { err := e.listen(backlog) - if err != nil && !err.IgnoreStats() { - e.stack.Stats().TCP.FailedConnectionAttempts.Increment() - e.stats.FailedConnectionAttempts.Increment() + if err != nil { + if !err.IgnoreStats() { + e.stack.Stats().TCP.FailedConnectionAttempts.Increment() + e.stats.FailedConnectionAttempts.Increment() + } } return err } -func (e *endpoint) listen(backlog int) *tcpip.Error { +func (e *endpoint) listen(backlog int) tcpip.Error { e.LockUser() defer e.UnlockUser() @@ -2434,7 +2456,7 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { // Adjust the size of the channel iff we can fix // existing pending connections into the new one. if len(e.acceptedChan) > backlog { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } if cap(e.acceptedChan) == backlog { return nil @@ -2466,7 +2488,7 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { // Endpoint must be bound before it can transition to listen mode. if e.EndpointState() != StateBound { e.stats.ReadErrors.InvalidEndpointState.Increment() - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } // Register the endpoint. @@ -2506,7 +2528,7 @@ func (e *endpoint) startAcceptedLoop() { // to an endpoint previously set to listen mode. // // addr if not-nil will contain the peer address of the returned endpoint. -func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { +func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { e.LockUser() defer e.UnlockUser() @@ -2515,7 +2537,7 @@ func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter. e.rcvListMu.Unlock() // Endpoint must be in listen state before it can accept connections. if rcvClosed || e.EndpointState() != StateListen { - return nil, nil, tcpip.ErrInvalidEndpointState + return nil, nil, &tcpip.ErrInvalidEndpointState{} } // Get the new accepted endpoint. @@ -2526,7 +2548,7 @@ func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter. case n = <-e.acceptedChan: e.acceptCond.Signal() default: - return nil, nil, tcpip.ErrWouldBlock + return nil, nil, &tcpip.ErrWouldBlock{} } if peerAddr != nil { *peerAddr = n.getRemoteAddress() @@ -2535,19 +2557,19 @@ func (e *endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter. } // Bind binds the endpoint to a specific local port and optionally address. -func (e *endpoint) Bind(addr tcpip.FullAddress) (err *tcpip.Error) { +func (e *endpoint) Bind(addr tcpip.FullAddress) (err tcpip.Error) { e.LockUser() defer e.UnlockUser() return e.bindLocked(addr) } -func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { +func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err tcpip.Error) { // Don't allow binding once endpoint is not in the initial state // anymore. This is because once the endpoint goes into a connected or // listen state, it is already bound. if e.EndpointState() != StateInitial { - return tcpip.ErrAlreadyBound + return &tcpip.ErrAlreadyBound{} } e.BindAddr = addr.Addr @@ -2575,7 +2597,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { if len(addr.Addr) != 0 { nic = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) if nic == 0 { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } e.ID.LocalAddress = addr.Addr } @@ -2616,7 +2638,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { } // GetLocalAddress returns the address to which the endpoint is bound. -func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { e.LockUser() defer e.UnlockUser() @@ -2628,12 +2650,12 @@ func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { } // GetRemoteAddress returns the address to which the endpoint is connected. -func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { e.LockUser() defer e.UnlockUser() if !e.EndpointState().connected() { - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } return e.getRemoteAddress(), nil @@ -2665,7 +2687,7 @@ func (e *endpoint) enqueueSegment(s *segment) bool { return true } -func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, extra uint32, pkt *stack.PacketBuffer) { +func (e *endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, pkt *stack.PacketBuffer) { // Update last error first. e.lastErrorMu.Lock() e.lastError = err @@ -2674,11 +2696,8 @@ func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, ext // Update the error queue if IP_RECVERR is enabled. if e.SocketOptions().GetRecvError() { e.SocketOptions().QueueErr(&tcpip.SockError{ - Err: err, - ErrOrigin: header.ICMPOriginFromNetProto(pkt.NetworkProtocolNumber), - ErrType: errType, - ErrCode: errCode, - ErrInfo: extra, + Err: err, + Cause: transErr, // Linux passes the payload with the TCP header. We don't know if the TCP // header even exists, it may not for fragmented packets. Payload: pkt.Data.ToView(), @@ -2700,27 +2719,26 @@ func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, ext e.notifyProtocolGoroutine(notifyError) } -// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { - switch typ { - case stack.ControlPacketTooBig: +// HandleError implements stack.TransportEndpoint. +func (e *endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer) { + handlePacketTooBig := func(mtu uint32) { e.sndBufMu.Lock() e.packetTooBigCount++ - if v := int(extra); v < e.sndMTU { + if v := int(mtu); v < e.sndMTU { e.sndMTU = v } e.sndBufMu.Unlock() - e.notifyProtocolGoroutine(notifyMTUChanged) + } - case stack.ControlNoRoute: - e.onICMPError(tcpip.ErrNoRoute, byte(header.ICMPv4DstUnreachable), byte(header.ICMPv4HostUnreachable), extra, pkt) - - case stack.ControlAddressUnreachable: - e.onICMPError(tcpip.ErrNoRoute, byte(header.ICMPv6DstUnreachable), byte(header.ICMPv6AddressUnreachable), extra, pkt) - - case stack.ControlNetworkUnreachable: - e.onICMPError(tcpip.ErrNetworkUnreachable, byte(header.ICMPv6DstUnreachable), byte(header.ICMPv6NetworkUnreachable), extra, pkt) + // TODO(gvisor.dev/issues/5270): Handle all transport errors. + switch transErr.Kind() { + case stack.PacketTooBigTransportError: + handlePacketTooBig(transErr.Info()) + case stack.DestinationHostUnreachableTransportError: + e.onICMPError(&tcpip.ErrNoRoute{}, transErr, pkt) + case stack.DestinationNetworkUnreachableTransportError: + e.onICMPError(&tcpip.ErrNetworkUnreachable{}, transErr, pkt) } } @@ -3013,12 +3031,16 @@ func (e *endpoint) completeState() stack.TCPEndpointState { rc := &e.snd.rc s.Sender.RACKState = stack.TCPRACKState{ - XmitTime: rc.xmitTime, - EndSequence: rc.endSequence, - FACK: rc.fack, - RTT: rc.rtt, - Reord: rc.reorderSeen, - DSACKSeen: rc.dsackSeen, + XmitTime: rc.xmitTime, + EndSequence: rc.endSequence, + FACK: rc.fack, + RTT: rc.rtt, + Reord: rc.reorderSeen, + DSACKSeen: rc.dsackSeen, + ReoWnd: rc.reoWnd, + ReoWndIncr: rc.reoWndIncr, + ReoWndPersist: rc.reoWndPersist, + RTTSeq: rc.rttSeq, } return s } @@ -3107,3 +3129,19 @@ func GetTCPSendBufferLimits(s tcpip.StackHandler) tcpip.SendBufferSizeOption { Max: ss.Max, } } + +// allowOutOfWindowAck returns true if an out-of-window ACK can be sent now. +func (e *endpoint) allowOutOfWindowAck() bool { + var limit stack.TCPInvalidRateLimitOption + if err := e.stack.Option(&limit); err != nil { + panic(fmt.Sprintf("e.stack.Option(%+v) failed with error: %s", limit, err)) + } + + now := time.Now() + if now.Sub(e.lastOutOfWindowAckTime) < time.Duration(limit) { + return false + } + + e.lastOutOfWindowAckTime = now + return true +} diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 4a01c81b4..e4368026f 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -59,7 +59,7 @@ func (e *endpoint) beforeSave() { Err: fmt.Errorf("endpoint cannot be saved in connected state: local %s:%d, remote %s:%d", e.ID.LocalAddress, e.ID.LocalPort, e.ID.RemoteAddress, e.ID.RemotePort), }) } - e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.resetConnectionLocked(&tcpip.ErrConnectionAborted{}) e.mu.Unlock() e.Close() e.mu.Lock() @@ -232,7 +232,8 @@ func (e *endpoint) Resume(s *stack.Stack) { // Reset the scoreboard to reinitialize the sack information as // we do not restore SACK information. e.scoreboard.Reset() - if err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.ID.RemotePort}, false, e.workerRunning); err != tcpip.ErrConnectStarted { + err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.ID.RemotePort}, false, e.workerRunning) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { panic("endpoint connecting failed: " + err.String()) } e.mu.Lock() @@ -269,7 +270,8 @@ func (e *endpoint) Resume(s *stack.Stack) { connectedLoading.Wait() listenLoading.Wait() bind() - if err := e.Connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.ID.RemotePort}); err != tcpip.ErrConnectStarted { + err := e.Connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.ID.RemotePort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { panic("endpoint connecting failed: " + err.String()) } connectingLoading.Done() @@ -296,24 +298,6 @@ func (e *endpoint) Resume(s *stack.Stack) { } } -// saveLastError is invoked by stateify. -func (e *endpoint) saveLastError() string { - if e.lastError == nil { - return "" - } - - return e.lastError.String() -} - -// loadLastError is invoked by stateify. -func (e *endpoint) loadLastError(s string) { - if s == "" { - return - } - - e.lastError = tcpip.StringToError(s) -} - // saveRecentTSTime is invoked by stateify. func (e *endpoint) saveRecentTSTime() unixTime { return unixTime{e.recentTSTime.Unix(), e.recentTSTime.UnixNano()} @@ -324,22 +308,14 @@ func (e *endpoint) loadRecentTSTime(unix unixTime) { e.recentTSTime = time.Unix(unix.second, unix.nano) } -// saveHardError is invoked by stateify. -func (e *endpoint) saveHardError() string { - if e.hardError == nil { - return "" - } - - return e.hardError.String() +// saveLastOutOfWindowAckTime is invoked by stateify. +func (e *endpoint) saveLastOutOfWindowAckTime() unixTime { + return unixTime{e.lastOutOfWindowAckTime.Unix(), e.lastOutOfWindowAckTime.UnixNano()} } -// loadHardError is invoked by stateify. -func (e *endpoint) loadHardError(s string) { - if s == "" { - return - } - - e.hardError = tcpip.StringToError(s) +// loadLastOutOfWindowAckTime is invoked by stateify. +func (e *endpoint) loadLastOutOfWindowAckTime(unix unixTime) { + e.lastOutOfWindowAckTime = time.Unix(unix.second, unix.nano) } // saveMeasureTime is invoked by stateify. diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 596178625..2f9fe7ee0 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -143,12 +143,12 @@ func (r *ForwarderRequest) Complete(sendReset bool) { // CreateEndpoint creates a TCP endpoint for the connection request, performing // the 3-way handshake in the process. -func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { r.mu.Lock() defer r.mu.Unlock() if r.segment == nil { - return nil, tcpip.ErrInvalidEndpointState + return nil, &tcpip.ErrInvalidEndpointState{} } f := r.forwarder diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 1720370c9..04012cd40 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -161,13 +161,13 @@ func (*protocol) Number() tcpip.TransportProtocolNumber { } // NewEndpoint creates a new tcp endpoint. -func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return newEndpoint(p.stack, netProto, waiterQueue), nil } // NewRawEndpoint creates a new raw TCP endpoint. Raw TCP sockets are currently // unsupported. It implements stack.TransportProtocol.NewRawEndpoint. -func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return raw.NewEndpoint(p.stack, netProto, header.TCPProtocolNumber, waiterQueue) } @@ -178,7 +178,7 @@ func (*protocol) MinimumPacketSize() int { // ParsePorts returns the source and destination ports stored in the given tcp // packet. -func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { +func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) { h := header.TCP(v) return h.SourcePort(), h.DestinationPort(), nil } @@ -216,7 +216,7 @@ func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, // replyWithReset replies to the given segment with a reset segment. // // If the passed TTL is 0, then the route's default TTL will be used. -func replyWithReset(stack *stack.Stack, s *segment, tos, ttl uint8) *tcpip.Error { +func replyWithReset(stack *stack.Stack, s *segment, tos, ttl uint8) tcpip.Error { route, err := stack.FindRoute(s.nicID, s.dstAddr, s.srcAddr, s.netProto, false /* multicastLoop */) if err != nil { return err @@ -261,7 +261,7 @@ func replyWithReset(stack *stack.Stack, s *segment, tos, ttl uint8) *tcpip.Error } // SetOption implements stack.TransportProtocol.SetOption. -func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpip.Error { +func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.TCPSACKEnabled: p.mu.Lock() @@ -283,7 +283,7 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi case *tcpip.TCPSendBufferSizeRangeOption: if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } p.mu.Lock() p.sendBufferSize = *v @@ -292,7 +292,7 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi case *tcpip.TCPReceiveBufferSizeRangeOption: if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } p.mu.Lock() p.recvBufferSize = *v @@ -310,7 +310,7 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi } // linux returns ENOENT when an invalid congestion control // is specified. - return tcpip.ErrNoSuchFile + return &tcpip.ErrNoSuchFile{} case *tcpip.TCPModerateReceiveBufferOption: p.mu.Lock() @@ -340,7 +340,7 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi case *tcpip.TCPTimeWaitReuseOption: if *v < tcpip.TCPTimeWaitReuseDisabled || *v > tcpip.TCPTimeWaitReuseLoopbackOnly { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } p.mu.Lock() p.timeWaitReuse = *v @@ -381,7 +381,7 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi case *tcpip.TCPSynRetriesOption: if *v < 1 || *v > 255 { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } p.mu.Lock() p.synRetries = uint8(*v) @@ -389,12 +389,12 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) *tcpi return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } // Option implements stack.TransportProtocol.Option. -func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) *tcpip.Error { +func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) tcpip.Error { switch v := option.(type) { case *tcpip.TCPSACKEnabled: p.mu.RLock() @@ -493,7 +493,7 @@ func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) *tcpip.E return nil default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } } diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go index 307bacca5..e862f159e 100644 --- a/pkg/tcpip/transport/tcp/rack.go +++ b/pkg/tcpip/transport/tcp/rack.go @@ -22,12 +22,21 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) -// wcDelayedACKTimeout is the recommended maximum delayed ACK timer value as -// defined in https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5. -// It stands for worst case delayed ACK timer (WCDelAckT). When FlightSize is -// 1, PTO is inflated by WCDelAckT time to compensate for a potential long -// delayed ACK timer at the receiver. -const wcDelayedACKTimeout = 200 * time.Millisecond +const ( + // wcDelayedACKTimeout is the recommended maximum delayed ACK timer + // value as defined in the RFC. It stands for worst case delayed ACK + // timer (WCDelAckT). When FlightSize is 1, PTO is inflated by + // WCDelAckT time to compensate for a potential long delayed ACK timer + // at the receiver. + // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5. + wcDelayedACKTimeout = 200 * time.Millisecond + + // tcpRACKRecoveryThreshold is the number of loss recoveries for which + // the reorder window is inflated and after that the reorder window is + // reset to its initial value of minRTT/4. + // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2. + tcpRACKRecoveryThreshold = 16 +) // RACK is a loss detection algorithm used in TCP to detect packet loss and // reordering using transmission timestamp of the packets instead of packet or @@ -44,6 +53,11 @@ type rackControl struct { // endSequence is the ending TCP sequence number of rackControl.seg. endSequence seqnum.Value + // exitedRecovery indicates if the connection is exiting loss recovery. + // This flag is set if the sender is leaving the recovery after + // receiving an ACK and is reset during updating of reorder window. + exitedRecovery bool + // fack is the highest selectively or cumulatively acknowledged // sequence. fack seqnum.Value @@ -51,15 +65,30 @@ type rackControl struct { // minRTT is the estimated minimum RTT of the connection. minRTT time.Duration + // reorderSeen indicates if reordering has been detected on this + // connection. + reorderSeen bool + + // reoWnd is the reordering window time used for recording packet + // transmission times. It is used to defer the moment at which RACK + // marks a packet lost. + reoWnd time.Duration + + // reoWndIncr is the multiplier applied to adjust reorder window. + reoWndIncr uint8 + + // reoWndPersist is the number of loss recoveries before resetting + // reorder window. + reoWndPersist int8 + // rtt is the RTT of the most recently delivered packet on the // connection (either cumulatively acknowledged or selectively // acknowledged) that was not marked invalid as a possible spurious // retransmission. rtt time.Duration - // reorderSeen indicates if reordering has been detected on this - // connection. - reorderSeen bool + // rttSeq is the SND.NXT when rtt is updated. + rttSeq seqnum.Value // xmitTime is the latest transmission timestamp of rackControl.seg. xmitTime time.Time `state:".(unixTime)"` @@ -75,29 +104,36 @@ type rackControl struct { // tlpHighRxt the value of sender.sndNxt at the time of sending // a TLP retransmission. tlpHighRxt seqnum.Value + + // snd is a reference to the sender. + snd *sender } // init initializes RACK specific fields. -func (rc *rackControl) init() { +func (rc *rackControl) init(snd *sender, iss seqnum.Value) { + rc.fack = iss + rc.reoWndIncr = 1 + rc.snd = snd rc.probeTimer.init(&rc.probeWaker) } // update will update the RACK related fields when an ACK has been received. -// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 -func (rc *rackControl) update(seg *segment, ackSeg *segment, offset uint32) { +// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2 +func (rc *rackControl) update(seg *segment, ackSeg *segment) { rtt := time.Now().Sub(seg.xmitTime) + tsOffset := rc.snd.ep.tsOffset // If the ACK is for a retransmitted packet, do not update if it is a // spurious inference which is determined by below checks: - // 1. When Timestamping option is available, if the TSVal is less than the - // transmit time of the most recent retransmitted packet. + // 1. When Timestamping option is available, if the TSVal is less than + // the transmit time of the most recent retransmitted packet. // 2. When RTT calculated for the packet is less than the smoothed RTT // for the connection. // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 // step 2 if seg.xmitCount > 1 { if ackSeg.parsedOptions.TS && ackSeg.parsedOptions.TSEcr != 0 { - if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, offset) { + if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, tsOffset) { return } } @@ -149,9 +185,8 @@ func (rc *rackControl) detectReorder(seg *segment) { } } -// setDSACKSeen updates rack control if duplicate SACK is seen by the connection. -func (rc *rackControl) setDSACKSeen() { - rc.dsackSeen = true +func (rc *rackControl) setDSACKSeen(dsackSeen bool) { + rc.dsackSeen = dsackSeen } // shouldSchedulePTO dictates whether we should schedule a PTO or not. @@ -162,7 +197,7 @@ func (s *sender) shouldSchedulePTO() bool { // The connection supports SACK. s.ep.sackPermitted && // The connection is not in loss recovery. - (s.state != RTORecovery && s.state != SACKRecovery) && + (s.state != tcpip.RTORecovery && s.state != tcpip.SACKRecovery) && // The connection has no SACKed sequences in the SACK scoreboard. s.ep.scoreboard.Sacked() == 0 } @@ -193,7 +228,7 @@ func (s *sender) schedulePTO() { // probeTimerExpired is the same as TLP_send_probe() as defined in // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.5.2. -func (s *sender) probeTimerExpired() *tcpip.Error { +func (s *sender) probeTimerExpired() tcpip.Error { if !s.rc.probeTimer.checkExpiration() { return nil } @@ -266,9 +301,88 @@ func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) { // Step 2. Either the original packet or the retransmission (in the // form of a probe) was lost. Invoke a congestion control response // equivalent to fast recovery. - s.cc.HandleNDupAcks() + s.cc.HandleLossDetected() s.enterRecovery() s.leaveRecovery() } } } + +// updateRACKReorderWindow updates the reorder window. +// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 +// * Step 4: Update RACK reordering window +// To handle the prevalent small degree of reordering, RACK.reo_wnd serves as +// an allowance for settling time before marking a packet lost. RACK starts +// initially with a conservative window of min_RTT/4. If no reordering has +// been observed RACK uses reo_wnd of zero during loss recovery, in order to +// retransmit quickly, or when the number of DUPACKs exceeds the classic +// DUPACKthreshold. +func (rc *rackControl) updateRACKReorderWindow(ackSeg *segment) { + dsackSeen := rc.dsackSeen + snd := rc.snd + + // React to DSACK once per round trip. + // If SND.UNA < RACK.rtt_seq: + // RACK.dsack = false + if snd.sndUna.LessThan(rc.rttSeq) { + dsackSeen = false + } + + // If RACK.dsack: + // RACK.reo_wnd_incr += 1 + // RACK.dsack = false + // RACK.rtt_seq = SND.NXT + // RACK.reo_wnd_persist = 16 + if dsackSeen { + rc.reoWndIncr++ + dsackSeen = false + rc.rttSeq = snd.sndNxt + rc.reoWndPersist = tcpRACKRecoveryThreshold + } else if rc.exitedRecovery { + // Else if exiting loss recovery: + // RACK.reo_wnd_persist -= 1 + // If RACK.reo_wnd_persist <= 0: + // RACK.reo_wnd_incr = 1 + rc.reoWndPersist-- + if rc.reoWndPersist <= 0 { + rc.reoWndIncr = 1 + } + rc.exitedRecovery = false + } + + // Reorder window is zero during loss recovery, or when the number of + // DUPACKs exceeds the classic DUPACKthreshold. + // If RACK.reord is FALSE: + // If in loss recovery: (If in fast or timeout recovery) + // RACK.reo_wnd = 0 + // Return + // Else if RACK.pkts_sacked >= RACK.dupthresh: + // RACK.reo_wnd = 0 + // return + if !rc.reorderSeen { + if snd.state == tcpip.RTORecovery || snd.state == tcpip.SACKRecovery { + rc.reoWnd = 0 + return + } + + if snd.sackedOut >= nDupAckThreshold { + rc.reoWnd = 0 + return + } + } + + // Calculate reorder window. + // RACK.reo_wnd = RACK.min_RTT / 4 * RACK.reo_wnd_incr + // RACK.reo_wnd = min(RACK.reo_wnd, SRTT) + snd.rtt.Lock() + srtt := snd.rtt.srtt + snd.rtt.Unlock() + rc.reoWnd = time.Duration((int64(rc.minRTT) / 4) * int64(rc.reoWndIncr)) + if srtt < rc.reoWnd { + rc.reoWnd = srtt + } +} + +func (rc *rackControl) exitRecovery() { + rc.exitedRecovery = true +} diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index 405a6dce7..a5c82b8fa 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -347,7 +347,7 @@ func (r *receiver) updateRTT() { r.ep.rcvListMu.Unlock() } -func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err *tcpip.Error) { +func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err tcpip.Error) { r.ep.rcvListMu.Lock() rcvClosed := r.ep.rcvClosed || r.closed r.ep.rcvListMu.Unlock() @@ -385,7 +385,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // fails, we ignore the packet: // https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591 if r.ep.snd.sndNxt.LessThan(s.ackNumber) { - r.ep.snd.sendAck() + r.ep.snd.maybeSendOutOfWindowAck(s) return true, nil } @@ -395,7 +395,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // trigger a RST. endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size())) if state != StateCloseWait && rcvClosed && r.rcvNxt.LessThan(endDataSeq) { - return true, tcpip.ErrConnectionAborted + return true, &tcpip.ErrConnectionAborted{} } if state == StateFinWait1 { break @@ -424,7 +424,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // the last actual data octet in a segment in // which it occurs. if closed && (!s.flagIsSet(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.rcvNxt+1) { - return true, tcpip.ErrConnectionAborted + return true, &tcpip.ErrConnectionAborted{} } } @@ -443,7 +443,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // handleRcvdSegment handles TCP segments directed at the connection managed by // r as they arrive. It is called by the protocol main loop. -func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { +func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) { state := r.ep.EndpointState() closed := r.ep.closed @@ -454,7 +454,7 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { // send an ACK and stop further processing of the segment. // This is according to RFC 793, page 68. if !r.acceptable(segSeq, segLen) { - r.ep.snd.sendAck() + r.ep.snd.maybeSendOutOfWindowAck(s) return true, nil } diff --git a/pkg/tcpip/transport/tcp/reno.go b/pkg/tcpip/transport/tcp/reno.go index f83ebc717..ff39780a5 100644 --- a/pkg/tcpip/transport/tcp/reno.go +++ b/pkg/tcpip/transport/tcp/reno.go @@ -79,10 +79,10 @@ func (r *renoState) Update(packetsAcked int) { r.updateCongestionAvoidance(packetsAcked) } -// HandleNDupAcks implements congestionControl.HandleNDupAcks. -func (r *renoState) HandleNDupAcks() { - // A retransmit was triggered due to nDupAckThreshold - // being hit. Reduce our slow start threshold. +// HandleLossDetected implements congestionControl.HandleLossDetected. +func (r *renoState) HandleLossDetected() { + // A retransmit was triggered due to nDupAckThreshold or when RACK + // detected loss. Reduce our slow start threshold. r.reduceSlowStartThreshold() } diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 079d90848..463a259b7 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -48,34 +48,13 @@ const ( MaxRetries = 15 ) -// ccState indicates the current congestion control state for this sender. -type ccState int - -const ( - // Open indicates that the sender is receiving acks in order and - // no loss or dupACK's etc have been detected. - Open ccState = iota - // RTORecovery indicates that an RTO has occurred and the sender - // has entered an RTO based recovery phase. - RTORecovery - // FastRecovery indicates that the sender has entered FastRecovery - // based on receiving nDupAck's. This state is entered only when - // SACK is not in use. - FastRecovery - // SACKRecovery indicates that the sender has entered SACK based - // recovery. - SACKRecovery - // Disorder indicates the sender either received some SACK blocks - // or dupACK's. - Disorder -) - // congestionControl is an interface that must be implemented by any supported // congestion control algorithm. type congestionControl interface { - // HandleNDupAcks is invoked when sender.dupAckCount >= nDupAckThreshold - // just before entering fast retransmit. - HandleNDupAcks() + // HandleLossDetected is invoked when the loss is detected by RACK or + // sender.dupAckCount >= nDupAckThreshold just before entering fast + // retransmit. + HandleLossDetected() // HandleRTOExpired is invoked when the retransmit timer expires. HandleRTOExpired() @@ -204,7 +183,7 @@ type sender struct { maxSentAck seqnum.Value // state is the current state of congestion control for this endpoint. - state ccState + state tcpip.CongestionControlState // cc is the congestion control algorithm in use for this sender. cc congestionControl @@ -280,14 +259,9 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint highRxt: iss, rescueRxt: iss, }, - rc: rackControl{ - fack: iss, - }, gso: ep.gso != nil, } - s.rc.init() - if s.gso { s.ep.gso.MSS = uint16(maxPayloadSize) } @@ -295,6 +269,7 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint s.cc = s.initCongestionControl(ep.cc) s.lr = s.initLossRecovery() + s.rc.init(s, iss) // A negative sndWndScale means that no scaling is in use, otherwise we // store the scaling value. @@ -593,7 +568,7 @@ func (s *sender) retransmitTimerExpired() bool { s.leaveRecovery() } - s.state = RTORecovery + s.state = tcpip.RTORecovery s.cc.HandleRTOExpired() // Mark the next segment to be sent as the first unacknowledged one and @@ -1018,7 +993,7 @@ func (s *sender) sendData() { // "A TCP SHOULD set cwnd to no more than RW before beginning // transmission if the TCP has not sent data in the interval exceeding // the retrasmission timeout." - if !s.fr.active && s.state != RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto { + if !s.fr.active && s.state != tcpip.RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto { if s.sndCwnd > InitialCwnd { s.sndCwnd = InitialCwnd } @@ -1062,14 +1037,14 @@ func (s *sender) enterRecovery() { s.fr.highRxt = s.sndUna s.fr.rescueRxt = s.sndUna if s.ep.sackPermitted { - s.state = SACKRecovery + s.state = tcpip.SACKRecovery s.ep.stack.Stats().TCP.SACKRecovery.Increment() // Set TLPRxtOut to false according to // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1. s.rc.tlpRxtOut = false return } - s.state = FastRecovery + s.state = tcpip.FastRecovery s.ep.stack.Stats().TCP.FastRecovery.Increment() } @@ -1080,7 +1055,6 @@ func (s *sender) leaveRecovery() { // Deflate cwnd. It had been artificially inflated when new dups arrived. s.sndCwnd = s.sndSsthresh - s.cc.PostRecovery() } @@ -1166,7 +1140,7 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { s.fr.highRxt = s.sndUna - 1 // Do run SetPipe() to calculate the outstanding segments. s.SetPipe() - s.state = Disorder + s.state = tcpip.Disorder return false } @@ -1179,7 +1153,7 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { s.dupAckCount = 0 return false } - s.cc.HandleNDupAcks() + s.cc.HandleLossDetected() s.enterRecovery() s.dupAckCount = 0 return true @@ -1217,11 +1191,13 @@ func (s *sender) isDupAck(seg *segment) bool { // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 // steps 2 and 3. func (s *sender) walkSACK(rcvdSeg *segment) { + s.rc.setDSACKSeen(false) + // Look for DSACK block. idx := 0 n := len(rcvdSeg.parsedOptions.SACKBlocks) if checkDSACK(rcvdSeg) { - s.rc.setDSACKSeen() + s.rc.setDSACKSeen(true) idx = 1 n-- } @@ -1242,7 +1218,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) { for _, sb := range sackBlocks { for seg != nil && seg.sequenceNumber.LessThan(sb.End) && seg.xmitCount != 0 { if sb.Start.LessThanEq(seg.sequenceNumber) && !seg.acked { - s.rc.update(seg, rcvdSeg, s.ep.tsOffset) + s.rc.update(seg, rcvdSeg) s.rc.detectReorder(seg) seg.acked = true s.sackedOut += s.pCount(seg, s.maxPayloadSize) @@ -1412,6 +1388,17 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { acked := s.sndUna.Size(ack) s.sndUna = ack + // The remote ACK-ing at least 1 byte is an indication that we have a + // full-duplex connection to the remote as the only way we will receive an + // ACK is if the remote received data that we previously sent. + // + // As of writing, linux seems to only confirm a route as reachable when + // forward progress is made which is indicated by an ACK that removes data + // from the retransmit queue. + if acked > 0 { + s.ep.route.ConfirmReachable() + } + ackLeft := acked originalOutstanding := s.outstanding for ackLeft > 0 { @@ -1435,7 +1422,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { // Update the RACK fields if SACK is enabled. if s.ep.sackPermitted && !seg.acked { - s.rc.update(seg, rcvdSeg, s.ep.tsOffset) + s.rc.update(seg, rcvdSeg) s.rc.detectReorder(seg) } @@ -1464,7 +1451,11 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { if !s.fr.active { s.cc.Update(originalOutstanding - s.outstanding) if s.fr.last.LessThan(s.sndUna) { - s.state = Open + s.state = tcpip.Open + // Update RACK when we are exiting fast or RTO + // recovery as described in the RFC + // draft-ietf-tcpm-rack-08 Section-7.2 Step 4. + s.rc.exitRecovery() } } @@ -1488,6 +1479,12 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { } } + // Update RACK reorder window. + // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2 + // * Upon receiving an ACK: + // * Step 4: Update RACK reordering window + s.rc.updateRACKReorderWindow(rcvdSeg) + // Now that we've popped all acknowledged data from the retransmit // queue, retransmit if needed. if s.fr.active { @@ -1508,7 +1505,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { } // sendSegment sends the specified segment. -func (s *sender) sendSegment(seg *segment) *tcpip.Error { +func (s *sender) sendSegment(seg *segment) tcpip.Error { if seg.xmitCount > 0 { s.ep.stack.Stats().TCP.Retransmits.Increment() s.ep.stats.SendErrors.Retransmits.Increment() @@ -1539,7 +1536,7 @@ func (s *sender) sendSegment(seg *segment) *tcpip.Error { // sendSegmentFromView sends a new segment containing the given payload, flags // and sequence number. -func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq seqnum.Value) *tcpip.Error { +func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq seqnum.Value) tcpip.Error { s.lastSendTime = time.Now() if seq == s.rttMeasureSeqNum { s.rttMeasureTime = s.lastSendTime @@ -1552,3 +1549,13 @@ func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq return s.ep.sendRaw(data, flags, seq, rcvNxt, rcvWnd) } + +// maybeSendOutOfWindowAck sends an ACK if we are not being rate limited +// currently. +func (s *sender) maybeSendOutOfWindowAck(seg *segment) { + // Data packets are unlikely to be part of an ACK loop. So always send + // an ACK for a packet w/ data. + if seg.payloadSize() > 0 || s.ep.allowOutOfWindowAck() { + s.sendAck() + } +} diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go index af915203b..a6a26b705 100644 --- a/pkg/tcpip/transport/tcp/tcp_rack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go @@ -16,6 +16,7 @@ package tcp_test import ( "bytes" + "fmt" "testing" "time" @@ -534,3 +535,64 @@ func TestRACKWithInvalidDSACKBlock(t *testing.T) { // ACK before the test completes. <-probeDone } + +func addReorderWindowCheckerProbe(c *context.Context, numACK int, probeDone chan error) { + var n int + c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) { + // Validate that RACK detects DSACK. + n++ + if n < numACK { + return + } + + if state.Sender.RACKState.ReoWnd == 0 || state.Sender.RACKState.ReoWnd > state.Sender.SRTT { + probeDone <- fmt.Errorf("got RACKState.ReoWnd: %v, expected it to be greater than 0 and less than %v", state.Sender.RACKState.ReoWnd, state.Sender.SRTT) + return + } + + if state.Sender.RACKState.ReoWndIncr != 1 { + probeDone <- fmt.Errorf("got RACKState.ReoWndIncr: %v, want: 1", state.Sender.RACKState.ReoWndIncr) + return + } + + if state.Sender.RACKState.ReoWndPersist > 0 { + probeDone <- fmt.Errorf("got RACKState.ReoWndPersist: %v, want: greater than 0", state.Sender.RACKState.ReoWndPersist) + return + } + probeDone <- nil + }) +} + +func TestRACKCheckReorderWindow(t *testing.T) { + c := context.New(t, uint32(mtu)) + defer c.Cleanup() + + probeDone := make(chan error) + const ackNumToVerify = 3 + addReorderWindowCheckerProbe(c, ackNumToVerify, probeDone) + + const numPackets = 7 + sendAndReceive(t, c, numPackets) + + // Send ACK for #1 packet. + bytesRead := maxPayload + seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1) + c.SendAck(seq, bytesRead) + + // Missing [2-6] packets and SACK #7 packet. + seq = seqnum.Value(context.TestInitialSequenceNumber).Add(1) + start := c.IRS.Add(1 + seqnum.Size(6*maxPayload)) + end := start.Add(seqnum.Size(maxPayload)) + c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}}) + + // Received delayed packets [2-6] which indicates there is reordering + // in the connection. + bytesRead += 6 * maxPayload + c.SendAck(seq, bytesRead) + + // Wait for the probe function to finish processing the ACK before the + // test completes. + if err := <-probeDone; err != nil { + t.Fatalf("unexpected values for RACK variables: %v", err) + } +} diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 87ff2b909..cd3c4a027 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -48,7 +48,7 @@ type endpointTester struct { } // CheckReadError issues a read to the endpoint and checking for an error. -func (e *endpointTester) CheckReadError(t *testing.T, want *tcpip.Error) { +func (e *endpointTester) CheckReadError(t *testing.T, want tcpip.Error) { t.Helper() res, got := e.ep.Read(ioutil.Discard, tcpip.ReadOptions{}) if got != want { @@ -87,7 +87,7 @@ func (e *endpointTester) CheckReadFull(t *testing.T, count int, notifyRead <-cha } for w.N != 0 { _, err := e.ep.Read(&w, tcpip.ReadOptions{}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for receive to be notified. select { case <-notifyRead: @@ -128,8 +128,11 @@ func TestGiveUpConnect(t *testing.T) { wq.EventRegister(&waitEntry, waiter.EventHUp) defer wq.EventUnregister(&waitEntry) - if err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted { - t.Fatalf("got ep.Connect(...) = %s, want = %s", err, tcpip.ErrConnectStarted) + { + err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got ep.Connect(...) = %v, want = %s", err, &tcpip.ErrConnectStarted{}) + } } // Close the connection, wait for completion. @@ -140,8 +143,11 @@ func TestGiveUpConnect(t *testing.T) { // Call Connect again to retreive the handshake failure status // and stats updates. - if err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrAborted { - t.Fatalf("got ep.Connect(...) = %s, want = %s", err, tcpip.ErrAborted) + { + err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrAborted); !ok { + t.Fatalf("got ep.Connect(...) = %v, want = %s", err, &tcpip.ErrAborted{}) + } } if got := c.Stack().Stats().TCP.FailedConnectionAttempts.Value(); got != 1 { @@ -194,8 +200,11 @@ func TestActiveFailedConnectionAttemptIncrement(t *testing.T) { c.EP = ep want := stats.TCP.FailedConnectionAttempts.Value() + 1 - if err := c.EP.Connect(tcpip.FullAddress{NIC: 2, Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrNoRoute { - t.Errorf("got c.EP.Connect(...) = %s, want = %s", err, tcpip.ErrNoRoute) + { + err := c.EP.Connect(tcpip.FullAddress{NIC: 2, Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrNoRoute); !ok { + t.Errorf("got c.EP.Connect(...) = %v, want = %s", err, &tcpip.ErrNoRoute{}) + } } if got := stats.TCP.FailedConnectionAttempts.Value(); got != want { @@ -211,7 +220,7 @@ func TestCloseWithoutConnect(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -384,7 +393,7 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -925,8 +934,11 @@ func TestUserSuppliedMSSOnConnect(t *testing.T) { ws := tcp.FindWndScale(seqnum.Size(rcvBufSize)) connectAddr := tcpip.FullAddress{Addr: ip.connectAddr, Port: context.TestPort} - if err := c.EP.Connect(connectAddr); err != tcpip.ErrConnectStarted { - t.Fatalf("Connect(%+v): %s", connectAddr, err) + { + err := c.EP.Connect(connectAddr) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("Connect(%+v): %s", connectAddr, err) + } } // Receive SYN packet with our user supplied MSS. @@ -1442,7 +1454,8 @@ func TestConnectBindToDevice(t *testing.T) { c.WQ.EventRegister(&waitEntry, waiter.EventOut) defer c.WQ.EventUnregister(&waitEntry) - if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted { + err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { t.Fatalf("unexpected return value from Connect: %s", err) } @@ -1502,8 +1515,9 @@ func TestSynSent(t *testing.T) { defer c.WQ.EventUnregister(&waitEntry) addr := tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort} - if err := c.EP.Connect(addr); err != tcpip.ErrConnectStarted { - t.Fatalf("got Connect(%+v) = %s, want %s", addr, err, tcpip.ErrConnectStarted) + err := c.EP.Connect(addr) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got Connect(%+v) = %v, want %s", addr, err, &tcpip.ErrConnectStarted{}) } // Receive SYN packet. @@ -1548,9 +1562,9 @@ func TestSynSent(t *testing.T) { ept := endpointTester{c.EP} if test.reset { - ept.CheckReadError(t, tcpip.ErrConnectionRefused) + ept.CheckReadError(t, &tcpip.ErrConnectionRefused{}) } else { - ept.CheckReadError(t, tcpip.ErrAborted) + ept.CheckReadError(t, &tcpip.ErrAborted{}) } if got := c.Stack().Stats().TCP.CurrentConnected.Value(); got != 0 { @@ -1576,7 +1590,7 @@ func TestOutOfOrderReceive(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Send second half of data first, with seqnum 3 ahead of expected. data := []byte{1, 2, 3, 4, 5, 6} @@ -1601,7 +1615,7 @@ func TestOutOfOrderReceive(t *testing.T) { // Wait 200ms and check that no data has been received. time.Sleep(200 * time.Millisecond) - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Send the first 3 bytes now. c.SendPacket(data[:3], &context.Headers{ @@ -1640,7 +1654,7 @@ func TestOutOfOrderFlood(t *testing.T) { c.CreateConnected(789, 30000, rcvBufSz) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Send 100 packets before the actual one that is expected. data := []byte{1, 2, 3, 4, 5, 6} @@ -1716,7 +1730,7 @@ func TestRstOnCloseWithUnreadData(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) data := []byte{1, 2, 3} c.SendPacket(data, &context.Headers{ @@ -1784,7 +1798,7 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) data := []byte{1, 2, 3} c.SendPacket(data, &context.Headers{ @@ -1866,13 +1880,13 @@ func TestShutdownRead(t *testing.T) { c.CreateConnected(789, 30000, -1 /* epRcvBuf */) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) if err := c.EP.Shutdown(tcpip.ShutdownRead); err != nil { t.Fatalf("Shutdown failed: %s", err) } - ept.CheckReadError(t, tcpip.ErrClosedForReceive) + ept.CheckReadError(t, &tcpip.ErrClosedForReceive{}) var want uint64 = 1 if got := c.EP.Stats().(*tcp.Stats).ReadErrors.ReadClosed.Value(); got != want { t.Fatalf("got EP stats Stats.ReadErrors.ReadClosed got %d want %d", got, want) @@ -1891,7 +1905,7 @@ func TestFullWindowReceive(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Fill up the window w/ tcp.SegOverheadFactor*rcvBufSz as netstack multiplies // the provided buffer value by tcp.SegOverheadFactor to calculate the actual @@ -2052,7 +2066,7 @@ func TestNoWindowShrinking(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Send a 1 byte payload so that we can record the current receive window. // Send a payload of half the size of rcvBufSize. @@ -2370,7 +2384,7 @@ func TestScaledWindowAccept(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -2443,7 +2457,7 @@ func TestNonScaledWindowAccept(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -2958,7 +2972,7 @@ func TestSetTTL(t *testing.T) { c := context.New(t, 65535) defer c.Cleanup() - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -2968,8 +2982,11 @@ func TestSetTTL(t *testing.T) { t.Fatalf("SetSockOptInt(TTLOption, %d) failed: %s", wantTTL, err) } - if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted { - t.Fatalf("unexpected return value from Connect: %s", err) + { + err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("unexpected return value from Connect: %s", err) + } } // Receive SYN packet. @@ -3029,7 +3046,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -3085,7 +3102,7 @@ func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -3110,9 +3127,9 @@ func TestForwarderSendMSSLessThanMTU(t *testing.T) { defer c.Cleanup() s := c.Stack() - ch := make(chan *tcpip.Error, 1) + ch := make(chan tcpip.Error, 1) f := tcp.NewForwarder(s, 65536, 10, func(r *tcp.ForwarderRequest) { - var err *tcpip.Error + var err tcpip.Error c.EP, err = r.CreateEndpoint(&c.WQ) ch <- err }) @@ -3141,7 +3158,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -3160,8 +3177,11 @@ func TestSynOptionsOnActiveConnect(t *testing.T) { c.WQ.EventRegister(&we, waiter.EventOut) defer c.WQ.EventUnregister(&we) - if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted { - t.Fatalf("got c.EP.Connect(...) = %s, want = %s", err, tcpip.ErrConnectStarted) + { + err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got c.EP.Connect(...) = %v, want = %s", err, &tcpip.ErrConnectStarted{}) + } } // Receive SYN packet. @@ -3271,22 +3291,23 @@ func TestReceiveOnResetConnection(t *testing.T) { loop: for { - switch _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}); err { - case tcpip.ErrWouldBlock: + switch _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}); err.(type) { + case *tcpip.ErrWouldBlock: select { case <-ch: // Expect the state to be StateError and subsequent Reads to fail with HardError. - if _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}); err != tcpip.ErrConnectionReset { - t.Fatalf("got c.EP.Read() = %s, want = %s", err, tcpip.ErrConnectionReset) + _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrConnectionReset); !ok { + t.Fatalf("got c.EP.Read() = %v, want = %s", err, &tcpip.ErrConnectionReset{}) } break loop case <-time.After(1 * time.Second): t.Fatalf("Timed out waiting for reset to arrive") } - case tcpip.ErrConnectionReset: + case *tcpip.ErrConnectionReset: break loop default: - t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrConnectionReset) + t.Fatalf("got c.EP.Read(nil) = %v, want = %s", err, &tcpip.ErrConnectionReset{}) } } @@ -3325,8 +3346,9 @@ func TestSendOnResetConnection(t *testing.T) { // Try to write. var r bytes.Reader r.Reset(make([]byte, 10)) - if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != tcpip.ErrConnectionReset { - t.Fatalf("got c.EP.Write(...) = %s, want = %s", err, tcpip.ErrConnectionReset) + _, err := c.EP.Write(&r, tcpip.WriteOptions{}) + if _, ok := err.(*tcpip.ErrConnectionReset); !ok { + t.Fatalf("got c.EP.Write(...) = %v, want = %s", err, &tcpip.ErrConnectionReset{}) } } @@ -4184,7 +4206,7 @@ func TestReadAfterClosedState(t *testing.T) { defer c.WQ.EventUnregister(&we) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Shutdown immediately for write, check that we get a FIN. if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil { @@ -4263,10 +4285,13 @@ func TestReadAfterClosedState(t *testing.T) { // Now that we drained the queue, check that functions fail with the // right error code. - ept.CheckReadError(t, tcpip.ErrClosedForReceive) + ept.CheckReadError(t, &tcpip.ErrClosedForReceive{}) var buf bytes.Buffer - if _, err := c.EP.Read(&buf, tcpip.ReadOptions{Peek: true}); err != tcpip.ErrClosedForReceive { - t.Fatalf("c.EP.Read(_, {Peek: true}) = %v, %s; want _, %s", res, err, tcpip.ErrClosedForReceive) + { + _, err := c.EP.Read(&buf, tcpip.ReadOptions{Peek: true}) + if _, ok := err.(*tcpip.ErrClosedForReceive); !ok { + t.Fatalf("c.EP.Read(_, {Peek: true}) = %v, %s; want _, %s", res, err, &tcpip.ErrClosedForReceive{}) + } } } @@ -4277,7 +4302,7 @@ func TestReusePort(t *testing.T) { defer c.Cleanup() // First case, just an endpoint that was bound. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) if err != nil { t.Fatalf("NewEndpoint failed; %s", err) @@ -4307,8 +4332,11 @@ func TestReusePort(t *testing.T) { if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } - if err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted { - t.Fatalf("got c.EP.Connect(...) = %s, want = %s", err, tcpip.ErrConnectStarted) + { + err := c.EP.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got c.EP.Connect(...) = %v, want = %s", err, &tcpip.ErrConnectStarted{}) + } } c.EP.Close() @@ -4515,11 +4543,11 @@ func TestBindToDeviceOption(t *testing.T) { testActions := []struct { name string setBindToDevice *tcpip.NICID - setBindToDeviceError *tcpip.Error + setBindToDeviceError tcpip.Error getBindToDevice int32 }{ {"GetDefaultValue", nil, nil, 0}, - {"BindToNonExistent", nicIDPtr(999), tcpip.ErrUnknownDevice, 0}, + {"BindToNonExistent", nicIDPtr(999), &tcpip.ErrUnknownDevice{}, 0}, {"BindToExistent", nicIDPtr(321), nil, 321}, {"UnbindToDevice", nicIDPtr(0), nil, 0}, } @@ -4539,7 +4567,7 @@ func TestBindToDeviceOption(t *testing.T) { } } -func makeStack() (*stack.Stack, *tcpip.Error) { +func makeStack() (*stack.Stack, tcpip.Error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ ipv4.NewProtocol, @@ -4609,8 +4637,11 @@ func TestSelfConnect(t *testing.T) { wq.EventRegister(&waitEntry, waiter.EventOut) defer wq.EventUnregister(&waitEntry) - if err := ep.Connect(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != tcpip.ErrConnectStarted { - t.Fatalf("got ep.Connect(...) = %s, want = %s", err, tcpip.ErrConnectStarted) + { + err := ep.Connect(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { + t.Fatalf("got ep.Connect(...) = %v, want = %s", err, &tcpip.ErrConnectStarted{}) + } } <-notifyCh @@ -4762,9 +4793,9 @@ func TestConnectAvoidsBoundPorts(t *testing.T) { t.Fatalf("Bind(%d) failed: %s", i, err) } } - want := tcpip.ErrConnectStarted + var want tcpip.Error = &tcpip.ErrConnectStarted{} if collides { - want = tcpip.ErrNoPortAvailable + want = &tcpip.ErrNoPortAvailable{} } if err := makeEP(candidateNetwork).Connect(tcpip.FullAddress{Addr: address(t, candidateAddressType, false), Port: 31337}); err != want { t.Fatalf("got ep.Connect(..) = %s, want = %s", err, want) @@ -4889,11 +4920,11 @@ func TestTCPEndpointProbe(t *testing.T) { func TestStackSetCongestionControl(t *testing.T) { testCases := []struct { cc tcpip.CongestionControlOption - err *tcpip.Error + err tcpip.Error }{ {"reno", nil}, {"cubic", nil}, - {"blahblah", tcpip.ErrNoSuchFile}, + {"blahblah", &tcpip.ErrNoSuchFile{}}, } for _, tc := range testCases { @@ -4975,11 +5006,11 @@ func TestStackSetAvailableCongestionControl(t *testing.T) { func TestEndpointSetCongestionControl(t *testing.T) { testCases := []struct { cc tcpip.CongestionControlOption - err *tcpip.Error + err tcpip.Error }{ {"reno", nil}, {"cubic", nil}, - {"blahblah", tcpip.ErrNoSuchFile}, + {"blahblah", &tcpip.ErrNoSuchFile{}}, } for _, connected := range []bool{false, true} { @@ -4989,7 +5020,7 @@ func TestEndpointSetCongestionControl(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5085,7 +5116,7 @@ func TestKeepalive(t *testing.T) { // Check that the connection is still alive. ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Send some data and wait before ACKing it. Keepalives should be disabled // during this period. @@ -5176,7 +5207,7 @@ func TestKeepalive(t *testing.T) { t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout.Value() = %d, want = 1", got) } - ept.CheckReadError(t, tcpip.ErrTimeout) + ept.CheckReadError(t, &tcpip.ErrTimeout{}) if got := c.Stack().Stats().TCP.CurrentEstablished.Value(); got != 0 { t.Errorf("got stats.TCP.CurrentEstablished.Value() = %d, want = 0", got) @@ -5283,7 +5314,7 @@ func TestListenBacklogFull(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5326,7 +5357,7 @@ func TestListenBacklogFull(t *testing.T) { for i := 0; i < listenBacklog; i++ { _, _, err = c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -5343,7 +5374,7 @@ func TestListenBacklogFull(t *testing.T) { // Now verify that there are no more connections that can be accepted. _, _, err = c.EP.Accept(nil) - if err != tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { select { case <-ch: t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) @@ -5355,7 +5386,7 @@ func TestListenBacklogFull(t *testing.T) { executeHandshake(t, c, context.TestPort+lastPortOffset, false /*synCookieInUse */) newEP, _, err := c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -5598,7 +5629,7 @@ func TestListenSynRcvdQueueFull(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5673,7 +5704,7 @@ func TestListenSynRcvdQueueFull(t *testing.T) { defer c.WQ.EventUnregister(&we) newEP, _, err := c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -5709,7 +5740,7 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { } // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5750,7 +5781,7 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { defer c.WQ.EventUnregister(&we) _, _, err = c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -5766,7 +5797,7 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { // Now verify that there are no more connections that can be accepted. _, _, err = c.EP.Accept(nil) - if err != tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { select { case <-ch: t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) @@ -5780,7 +5811,7 @@ func TestSYNRetransmit(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5824,7 +5855,7 @@ func TestSynRcvdBadSeqNumber(t *testing.T) { defer c.Cleanup() // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { t.Fatalf("NewEndpoint failed: %s", err) @@ -5899,12 +5930,13 @@ func TestSynRcvdBadSeqNumber(t *testing.T) { }) newEP, _, err := c.EP.Accept(nil) - - if err != nil && err != tcpip.ErrWouldBlock { + switch err.(type) { + case nil, *tcpip.ErrWouldBlock: + default: t.Fatalf("Accept failed: %s", err) } - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Try to accept the connections in the backlog. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.EventIn) @@ -5972,7 +6004,7 @@ func TestPassiveConnectionAttemptIncrement(t *testing.T) { // Verify that there is only one acceptable connection at this point. _, _, err = c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6042,7 +6074,7 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { // Now check that there is one acceptable connections. _, _, err = c.EP.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6074,7 +6106,7 @@ func TestEndpointBindListenAcceptState(t *testing.T) { } ept := endpointTester{ep} - ept.CheckReadError(t, tcpip.ErrNotConnected) + ept.CheckReadError(t, &tcpip.ErrNotConnected{}) if got := ep.Stats().(*tcp.Stats).ReadErrors.NotConnected.Value(); got != 1 { t.Errorf("got EP stats Stats.ReadErrors.NotConnected got %d want %d", got, 1) } @@ -6094,7 +6126,7 @@ func TestEndpointBindListenAcceptState(t *testing.T) { defer wq.EventUnregister(&we) aep, _, err := ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6110,8 +6142,11 @@ func TestEndpointBindListenAcceptState(t *testing.T) { if got, want := tcp.EndpointState(aep.State()), tcp.StateEstablished; got != want { t.Errorf("unexpected endpoint state: want %s, got %s", want, got) } - if err := aep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrAlreadyConnected { - t.Errorf("unexpected error attempting to call connect on an established endpoint, got: %s, want: %s", err, tcpip.ErrAlreadyConnected) + { + err := aep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}) + if _, ok := err.(*tcpip.ErrAlreadyConnected); !ok { + t.Errorf("unexpected error attempting to call connect on an established endpoint, got: %v, want: %s", err, &tcpip.ErrAlreadyConnected{}) + } } // Listening endpoint remains in listen state. if got, want := tcp.EndpointState(ep.State()), tcp.StateListen; got != want { @@ -6230,7 +6265,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { // window increases to the full available buffer size. for { _, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { break } } @@ -6267,6 +6302,13 @@ func TestReceiveBufferAutoTuning(t *testing.T) { // Enable Auto-tuning. stk := c.Stack() + // Disable out of window rate limiting for this test by setting it to 0 as we + // use out of window ACKs to measure the advertised window. + var tcpInvalidRateLimit stack.TCPInvalidRateLimitOption + if err := stk.SetOption(tcpInvalidRateLimit); err != nil { + t.Fatalf("e.stack.SetOption(%#v) = %s", tcpInvalidRateLimit, err) + } + const receiveBufferSize = 80 << 10 // 80KB. const maxReceiveBufferSize = receiveBufferSize * 10 { @@ -6354,7 +6396,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) { totalCopied := 0 for { res, err := c.EP.Read(ioutil.Discard, tcpip.ReadOptions{}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { break } totalCopied += res.Count @@ -6546,7 +6588,7 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6665,7 +6707,7 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6772,7 +6814,7 @@ func TestTCPTimeWaitNewSyn(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6862,7 +6904,7 @@ func TestTCPTimeWaitNewSyn(t *testing.T) { // Try to accept the connection. c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -6936,7 +6978,7 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -7086,7 +7128,7 @@ func TestTCPCloseWithData(t *testing.T) { defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: @@ -7277,7 +7319,7 @@ func TestTCPUserTimeout(t *testing.T) { ) ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrTimeout) + ept.CheckReadError(t, &tcpip.ErrTimeout{}) if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want { t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %d, want = %d", got, want) @@ -7321,7 +7363,7 @@ func TestKeepaliveWithUserTimeout(t *testing.T) { // Check that the connection is still alive. ept := endpointTester{c.EP} - ept.CheckReadError(t, tcpip.ErrWouldBlock) + ept.CheckReadError(t, &tcpip.ErrWouldBlock{}) // Now receive 1 keepalives, but don't ACK it. b := c.GetPacket() @@ -7360,7 +7402,7 @@ func TestKeepaliveWithUserTimeout(t *testing.T) { ), ) - ept.CheckReadError(t, tcpip.ErrTimeout) + ept.CheckReadError(t, &tcpip.ErrTimeout{}) if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want { t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %d, want = %d", got, want) } @@ -7515,8 +7557,9 @@ func TestTCPDeferAccept(t *testing.T) { irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */) - if _, _, err := c.EP.Accept(nil); err != tcpip.ErrWouldBlock { - t.Fatalf("got c.EP.Accept(nil) = %s, want: %s", err, tcpip.ErrWouldBlock) + _, _, err := c.EP.Accept(nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got c.EP.Accept(nil) = %v, want: %s", err, &tcpip.ErrWouldBlock{}) } // Send data. This should result in an acceptable endpoint. @@ -7573,8 +7616,9 @@ func TestTCPDeferAcceptTimeout(t *testing.T) { irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */) - if _, _, err := c.EP.Accept(nil); err != tcpip.ErrWouldBlock { - t.Fatalf("got c.EP.Accept(nil) = %s, want: %s", err, tcpip.ErrWouldBlock) + _, _, err := c.EP.Accept(nil) + if _, ok := err.(*tcpip.ErrWouldBlock); !ok { + t.Fatalf("got c.EP.Accept(nil) = %v, want: %s", err, &tcpip.ErrWouldBlock{}) } // Sleep for a little of the tcpDeferAccept timeout. @@ -7696,13 +7740,13 @@ func TestSetStackTimeWaitReuse(t *testing.T) { s := c.Stack() testCases := []struct { v int - err *tcpip.Error + err tcpip.Error }{ {int(tcpip.TCPTimeWaitReuseDisabled), nil}, {int(tcpip.TCPTimeWaitReuseGlobal), nil}, {int(tcpip.TCPTimeWaitReuseLoopbackOnly), nil}, - {int(tcpip.TCPTimeWaitReuseLoopbackOnly) + 1, tcpip.ErrInvalidOptionValue}, - {int(tcpip.TCPTimeWaitReuseDisabled) - 1, tcpip.ErrInvalidOptionValue}, + {int(tcpip.TCPTimeWaitReuseLoopbackOnly) + 1, &tcpip.ErrInvalidOptionValue{}}, + {int(tcpip.TCPTimeWaitReuseDisabled) - 1, &tcpip.ErrInvalidOptionValue{}}, } for _, tc := range testCases { diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index ee55f030c..b1cb9a324 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -586,7 +586,7 @@ func (c *Context) ReceiveNonBlockingAndCheckPacket(data []byte, offset, size int // is true then it sets the IP_V6ONLY option on the socket to make it a IPv6 // only endpoint instead of a default dual stack socket. func (c *Context) CreateV6Endpoint(v6only bool) { - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv6.ProtocolNumber, &c.WQ) if err != nil { c.t.Fatalf("NewEndpoint failed: %v", err) @@ -689,7 +689,8 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte) c.WQ.EventRegister(&waitEntry, waiter.EventOut) defer c.WQ.EventUnregister(&waitEntry) - if err := c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort}); err != tcpip.ErrConnectStarted { + err := c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort}) + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { c.t.Fatalf("Unexpected return value from Connect: %v", err) } @@ -749,7 +750,7 @@ func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte) // Create creates a TCP endpoint. func (c *Context) Create(epRcvBuf int) { // Create TCP endpoint. - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { c.t.Fatalf("NewEndpoint failed: %v", err) @@ -887,7 +888,7 @@ func (r *RawEndpoint) VerifyACKHasSACK(sackBlocks []header.SACKBlock) { // It also verifies where required(eg.Timestamp) that the ACK to the SYN-ACK // does not carry an option that was not requested. func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *RawEndpoint { - var err *tcpip.Error + var err tcpip.Error c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) if err != nil { c.t.Fatalf("c.s.NewEndpoint(tcp, ipv4...) = %v", err) @@ -903,7 +904,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) * testFullAddr := tcpip.FullAddress{Addr: TestAddr, Port: TestPort} err = c.EP.Connect(testFullAddr) - if err != tcpip.ErrConnectStarted { + if _, ok := err.(*tcpip.ErrConnectStarted); !ok { c.t.Fatalf("c.ep.Connect(%v) = %v", testFullAddr, err) } // Receive SYN packet. @@ -1054,7 +1055,7 @@ func (c *Context) AcceptWithOptions(wndScale int, synOptions header.TCPSynOption defer wq.EventUnregister(&we) c.EP, _, err = ep.Accept(nil) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for connection to be established. select { case <-ch: diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 4988ba29b..afd8f4d39 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -109,8 +109,8 @@ type endpoint struct { multicastNICID tcpip.NICID portFlags ports.Flags - lastErrorMu sync.Mutex `state:"nosave"` - lastError *tcpip.Error `state:".(string)"` + lastErrorMu sync.Mutex `state:"nosave"` + lastError tcpip.Error // Values used to reserve a port or register a transport endpoint. // (which ever happens first). @@ -215,7 +215,7 @@ func (e *endpoint) UniqueID() uint64 { return e.uniqueID } -func (e *endpoint) LastError() *tcpip.Error { +func (e *endpoint) LastError() tcpip.Error { e.lastErrorMu.Lock() defer e.lastErrorMu.Unlock() @@ -225,7 +225,7 @@ func (e *endpoint) LastError() *tcpip.Error { } // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError. -func (e *endpoint) UpdateLastError(err *tcpip.Error) { +func (e *endpoint) UpdateLastError(err tcpip.Error) { e.lastErrorMu.Lock() e.lastError = err e.lastErrorMu.Unlock() @@ -282,7 +282,7 @@ func (e *endpoint) Close() { func (e *endpoint) ModerateRecvBuf(copied int) {} // Read implements tcpip.Endpoint.Read. -func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, *tcpip.Error) { +func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { if err := e.LastError(); err != nil { return tcpip.ReadResult{}, err } @@ -290,10 +290,10 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult e.rcvMu.Lock() if e.rcvList.Empty() { - err := tcpip.ErrWouldBlock + var err tcpip.Error = &tcpip.ErrWouldBlock{} if e.rcvClosed { e.stats.ReadErrors.ReadClosed.Increment() - err = tcpip.ErrClosedForReceive + err = &tcpip.ErrClosedForReceive{} } e.rcvMu.Unlock() return tcpip.ReadResult{}, err @@ -340,7 +340,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult n, err := p.data.ReadTo(dst, opts.Peek) if n == 0 && err != nil { - return res, tcpip.ErrBadBuffer + return res, &tcpip.ErrBadBuffer{} } res.Count = n return res, nil @@ -351,7 +351,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult // reacquire the mutex in exclusive mode. // // Returns true for retry if preparation should be retried. -func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpip.Error) { +func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) { switch e.EndpointState() { case StateInitial: case StateConnected: @@ -359,11 +359,11 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi case StateBound: if to == nil { - return false, tcpip.ErrDestinationRequired + return false, &tcpip.ErrDestinationRequired{} } return false, nil default: - return false, tcpip.ErrInvalidEndpointState + return false, &tcpip.ErrInvalidEndpointState{} } e.mu.RUnlock() @@ -389,7 +389,7 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi // connectRoute establishes a route to the specified interface or the // configured multicast interface if no interface is specified and the // specified address is a multicast address. -func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (*stack.Route, tcpip.NICID, *tcpip.Error) { +func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (*stack.Route, tcpip.NICID, tcpip.Error) { localAddr := e.ID.LocalAddress if e.isBroadcastOrMulticast(nicID, netProto, localAddr) { // A packet can only originate from a unicast address (i.e., an interface). @@ -415,18 +415,18 @@ func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netPr // Write writes data to the endpoint's peer. This method does not block // if the data cannot be written. -func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { n, err := e.write(p, opts) - switch err { + switch err.(type) { case nil: e.stats.PacketsSent.Increment() - case tcpip.ErrMessageTooLong, tcpip.ErrInvalidOptionValue: + case *tcpip.ErrMessageTooLong, *tcpip.ErrInvalidOptionValue: e.stats.WriteErrors.InvalidArgs.Increment() - case tcpip.ErrClosedForSend: + case *tcpip.ErrClosedForSend: e.stats.WriteErrors.WriteClosed.Increment() - case tcpip.ErrInvalidEndpointState: + case *tcpip.ErrInvalidEndpointState: e.stats.WriteErrors.InvalidEndpointState.Increment() - case tcpip.ErrNoRoute, tcpip.ErrBroadcastDisabled, tcpip.ErrNetworkUnreachable: + case *tcpip.ErrNoRoute, *tcpip.ErrBroadcastDisabled, *tcpip.ErrNetworkUnreachable: // Errors indicating any problem with IP routing of the packet. e.stats.SendErrors.NoRoute.Increment() default: @@ -436,14 +436,14 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc return n, err } -func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tcpip.Error) { +func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { if err := e.LastError(); err != nil { return 0, err } // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) if opts.More { - return 0, tcpip.ErrInvalidOptionValue + return 0, &tcpip.ErrInvalidOptionValue{} } to := opts.To @@ -459,7 +459,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // If we've shutdown with SHUT_WR we are in an invalid state for sending. if e.shutdownFlags&tcpip.ShutdownWrite != 0 { - return 0, tcpip.ErrClosedForSend + return 0, &tcpip.ErrClosedForSend{} } // Prepare for write. @@ -480,9 +480,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc // Reject destination address if it goes through a different // NIC than the endpoint was bound to. nicID := to.NIC + if nicID == 0 { + nicID = tcpip.NICID(e.ops.GetBindToDevice()) + } if e.BindNICID != 0 { if nicID != 0 && nicID != e.BindNICID { - return 0, tcpip.ErrNoRoute + return 0, &tcpip.ErrNoRoute{} } nicID = e.BindNICID @@ -490,7 +493,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc if to.Port == 0 { // Port 0 is an invalid port to send to. - return 0, tcpip.ErrInvalidEndpointState + return 0, &tcpip.ErrInvalidEndpointState{} } dst, netProto, err := e.checkV4MappedLocked(*to) @@ -509,19 +512,19 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc } if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() { - return 0, tcpip.ErrBroadcastDisabled + return 0, &tcpip.ErrBroadcastDisabled{} } v := make([]byte, p.Len()) if _, err := io.ReadFull(p, v); err != nil { - return 0, tcpip.ErrBadBuffer + return 0, &tcpip.ErrBadBuffer{} } if len(v) > header.UDPMaximumPacketSize { // Payload can't possibly fit in a packet. so := e.SocketOptions() if so.GetRecvError() { so.QueueLocalErr( - tcpip.ErrMessageTooLong, + &tcpip.ErrMessageTooLong{}, route.NetProto, header.UDPMaximumPacketSize, tcpip.FullAddress{ @@ -532,7 +535,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, *tc v, ) } - return 0, tcpip.ErrMessageTooLong + return 0, &tcpip.ErrMessageTooLong{} } ttl := e.ttl @@ -582,13 +585,13 @@ func (e *endpoint) OnReusePortSet(v bool) { } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { +func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error { switch opt { case tcpip.MTUDiscoverOption: // Return not supported if the value is not disabling path // MTU discovery. if v != tcpip.PMTUDiscoveryDont { - return tcpip.ErrNotSupported + return &tcpip.ErrNotSupported{} } case tcpip.MulticastTTLOption: @@ -640,7 +643,7 @@ func (e *endpoint) HasNIC(id int32) bool { } // SetSockOpt implements tcpip.Endpoint.SetSockOpt. -func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { +func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { switch v := opt.(type) { case *tcpip.MulticastInterfaceOption: e.mu.Lock() @@ -662,17 +665,17 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { if nic != 0 { if !e.stack.CheckNIC(nic) { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } } else { nic = e.stack.CheckLocalAddress(0, netProto, addr) if nic == 0 { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } } if e.BindNICID != 0 && e.BindNICID != nic { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } e.multicastNICID = nic @@ -680,7 +683,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { case *tcpip.AddMembershipOption: if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } nicID := v.NIC @@ -696,7 +699,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { nicID = e.stack.CheckLocalAddress(nicID, e.NetProto, v.InterfaceAddr) } if nicID == 0 { - return tcpip.ErrUnknownDevice + return &tcpip.ErrUnknownDevice{} } memToInsert := multicastMembership{nicID: nicID, multicastAddr: v.MulticastAddr} @@ -705,7 +708,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { defer e.mu.Unlock() if _, ok := e.multicastMemberships[memToInsert]; ok { - return tcpip.ErrPortInUse + return &tcpip.ErrPortInUse{} } if err := e.stack.JoinGroup(e.NetProto, nicID, v.MulticastAddr); err != nil { @@ -716,7 +719,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { case *tcpip.RemoveMembershipOption: if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) { - return tcpip.ErrInvalidOptionValue + return &tcpip.ErrInvalidOptionValue{} } nicID := v.NIC @@ -731,7 +734,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { nicID = e.stack.CheckLocalAddress(nicID, e.NetProto, v.InterfaceAddr) } if nicID == 0 { - return tcpip.ErrUnknownDevice + return &tcpip.ErrUnknownDevice{} } memToRemove := multicastMembership{nicID: nicID, multicastAddr: v.MulticastAddr} @@ -740,7 +743,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { defer e.mu.Unlock() if _, ok := e.multicastMemberships[memToRemove]; !ok { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } if err := e.stack.LeaveGroup(e.NetProto, nicID, v.MulticastAddr); err != nil { @@ -756,7 +759,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { } // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. -func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { +func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { switch opt { case tcpip.IPv4TOSOption: e.mu.RLock() @@ -803,12 +806,12 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { return v, nil default: - return -1, tcpip.ErrUnknownProtocolOption + return -1, &tcpip.ErrUnknownProtocolOption{} } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. -func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { +func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error { switch o := opt.(type) { case *tcpip.MulticastInterfaceOption: e.mu.Lock() @@ -819,14 +822,14 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { e.mu.Unlock() default: - return tcpip.ErrUnknownProtocolOption + return &tcpip.ErrUnknownProtocolOption{} } return nil } // sendUDP sends a UDP segment via the provided network endpoint and under the // provided identity. -func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) *tcpip.Error { +func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) tcpip.Error { pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: header.UDPMinimumSize + int(r.MaxHeaderLength()), Data: data, @@ -876,7 +879,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u // checkV4MappedLocked determines the effective network protocol and converts // addr to its canonical form. -func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) { +func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.ops.GetV6Only()) if err != nil { return tcpip.FullAddress{}, 0, err @@ -885,7 +888,7 @@ func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddres } // Disconnect implements tcpip.Endpoint.Disconnect. -func (e *endpoint) Disconnect() *tcpip.Error { +func (e *endpoint) Disconnect() tcpip.Error { e.mu.Lock() defer e.mu.Unlock() @@ -903,7 +906,7 @@ func (e *endpoint) Disconnect() *tcpip.Error { // Exclude ephemerally bound endpoints. if e.BindNICID != 0 || e.ID.LocalAddress == "" { - var err *tcpip.Error + var err tcpip.Error id = stack.TransportEndpointID{ LocalPort: e.ID.LocalPort, LocalAddress: e.ID.LocalAddress, @@ -934,10 +937,10 @@ func (e *endpoint) Disconnect() *tcpip.Error { } // Connect connects the endpoint to its peer. Specifying a NIC is optional. -func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Connect(addr tcpip.FullAddress) tcpip.Error { if addr.Port == 0 { // We don't support connecting to port zero. - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } e.mu.Lock() @@ -954,12 +957,12 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } if nicID != 0 && nicID != e.BindNICID { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } nicID = e.BindNICID default: - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } addr, netProto, err := e.checkV4MappedLocked(addr) @@ -1029,20 +1032,20 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } // ConnectEndpoint is not supported. -func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error { - return tcpip.ErrInvalidEndpointState +func (*endpoint) ConnectEndpoint(tcpip.Endpoint) tcpip.Error { + return &tcpip.ErrInvalidEndpointState{} } // Shutdown closes the read and/or write end of the endpoint connection // to its peer. -func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { +func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() // A socket in the bound state can still receive multicast messages, // so we need to notify waiters on shutdown. if state := e.EndpointState(); state != StateBound && state != StateConnected { - return tcpip.ErrNotConnected + return &tcpip.ErrNotConnected{} } e.shutdownFlags |= flags @@ -1062,16 +1065,16 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { } // Listen is not supported by UDP, it just fails. -func (*endpoint) Listen(int) *tcpip.Error { - return tcpip.ErrNotSupported +func (*endpoint) Listen(int) tcpip.Error { + return &tcpip.ErrNotSupported{} } // Accept is not supported by UDP, it just fails. -func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { - return nil, nil, tcpip.ErrNotSupported +func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { + return nil, nil, &tcpip.ErrNotSupported{} } -func (e *endpoint) registerWithStack(netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) { +func (e *endpoint) registerWithStack(netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, tcpip.Error) { bindToDevice := tcpip.NICID(e.ops.GetBindToDevice()) if e.ID.LocalPort == 0 { port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, bindToDevice, tcpip.FullAddress{}, nil /* testPort */) @@ -1090,11 +1093,11 @@ func (e *endpoint) registerWithStack(netProtos []tcpip.NetworkProtocolNumber, id return id, bindToDevice, err } -func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) bindLocked(addr tcpip.FullAddress) tcpip.Error { // Don't allow binding once endpoint is not in the initial state // anymore. if e.EndpointState() != StateInitial { - return tcpip.ErrInvalidEndpointState + return &tcpip.ErrInvalidEndpointState{} } addr, netProto, err := e.checkV4MappedLocked(addr) @@ -1118,7 +1121,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { // A local unicast address was specified, verify that it's valid. nicID = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) if nicID == 0 { - return tcpip.ErrBadLocalAddress + return &tcpip.ErrBadLocalAddress{} } } @@ -1148,7 +1151,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { // Bind binds the endpoint to a specific local address and port. // Specifying a NIC is optional. -func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { +func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { e.mu.Lock() defer e.mu.Unlock() @@ -1164,7 +1167,7 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { } // GetLocalAddress returns the address to which the endpoint is bound. -func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() @@ -1181,12 +1184,12 @@ func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { } // GetRemoteAddress returns the address to which the endpoint is connected. -func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { +func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() if e.EndpointState() != StateConnected { - return tcpip.FullAddress{}, tcpip.ErrNotConnected + return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} } return tcpip.FullAddress{ @@ -1319,7 +1322,7 @@ func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketB } } -func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, extra uint32, pkt *stack.PacketBuffer) { +func (e *endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, pkt *stack.PacketBuffer) { // Update last error first. e.lastErrorMu.Lock() e.lastError = err @@ -1335,12 +1338,9 @@ func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, ext } e.SocketOptions().QueueErr(&tcpip.SockError{ - Err: err, - ErrOrigin: header.ICMPOriginFromNetProto(pkt.NetworkProtocolNumber), - ErrType: errType, - ErrCode: errCode, - ErrInfo: extra, - Payload: payload, + Err: err, + Cause: transErr, + Payload: payload, Dst: tcpip.FullAddress{ NIC: pkt.NICID, Addr: e.ID.RemoteAddress, @@ -1359,24 +1359,13 @@ func (e *endpoint) onICMPError(err *tcpip.Error, errType byte, errCode byte, ext e.waiterQueue.Notify(waiter.EventErr) } -// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) { - if typ == stack.ControlPortUnreachable { +// HandleError implements stack.TransportEndpoint. +func (e *endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer) { + // TODO(gvisor.dev/issues/5270): Handle all transport errors. + switch transErr.Kind() { + case stack.DestinationPortUnreachableTransportError: if e.EndpointState() == StateConnected { - var errType byte - var errCode byte - switch pkt.NetworkProtocolNumber { - case header.IPv4ProtocolNumber: - errType = byte(header.ICMPv4DstUnreachable) - errCode = byte(header.ICMPv4PortUnreachable) - case header.IPv6ProtocolNumber: - errType = byte(header.ICMPv6DstUnreachable) - errCode = byte(header.ICMPv6PortUnreachable) - default: - panic(fmt.Sprintf("unsupported net proto for infering ICMP type and code: %d", pkt.NetworkProtocolNumber)) - } - e.onICMPError(tcpip.ErrConnectionRefused, errType, errCode, extra, pkt) - return + e.onICMPError(&tcpip.ErrConnectionRefused{}, transErr, pkt) } } } diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index feb53b553..21a6aa460 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -37,24 +37,6 @@ func (u *udpPacket) loadData(data buffer.VectorisedView) { u.data = data } -// saveLastError is invoked by stateify. -func (e *endpoint) saveLastError() string { - if e.lastError == nil { - return "" - } - - return e.lastError.String() -} - -// loadLastError is invoked by stateify. -func (e *endpoint) loadLastError(s string) { - if s == "" { - return - } - - e.lastError = tcpip.StringToError(s) -} - // beforeSave is invoked by stateify. func (e *endpoint) beforeSave() { // Stop incoming packets from being handled (and mutate endpoint state). @@ -114,7 +96,7 @@ func (e *endpoint) Resume(s *stack.Stack) { netProto = header.IPv6ProtocolNumber } - var err *tcpip.Error + var err tcpip.Error if state == StateConnected { e.route, err = e.stack.FindRoute(e.RegisterNICID, e.ID.LocalAddress, e.ID.RemoteAddress, netProto, e.ops.GetMulticastLoop()) if err != nil { @@ -123,7 +105,7 @@ func (e *endpoint) Resume(s *stack.Stack) { } else if len(e.ID.LocalAddress) != 0 && !e.isBroadcastOrMulticast(e.RegisterNICID, netProto, e.ID.LocalAddress) { // stateBound // A local unicast address is specified, verify that it's valid. if e.stack.CheckLocalAddress(e.RegisterNICID, netProto, e.ID.LocalAddress) == 0 { - panic(tcpip.ErrBadLocalAddress) + panic(&tcpip.ErrBadLocalAddress{}) } } diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go index aae794506..705ad1f64 100644 --- a/pkg/tcpip/transport/udp/forwarder.go +++ b/pkg/tcpip/transport/udp/forwarder.go @@ -69,7 +69,7 @@ func (r *ForwarderRequest) ID() stack.TransportEndpointID { } // CreateEndpoint creates a connected UDP endpoint for the session request. -func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { netHdr := r.pkt.Network() route, err := r.stack.FindRoute(r.pkt.NICID, netHdr.DestinationAddress(), netHdr.SourceAddress(), r.pkt.NetworkProtocolNumber, false /* multicastLoop */) if err != nil { diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 91420edd3..427fdd0c9 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -54,13 +54,13 @@ func (*protocol) Number() tcpip.TransportProtocolNumber { } // NewEndpoint creates a new udp endpoint. -func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return newEndpoint(p.stack, netProto, waiterQueue), nil } // NewRawEndpoint creates a new raw UDP endpoint. It implements // stack.TransportProtocol.NewRawEndpoint. -func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { +func (p *protocol) NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { return raw.NewEndpoint(p.stack, netProto, header.UDPProtocolNumber, waiterQueue) } @@ -71,7 +71,7 @@ func (*protocol) MinimumPacketSize() int { // ParsePorts returns the source and destination ports stored in the given udp // packet. -func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { +func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) { h := header.UDP(v) return h.SourcePort(), h.DestinationPort(), nil } @@ -94,13 +94,13 @@ func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, } // SetOption implements stack.TransportProtocol.SetOption. -func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) SetOption(tcpip.SettableTransportProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Option implements stack.TransportProtocol.Option. -func (*protocol) Option(tcpip.GettableTransportProtocolOption) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (*protocol) Option(tcpip.GettableTransportProtocolOption) tcpip.Error { + return &tcpip.ErrUnknownProtocolOption{} } // Close implements stack.TransportProtocol.Close. diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index c4794e876..5d81dbb94 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -353,7 +353,7 @@ func (c *testContext) cleanup() { func (c *testContext) createEndpoint(proto tcpip.NetworkProtocolNumber) { c.t.Helper() - var err *tcpip.Error + var err tcpip.Error c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, proto, &c.wq) if err != nil { c.t.Fatal("NewEndpoint failed: ", err) @@ -555,11 +555,11 @@ func TestBindToDeviceOption(t *testing.T) { testActions := []struct { name string setBindToDevice *tcpip.NICID - setBindToDeviceError *tcpip.Error + setBindToDeviceError tcpip.Error getBindToDevice int32 }{ {"GetDefaultValue", nil, nil, 0}, - {"BindToNonExistent", nicIDPtr(999), tcpip.ErrUnknownDevice, 0}, + {"BindToNonExistent", nicIDPtr(999), &tcpip.ErrUnknownDevice{}, 0}, {"BindToExistent", nicIDPtr(321), nil, 321}, {"UnbindToDevice", nicIDPtr(0), nil, 0}, } @@ -599,7 +599,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe var buf bytes.Buffer res, err := c.ep.Read(&buf, tcpip.ReadOptions{NeedRemoteAddr: true}) - if err == tcpip.ErrWouldBlock { + if _, ok := err.(*tcpip.ErrWouldBlock); ok { // Wait for data to become available. select { case <-ch: @@ -703,8 +703,11 @@ func TestBindReservedPort(t *testing.T) { t.Fatalf("NewEndpoint failed: %s", err) } defer ep.Close() - if got, want := ep.Bind(addr), tcpip.ErrPortInUse; got != want { - t.Fatalf("got ep.Bind(...) = %s, want = %s", got, want) + { + err := ep.Bind(addr) + if _, ok := err.(*tcpip.ErrPortInUse); !ok { + t.Fatalf("got ep.Bind(...) = %s, want = %s", err, &tcpip.ErrPortInUse{}) + } } } @@ -716,8 +719,11 @@ func TestBindReservedPort(t *testing.T) { defer ep.Close() // We can't bind ipv4-any on the port reserved by the connected endpoint // above, since the endpoint is dual-stack. - if got, want := ep.Bind(tcpip.FullAddress{Port: addr.Port}), tcpip.ErrPortInUse; got != want { - t.Fatalf("got ep.Bind(...) = %s, want = %s", got, want) + { + err := ep.Bind(tcpip.FullAddress{Port: addr.Port}) + if _, ok := err.(*tcpip.ErrPortInUse); !ok { + t.Fatalf("got ep.Bind(...) = %s, want = %s", err, &tcpip.ErrPortInUse{}) + } } // We can bind an ipv4 address on this port, though. if err := ep.Bind(tcpip.FullAddress{Addr: stackAddr, Port: addr.Port}); err != nil { @@ -806,11 +812,11 @@ func TestV4ReadSelfSource(t *testing.T) { for _, tt := range []struct { name string handleLocal bool - wantErr *tcpip.Error + wantErr tcpip.Error wantInvalidSource uint64 }{ {"HandleLocal", false, nil, 0}, - {"NoHandleLocal", true, tcpip.ErrWouldBlock, 1}, + {"NoHandleLocal", true, &tcpip.ErrWouldBlock{}, 1}, } { t.Run(tt.name, func(t *testing.T) { c := newDualTestContextWithOptions(t, defaultMTU, stack.Options{ @@ -959,7 +965,7 @@ func TestV4ReadBroadcastOnBoundToWildcard(t *testing.T) { // testFailingWrite sends a packet of the given test flow into the UDP endpoint // and verifies it fails with the provided error code. -func testFailingWrite(c *testContext, flow testFlow, wantErr *tcpip.Error) { +func testFailingWrite(c *testContext, flow testFlow, wantErr tcpip.Error) { c.t.Helper() // Take a snapshot of the stats to validate them at the end of the test. epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() @@ -1092,7 +1098,7 @@ func TestDualWriteConnectedToV6(t *testing.T) { testWrite(c, unicastV6) // Write to V4 mapped address. - testFailingWrite(c, unicastV4in6, tcpip.ErrNetworkUnreachable) + testFailingWrite(c, unicastV4in6, &tcpip.ErrNetworkUnreachable{}) const want = 1 if got := c.ep.Stats().(*tcpip.TransportEndpointStats).SendErrors.NoRoute.Value(); got != want { c.t.Fatalf("Endpoint stat not updated. got %d want %d", got, want) @@ -1113,7 +1119,7 @@ func TestDualWriteConnectedToV4Mapped(t *testing.T) { testWrite(c, unicastV4in6) // Write to v6 address. - testFailingWrite(c, unicastV6, tcpip.ErrInvalidEndpointState) + testFailingWrite(c, unicastV6, &tcpip.ErrInvalidEndpointState{}) } func TestV4WriteOnV6Only(t *testing.T) { @@ -1123,7 +1129,7 @@ func TestV4WriteOnV6Only(t *testing.T) { c.createEndpointForFlow(unicastV6Only) // Write to V4 mapped address. - testFailingWrite(c, unicastV4in6, tcpip.ErrNoRoute) + testFailingWrite(c, unicastV4in6, &tcpip.ErrNoRoute{}) } func TestV6WriteOnBoundToV4Mapped(t *testing.T) { @@ -1138,7 +1144,7 @@ func TestV6WriteOnBoundToV4Mapped(t *testing.T) { } // Write to v6 address. - testFailingWrite(c, unicastV6, tcpip.ErrInvalidEndpointState) + testFailingWrite(c, unicastV6, &tcpip.ErrInvalidEndpointState{}) } func TestV6WriteOnConnected(t *testing.T) { @@ -1197,8 +1203,11 @@ func TestWriteOnConnectedInvalidPort(t *testing.T) { c.t.Fatalf("c.ep.Write(...) wrote %d bytes, want %d bytes", got, want) } - if err := c.ep.LastError(); err != tcpip.ErrConnectionRefused { - c.t.Fatalf("expected c.ep.LastError() == ErrConnectionRefused, got: %+v", err) + { + err := c.ep.LastError() + if _, ok := err.(*tcpip.ErrConnectionRefused); !ok { + c.t.Fatalf("expected c.ep.LastError() == ErrConnectionRefused, got: %+v", err) + } } }) } @@ -1605,7 +1614,7 @@ func TestTTL(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{p}, }) - ep := s.NetworkProtocolInstance(n).NewEndpoint(&testInterface{}, nil, nil, nil) + ep := s.NetworkProtocolInstance(n).NewEndpoint(&testInterface{}, nil) wantTTL = ep.DefaultTTL() ep.Close() } @@ -2308,21 +2317,21 @@ func TestShutdownWrite(t *testing.T) { t.Fatalf("Shutdown failed: %s", err) } - testFailingWrite(c, unicastV6, tcpip.ErrClosedForSend) + testFailingWrite(c, unicastV6, &tcpip.ErrClosedForSend{}) } -func (c *testContext) checkEndpointWriteStats(incr uint64, want tcpip.TransportEndpointStats, err *tcpip.Error) { +func (c *testContext) checkEndpointWriteStats(incr uint64, want tcpip.TransportEndpointStats, err tcpip.Error) { got := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() - switch err { + switch err.(type) { case nil: want.PacketsSent.IncrementBy(incr) - case tcpip.ErrMessageTooLong, tcpip.ErrInvalidOptionValue: + case *tcpip.ErrMessageTooLong, *tcpip.ErrInvalidOptionValue: want.WriteErrors.InvalidArgs.IncrementBy(incr) - case tcpip.ErrClosedForSend: + case *tcpip.ErrClosedForSend: want.WriteErrors.WriteClosed.IncrementBy(incr) - case tcpip.ErrInvalidEndpointState: + case *tcpip.ErrInvalidEndpointState: want.WriteErrors.InvalidEndpointState.IncrementBy(incr) - case tcpip.ErrNoRoute, tcpip.ErrBroadcastDisabled, tcpip.ErrNetworkUnreachable: + case *tcpip.ErrNoRoute, *tcpip.ErrBroadcastDisabled, *tcpip.ErrNetworkUnreachable: want.SendErrors.NoRoute.IncrementBy(incr) default: want.SendErrors.SendToNetworkFailed.IncrementBy(incr) @@ -2332,11 +2341,11 @@ func (c *testContext) checkEndpointWriteStats(incr uint64, want tcpip.TransportE } } -func (c *testContext) checkEndpointReadStats(incr uint64, want tcpip.TransportEndpointStats, err *tcpip.Error) { +func (c *testContext) checkEndpointReadStats(incr uint64, want tcpip.TransportEndpointStats, err tcpip.Error) { got := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() - switch err { - case nil, tcpip.ErrWouldBlock: - case tcpip.ErrClosedForReceive: + switch err.(type) { + case nil, *tcpip.ErrWouldBlock: + case *tcpip.ErrClosedForReceive: want.ReadErrors.ReadClosed.IncrementBy(incr) default: c.t.Errorf("Endpoint error missing stats update err %v", err) @@ -2509,14 +2518,26 @@ func TestOutgoingSubnetBroadcast(t *testing.T) { Port: 80, } opts := tcpip.WriteOptions{To: &to} - expectedErrWithoutBcastOpt := tcpip.ErrBroadcastDisabled + expectedErrWithoutBcastOpt := func(err tcpip.Error) tcpip.Error { + if _, ok := err.(*tcpip.ErrBroadcastDisabled); ok { + return nil + } + return &tcpip.ErrBroadcastDisabled{} + } if !test.requiresBroadcastOpt { expectedErrWithoutBcastOpt = nil } r.Reset(data) - if n, err := ep.Write(&r, opts); err != expectedErrWithoutBcastOpt { - t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, %s)", opts, n, err, expectedErrWithoutBcastOpt) + { + n, err := ep.Write(&r, opts) + if expectedErrWithoutBcastOpt != nil { + if want := expectedErrWithoutBcastOpt(err); want != nil { + t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, %s)", opts, n, err, want) + } + } else if err != nil { + t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, nil)", opts, n, err) + } } ep.SocketOptions().SetBroadcast(true) @@ -2529,8 +2550,15 @@ func TestOutgoingSubnetBroadcast(t *testing.T) { ep.SocketOptions().SetBroadcast(false) r.Reset(data) - if n, err := ep.Write(&r, opts); err != expectedErrWithoutBcastOpt { - t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, %s)", opts, n, err, expectedErrWithoutBcastOpt) + { + n, err := ep.Write(&r, opts) + if expectedErrWithoutBcastOpt != nil { + if want := expectedErrWithoutBcastOpt(err); want != nil { + t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, %s)", opts, n, err, want) + } + } else if err != nil { + t.Fatalf("got ep.Write(_, %#v) = (%d, %s), want = (_, nil)", opts, n, err) + } } }) } |