summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/header/parse/parse.go3
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation.go71
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler.go50
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go15
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go41
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go13
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go27
7 files changed, 100 insertions, 120 deletions
diff --git a/pkg/tcpip/header/parse/parse.go b/pkg/tcpip/header/parse/parse.go
index 5ca75c834..2042f214a 100644
--- a/pkg/tcpip/header/parse/parse.go
+++ b/pkg/tcpip/header/parse/parse.go
@@ -109,6 +109,9 @@ traverseExtensions:
fragOffset = extHdr.FragmentOffset()
fragMore = extHdr.More()
}
+ rawPayload := it.AsRawHeader(true /* consume */)
+ extensionsSize = dataClone.Size() - rawPayload.Buf.Size()
+ break traverseExtensions
case header.IPv6RawPayloadHeader:
// We've found the payload after any extensions.
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index 936601287..c75ca7d71 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -71,16 +71,25 @@ type FragmentID struct {
// Fragmentation is the main structure that other modules
// of the stack should use to implement IP Fragmentation.
type Fragmentation struct {
- mu sync.Mutex
- highLimit int
- lowLimit int
- reassemblers map[FragmentID]*reassembler
- rList reassemblerList
- size int
- timeout time.Duration
- blockSize uint16
- clock tcpip.Clock
- releaseJob *tcpip.Job
+ mu sync.Mutex
+ highLimit int
+ lowLimit int
+ reassemblers map[FragmentID]*reassembler
+ rList reassemblerList
+ size int
+ timeout time.Duration
+ blockSize uint16
+ clock tcpip.Clock
+ releaseJob *tcpip.Job
+ timeoutHandler TimeoutHandler
+}
+
+// TimeoutHandler is consulted if a packet reassembly has timed out.
+type TimeoutHandler interface {
+ // OnReassemblyTimeout will be called with the first fragment (or nil, if the
+ // first fragment has not been received) of a packet whose reassembly has
+ // timed out.
+ OnReassemblyTimeout(pkt *stack.PacketBuffer)
}
// NewFragmentation creates a new Fragmentation.
@@ -97,7 +106,7 @@ type Fragmentation struct {
// reassemblingTimeout specifies the maximum time allowed to reassemble a packet.
// Fragments are lazily evicted only when a new a packet with an
// already existing fragmentation-id arrives after the timeout.
-func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration, clock tcpip.Clock) *Fragmentation {
+func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration, clock tcpip.Clock, timeoutHandler TimeoutHandler) *Fragmentation {
if lowMemoryLimit >= highMemoryLimit {
lowMemoryLimit = highMemoryLimit
}
@@ -111,12 +120,13 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
}
f := &Fragmentation{
- reassemblers: make(map[FragmentID]*reassembler),
- highLimit: highMemoryLimit,
- lowLimit: lowMemoryLimit,
- timeout: reassemblingTimeout,
- blockSize: blockSize,
- clock: clock,
+ reassemblers: make(map[FragmentID]*reassembler),
+ highLimit: highMemoryLimit,
+ lowLimit: lowMemoryLimit,
+ timeout: reassemblingTimeout,
+ blockSize: blockSize,
+ clock: clock,
+ timeoutHandler: timeoutHandler,
}
f.releaseJob = tcpip.NewJob(f.clock, &f.mu, f.releaseReassemblersLocked)
@@ -136,16 +146,8 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
// proto is the protocol number marked in the fragment being processed. It has
// to be given here outside of the FragmentID struct because IPv6 should not use
// the protocol to identify a fragment.
-//
-// releaseCB is a callback that will run when the fragment reassembly of a
-// packet is complete or cancelled. releaseCB take a a boolean argument which is
-// true iff the reassembly is cancelled due to timeout. releaseCB should be
-// passed only with the first fragment of a packet. If more than one releaseCB
-// are passed for the same packet, only the first releaseCB will be saved for
-// the packet and the succeeding ones will be dropped by running them
-// immediately with a false argument.
func (f *Fragmentation) Process(
- id FragmentID, first, last uint16, more bool, proto uint8, vv buffer.VectorisedView, releaseCB func(bool)) (
+ id FragmentID, first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (
buffer.VectorisedView, uint8, bool, error) {
if first > last {
return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs)
@@ -160,10 +162,9 @@ func (f *Fragmentation) Process(
return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs)
}
- if l := vv.Size(); l < int(fragmentSize) {
- return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes less than the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
+ if l := pkt.Data.Size(); l != int(fragmentSize) {
+ return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes not equal to the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
}
- vv.CapLength(int(fragmentSize))
f.mu.Lock()
r, ok := f.reassemblers[id]
@@ -179,15 +180,9 @@ func (f *Fragmentation) Process(
f.releaseReassemblersLocked()
}
}
- if releaseCB != nil {
- if !r.setCallback(releaseCB) {
- // We got a duplicate callback. Release it immediately.
- releaseCB(false /* timedOut */)
- }
- }
f.mu.Unlock()
- res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, vv)
+ res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, pkt)
if err != nil {
// We probably got an invalid sequence of fragments. Just
// discard the reassembler and move on.
@@ -231,7 +226,9 @@ func (f *Fragmentation) release(r *reassembler, timedOut bool) {
f.size = 0
}
- r.release(timedOut) // releaseCB may run.
+ if h := f.timeoutHandler; timedOut && h != nil {
+ h.OnReassemblyTimeout(r.pkt)
+ }
}
// releaseReassemblersLocked releases already-expired reassemblers, then
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index c0cc0bde0..19f4920b3 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
type hole struct {
@@ -41,7 +42,7 @@ type reassembler struct {
heap fragHeap
done bool
creationTime int64
- callback func(bool)
+ pkt *stack.PacketBuffer
}
func newReassembler(id FragmentID, clock tcpip.Clock) *reassembler {
@@ -79,7 +80,7 @@ func (r *reassembler) updateHoles(first, last uint16, more bool) bool {
return used
}
-func (r *reassembler) process(first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (buffer.VectorisedView, uint8, bool, int, error) {
+func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (buffer.VectorisedView, uint8, bool, int, error) {
r.mu.Lock()
defer r.mu.Unlock()
consumed := 0
@@ -89,18 +90,20 @@ func (r *reassembler) process(first, last uint16, more bool, proto uint8, vv buf
// was waiting on the mutex. We don't have to do anything in this case.
return buffer.VectorisedView{}, 0, false, consumed, nil
}
- // For IPv6, it is possible to have different Protocol values between
- // fragments of a packet (because, unlike IPv4, the Protocol is not used to
- // identify a fragment). In this case, only the Protocol of the first
- // fragment must be used as per RFC 8200 Section 4.5.
- //
- // TODO(gvisor.dev/issue/3648): The entire first IP header should be recorded
- // here (instead of just the protocol) because most IP options should be
- // derived from the first fragment.
- if first == 0 {
- r.proto = proto
- }
if r.updateHoles(first, last, more) {
+ // For IPv6, it is possible to have different Protocol values between
+ // fragments of a packet (because, unlike IPv4, the Protocol is not used to
+ // identify a fragment). In this case, only the Protocol of the first
+ // fragment must be used as per RFC 8200 Section 4.5.
+ //
+ // TODO(gvisor.dev/issue/3648): During reassembly of an IPv6 packet, IP
+ // options received in the first fragment should be used - and they should
+ // override options from following fragments.
+ if first == 0 {
+ r.pkt = pkt
+ r.proto = proto
+ }
+ vv := pkt.Data
// We store the incoming packet only if it filled some holes.
heap.Push(&r.heap, fragment{offset: first, vv: vv.Clone(nil)})
consumed = vv.Size()
@@ -124,24 +127,3 @@ func (r *reassembler) checkDoneOrMark() bool {
r.mu.Unlock()
return prev
}
-
-func (r *reassembler) setCallback(c func(bool)) bool {
- r.mu.Lock()
- defer r.mu.Unlock()
- if r.callback != nil {
- return false
- }
- r.callback = c
- return true
-}
-
-func (r *reassembler) release(timedOut bool) {
- r.mu.Lock()
- callback := r.callback
- r.callback = nil
- r.mu.Unlock()
-
- if callback != nil {
- callback(timedOut)
- }
-}
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 204b182e6..488945226 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -514,3 +514,18 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpi
counter.Increment()
return nil
}
+
+// OnReassemblyTimeout implements fragmentation.TimeoutHandler.
+func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
+ // OnReassemblyTimeout sends a Time Exceeded Message, as per RFC 792:
+ //
+ // If a host reassembling a fragmented datagram cannot complete the
+ // reassembly due to missing fragments within its time limit it discards the
+ // datagram, and it may send a time exceeded message.
+ //
+ // If fragment zero is not available then no time exceeded need be sent at
+ // all.
+ if pkt != nil {
+ p.returnError(&icmpReasonReassemblyTimeout{}, pkt)
+ }
+}
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index a9a38b851..1efe6297a 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -650,29 +650,8 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
return
}
- // Set up a callback in case we need to send a Time Exceeded Message, as per
- // RFC 792:
- //
- // If a host reassembling a fragmented datagram cannot complete the
- // reassembly due to missing fragments within its time limit it discards
- // the datagram, and it may send a time exceeded message.
- //
- // If fragment zero is not available then no time exceeded need be sent at
- // all.
- var releaseCB func(bool)
- if start == 0 {
- pkt := pkt.Clone()
- releaseCB = func(timedOut bool) {
- if timedOut {
- _ = e.protocol.returnError(&icmpReasonReassemblyTimeout{}, pkt)
- }
- }
- }
-
- var ready bool
- var err error
proto := h.Protocol()
- pkt.Data, _, ready, err = e.protocol.fragmentation.Process(
+ data, _, ready, err := e.protocol.fragmentation.Process(
// As per RFC 791 section 2.3, the identification value is unique
// for a source-destination pair and protocol.
fragmentation.FragmentID{
@@ -685,8 +664,7 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
start+uint16(pkt.Data.Size())-1,
h.More(),
proto,
- pkt.Data,
- releaseCB,
+ pkt,
)
if err != nil {
stats.IP.MalformedPacketsReceived.Increment()
@@ -696,6 +674,7 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
if !ready {
return
}
+ pkt.Data = data
// The reassembler doesn't take care of fixing up the header, so we need
// to do it here.
@@ -863,6 +842,7 @@ func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
var _ stack.NetworkProtocol = (*protocol)(nil)
+var _ fragmentation.TimeoutHandler = (*protocol)(nil)
type protocol struct {
stack *stack.Stack
@@ -1027,13 +1007,14 @@ func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
}
hashIV := r[buckets]
- return &protocol{
- stack: s,
- ids: ids,
- hashIV: hashIV,
- defaultTTL: DefaultTTL,
- fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock()),
+ p := &protocol{
+ stack: s,
+ ids: ids,
+ hashIV: hashIV,
+ defaultTTL: DefaultTTL,
}
+ p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
+ return p
}
func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) {
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 8d788af80..beb8f562e 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -922,3 +922,16 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpi
counter.Increment()
return nil
}
+
+// OnReassemblyTimeout implements fragmentation.TimeoutHandler.
+func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
+ // OnReassemblyTimeout sends a Time Exceeded Message as per RFC 2460 Section
+ // 4.5:
+ //
+ // If the first fragment (i.e., the one with a Fragment Offset of zero) has
+ // been received, an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded
+ // message should be sent to the source of that fragment.
+ if pkt != nil {
+ p.returnError(&icmpReasonReassemblyTimeout{}, pkt)
+ }
+}
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 7697ff987..7a00f6314 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -967,18 +967,6 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
return
}
- // Set up a callback in case we need to send a Time Exceeded Message as
- // per RFC 2460 Section 4.5.
- var releaseCB func(bool)
- if start == 0 {
- pkt := pkt.Clone()
- releaseCB = func(timedOut bool) {
- if timedOut {
- _ = e.protocol.returnError(&icmpReasonReassemblyTimeout{}, pkt)
- }
- }
- }
-
// Note that pkt doesn't have its transport header set after reassembly,
// and won't until DeliverNetworkPacket sets it.
data, proto, ready, err := e.protocol.fragmentation.Process(
@@ -993,17 +981,17 @@ func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
start+uint16(fragmentPayloadLen)-1,
extHdr.More(),
uint8(rawPayload.Identifier),
- rawPayload.Buf,
- releaseCB,
+ pkt,
)
if err != nil {
stats.IP.MalformedPacketsReceived.Increment()
stats.IP.MalformedFragmentsReceived.Increment()
return
}
- pkt.Data = data
if ready {
+ pkt.Data = data
+
// We create a new iterator with the reassembled packet because we could
// have more extension headers in the reassembled payload, as per RFC
// 8200 section 4.5. We also use the NextHeader value from the first
@@ -1414,6 +1402,7 @@ func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
var _ stack.NetworkProtocol = (*protocol)(nil)
+var _ fragmentation.TimeoutHandler = (*protocol)(nil)
type protocol struct {
stack *stack.Stack
@@ -1669,10 +1658,9 @@ func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
return func(s *stack.Stack) stack.NetworkProtocol {
p := &protocol{
- stack: s,
- fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock()),
- ids: ids,
- hashIV: hashIV,
+ stack: s,
+ ids: ids,
+ hashIV: hashIV,
ndpDisp: opts.NDPDisp,
ndpConfigs: opts.NDPConfigs,
@@ -1680,6 +1668,7 @@ func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
tempIIDSeed: opts.TempIIDSeed,
autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal,
}
+ p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
p.mu.eps = make(map[*endpoint]struct{})
p.SetDefaultTTL(DefaultTTL)
return p