diff options
Diffstat (limited to 'pkg/tcpip/network')
-rw-r--r-- | pkg/tcpip/network/fragmentation/fragmentation.go | 94 | ||||
-rw-r--r-- | pkg/tcpip/network/fragmentation/reassembler.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4.go | 19 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv6/ipv6.go | 17 |
4 files changed, 111 insertions, 23 deletions
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index 2982450f8..1827666c5 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -17,28 +17,58 @@ package fragmentation import ( + "errors" "fmt" "log" "time" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" ) -// DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time. -const DefaultReassembleTimeout = 30 * time.Second +const ( + // DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time. + DefaultReassembleTimeout = 30 * time.Second -// HighFragThreshold is the threshold at which we start trimming old -// fragmented packets. Linux uses a default value of 4 MB. See -// net.ipv4.ipfrag_high_thresh for more information. -const HighFragThreshold = 4 << 20 // 4MB + // HighFragThreshold is the threshold at which we start trimming old + // fragmented packets. Linux uses a default value of 4 MB. See + // net.ipv4.ipfrag_high_thresh for more information. + HighFragThreshold = 4 << 20 // 4MB -// LowFragThreshold is the threshold we reach to when we start dropping -// older fragmented packets. It's important that we keep enough room for newer -// packets to be re-assembled. Hence, this needs to be lower than -// HighFragThreshold enough. Linux uses a default value of 3 MB. See -// net.ipv4.ipfrag_low_thresh for more information. -const LowFragThreshold = 3 << 20 // 3MB + // LowFragThreshold is the threshold we reach to when we start dropping + // older fragmented packets. It's important that we keep enough room for newer + // packets to be re-assembled. Hence, this needs to be lower than + // HighFragThreshold enough. Linux uses a default value of 3 MB. See + // net.ipv4.ipfrag_low_thresh for more information. + LowFragThreshold = 3 << 20 // 3MB + + // minBlockSize is the minimum block size for fragments. + minBlockSize = 1 +) + +var ( + // ErrInvalidArgs indicates to the caller that that an invalid argument was + // provided. + ErrInvalidArgs = errors.New("invalid args") +) + +// FragmentID is the identifier for a fragment. +type FragmentID struct { + // Source is the source address of the fragment. + Source tcpip.Address + + // Destination is the destination address of the fragment. + Destination tcpip.Address + + // ID is the identification value of the fragment. + // + // This is a uint32 because IPv6 uses a 32-bit identification value. + ID uint32 + + // The protocol for the packet. + Protocol uint8 +} // Fragmentation is the main structure that other modules // of the stack should use to implement IP Fragmentation. @@ -46,14 +76,17 @@ type Fragmentation struct { mu sync.Mutex highLimit int lowLimit int - reassemblers map[uint32]*reassembler + reassemblers map[FragmentID]*reassembler rList reassemblerList size int timeout time.Duration + blockSize uint16 } // NewFragmentation creates a new Fragmentation. // +// blockSize specifies the fragment block size, in bytes. +// // highMemoryLimit specifies the limit on the memory consumed // by the fragments stored by Fragmentation (overhead of internal data-structures // is not accounted). Fragments are dropped when the limit is reached. @@ -64,7 +97,7 @@ type Fragmentation struct { // reassemblingTimeout specifies the maximum time allowed to reassemble a packet. // Fragments are lazily evicted only when a new a packet with an // already existing fragmentation-id arrives after the timeout. -func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation { +func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation { if lowMemoryLimit >= highMemoryLimit { lowMemoryLimit = highMemoryLimit } @@ -73,17 +106,46 @@ func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout t lowMemoryLimit = 0 } + if blockSize < minBlockSize { + blockSize = minBlockSize + } + return &Fragmentation{ - reassemblers: make(map[uint32]*reassembler), + reassemblers: make(map[FragmentID]*reassembler), highLimit: highMemoryLimit, lowLimit: lowMemoryLimit, timeout: reassemblingTimeout, + blockSize: blockSize, } } // Process processes an incoming fragment belonging to an ID and returns a // complete packet when all the packets belonging to that ID have been received. -func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool, error) { +// +// [first, last] is the range of the fragment bytes. +// +// first must be a multiple of the block size f is configured with. The size +// of the fragment data must be a multiple of the block size, unless there are +// no fragments following this fragment (more set to false). +func (f *Fragmentation) Process(id FragmentID, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool, error) { + if first > last { + return buffer.VectorisedView{}, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs) + } + + if first%f.blockSize != 0 { + return buffer.VectorisedView{}, false, fmt.Errorf("first=%d is not a multiple of block size=%d: %w", first, f.blockSize, ErrInvalidArgs) + } + + fragmentSize := last - first + 1 + if more && fragmentSize%f.blockSize != 0 { + return buffer.VectorisedView{}, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs) + } + + if l := vv.Size(); l < int(fragmentSize) { + return buffer.VectorisedView{}, false, fmt.Errorf("got fragment size=%d bytes less than the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs) + } + vv.CapLength(int(fragmentSize)) + f.mu.Lock() r, ok := f.reassemblers[id] if ok && r.tooOld(f.timeout) { diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go index 0a83d81f2..50d30bbf0 100644 --- a/pkg/tcpip/network/fragmentation/reassembler.go +++ b/pkg/tcpip/network/fragmentation/reassembler.go @@ -32,7 +32,7 @@ type hole struct { type reassembler struct { reassemblerEntry - id uint32 + id FragmentID size int mu sync.Mutex holes []hole @@ -42,7 +42,7 @@ type reassembler struct { creationTime time.Time } -func newReassembler(id uint32) *reassembler { +func newReassembler(id FragmentID) *reassembler { r := &reassembler{ id: id, holes: make([]hole, 0, 16), diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index b1776e5ee..d5f5d38f7 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -45,6 +45,10 @@ const ( // buckets is the number of identifier buckets. buckets = 2048 + + // The size of a fragment block, in bytes, as per RFC 791 section 3.1, + // page 14. + fragmentblockSize = 8 ) type endpoint struct { @@ -66,7 +70,7 @@ func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWi prefixLen: addrWithPrefix.PrefixLen, linkEP: linkEP, dispatcher: dispatcher, - fragmentation: fragmentation.NewFragmentation(fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout), + fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout), protocol: p, stack: st, } @@ -438,7 +442,18 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { } var ready bool var err error - pkt.Data, ready, err = e.fragmentation.Process(hash.IPv4FragmentHash(h), h.FragmentOffset(), last, h.More(), pkt.Data) + pkt.Data, ready, err = e.fragmentation.Process( + fragmentation.FragmentID{ + Source: h.SourceAddress(), + Destination: h.DestinationAddress(), + ID: uint32(h.ID()), + Protocol: h.Protocol(), + }, + h.FragmentOffset(), + last, + h.More(), + pkt.Data, + ) if err != nil { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index 95fbcf2d1..a0a5c9c01 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation" - "gvisor.dev/gvisor/pkg/tcpip/network/hash" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -343,7 +342,19 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { var ready bool // Note that pkt doesn't have its transport header set after reassembly, // and won't until DeliverNetworkPacket sets it. - pkt.Data, ready, err = e.fragmentation.Process(hash.IPv6FragmentHash(h, extHdr.ID()), start, last, extHdr.More(), rawPayload.Buf) + pkt.Data, ready, err = e.fragmentation.Process( + // IPv6 ignores the Protocol field since the ID only needs to be unique + // across source-destination pairs, as per RFC 8200 section 4.5. + fragmentation.FragmentID{ + Source: h.SourceAddress(), + Destination: h.DestinationAddress(), + ID: extHdr.ID(), + }, + start, + last, + extHdr.More(), + rawPayload.Buf, + ) if err != nil { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() @@ -467,7 +478,7 @@ func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWi linkEP: linkEP, linkAddrCache: linkAddrCache, dispatcher: dispatcher, - fragmentation: fragmentation.NewFragmentation(fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout), + fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout), protocol: p, }, nil } |