diff options
author | Bhasker Hariharan <bhaskerh@google.com> | 2018-11-09 14:37:42 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-11-09 14:38:46 -0800 |
commit | 33089561b1d53dada959a312ab69574cd6635b4b (patch) | |
tree | 7b497da71c6c238eafe4faf49bb2a9bbb0b4cddf | |
parent | 93e88760b0d0c9c6656f7773f68540b1853d169b (diff) |
Add an implementation of a SACK scoreboard as per RFC6675.
PiperOrigin-RevId: 220866996
Change-Id: I89d48215df57c00d6a6ec512fc18712a2ea9080b
-rw-r--r-- | WORKSPACE | 6 | ||||
-rw-r--r-- | pkg/tcpip/header/BUILD | 1 | ||||
-rw-r--r-- | pkg/tcpip/header/tcp.go | 11 | ||||
-rw-r--r-- | pkg/tcpip/stack/stack.go | 12 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/BUILD | 19 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/sack_scoreboard.go | 227 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/sack_scoreboard_test.go | 150 |
7 files changed, 416 insertions, 10 deletions
@@ -131,3 +131,9 @@ http_archive( "https://github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz", ], ) + +go_repository( + name = "com_github_google_btree", + importpath = "github.com/google/btree", + commit = "4030bb1f1f0c35b30ca7009e9ebd06849dd45306", +) diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index 66b37720c..8e455fe1e 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -24,6 +24,7 @@ go_library( "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/seqnum", + "@com_github_google_btree//:go_default_library", ], ) diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 567a21167..207046b36 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -17,6 +17,7 @@ package header import ( "encoding/binary" + "github.com/google/btree" "gvisor.googlesource.com/gvisor/pkg/tcpip" "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" ) @@ -131,6 +132,16 @@ type SACKBlock struct { End seqnum.Value } +// Less returns true if r.Start < b.Start. +func (r SACKBlock) Less(b btree.Item) bool { + return r.Start.LessThan(b.(SACKBlock).Start) +} + +// Contains returns true if b is completely contained in r. +func (r SACKBlock) Contains(b SACKBlock) bool { + return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End) +} + // TCPOptions are used to parse and cache the TCP segment options for a non // syn/syn-ack segment. // diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index d4da980a9..7af343fba 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -200,9 +200,17 @@ type TCPSenderState struct { // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint. type TCPSACKInfo struct { - // Blocks is the list of SACK block currently received by the - // TCP endpoint. + // Blocks is the list of SACK Blocks that identify the out of order segments + // held by a given TCP endpoint. Blocks []header.SACKBlock + + // ReceivedBlocks are the SACK blocks received by this endpoint + // from the peer endpoint. + ReceivedBlocks []header.SACKBlock + + // MaxSACKED is the highest sequence number that has been SACKED + // by the peer. + MaxSACKED seqnum.Value } // TCPEndpointState is a copy of the internal state of a TCP endpoint. diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 5a77ee232..18e6a5dc6 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -28,6 +28,7 @@ go_library( "rcv.go", "reno.go", "sack.go", + "sack_scoreboard.go", "segment.go", "segment_heap.go", "segment_queue.go", @@ -50,14 +51,24 @@ go_library( "//pkg/tcpip/stack", "//pkg/tmutex", "//pkg/waiter", + "@com_github_google_btree//:go_default_library", ], ) +filegroup( + name = "autogen", + srcs = [ + "tcp_segment_list.go", + ], + visibility = ["//:sandbox"], +) + go_test( name = "tcp_test", size = "small", srcs = [ "dual_stack_test.go", + "sack_scoreboard_test.go", "tcp_sack_test.go", "tcp_test.go", "tcp_timestamp_test.go", @@ -79,11 +90,3 @@ go_test( "//pkg/waiter", ], ) - -filegroup( - name = "autogen", - srcs = [ - "tcp_segment_list.go", - ], - visibility = ["//:sandbox"], -) diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go new file mode 100644 index 000000000..d251d32db --- /dev/null +++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -0,0 +1,227 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp + +import ( + "fmt" + "strings" + + "github.com/google/btree" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" +) + +// maxSACKBlocks is the maximum number of distinct SACKBlocks the scoreboard +// will track. Once there are 100 distinct blocks, new insertions will fail. +const maxSACKBlocks = 100 + +// SACKScoreboard stores a set of disjoint SACK ranges. +type SACKScoreboard struct { + smss uint16 + maxSACKED seqnum.Value + sacked seqnum.Size + ranges *btree.BTree +} + +// NewSACKScoreboard returns a new SACK Scoreboard. +func NewSACKScoreboard(smss uint16, iss seqnum.Value) *SACKScoreboard { + return &SACKScoreboard{ + smss: smss, + ranges: btree.New(2), + maxSACKED: iss, + } +} + +// Insert inserts/merges the provided SACKBlock into the scoreboard. +func (s *SACKScoreboard) Insert(r header.SACKBlock) { + if s.ranges.Len() >= maxSACKBlocks { + return + } + + // Check if we can merge the new range with a range before or after it. + var toDelete []btree.Item + if s.maxSACKED.LessThan(r.End - 1) { + s.maxSACKED = r.End - 1 + } + s.ranges.AscendGreaterOrEqual(r, func(i btree.Item) bool { + if i == r { + return true + } + sacked := i.(header.SACKBlock) + // There is a hole between these two SACK blocks, so we can't + // merge anymore. + if r.End.LessThan(r.Start) { + return false + } + // There is some overlap at this point, merge the blocks and + // delete the other one. + // + // ----sS--------sE + // r.S---------------rE + // -------sE + if sacked.End.LessThan(r.End) { + // sacked is contained in the newly inserted range. + // Delete this block. + toDelete = append(toDelete, i) + return true + } + // sacked covers a range past end of the newly inserted + // block. + r.End = sacked.End + toDelete = append(toDelete, i) + return true + }) + + s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { + if i == r { + return true + } + sacked := i.(header.SACKBlock) + // sA------sE + // rA----rE + if sacked.End.LessThan(r.Start) { + return false + } + // The previous range extends into the current block. Merge it + // into the newly inserted range and delete the other one. + // + // <-rA---rE----<---rE---> + // sA--------------sE + r.Start = sacked.Start + // Extend r to cover sacked if sacked extends past r. + if r.End.LessThan(sacked.End) { + r.End = sacked.End + } + toDelete = append(toDelete, i) + return true + }) + for _, i := range toDelete { + if sb := s.ranges.Delete(i); sb != nil { + sb := i.(header.SACKBlock) + s.sacked -= sb.Start.Size(sb.End) + } + } + + replaced := s.ranges.ReplaceOrInsert(r) + if replaced == nil { + s.sacked += r.Start.Size(r.End) + } +} + +// IsSACKED returns true if the a given range of sequence numbers denoted by r +// are already covered by SACK information in the scoreboard. +func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool { + found := false + s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { + sacked := i.(header.SACKBlock) + if sacked.End.LessThan(r.Start) { + return false + } + if sacked.Contains(r) { + found = true + return false + } + return true + }) + return found +} + +// Dump prints the state of the scoreboard structure. +func (s *SACKScoreboard) String() string { + var str strings.Builder + str.WriteString("SACKScoreboard: {") + s.ranges.Ascend(func(i btree.Item) bool { + str.WriteString(fmt.Sprintf("%v,", i)) + return true + }) + str.WriteString("}\n") + return str.String() +} + +// Delete removes all SACK information prior to seq. +func (s *SACKScoreboard) Delete(seq seqnum.Value) { + toDelete := []btree.Item{} + r := header.SACKBlock{seq, seq.Add(1)} + s.ranges.DescendLessOrEqual(r, func(i btree.Item) bool { + if i == r { + return true + } + sb := i.(header.SACKBlock) + toDelete = append(toDelete, i) + if sb.End.LessThanEq(seq) { + s.sacked -= sb.Start.Size(sb.End) + } else { + newSB := header.SACKBlock{seq, sb.End} + s.ranges.ReplaceOrInsert(newSB) + s.sacked -= sb.Start.Size(seq) + } + return true + }) + for _, i := range toDelete { + s.ranges.Delete(i) + } +} + +// Copy provides a copy of the SACK scoreboard. +func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) { + s.ranges.Ascend(func(i btree.Item) bool { + sackBlocks = append(sackBlocks, i.(header.SACKBlock)) + return true + }) + return sackBlocks, s.maxSACKED +} + +// IsLost implements the IsLost(SeqNum) operation defined in RFC 3517 section 4. +// +// This routine returns whether the given sequence number is considered to be +// lost. The routine returns true when either nDupAckThreshold discontiguous +// SACKed sequences have arrived above 'SeqNum' or (nDupAckThreshold * SMSS) +// bytes with sequence numbers greater than 'SeqNum' have been SACKed. +// Otherwise, the routine returns false. +func (s *SACKScoreboard) IsLost(r header.SACKBlock) bool { + nDupSACK := 0 + nDupSACKBytes := seqnum.Size(0) + isLost := false + s.ranges.AscendGreaterOrEqual(r, func(i btree.Item) bool { + sacked := i.(header.SACKBlock) + if sacked.Contains(r) { + return false + } + nDupSACKBytes += sacked.Start.Size(sacked.End) + nDupSACK++ + if nDupSACK >= nDupAckThreshold || nDupSACKBytes >= seqnum.Size(nDupAckThreshold*s.smss) { + isLost = true + return false + } + return true + }) + return isLost +} + +// Empty returns true if the SACK scoreboard has no entries, false otherwise. +func (s *SACKScoreboard) Empty() bool { + return s.ranges.Len() == 0 +} + +// Sacked returns the current number of bytes held in the SACK scoreboard. +func (s *SACKScoreboard) Sacked() seqnum.Size { + return s.sacked +} + +// MaxSACKED returns the highest sequence number ever inserted in the SACK +// scoreboard. +func (s *SACKScoreboard) MaxSACKED() seqnum.Value { + return s.maxSACKED +} diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go new file mode 100644 index 000000000..db4b52aca --- /dev/null +++ b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go @@ -0,0 +1,150 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_test + +import ( + "testing" + + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" + "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp" +) + +const smss = 1500 + +func initScoreboard(blocks []header.SACKBlock, iss seqnum.Value) *tcp.SACKScoreboard { + s := tcp.NewSACKScoreboard(smss, iss) + for _, blk := range blocks { + s.Insert(blk) + } + return s +} + +func TestSACKScoreboardIsSACKED(t *testing.T) { + type blockTest struct { + block header.SACKBlock + sacked bool + } + testCases := []struct { + comment string + scoreboardBlocks []header.SACKBlock + blockTests []blockTest + iss seqnum.Value + }{ + { + "Test holes and unsacked SACK blocks in SACKed ranges and insertion of overlapping SACK blocks", + []header.SACKBlock{{10, 20}, {10, 30}, {30, 40}, {41, 50}, {5, 10}, {1, 50}, {111, 120}, {101, 110}, {52, 120}}, + []blockTest{ + {header.SACKBlock{15, 21}, true}, + {header.SACKBlock{200, 201}, false}, + {header.SACKBlock{50, 51}, false}, + {header.SACKBlock{53, 120}, true}, + }, + 0, + }, + { + "Test disjoint SACKBlocks", + []header.SACKBlock{{2288624809, 2288810057}, {2288811477, 2288838565}}, + []blockTest{ + {header.SACKBlock{2288624809, 2288810057}, true}, + {header.SACKBlock{2288811477, 2288838565}, true}, + {header.SACKBlock{2288810057, 2288811477}, false}, + }, + 2288624809, + }, + { + "Test sequence number wrap around", + []header.SACKBlock{{4294254144, 225652}, {5340409, 5350509}}, + []blockTest{ + {header.SACKBlock{4294254144, 4294254145}, true}, + {header.SACKBlock{4294254143, 4294254144}, false}, + {header.SACKBlock{4294254144, 1}, true}, + {header.SACKBlock{225652, 5350509}, false}, + {header.SACKBlock{5340409, 5350509}, true}, + {header.SACKBlock{5350509, 5350609}, false}, + }, + 4294254144, + }, + } + for _, tc := range testCases { + sb := initScoreboard(tc.scoreboardBlocks, tc.iss) + for _, blkTest := range tc.blockTests { + if want, got := blkTest.sacked, sb.IsSACKED(blkTest.block); got != want { + t.Errorf("%s: s.IsSACKED(%v) = %v, want %v", tc.comment, blkTest.block, got, want) + } + } + } +} + +func TestSACKScoreboardIsLost(t *testing.T) { + s := tcp.NewSACKScoreboard(10, 0) + s.Insert(header.SACKBlock{1, 50}) + s.Insert(header.SACKBlock{51, 100}) + s.Insert(header.SACKBlock{111, 120}) + s.Insert(header.SACKBlock{101, 110}) + s.Insert(header.SACKBlock{121, 151}) + testCases := []struct { + block header.SACKBlock + lost bool + }{ + {header.SACKBlock{0, 1}, true}, + {header.SACKBlock{1, 2}, false}, + {header.SACKBlock{1, 45}, false}, + {header.SACKBlock{50, 51}, true}, + // This one should return true because there are > 3* 10 (smss) + // bytes that have been sacked above this sequence number. + {header.SACKBlock{119, 120}, true}, + {header.SACKBlock{120, 121}, true}, + {header.SACKBlock{125, 126}, false}, + } + for _, tc := range testCases { + if want, got := tc.lost, s.IsLost(tc.block); got != want { + t.Errorf("s.IsLost(%v) = %v, want %v", tc.block, got, want) + } + } +} + +func TestSACKScoreboardDelete(t *testing.T) { + blocks := []header.SACKBlock{{4294254144, 225652}, {5340409, 5350509}} + s := initScoreboard(blocks, 4294254143) + s.Delete(5340408) + if s.Empty() { + t.Fatalf("s.Empty() = true, want false") + } + if got, want := s.Sacked(), blocks[1].Start.Size(blocks[1].End); got != want { + t.Fatalf("incorrect sacked bytes in scoreboard got: %v, want: %v", got, want) + } + s.Delete(5340410) + if s.Empty() { + t.Fatal("s.Empty() = true, want false") + } + newSB := header.SACKBlock{5340410, 5350509} + if !s.IsSACKED(newSB) { + t.Fatalf("s.IsSACKED(%v) = false, want true, scoreboard: %v", newSB, s) + } + s.Delete(5350509) + lastOctet := header.SACKBlock{5350508, 5350509} + if s.IsSACKED(lastOctet) { + t.Fatalf("s.IsSACKED(%v) = false, want true", lastOctet) + } + + s.Delete(5350510) + if !s.Empty() { + t.Fatal("s.Empty() = false, want true") + } + if got, want := s.Sacked(), seqnum.Size(0); got != want { + t.Fatalf("incorrect sacked bytes in scoreboard got: %v, want: %v", got, want) + } +} |