From 68514d4ba3f7c06a89a8d0cd79327ede62dae65b Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Sun, 26 Jan 2020 18:32:52 -0800 Subject: Unroll checksum computation loop. Checksum computation is one of the most expensive bits of packet processing. Manual unrolling of the loop provides significant improvement in checksum speed. Updates #1656 BenchmarkChecksum/checksum_64-12 49834124 23.6 ns/op BenchmarkChecksum/checksum_128-12 27111997 44.1 ns/op BenchmarkChecksum/checksum_256-12 11416683 91.5 ns/op BenchmarkChecksum/checksum_512-12 6375298 174 ns/op BenchmarkChecksum/checksum_1024-12 3403852 338 ns/op BenchmarkChecksum/checksum_1500-12 2343576 493 ns/op BenchmarkChecksum/checksum_2048-12 1730521 656 ns/op BenchmarkChecksum/checksum_4096-12 920469 1327 ns/op BenchmarkChecksum/checksum_8192-12 445885 2637 ns/op BenchmarkChecksum/checksum_16384-12 226342 5268 ns/op BenchmarkChecksum/checksum_32767-12 114210 10503 ns/op BenchmarkChecksum/checksum_32768-12 99138 10610 ns/op BenchmarkChecksum/checksum_65535-12 53438 21158 ns/op BenchmarkChecksum/checksum_65536-12 52993 21067 ns/op BenchmarkUnrolledChecksum/checksum_64-12 61035639 19.1 ns/op BenchmarkUnrolledChecksum/checksum_128-12 36067015 33.6 ns/op BenchmarkUnrolledChecksum/checksum_256-12 19731220 60.4 ns/op BenchmarkUnrolledChecksum/checksum_512-12 9091291 116 ns/op BenchmarkUnrolledChecksum/checksum_1024-12 4976406 226 ns/op BenchmarkUnrolledChecksum/checksum_1500-12 3685224 328 ns/op BenchmarkUnrolledChecksum/checksum_2048-12 2579108 447 ns/op BenchmarkUnrolledChecksum/checksum_4096-12 1350475 887 ns/op BenchmarkUnrolledChecksum/checksum_8192-12 658248 1780 ns/op BenchmarkUnrolledChecksum/checksum_16384-12 335869 3534 ns/op BenchmarkUnrolledChecksum/checksum_32767-12 168650 7095 ns/op BenchmarkUnrolledChecksum/checksum_32768-12 168075 7098 ns/op BenchmarkUnrolledChecksum/checksum_65535-12 75085 14277 ns/op BenchmarkUnrolledChecksum/checksum_65536-12 75921 14127 ns/op PiperOrigin-RevId: 291643290 --- pkg/tcpip/header/checksum_test.go | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'pkg/tcpip/header/checksum_test.go') diff --git a/pkg/tcpip/header/checksum_test.go b/pkg/tcpip/header/checksum_test.go index 86b466c1c..2fbd16a65 100644 --- a/pkg/tcpip/header/checksum_test.go +++ b/pkg/tcpip/header/checksum_test.go @@ -17,6 +17,8 @@ package header_test import ( + "fmt" + "math/rand" "testing" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -107,3 +109,63 @@ func TestChecksumVVWithOffset(t *testing.T) { }) } } + +func TestChecksum(t *testing.T) { + var bufSizes = []int{0, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 255, 256, 257, 1023, 1024} + type testCase struct { + buf []byte + initial uint16 + csumOrig uint16 + csumNew uint16 + } + testCases := make([]testCase, 100000) + // Ensure same buffer generation for test consistency. + rnd := rand.New(rand.NewSource(42)) + for i := range testCases { + testCases[i].buf = make([]byte, bufSizes[i%len(bufSizes)]) + testCases[i].initial = uint16(rnd.Intn(65536)) + rnd.Read(testCases[i].buf) + } + + for i := range testCases { + testCases[i].csumOrig = header.Checksum(testCases[i].buf, testCases[i].initial) + testCases[i].csumNew = header.UnrolledChecksum(testCases[i].buf, testCases[i].initial) + if got, want := testCases[i].csumNew, testCases[i].csumOrig; got != want { + t.Fatalf("new checksum for (buf = %x, initial = %d) does not match old got: %d, want: %d", testCases[i].buf, testCases[i].initial, got, want) + } + } +} + +func BenchmarkChecksum(b *testing.B) { + var bufSizes = []int{64, 128, 256, 512, 1024, 1500, 2048, 4096, 8192, 16384, 32767, 32768, 65535, 65536} + + checkSumImpls := []struct { + fn func([]byte, uint16) uint16 + name string + }{ + {header.Checksum, fmt.Sprintf("checksum")}, + {header.UnrolledChecksum, fmt.Sprintf("unrolled_checksum")}, + } + + for _, csumImpl := range checkSumImpls { + // Ensure same buffer generation for test consistency. + rnd := rand.New(rand.NewSource(42)) + for _, bufSz := range bufSizes { + b.Run(fmt.Sprintf("%s_%d", csumImpl.name, bufSz), func(b *testing.B) { + tc := struct { + buf []byte + initial uint16 + csum uint16 + }{ + buf: make([]byte, bufSz), + initial: uint16(rnd.Intn(65536)), + } + rnd.Read(tc.buf) + b.ResetTimer() + for i := 0; i < b.N; i++ { + tc.csum = csumImpl.fn(tc.buf, tc.initial) + } + }) + } + } +} -- cgit v1.2.3 From 6b43cf791a74a746443f70f98d859c1246f87e2a Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 27 Jan 2020 05:33:03 -0800 Subject: Replace calculateChecksum w/ the unrolled version. Fixes #1656 PiperOrigin-RevId: 291703760 --- pkg/tcpip/header/checksum.go | 15 +++++++++------ pkg/tcpip/header/checksum_test.go | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'pkg/tcpip/header/checksum_test.go') diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index ce57b581a..204285576 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -160,20 +160,23 @@ func unrolledCalculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bo return ChecksumCombine(uint16(v), uint16(v>>16)), odd } -// Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the -// given byte array. +// ChecksumOld calculates the checksum (as defined in RFC 1071) of the bytes in +// the given byte array. This function uses a non-optimized implementation. Its +// only retained for reference and to use as a benchmark/test. Most code should +// use the header.Checksum function. // // The initial checksum must have been computed on an even number of bytes. -func Checksum(buf []byte, initial uint16) uint16 { +func ChecksumOld(buf []byte, initial uint16) uint16 { s, _ := calculateChecksum(buf, false, uint32(initial)) return s } -// UnrolledChecksum calculates the checksum (as defined in RFC 1071) of the -// bytes in the given byte array. +// Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the +// given byte array. This function uses an optimized unrolled version of the +// checksum algorithm. // // The initial checksum must have been computed on an even number of bytes. -func UnrolledChecksum(buf []byte, initial uint16) uint16 { +func Checksum(buf []byte, initial uint16) uint16 { s, _ := unrolledCalculateChecksum(buf, false, uint32(initial)) return s } diff --git a/pkg/tcpip/header/checksum_test.go b/pkg/tcpip/header/checksum_test.go index 2fbd16a65..309403482 100644 --- a/pkg/tcpip/header/checksum_test.go +++ b/pkg/tcpip/header/checksum_test.go @@ -128,8 +128,8 @@ func TestChecksum(t *testing.T) { } for i := range testCases { - testCases[i].csumOrig = header.Checksum(testCases[i].buf, testCases[i].initial) - testCases[i].csumNew = header.UnrolledChecksum(testCases[i].buf, testCases[i].initial) + testCases[i].csumOrig = header.ChecksumOld(testCases[i].buf, testCases[i].initial) + testCases[i].csumNew = header.Checksum(testCases[i].buf, testCases[i].initial) if got, want := testCases[i].csumNew, testCases[i].csumOrig; got != want { t.Fatalf("new checksum for (buf = %x, initial = %d) does not match old got: %d, want: %d", testCases[i].buf, testCases[i].initial, got, want) } @@ -143,8 +143,8 @@ func BenchmarkChecksum(b *testing.B) { fn func([]byte, uint16) uint16 name string }{ + {header.ChecksumOld, fmt.Sprintf("checksum_old")}, {header.Checksum, fmt.Sprintf("checksum")}, - {header.UnrolledChecksum, fmt.Sprintf("unrolled_checksum")}, } for _, csumImpl := range checkSumImpls { -- cgit v1.2.3