diff options
author | Bhasker Hariharan <bhaskerh@google.com> | 2020-01-26 18:32:52 -0800 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-01-26 18:35:01 -0800 |
commit | 68514d4ba3f7c06a89a8d0cd79327ede62dae65b (patch) | |
tree | 947cb7427ce775b85e9e3be396eb52c6426b919c /pkg/tcpip/header/checksum.go | |
parent | 18a7e1309decb9bc09879e337adbc00f81d420c5 (diff) |
Unroll checksum computation loop.
Checksum computation is one of the most expensive bits of
packet processing. Manual unrolling of the loop provides
significant improvement in checksum speed.
Updates #1656
BenchmarkChecksum/checksum_64-12 49834124 23.6 ns/op
BenchmarkChecksum/checksum_128-12 27111997 44.1 ns/op
BenchmarkChecksum/checksum_256-12 11416683 91.5 ns/op
BenchmarkChecksum/checksum_512-12 6375298 174 ns/op
BenchmarkChecksum/checksum_1024-12 3403852 338 ns/op
BenchmarkChecksum/checksum_1500-12 2343576 493 ns/op
BenchmarkChecksum/checksum_2048-12 1730521 656 ns/op
BenchmarkChecksum/checksum_4096-12 920469 1327 ns/op
BenchmarkChecksum/checksum_8192-12 445885 2637 ns/op
BenchmarkChecksum/checksum_16384-12 226342 5268 ns/op
BenchmarkChecksum/checksum_32767-12 114210 10503 ns/op
BenchmarkChecksum/checksum_32768-12 99138 10610 ns/op
BenchmarkChecksum/checksum_65535-12 53438 21158 ns/op
BenchmarkChecksum/checksum_65536-12 52993 21067 ns/op
BenchmarkUnrolledChecksum/checksum_64-12 61035639 19.1 ns/op
BenchmarkUnrolledChecksum/checksum_128-12 36067015 33.6 ns/op
BenchmarkUnrolledChecksum/checksum_256-12 19731220 60.4 ns/op
BenchmarkUnrolledChecksum/checksum_512-12 9091291 116 ns/op
BenchmarkUnrolledChecksum/checksum_1024-12 4976406 226 ns/op
BenchmarkUnrolledChecksum/checksum_1500-12 3685224 328 ns/op
BenchmarkUnrolledChecksum/checksum_2048-12 2579108 447 ns/op
BenchmarkUnrolledChecksum/checksum_4096-12 1350475 887 ns/op
BenchmarkUnrolledChecksum/checksum_8192-12 658248 1780 ns/op
BenchmarkUnrolledChecksum/checksum_16384-12 335869 3534 ns/op
BenchmarkUnrolledChecksum/checksum_32767-12 168650 7095 ns/op
BenchmarkUnrolledChecksum/checksum_32768-12 168075 7098 ns/op
BenchmarkUnrolledChecksum/checksum_65535-12 75085 14277 ns/op
BenchmarkUnrolledChecksum/checksum_65536-12 75921 14127 ns/op
PiperOrigin-RevId: 291643290
Diffstat (limited to 'pkg/tcpip/header/checksum.go')
-rw-r--r-- | pkg/tcpip/header/checksum.go | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go index 9749c7f4d..ce57b581a 100644 --- a/pkg/tcpip/header/checksum.go +++ b/pkg/tcpip/header/checksum.go @@ -45,6 +45,121 @@ func calculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bool) { return ChecksumCombine(uint16(v), uint16(v>>16)), odd } +func unrolledCalculateChecksum(buf []byte, odd bool, initial uint32) (uint16, bool) { + v := initial + + if odd { + v += uint32(buf[0]) + buf = buf[1:] + } + + l := len(buf) + odd = l&1 != 0 + if odd { + l-- + v += uint32(buf[l]) << 8 + } + for (l - 64) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[64:] + l = l - 64 + } + if (l - 32) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + i += 16 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[32:] + l = l - 32 + } + if (l - 16) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + v += (uint32(buf[i+8]) << 8) + uint32(buf[i+9]) + v += (uint32(buf[i+10]) << 8) + uint32(buf[i+11]) + v += (uint32(buf[i+12]) << 8) + uint32(buf[i+13]) + v += (uint32(buf[i+14]) << 8) + uint32(buf[i+15]) + buf = buf[16:] + l = l - 16 + } + if (l - 8) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + v += (uint32(buf[i+4]) << 8) + uint32(buf[i+5]) + v += (uint32(buf[i+6]) << 8) + uint32(buf[i+7]) + buf = buf[8:] + l = l - 8 + } + if (l - 4) >= 0 { + i := 0 + v += (uint32(buf[i]) << 8) + uint32(buf[i+1]) + v += (uint32(buf[i+2]) << 8) + uint32(buf[i+3]) + buf = buf[4:] + l = l - 4 + } + + // At this point since l was even before we started unrolling + // there can be only two bytes left to add. + if l != 0 { + v += (uint32(buf[0]) << 8) + uint32(buf[1]) + } + + return ChecksumCombine(uint16(v), uint16(v>>16)), odd +} + // Checksum calculates the checksum (as defined in RFC 1071) of the bytes in the // given byte array. // @@ -54,6 +169,15 @@ func Checksum(buf []byte, initial uint16) uint16 { return s } +// UnrolledChecksum calculates the checksum (as defined in RFC 1071) of the +// bytes in the given byte array. +// +// The initial checksum must have been computed on an even number of bytes. +func UnrolledChecksum(buf []byte, initial uint16) uint16 { + s, _ := unrolledCalculateChecksum(buf, false, uint32(initial)) + return s +} + // ChecksumVV calculates the checksum (as defined in RFC 1071) of the bytes in // the given VectorizedView. // |