summaryrefslogtreecommitdiffhomepage
path: root/benchmarks/tcp/tcp_benchmark.sh
blob: 69344c9c3c636f0aa0649722dc6c99bf288ec10e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#!/bin/bash

# Copyright 2018 The gVisor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TCP benchmark; see README.md for documentation.

# Fixed parameters.
iperf_port=45201 # Not likely to be privileged.
proxy_port=44000 # Ditto.
client_addr=10.0.0.1
client_proxy_addr=10.0.0.2
server_proxy_addr=10.0.0.3
server_addr=10.0.0.4
mask=8

# Defaults; this provides a reasonable approximation of a decent internet link.
# Parameters can be varied independently from this set to see response to
# various changes in the kind of link available.
client=false
server=false
verbose=false
gso=0
swgso=false
mtu=1280                # 1280 is a reasonable lowest-common-denominator.
latency=10              # 10ms approximates a fast, dedicated connection.
latency_variation=1     # +/- 1ms is a relatively low amount of jitter.
loss=0.1                # 0.1% loss is non-zero, but not extremely high.
duplicate=0.1           # 0.1% means duplicates are 1/10x as frequent as losses.
duration=30             # 30s is enough time to consistent results (experimentally).
helper_dir=$(dirname $0)
netstack_opts=

# Check for netem support.
lsmod_output=$(lsmod | grep sch_netem)
if [ "$?" != "0" ]; then
  echo "warning: sch_netem may not be installed." >&2
fi

while [ $# -gt 0 ]; do
  case "$1" in
    --client)
      client=true
      ;;
    --client_tcp_probe_file)
      shift
      netstack_opts="${netstack_opts} -client_tcp_probe_file=$1"
      ;;
    --server)
      server=true
      ;;
    --verbose)
      verbose=true
      ;;
    --gso)
      shift
      gso=$1
      ;;
    --swgso)
      swgso=true
      ;;
    --server_tcp_probe_file)
      shift
      netstack_opts="${netstack_opts} -server_tcp_probe_file=$1"
      ;;
    --ideal)
      mtu=1500            # Standard ethernet.
      latency=0           # No latency.
      latency_variation=0 # No jitter.
      loss=0              # No loss.
      duplicate=0         # No duplicates.
      ;;
    --mtu)
      shift
      [ "$#" -le 0 ] && echo "no mtu provided" && exit 1
      mtu=$1
      ;;
    --sack)
      netstack_opts="${netstack_opts} -sack"
      ;;
    --cubic)
      netstack_opts="${netstack_opts} -cubic"
      ;;
    --duration)
      shift
      [ "$#" -le 0 ] && echo "no duration provided" && exit 1
      duration=$1
      ;;
    --latency)
      shift
      [ "$#" -le 0 ] && echo "no latency provided" && exit 1
      latency=$1
      ;;
    --latency-variation)
      shift
      [ "$#" -le 0 ] && echo "no latency variation provided" && exit 1
      latency_variation=$1
      ;;
    --loss)
      shift
      [ "$#" -le 0 ] && echo "no loss probability provided" && exit 1
      loss=$1
      ;;
    --duplicate)
      shift
      [ "$#" -le 0 ] && echo "no duplicate provided" && exit 1
      duplicate=$1
      ;;
    --cpuprofile)
      shift
      netstack_opts="${netstack_opts} -cpuprofile=$1"
      ;;
    --memprofile)
      shift
      netstack_opts="${netstack_opts} -memprofile=$1"
      ;;
    --helpers)
      shift
      [ "$#" -le 0 ] && echo "no helper dir provided" && exit 1
      helper_dir=$1
      ;;
    *)
      echo "usage: $0 [options]"
      echo "options:"
      echo " --help                show this message"
      echo " --verbose             verbose output"
      echo " --client              use netstack as the client"
      echo " --ideal               reset all network emulation"
      echo " --server              use netstack as the server"
      echo " --mtu                 set the mtu (bytes)"
      echo " --sack                enable SACK support"
      echo " --cubic               enable CUBIC congestion control for Netstack"
      echo " --duration            set the test duration (s)"
      echo " --latency             set the latency (ms)"
      echo " --latency-variation   set the latency variation"
      echo " --loss                set the loss probability (%)"
      echo " --duplicate           set the duplicate probability (%)"
      echo " --helpers             set the helper directory"
      echo ""
      echo "The output will of the script will be:"
      echo "  <throughput> <client-cpu-usage> <server-cpu-usage>"
      exit 1
  esac
  shift
done

if [ ${verbose} == "true" ]; then
  set -x
fi

# Latency needs to be halved, since it's applied on both ways.
half_latency=$(echo ${latency}/2 | bc -l | awk '{printf "%1.2f", $0}')
half_loss=$(echo ${loss}/2 | bc -l | awk '{printf "%1.6f", $0}')
half_duplicate=$(echo ${duplicate}/2 | bc -l | awk '{printf "%1.6f", $0}')
helper_dir=${helper_dir#$(pwd)/} # Use relative paths.
proxy_binary=${helper_dir}/tcp_proxy
nsjoin_binary=${helper_dir}/nsjoin

if [ ! -e ${proxy_binary} ]; then
  echo "Could not locate ${proxy_binary}, please make sure you've built the binary"
  exit 1
fi

if [ ! -e ${nsjoin_binary} ]; then
  echo "Could not locate ${nsjoin_binary}, please make sure you've built the binary"
  exit 1
fi

if [ $(echo ${latency_variation} | awk '{printf "%1.2f", $0}') != "0.00" ]; then
  # As long as there's some jitter, then we use the paretonormal distribution.
  # This will preserve the minimum RTT, but add a realistic amount of jitter to
  # the connection and cause re-ordering, etc. The regular pareto distribution
  # appears to an unreasonable level of delay (we want only small spikes.)
  distribution="distribution paretonormal"
else
  distribution=""
fi

# Client proxy that will listen on the client's iperf target forward traffic
# using the host networking stack.
client_args="${proxy_binary} -port ${proxy_port} -forward ${server_proxy_addr}:${proxy_port}"
if ${client}; then
  # Client proxy that will listen on the client's iperf target
  # and forward traffic using netstack.
  client_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -client \\
      -mtu ${mtu} -iface client.0 -addr ${client_proxy_addr} -mask ${mask} \\
      -forward ${server_proxy_addr}:${proxy_port} -gso=${gso} -swgso=${swgso}"
fi

# Server proxy that will listen on the proxy port and forward to the server's
# iperf server using the host networking stack.
server_args="${proxy_binary} -port ${proxy_port} -forward ${server_addr}:${iperf_port}"
if ${server}; then
  # Server proxy that will listen on the proxy port and forward to the servers'
  # iperf server using netstack.
  server_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -server \\
      -mtu ${mtu} -iface server.0 -addr ${server_proxy_addr} -mask ${mask} \\
      -forward ${server_addr}:${iperf_port} -gso=${gso} -swgso=${swgso}"
fi

# Specify loss and duplicate parameters only if they are non-zero
loss_opt=""
if [ "$(echo $half_loss | bc -q)" != "0" ]; then
  loss_opt="loss random ${half_loss}%"
fi
duplicate_opt=""
if [ "$(echo $half_duplicate | bc -q)" != "0" ]; then
  duplicate_opt="duplicate ${half_duplicate}%"
fi

exec unshare -U -m -n -r -f -p --mount-proc /bin/bash << EOF
set -e -m

if [ ${verbose} == "true" ]; then
  set -x
fi

mount -t tmpfs netstack-bench /tmp

# We may have reset the path in the unshare if the shell loaded some public
# profiles. Ensure that tools are discoverable via the parent's PATH.
export PATH=${PATH}

# Add client, server interfaces.
ip link add client.0 type veth peer name client.1
ip link add server.0 type veth peer name server.1

# Add network emulation devices.
ip link add wan.0 type veth peer name wan.1
ip link set wan.0 up
ip link set wan.1 up

# Enroll on the bridge.
ip link add name br0 type bridge
ip link add name br1 type bridge
ip link set client.1 master br0
ip link set server.1 master br1
ip link set wan.0 master br0
ip link set wan.1 master br1
ip link set br0 up
ip link set br1 up

# Set the MTU appropriately.
ip link set client.0 mtu ${mtu}
ip link set server.0 mtu ${mtu}
ip link set wan.0 mtu ${mtu}
ip link set wan.1 mtu ${mtu}

# Add appropriate latency, loss and duplication.
#
# This is added in at the point of bridge connection.
for device in wan.0 wan.1; do
  # NOTE: We don't support a loss correlation as testing has shown that it
  # actually doesn't work. The man page actually has a small comment about this
  # "It is also possible to add a correlation, but this option is now deprecated
  # due to the noticed bad behavior." For more information see netem(8).
  tc qdisc add dev \$device root netem \\
    delay ${half_latency}ms ${latency_variation}ms ${distribution} \\
    ${loss_opt} ${duplicate_opt}
done

# Start a client proxy.
touch /tmp/client.netns
unshare -n mount --bind /proc/self/ns/net /tmp/client.netns

# Move the endpoint into the namespace.
while ip link | grep client.0 > /dev/null; do
  ip link set dev client.0 netns /tmp/client.netns
done

if ! ${client}; then
  # Only add the address to NIC if netstack is not in use. Otherwise the host
  # will also process the inbound SYN and send a RST back.
  ${nsjoin_binary} /tmp/client.netns ip addr add ${client_proxy_addr}/${mask} dev client.0
fi

# Start a server proxy.
touch /tmp/server.netns
unshare -n mount --bind /proc/self/ns/net /tmp/server.netns
# Move the endpoint into the namespace.
while ip link | grep server.0 > /dev/null; do
  ip link set dev server.0 netns /tmp/server.netns
done
if ! ${server}; then
  # Only add the address to NIC if netstack is not in use. Otherwise the host
  # will also process the inbound SYN and send a RST back.
  ${nsjoin_binary} /tmp/server.netns ip addr add ${server_proxy_addr}/${mask} dev server.0
fi

# Add client and server addresses, and bring everything up.
${nsjoin_binary} /tmp/client.netns ip addr add ${client_addr}/${mask} dev client.0
${nsjoin_binary} /tmp/server.netns ip addr add ${server_addr}/${mask} dev server.0
${nsjoin_binary} /tmp/client.netns ip link set client.0 up
${nsjoin_binary} /tmp/client.netns ip link set lo up
${nsjoin_binary} /tmp/server.netns ip link set server.0 up
${nsjoin_binary} /tmp/server.netns ip link set lo up
ip link set dev client.1 up
ip link set dev server.1 up

${nsjoin_binary} /tmp/client.netns ${client_args} &
client_pid=\$!
${nsjoin_binary} /tmp/server.netns ${server_args} &
server_pid=\$!

# Start the iperf server.
${nsjoin_binary} /tmp/server.netns iperf -p ${iperf_port} -s >&2 &
iperf_pid=\$!

# Show traffic information.
if ! ${client} && ! ${server}; then
  ${nsjoin_binary} /tmp/client.netns ping -c 100 -i 0.001 -W 1 ${server_addr} >&2 || true
fi

results_file=\$(mktemp)
function cleanup {
  rm -f \$results_file
  kill -TERM \$client_pid
  kill -TERM \$server_pid
  wait \$client_pid
  wait \$server_pid
  kill -9 \$iperf_pid 2>/dev/null
}

# Allow failure from this point.
set +e
trap cleanup EXIT

# Run the benchmark, recording the results file.
while ${nsjoin_binary} /tmp/client.netns iperf \\
    -p ${proxy_port} -c ${client_addr} -t ${duration} -f m 2>&1 \\
    | tee \$results_file \\
    | grep "connect failed" >/dev/null; do
  sleep 0.1 # Wait for all services.
done

# Unlink all relevant devices from the bridge. This is because when the bridge
# is deleted, the kernel may hang. It appears that this problem is fixed in
# upstream commit 1ce5cce895309862d2c35d922816adebe094fe4a.
ip link set client.1 nomaster
ip link set server.1 nomaster
ip link set wan.0 nomaster
ip link set wan.1 nomaster

# Emit raw results.
cat \$results_file >&2

# Emit a useful result (final throughput).
mbits=\$(grep Mbits/sec \$results_file \\
  | sed -n -e 's/^.*[[:space:]]\\([[:digit:]]\\+\\(\\.[[:digit:]]\\+\\)\\?\\)[[:space:]]*Mbits\\/sec.*/\\1/p')
client_cpu_ticks=\$(cat /proc/\$client_pid/stat \\
  | awk '{print (\$14+\$15);}')
server_cpu_ticks=\$(cat /proc/\$server_pid/stat \\
  | awk '{print (\$14+\$15);}')
ticks_per_sec=\$(getconf CLK_TCK)
client_cpu_load=\$(bc -l <<< \$client_cpu_ticks/\$ticks_per_sec/${duration})
server_cpu_load=\$(bc -l <<< \$server_cpu_ticks/\$ticks_per_sec/${duration})
echo \$mbits \$client_cpu_load \$server_cpu_load
EOF