xref: /linux/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh (revision e5a52fd2b8cdb700b3c07b030e050a49ef3156b9)
1# SPDX-License-Identifier: GPL-2.0
2
3# This test sends a >1Gbps stream of traffic from H1, to the switch, which
4# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
5# switch and forwarded to the port under test $swp3, which is also 1Gbps.
6#
7# This way, $swp3 should be 100% filled with traffic without any of it spilling
8# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
9# is what H2 is used for--it sends the extra traffic to create backlog.
10#
11# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
12# and maximum size are 1 byte apart, so there is a very clear border under which
13# no marking or dropping takes place, and above which everything is marked or
14# dropped.
15#
16# The test uses the buffer build-up behavior to test the installed RED.
17#
18# In order to test WRED, $swp3 actually contains RED under PRIO, with two
19# different configurations. Traffic is prioritized using 802.1p and relies on
20# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
21# 802.1p marking.
22#
23# +--------------------------+                     +--------------------------+
24# | H1                       |                     | H2                       |
25# |     + $h1.10             |                     |     + $h2.10             |
26# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
27# |     |                    |                     |     |                    |
28# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
29# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
30# |     |                |   |                     |     |                |   |
31# |     \______    ______/   |                     |     \______    ______/   |
32# |            \ /           |                     |            \ /           |
33# |             + $h1        |                     |             + $h2        |
34# +-------------|------------+                     +-------------|------------+
35#               | >1Gbps                                         |
36# +-------------|------------------------------------------------|------------+
37# | SW          + $swp1                                          + $swp2      |
38# |     _______/ \___________                        ___________/ \_______    |
39# |    /                     \                      /                     \   |
40# |  +-|-----------------+   |                    +-|-----------------+   |   |
41# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
42# |  |                   |   |        .-------------+ $swp5.10        |   |   |
43# |  |     BR1_10        |   |        |           |                   |   |   |
44# |  |                   |   |        |           |     BR2_10        |   |   |
45# |  | + $swp2.10        |   |        |           |                   |   |   |
46# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
47# |    |                     |        |           +-|-----------------+   |   |
48# |    |   +-----------------|-+      |             |   +-----------------|-+ |
49# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
50# |    |   |                   |      | .-----------------+ $swp5.11        | |
51# |    |   |      BR1_11       |      | |           |   |                   | |
52# |    |   |                   |      | |           |   |      BR2_11       | |
53# |    |   |        $swp2.11 + |      | |           |   |                   | |
54# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
55# |    |                     |        | |           |   +-----------------|-+ |
56# |    \_______   ___________/        | |           \___________   _______/   |
57# |            \ /                    \ /                       \ /           |
58# |             + $swp4                + $swp5                   + $swp3      |
59# +-------------|----------------------|-------------------------|------------+
60#               |                      |                         | 1Gbps
61#               \________1Gbps_________/                         |
62#                                   +----------------------------|------------+
63#                                   | H3                         + $h3        |
64#                                   |      _____________________/ \_______    |
65#                                   |     /                               \   |
66#                                   |     |                               |   |
67#                                   |     + $h3.10                 $h3.11 +   |
68#                                   |       192.0.2.3/28    192.0.2.19/28     |
69#                                   +-----------------------------------------+
70
71NUM_NETIFS=8
72CHECK_TC="yes"
73lib_dir=$(dirname $0)/../../../net/forwarding
74source $lib_dir/lib.sh
75source $lib_dir/devlink_lib.sh
76source qos_lib.sh
77
78ipaddr()
79{
80	local host=$1; shift
81	local vlan=$1; shift
82
83	echo 192.0.2.$((16 * (vlan - 10) + host))
84}
85
86host_create()
87{
88	local dev=$1; shift
89	local host=$1; shift
90
91	simple_if_init $dev
92	mtu_set $dev 10000
93
94	vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
95	ip link set dev $dev.10 type vlan egress 0:0
96
97	vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
98	ip link set dev $dev.11 type vlan egress 0:1
99}
100
101host_destroy()
102{
103	local dev=$1; shift
104
105	vlan_destroy $dev 11
106	vlan_destroy $dev 10
107	mtu_restore $dev
108	simple_if_fini $dev
109}
110
111h1_create()
112{
113	host_create $h1 1
114}
115
116h1_destroy()
117{
118	host_destroy $h1
119}
120
121h2_create()
122{
123	host_create $h2 2
124
125	# Some of the tests in this suite use multicast traffic. As this traffic
126	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
127	# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
128	# intended destination) and $swp5 (which is intended as ingress for
129	# another stream of traffic).
130	#
131	# This is generally not a problem, but if the $swp5 throughput is lower
132	# than $swp2 throughput, there will be a build-up at $swp5. That may
133	# cause packets to fail to queue up at $swp3 due to shared buffer
134	# quotas, and the test to spuriously fail.
135	#
136	# Prevent this by setting the speed of $h2 to 1Gbps.
137
138	ethtool -s $h2 speed 1000 autoneg off
139}
140
141h2_destroy()
142{
143	ethtool -s $h2 autoneg on
144	host_destroy $h2
145}
146
147h3_create()
148{
149	host_create $h3 3
150	ethtool -s $h3 speed 1000 autoneg off
151}
152
153h3_destroy()
154{
155	ethtool -s $h3 autoneg on
156	host_destroy $h3
157}
158
159switch_create()
160{
161	local intf
162	local vlan
163
164	ip link add dev br1_10 type bridge
165	ip link add dev br1_11 type bridge
166
167	ip link add dev br2_10 type bridge
168	ip link add dev br2_11 type bridge
169
170	for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
171		ip link set dev $intf up
172		mtu_set $intf 10000
173	done
174
175	for intf in $swp1 $swp4; do
176		for vlan in 10 11; do
177			vlan_create $intf $vlan
178			ip link set dev $intf.$vlan master br1_$vlan
179			ip link set dev $intf.$vlan up
180		done
181	done
182
183	for intf in $swp2 $swp3 $swp5; do
184		for vlan in 10 11; do
185			vlan_create $intf $vlan
186			ip link set dev $intf.$vlan master br2_$vlan
187			ip link set dev $intf.$vlan up
188		done
189	done
190
191	ip link set dev $swp4.10 type vlan egress 0:0
192	ip link set dev $swp4.11 type vlan egress 0:1
193	for intf in $swp1 $swp2 $swp5; do
194		for vlan in 10 11; do
195			ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
196		done
197	done
198
199	for intf in $swp2 $swp3 $swp4 $swp5; do
200		ethtool -s $intf speed 1000 autoneg off
201	done
202
203	ip link set dev br1_10 up
204	ip link set dev br1_11 up
205	ip link set dev br2_10 up
206	ip link set dev br2_11 up
207
208	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
209	devlink_port_pool_th_set $swp3 8 $size
210}
211
212switch_destroy()
213{
214	local intf
215	local vlan
216
217	devlink_port_pool_th_restore $swp3 8
218
219	tc qdisc del dev $swp3 root 2>/dev/null
220
221	ip link set dev br2_11 down
222	ip link set dev br2_10 down
223	ip link set dev br1_11 down
224	ip link set dev br1_10 down
225
226	for intf in $swp5 $swp4 $swp3 $swp2; do
227		ethtool -s $intf autoneg on
228	done
229
230	for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
231		for vlan in 11 10; do
232			ip link set dev $intf.$vlan down
233			ip link set dev $intf.$vlan nomaster
234			vlan_destroy $intf $vlan
235		done
236
237		mtu_restore $intf
238		ip link set dev $intf down
239	done
240
241	ip link del dev br2_11
242	ip link del dev br2_10
243	ip link del dev br1_11
244	ip link del dev br1_10
245}
246
247setup_prepare()
248{
249	h1=${NETIFS[p1]}
250	swp1=${NETIFS[p2]}
251
252	swp2=${NETIFS[p3]}
253	h2=${NETIFS[p4]}
254
255	swp3=${NETIFS[p5]}
256	h3=${NETIFS[p6]}
257
258	swp4=${NETIFS[p7]}
259	swp5=${NETIFS[p8]}
260
261	h3_mac=$(mac_get $h3)
262
263	vrf_prepare
264
265	h1_create
266	h2_create
267	h3_create
268	switch_create
269}
270
271cleanup()
272{
273	pre_cleanup
274
275	switch_destroy
276	h3_destroy
277	h2_destroy
278	h1_destroy
279
280	vrf_cleanup
281}
282
283ping_ipv4()
284{
285	ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
286	ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
287	ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
288	ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
289}
290
291get_tc()
292{
293	local vlan=$1; shift
294
295	echo $((vlan - 10))
296}
297
298get_qdisc_handle()
299{
300	local vlan=$1; shift
301
302	local tc=$(get_tc $vlan)
303	local band=$((8 - tc))
304
305	# Handle is 107: for TC1, 108: for TC0.
306	echo "10$band:"
307}
308
309get_qdisc_backlog()
310{
311	local vlan=$1; shift
312
313	qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
314}
315
316get_mc_transmit_queue()
317{
318	local vlan=$1; shift
319
320	local tc=$(($(get_tc $vlan) + 8))
321	ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
322}
323
324get_nmarked()
325{
326	local vlan=$1; shift
327
328	ethtool_stats_get $swp3 ecn_marked
329}
330
331get_qdisc_npackets()
332{
333	local vlan=$1; shift
334
335	busywait_for_counter 1100 +1 \
336		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
337}
338
339# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
340# success. After 10 failed attempts it bails out and returns 1. It dumps the
341# backlog size to stdout.
342build_backlog()
343{
344	local vlan=$1; shift
345	local size=$1; shift
346	local proto=$1; shift
347
348	local tc=$((vlan - 10))
349	local band=$((8 - tc))
350	local cur=-1
351	local i=0
352
353	while :; do
354		local cur=$(busywait 1100 until_counter_is "> $cur" \
355					    get_qdisc_backlog $vlan)
356		local diff=$((size - cur))
357		local pkts=$(((diff + 7999) / 8000))
358
359		if ((cur >= size)); then
360			echo $cur
361			return 0
362		elif ((i++ > 10)); then
363			echo $cur
364			return 1
365		fi
366
367		$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
368		    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
369		    -t $proto -q -c $pkts "$@"
370	done
371}
372
373check_marking()
374{
375	local vlan=$1; shift
376	local cond=$1; shift
377
378	local npackets_0=$(get_qdisc_npackets $vlan)
379	local nmarked_0=$(get_nmarked $vlan)
380	sleep 5
381	local npackets_1=$(get_qdisc_npackets $vlan)
382	local nmarked_1=$(get_nmarked $vlan)
383
384	local nmarked_d=$((nmarked_1 - nmarked_0))
385	local npackets_d=$((npackets_1 - npackets_0))
386	local pct=$((100 * nmarked_d / npackets_d))
387
388	echo $pct
389	((pct $cond))
390}
391
392ecn_test_common()
393{
394	local name=$1; shift
395	local vlan=$1; shift
396	local limit=$1; shift
397	local backlog
398	local pct
399
400	# Build the below-the-limit backlog using UDP. We could use TCP just
401	# fine, but this way we get a proof that UDP is accepted when queue
402	# length is below the limit. The main stream is using TCP, and if the
403	# limit is misconfigured, we would see this traffic being ECN marked.
404	RET=0
405	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
406	check_err $? "Could not build the requested backlog"
407	pct=$(check_marking $vlan "== 0")
408	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
409	log_test "TC $((vlan - 10)): $name backlog < limit"
410
411	# Now push TCP, because non-TCP traffic would be early-dropped after the
412	# backlog crosses the limit, and we want to make sure that the backlog
413	# is above the limit.
414	RET=0
415	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
416	check_err $? "Could not build the requested backlog"
417	pct=$(check_marking $vlan ">= 95")
418	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
419	log_test "TC $((vlan - 10)): $name backlog > limit"
420}
421
422do_ecn_test()
423{
424	local vlan=$1; shift
425	local limit=$1; shift
426	local name=ECN
427
428	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
429			  $h3_mac tos=0x01
430	sleep 1
431
432	ecn_test_common "$name" $vlan $limit
433
434	# Up there we saw that UDP gets accepted when backlog is below the
435	# limit. Now that it is above, it should all get dropped, and backlog
436	# building should fail.
437	RET=0
438	build_backlog $vlan $((2 * limit)) udp >/dev/null
439	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
440	log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
441
442	stop_traffic
443	sleep 1
444}
445
446do_ecn_nodrop_test()
447{
448	local vlan=$1; shift
449	local limit=$1; shift
450	local name="ECN nodrop"
451
452	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
453			  $h3_mac tos=0x01
454	sleep 1
455
456	ecn_test_common "$name" $vlan $limit
457
458	# Up there we saw that UDP gets accepted when backlog is below the
459	# limit. Now that it is above, in nodrop mode, make sure it goes to
460	# backlog as well.
461	RET=0
462	build_backlog $vlan $((2 * limit)) udp >/dev/null
463	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
464	log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
465
466	stop_traffic
467	sleep 1
468}
469
470do_red_test()
471{
472	local vlan=$1; shift
473	local limit=$1; shift
474	local backlog
475	local pct
476
477	# Use ECN-capable TCP to verify there's no marking even though the queue
478	# is above limit.
479	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
480			  $h3_mac tos=0x01
481
482	# Pushing below the queue limit should work.
483	RET=0
484	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
485	check_err $? "Could not build the requested backlog"
486	pct=$(check_marking $vlan "== 0")
487	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
488	log_test "TC $((vlan - 10)): RED backlog < limit"
489
490	# Pushing above should not.
491	RET=0
492	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
493	check_fail $? "Traffic went into backlog instead of being early-dropped"
494	pct=$(check_marking $vlan "== 0")
495	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
496	local diff=$((limit - backlog))
497	pct=$((100 * diff / limit))
498	((0 <= pct && pct <= 5))
499	check_err $? "backlog $backlog / $limit expected <= 5% distance"
500	log_test "TC $((vlan - 10)): RED backlog > limit"
501
502	stop_traffic
503	sleep 1
504}
505
506do_mc_backlog_test()
507{
508	local vlan=$1; shift
509	local limit=$1; shift
510	local backlog
511	local pct
512
513	RET=0
514
515	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
516	start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
517
518	qbl=$(busywait 5000 until_counter_is ">= 500000" \
519		       get_qdisc_backlog $vlan)
520	check_err $? "Could not build MC backlog"
521
522	# Verify that we actually see the backlog on BUM TC. Do a busywait as
523	# well, performance blips might cause false fail.
524	local ebl
525	ebl=$(busywait 5000 until_counter_is ">= 500000" \
526		       get_mc_transmit_queue $vlan)
527	check_err $? "MC backlog reported by qdisc not visible in ethtool"
528
529	stop_traffic
530	stop_traffic
531
532	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
533}
534