#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2019 SUSE Linux Products GmbH. All Rights Reserved.
#
# FSQA Test No. 187
#
# Stress send running in parallel with balance and deduplication against files
# that belong to the snapshots used by send. The goal is to verify that these
# operations running in parallel do not lead to send crashing (trigger assertion
# failures and BUG_ONs), or send finding an inconsistent snapshot that leads to
# a failure (reported in dmesg/syslog). The test needs big trees (snapshots)
# with large differences between the parent and send snapshots in order to hit
# such issues with a good probability.
#
. ./common/preamble
_begin_fstest auto send dedupe clone balance

# Import common functions.
. ./common/attr
. ./common/filter
. ./common/reflink

# real QA test starts here
_supported_fs btrfs
_require_scratch_dedupe
_require_attrs

# We at least need 8GB of free space on $SCRATCH_DEV
_require_scratch_size $((8 * 1024 * 1024))

_scratch_mkfs >>$seqres.full 2>&1
_scratch_mount

dedupe_two_files()
{
	trap "wait; exit" SIGTERM

	local f1=$(find $SCRATCH_MNT/snap1 -type f | shuf -n 1)
	local f2=$(find $SCRATCH_MNT/snap2 -type f | shuf -n 1)

	if (( RANDOM % 2 )); then
		local tmp=$f1
		f1=$f2
		f2=$tmp
	fi

	# Ignore errors from dedupe. We just want to test for crashes and
	# deadlocks.
	$XFS_IO_PROG -r -c "dedupe $f1 0 0 64K" $f2 &> /dev/null
}

dedupe_files_loop()
{
	local stop=0

	# Avoid executing 'wait' inside the trap, because when we receive
	# SIGTERM we might be already executing the wait command in the while
	# loop below. When that is the case, bash 5.0+ with debug enabled prints
	# a warning message that makes the test fail due to a mismatch with the
	# golden output. That warning message is the following:
	#
	# warning: wait_for: recursively setting old_sigint_handler to wait_sigint_handler: running_trap = 16
	#
	trap "stop=1" SIGTERM

	while [ $stop -eq 0 ]; do
		for ((i = 1; i <= 5; i++)); do
			dedupe_two_files &
		done
		wait
	done
}

balance_loop()
{
	trap "wait; exit" SIGTERM

	while true; do
		# Balance only metadata block groups, since this is makes it
		# easier to hit problems (crashes and errors) in send.
		# Ignore errors from balance. We just want to test for crashes
		# and deadlocks.
		_run_btrfs_balance_start -f -m $SCRATCH_MNT &> /dev/null
		sleep $((RANDOM % 3))
	done
}

full_send_loop()
{
	trap "wait; exit" SIGTERM

	local count=$1

	for ((i = 1; i <= $count; i++)); do
		# Ignore errors from send. We will check for errors later in
		# dmesg/syslog.
		$BTRFS_UTIL_PROG send -f /dev/null \
			$SCRATCH_MNT/snap1 &> /dev/null
		sleep $((RANDOM % 3))
	done
}

inc_send_loop()
{
	trap "wait; exit" SIGTERM

	local count=$1

	for ((i = 1; i <= $count; i++)); do
		# Ignore errors from send. We will check for errors later in
		# dmesg/syslog.
		$BTRFS_UTIL_PROG send -f /dev/null \
			-p $SCRATCH_MNT/snap1 $SCRATCH_MNT/snap2 &> /dev/null
		sleep $((RANDOM % 3))
	done
}

write_files_loop()
{
	local count=$1
	local offset=$2

	for ((i = 1; i <= $count; i++)); do
		$XFS_IO_PROG -f -c "pwrite -S 0xea 0 64K" \
			$SCRATCH_MNT/file_$((i + offset)) >/dev/null
	done
}

set_xattrs_loop()
{
	local count=$1
	local offset=$2

	for ((i = 1; i <= $count; i++)); do
		$SETFATTR_PROG -n 'user.x1' -v $xattr_value \
			$SCRATCH_MNT/file_$((i + offset))
	done
}

# Number of files created before first snapshot. Must be divisable by 4.
nr_initial_files=40000
# Number of files created after the first snapshot. Must be divisable by 4.
nr_more_files=40000

# Create initial files.
step=$((nr_initial_files / 4))
for ((n = 0; n < 4; n++)); do
	offset=$((step * $n))
	write_files_loop $step $offset &
	create_pids[$n]=$!
done
wait ${create_pids[@]}

$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/snap1 \
	| _filter_scratch

# Add some more files, so that that are substantial differences between the
# two test snapshots used for an incremental send later.

# Create more files.
step=$((nr_more_files / 4))
for ((n = 0; n < 4; n++)); do
	offset=$((nr_initial_files + step * $n))
	write_files_loop $step $offset &
	create_pids[$n]=$!
done
wait ${create_pids[@]}

# Add some xattrs to all files, so that every leaf and node of the fs tree is
# COWed. Adding more files does only adds leafs and nodes to the tree's right
# side, since inode numbers are based on a counter and form the first part
# (objectid) of btree keys (we only modifying the right most leaf of the tree).
# Use large values for the xattrs to quickly increase the height of the tree.
xattr_value=$(printf '%0.sX' $(seq 1 3800))

# Split the work into 4 workers working on consecutive ranges to avoid contention
# on the same leafs as much as possible.
step=$(((nr_more_files + nr_initial_files) / 4))
for ((n = 0; n < 4; n++)); do
	offset=$((step * $n))
	set_xattrs_loop $step $offset &
	setxattr_pids[$n]=$!
done
wait ${setxattr_pids[@]}

$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/snap2 \
	| _filter_scratch

full_send_loop 5 &
full_send_pid=$!

inc_send_loop 10 &
inc_send_pid=$!

dedupe_files_loop &
dedupe_pid=$!

balance_loop &
balance_pid=$!

wait $full_send_pid
wait $inc_send_pid

kill $dedupe_pid
wait $dedupe_pid

kill $balance_pid
wait $balance_pid

# Check for errors messages that happen due to inconsistent snapshot caused by
# deduplication and balance running in parallel with send, causing btree nodes
# and leafs to disappear and getting reused while send is using them.
#
# Example messages:
#
# BTRFS error (device sdc): did not find backref in send_root. inode=63292, \
#     offset=0, disk_byte=5228134400 found extent=5228134400
#
# BTRFS error (device sdc): parent transid verify failed on 32243712 wanted 24 \
#     found 27
#
_dmesg_since_test_start | grep -E -e '\bBTRFS error \(device .*?\):'

status=0
exit
