test-sim-backwards: refactor, reduce node count to 9 in mixed mode

This catches the bug that hit prod where we missed that a migration
was necessary for a column, but only on uploads, and only for nodes
that had not checked in yet.

So, this does more uploads, and stops a node from starting so that
we trigger that specific scenario. Hopefully it will catch other
similar ones in the future.

I confirmed that this locally caught the bug when the release under
test was v0.34.10 and HEAD did not include the fix for it.

Change-Id: If7d41e8241d6a042fa524b4aff956b0264ecb128
This commit is contained in:
Jeff Wendling 2020-04-03 15:36:23 -06:00 committed by Egon Elbre
parent 5a85e8d749
commit bb28851964
2 changed files with 166 additions and 127 deletions

View File

@ -5,107 +5,88 @@ SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
: "${STORJ_NETWORK_DIR?Environment variable STORJ_NETWORK_DIR needs to be set}"
BUCKET=bucket-123
TEST_FILES_DIR="$STORJ_NETWORK_DIR/testfiles"
BRANCH_DST_DIR=${BRANCH_DIR:-$STORJ_NETWORK_DIR/branch}
RELEASE_DST_DIR=${RELEASE_DIR:-$STORJ_NETWORK_DIR/release}
while getopts "b:" o; do
case "${o}" in
b)
BUCKET="${OPTARG}"
;;
*)
;;
esac
done
shift $((OPTIND-1))
BUCKET="${BUCKET:-bucket-123}"
PRISTINE_FILES_DIR="$STORJ_NETWORK_DIR/pristine/$BUCKET"
DOWNLOAD_FILES_DIR="$STORJ_NETWORK_DIR/download/$BUCKET"
# override configured access with access where address is node ID + satellite addess
STORJ_ACCESS=$(go run "$SCRIPTDIR"/update-access.go "$SATELLITE_0_DIR" "$GATEWAY_0_ACCESS")
export STORJ_ACCESS
set -x
if [[ "$1" == "upload" ]]; then
mkdir -p "$PRISTINE_FILES_DIR" "$DOWNLOAD_FILES_DIR"
setup(){
mkdir -p "$TEST_FILES_DIR" "$BRANCH_DST_DIR" "$RELEASE_DST_DIR"
random_bytes_file () {
size=$1
output=$2
head -c $size </dev/urandom > $output
head -c "$size" </dev/urandom > "$output"
}
random_bytes_file "2KiB" "$TEST_FILES_DIR/small-upload-testfile" # create 2kb file of random bytes (inline)
random_bytes_file "5MiB" "$TEST_FILES_DIR/big-upload-testfile" # create 5mb file of random bytes (remote)
random_bytes_file "64MiB" "$TEST_FILES_DIR/multisegment-upload-testfile" # create 65mb file of random bytes (remote)
random_bytes_file "2KiB" "$PRISTINE_FILES_DIR/small-upload-testfile" # create 2kb file of random bytes (inline)
random_bytes_file "5MiB" "$PRISTINE_FILES_DIR/big-upload-testfile" # create 5mb file of random bytes (remote)
random_bytes_file "65MiB" "$PRISTINE_FILES_DIR/multisegment-upload-testfile" # create 65mb file of random bytes (remote)
echo "setup test successfully"
}
# sometimes we overwrite files in the same bucket. allow the mb to fail because of an existing
# bucket. if it fails for any other reason, the following cp will get it anyway.
uplink --config-dir "$GATEWAY_0_DIR" mb "sj://$BUCKET/" || true
# override configured access with access where address is node ID + satellite addess
export STORJ_ACCESS=$(go run "$SCRIPTDIR"/update-access.go $SATELLITE_0_DIR $GATEWAY_0_ACCESS)
if [[ "$1" == "upload" ]]; then
setup
uplink --config-dir "$GATEWAY_0_DIR" mb "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$TEST_FILES_DIR/small-upload-testfile" "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$TEST_FILES_DIR/big-upload-testfile" "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$TEST_FILES_DIR/multisegment-upload-testfile" "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/small-upload-testfile" "$RELEASE_DST_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/big-upload-testfile" "$RELEASE_DST_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/multisegment-upload-testfile" "$RELEASE_DST_DIR"
if cmp "$TEST_FILES_DIR/small-upload-testfile" "$RELEASE_DST_DIR/small-upload-testfile"
then
echo "upload test on release tag: small upload testfile matches uploaded file"
else
echo "upload test on release tag: small upload testfile does not match uploaded file"
exit 1
fi
if cmp "$TEST_FILES_DIR/big-upload-testfile" "$RELEASE_DST_DIR/big-upload-testfile"
then
echo "upload test on release tag: big upload testfile matches uploaded file"
else
echo "upload test on release tag: big upload testfile does not match uploaded file"
exit 1
fi
if cmp "$TEST_FILES_DIR/multisegment-upload-testfile" "$RELEASE_DST_DIR/multisegment-upload-testfile"
then
echo "upload test on release tag: multisegment upload testfile matches uploaded file"
else
echo "upload test on release tag: multisegment upload testfile does not match uploaded file"
exit 1
fi
rm "$RELEASE_DST_DIR/small-upload-testfile"
rm "$RELEASE_DST_DIR/big-upload-testfile"
rm "$RELEASE_DST_DIR/multisegment-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$PRISTINE_FILES_DIR/small-upload-testfile" "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$PRISTINE_FILES_DIR/big-upload-testfile" "sj://$BUCKET/"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "$PRISTINE_FILES_DIR/multisegment-upload-testfile" "sj://$BUCKET/"
fi
if [[ "$1" == "download" ]]; then
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/small-upload-testfile" "$BRANCH_DST_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/big-upload-testfile" "$BRANCH_DST_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/multisegment-upload-testfile" "$BRANCH_DST_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/small-upload-testfile" "$DOWNLOAD_FILES_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/big-upload-testfile" "$DOWNLOAD_FILES_DIR"
uplink --config-dir "$GATEWAY_0_DIR" cp --progress=false "sj://$BUCKET/multisegment-upload-testfile" "$DOWNLOAD_FILES_DIR"
if cmp "$TEST_FILES_DIR/small-upload-testfile" "$BRANCH_DST_DIR/small-upload-testfile"
if cmp "$PRISTINE_FILES_DIR/small-upload-testfile" "$DOWNLOAD_FILES_DIR/small-upload-testfile"
then
echo "download test on current branch: small upload testfile matches uploaded file"
echo "download test: small upload testfile matches uploaded file"
else
echo "download test on current branch: small upload testfile does not match uploaded file"
echo "download test: small upload testfile does not match uploaded file"
exit 1
fi
if cmp "$TEST_FILES_DIR/big-upload-testfile" "$BRANCH_DST_DIR/big-upload-testfile"
if cmp "$PRISTINE_FILES_DIR/big-upload-testfile" "$DOWNLOAD_FILES_DIR/big-upload-testfile"
then
echo "download test on current branch: big upload testfile matches uploaded file"
echo "download test: big upload testfile matches uploaded file"
else
echo "download test on current branch: big upload testfile does not match uploaded file"
echo "download test: big upload testfile does not match uploaded file"
exit 1
fi
if cmp "$TEST_FILES_DIR/multisegment-upload-testfile" "$BRANCH_DST_DIR/multisegment-upload-testfile"
if cmp "$PRISTINE_FILES_DIR/multisegment-upload-testfile" "$DOWNLOAD_FILES_DIR/multisegment-upload-testfile"
then
echo "download test on current branch: multisegment upload testfile matches uploaded file"
echo "download test: multisegment upload testfile matches uploaded file"
else
echo "download test on current branch: multisegment upload testfile does not match uploaded file"
echo "download test: multisegment upload testfile does not match uploaded file"
exit 1
fi
rm "$BRANCH_DST_DIR/small-upload-testfile"
rm "$BRANCH_DST_DIR/big-upload-testfile"
rm "$BRANCH_DST_DIR/multisegment-upload-testfile"
rm "$DOWNLOAD_FILES_DIR/small-upload-testfile"
rm "$DOWNLOAD_FILES_DIR/big-upload-testfile"
rm "$DOWNLOAD_FILES_DIR/multisegment-upload-testfile"
fi
if [[ "$1" == "cleanup" ]]; then
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/small-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/big-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/multisegment-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rb "sj://$BUCKET"
for BUCKET_DIR in "$STORJ_NETWORK_DIR"/pristine/*; do
BUCKET="$(basename "$BUCKET_DIR")"
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/small-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/big-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rm "sj://$BUCKET/multisegment-upload-testfile"
uplink --config-dir "$GATEWAY_0_DIR" rb "sj://$BUCKET"
done
fi

View File

@ -1,20 +1,51 @@
#!/usr/bin/env bash
set -ueo pipefail
set +x
set -xueo pipefail
TMP=$(mktemp -d -t tmp.XXXXXXXXXX)
export STORJ_NETWORK_DIR=$TMP
cleanup(){
##
## Set up temporary directories, environment variables, and helper functions
##
STORJ_NUM_NODES=10
STORJ_NETWORK_HOST4=${STORJ_NETWORK_HOST4:-127.0.0.1}
STORJ_SIM_POSTGRES=${STORJ_SIM_POSTGRES:-""}
if [ -z "${STORJ_SIM_POSTGRES}" ]; then
echo "Postgres is required for the satellite DB. Exiting."
exit 1
fi
STORJ_NETWORK_DIR=$(mktemp -d -t tmp.XXXXXXXXXX)
export STORJ_NETWORK_DIR
cleanup() {
git worktree remove -f "$RELEASE_DIR"
git worktree remove -f "$BRANCH_DIR"
rm -rf "$STORJ_NETWORK_DIR"
echo "cleaned up test successfully"
}
trap cleanup EXIT
BRANCH_DIR="$STORJ_NETWORK_DIR/branch"
RELEASE_DIR="$STORJ_NETWORK_DIR/release"
test() {
DIR=$1
shift
PATH="$DIR"/bin:"$PATH" storj-sim -x --storage-nodes="$STORJ_NUM_NODES" --host="$STORJ_NETWORK_HOST4" network test -- bash "$SCRIPTDIR"/test-backwards.sh "$@"
}
test_release() {
test "$RELEASE_DIR" "$@"
}
test_branch() {
test "$BRANCH_DIR" "$@"
}
##
## Build the release and branch binaries and set up the network
##
# setup two different directories containing the code for the latest release tag
# and for the current branch code
git worktree add -f "$BRANCH_DIR" HEAD
@ -25,10 +56,10 @@ echo "Checking out latest release tag: $latestReleaseTag"
git worktree add -f "$RELEASE_DIR" "$latestReleaseCommit"
# delete this file that forces production config settings
rm -f "$RELEASE_DIR/internal/version/release.go"
rm -f "$RELEASE_DIR"/internal/version/release.go
# clear out release information
cat > $RELEASE_DIR/private/version/release.go <<EOF
cat > "$RELEASE_DIR"/private/version/release.go <<EOF
// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
@ -38,78 +69,105 @@ EOF
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# replace unstable git.apache.org package with github
(cd $RELEASE_DIR && go mod edit -replace git.apache.org/thrift.git=github.com/apache/thrift@v0.12.0)
(cd "$RELEASE_DIR" && go mod edit -replace git.apache.org/thrift.git=github.com/apache/thrift@v0.12.0)
GOBIN=$RELEASE_DIR/bin make -C "$RELEASE_DIR" install-sim
GOBIN=$BRANCH_DIR/bin make -C "$BRANCH_DIR" install-sim
GOBIN="$RELEASE_DIR"/bin make -C "$RELEASE_DIR" install-sim
GOBIN="$BRANCH_DIR"/bin make -C "$BRANCH_DIR" install-sim
STORJ_NETWORK_HOST4=${STORJ_NETWORK_HOST4:-127.0.0.1}
STORJ_SIM_POSTGRES=${STORJ_SIM_POSTGRES:-""}
# setup the network using the release
PATH="$RELEASE_DIR"/bin:"$PATH" storj-sim -x --host "$STORJ_NETWORK_HOST4" network --postgres="$STORJ_SIM_POSTGRES" setup
if [ -z ${STORJ_SIM_POSTGRES} ]; then
echo "Postgres is required for the satellite DB. Exiting."
exit 1
fi
##
## Run some basic tests on the release branch, creating data for later tests.
##
# setup the network
PATH=$RELEASE_DIR/bin:$PATH storj-sim -x --host $STORJ_NETWORK_HOST4 network --postgres=$STORJ_SIM_POSTGRES setup
# upload using everything release
test_release -b release-network-release-uplink upload
# run upload part of backward compatibility tests from the lastest release branch
PATH=$RELEASE_DIR/bin:$PATH storj-sim -x --host $STORJ_NETWORK_HOST4 network test bash "$SCRIPTDIR"/test-backwards.sh upload
# check that it worked with everything release
test_release -b release-network-release-uplink download
SATELLITE_CONFIG=$(storj-sim network env SATELLITE_0_DIR)/config.yaml
##
## Change a bunch of settings to run on the current branch
##
SATELLITE_CONFIG="$(storj-sim network env SATELLITE_0_DIR)"/config.yaml
# this replaces anywhere that has "/release/" in the config file, which currently just renames the static dir paths
sed -i -e 's#/release/#/branch/#g' $SATELLITE_CONFIG
sed -i -e 's#/release/#/branch/#g' "$SATELLITE_CONFIG"
# replace any 140XX port with 100XX port to fix, satellite.API part removal from satellite.Core
sed -i -e "s#$STORJ_NETWORK_HOST4:140#$STORJ_NETWORK_HOST4:100#g" $SATELLITE_CONFIG
sed -i -e "s#$STORJ_NETWORK_HOST4:140#$STORJ_NETWORK_HOST4:100#g" "$SATELLITE_CONFIG"
# add new address for admin panel
if ! grep -q "admin.address" $SATELLITE_CONFIG; then
echo admin.address: $STORJ_NETWORK_HOST4:10005 >> $SATELLITE_CONFIG
if ! grep -q "admin.address" "$SATELLITE_CONFIG"; then
echo admin.address: "$STORJ_NETWORK_HOST4":10005 >> "$SATELLITE_CONFIG"
fi
# create redis config if it's missing
REDIS_CONFIG=$(storj-sim network env REDIS_0_DIR)/redis.conf
if [ ! -f "$REDIS_CONFIG" ] ; then
echo "daemonize no" >> $REDIS_CONFIG
echo "bind $STORJ_NETWORK_HOST4" >> $REDIS_CONFIG
echo "port 10004" >> $REDIS_CONFIG
echo "timeout 0" >> $REDIS_CONFIG
echo "databases 2" >> $REDIS_CONFIG
echo "dbfilename sim.rdb" >> $REDIS_CONFIG
echo "dir ./" >> $REDIS_CONFIG
{
echo "daemonize no"
echo "bind $STORJ_NETWORK_HOST4"
echo "port 10004"
echo "timeout 0"
echo "databases 2"
echo "dbfilename sim.rdb"
echo "dir ./"
} >> "$REDIS_CONFIG"
fi
## Ensure that partially upgraded network works
# keep half of the storage nodes on the old version
ln $RELEASE_DIR/bin/storagenode `storj-sim network env STORAGENODE_0_DIR`/storagenode
ln $RELEASE_DIR/bin/storagenode `storj-sim network env STORAGENODE_1_DIR`/storagenode
ln $RELEASE_DIR/bin/storagenode `storj-sim network env STORAGENODE_2_DIR`/storagenode
ln $RELEASE_DIR/bin/storagenode `storj-sim network env STORAGENODE_3_DIR`/storagenode
ln $RELEASE_DIR/bin/storagenode `storj-sim network env STORAGENODE_4_DIR`/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_0_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_1_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_2_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_3_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_4_DIR)"/storagenode
# upgrade the trust configuration on the other half as the old configuration is
# most certainly not being used outside of test environments and is not
# backwards compatible (i.e. ignored)
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" `storj-sim network env STORAGENODE_5_DIR`/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" `storj-sim network env STORAGENODE_6_DIR`/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" `storj-sim network env STORAGENODE_7_DIR`/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" `storj-sim network env STORAGENODE_8_DIR`/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" `storj-sim network env STORAGENODE_9_DIR`/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_5_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_6_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_7_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_8_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_9_DIR)"/config.yaml
# run download part of backward compatibility tests from the current branch, using new uplink
PATH=$BRANCH_DIR/bin:$PATH storj-sim -x --host $STORJ_NETWORK_HOST4 network test bash "$SCRIPTDIR"/test-backwards.sh download
# Run with 9 nodes to exercise more code paths with one node being offline.
STORJ_NUM_NODES=9
## Ensure that old uplink works
##
## Run tests on the branch under test.
##
# check that branch uplink + branch network can read fully release data
test_branch -b release-network-release-uplink download
# check that branch uplink + branch network can upload
test_branch -b branch-network-branch-uplink upload
##
## Run even more tests with the old uplink binary.
##
# overwrite new uplink with release branch and test the download
cp $RELEASE_DIR/bin/uplink $BRANCH_DIR/bin/uplink
cp "$RELEASE_DIR"/bin/uplink "$BRANCH_DIR"/bin/uplink
# run download part of backward compatibility tests from the current branch
PATH=$BRANCH_DIR/bin:$PATH storj-sim -x --host $STORJ_NETWORK_HOST4 network test bash "$SCRIPTDIR"/test-backwards.sh download
# check that release uplink + branch network can read fully release data
test_branch -b release-network-release-uplink download
# run a delete in the network
PATH=$BRANCH_DIR/bin:$PATH storj-sim -x --host $STORJ_NETWORK_HOST4 network test bash "$SCRIPTDIR"/test-backwards.sh cleanup
# check that release uplink + branch network can read fully branch data
test_branch -b branch-network-branch-uplink download
# check that release uplink + branch network can upload
test_branch -b branch-network-release-uplink upload
# check that release uplink + branch network can read mixed data
test_branch -b branch-network-release-uplink download
##
## Perform cleanup, deleting all of the files/buckets.
##
test_branch cleanup