storj/scripts/tests/backwardcompatibility/test-sim-backwards.sh

218 lines
7.9 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -xueo pipefail
##
## Set up temporary directories, environment variables, and helper functions
##
STORJ_NUM_NODES=10
STORJ_NETWORK_HOST4=${STORJ_NETWORK_HOST4:-127.0.0.1}
STORJ_SIM_POSTGRES=${STORJ_SIM_POSTGRES:-""}
if [ -z "${STORJ_SIM_POSTGRES}" ]; then
echo "Postgres is required for the satellite DB. Exiting."
exit 1
fi
STORJ_NETWORK_DIR=$(mktemp -d -t tmp.XXXXXXXXXX)
export STORJ_NETWORK_DIR
cleanup() {
git worktree remove -f "$RELEASE_DIR"
git worktree remove -f "$BRANCH_DIR"
rm -rf "$STORJ_NETWORK_DIR"
}
trap cleanup EXIT
BRANCH_DIR="$STORJ_NETWORK_DIR/branch"
RELEASE_DIR="$STORJ_NETWORK_DIR/release"
test() {
DIR=$1
shift
PATH="$DIR"/bin:"$PATH" storj-sim -x --storage-nodes="$STORJ_NUM_NODES" --host="$STORJ_NETWORK_HOST4" network test -- bash "$SCRIPTDIR"/test-backwards.sh "$@"
}
test_release() {
test "$RELEASE_DIR" "$@"
}
test_branch() {
test "$BRANCH_DIR" "$@"
}
install_sim_noquic(){
local bin_dir="$1"
mkdir -p ${bin_dir}
go build -race -tags noquic -v -o ${bin_dir}/storagenode storj.io/storj/cmd/storagenode >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/satellite storj.io/storj/cmd/satellite >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/storj-sim storj.io/storj/cmd/storj-sim >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/versioncontrol storj.io/storj/cmd/versioncontrol >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/uplink storj.io/storj/cmd/uplink >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/identity storj.io/storj/cmd/identity >/dev/null 2>&1
go build -race -tags noquic -v -o ${bin_dir}/certificates storj.io/storj/cmd/certificates >/dev/null 2>&1
GOBIN=${bin_dir} go install -race -tags noquic storj.io/gateway@latest
}
##
## Build the release and branch binaries and set up the network
##
# setup two different directories containing the code for the latest release tag
# and for the current branch code
git worktree add -f "$BRANCH_DIR" HEAD
latestReleaseCommit="$(git rev-list --exclude='*rc*' --tags --max-count=1)"
latestReleaseTag=$(git describe --tags "$latestReleaseCommit")
echo "Checking out latest release tag: $latestReleaseTag"
git worktree add -f "$RELEASE_DIR" "$latestReleaseCommit"
# delete this file that forces production config settings
rm -f "$RELEASE_DIR"/internal/version/release.go
# clear out release information
cat > "$RELEASE_DIR"/private/version/release.go <<EOF
// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
package version
EOF
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
pushd $RELEASE_DIR
install_sim_noquic "$RELEASE_DIR"/bin
popd
GOBIN="$BRANCH_DIR"/bin make -C "$BRANCH_DIR" install-sim
echo "Overriding default max segment size to 6MiB"
pushd $RELEASE_DIR
GOBIN=$RELEASE_DIR/bin go install -tags noquic -v -ldflags "-X 'storj.io/uplink.maxSegmentSize=6MiB'" storj.io/storj/cmd/uplink
popd
pushd $BRANCH_DIR
GOBIN=$BRANCH_DIR/bin go install -v -ldflags "-X 'storj.io/uplink.maxSegmentSize=6MiB'" storj.io/storj/cmd/uplink
popd
# setup the network using the release
PATH="$RELEASE_DIR"/bin:"$PATH" storj-sim -x --host "$STORJ_NETWORK_HOST4" network --postgres="$STORJ_SIM_POSTGRES" setup
##
## Run some basic tests on the release branch, creating data for later tests.
##
# upload using everything release
test_release -b release-network-release-uplink upload
# check that it worked with everything release
test_release -b release-network-release-uplink download
##
## Change a bunch of settings to run on the current branch
##
SATELLITE_CONFIG="$(storj-sim network env SATELLITE_0_DIR)"/config.yaml
# this replaces anywhere that has "/release/" in the config file, which currently just renames the static dir paths
sed -i -e 's#/release/#/branch/#g' "$SATELLITE_CONFIG"
# replace any 140XX port with 100XX port to fix, satellite.API part removal from satellite.Core
sed -i -e "s#$STORJ_NETWORK_HOST4:140#$STORJ_NETWORK_HOST4:100#g" "$SATELLITE_CONFIG"
# add new address for admin panel
if ! grep -q "admin.address" "$SATELLITE_CONFIG"; then
echo admin.address: "$STORJ_NETWORK_HOST4":10005 >> "$SATELLITE_CONFIG"
fi
# create redis config if it's missing
REDIS_CONFIG=$(storj-sim network env REDIS_0_DIR)/redis.conf
if [ ! -f "$REDIS_CONFIG" ] ; then
{
echo "daemonize no"
echo "bind $STORJ_NETWORK_HOST4"
echo "port 10004"
echo "timeout 0"
echo "databases 2"
echo "dbfilename sim.rdb"
echo "dir ./"
} >> "$REDIS_CONFIG"
fi
# setup multinode if config is missing
MULTINODE_DIR=$(storj-sim network env MULTINODE_0_DIR)
if [ ! -f "$MULTINODE_DIR/config.yaml" ]; then
multinode $(storj-sim --host "$STORJ_NETWORK_HOST4" network env MULTINODE_0_SETUP_ARGS)
fi
# keep half of the storage nodes on the old version
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_0_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_1_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_2_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_3_DIR)"/storagenode
ln "$RELEASE_DIR"/bin/storagenode "$(storj-sim network env STORAGENODE_4_DIR)"/storagenode
# upgrade the trust configuration on the other half as the old configuration is
# most certainly not being used outside of test environments and is not
# backwards compatible (i.e. ignored)
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_5_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_6_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_7_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_8_DIR)"/config.yaml
sed -i -e "s#storage.whitelisted-satellites#storage2.trust.sources#g" "$(storj-sim network env STORAGENODE_9_DIR)"/config.yaml
satellite/overlay: configurable meaning of last_net Up to now, we have been implementing the DistinctIP preference with code in two places: 1. On check-in, the last_net is determined by taking the /24 or /64 (in ResolveIPAndNetwork()) and we store it with the node record. 2. On node selection, a preference parameter defines whether to return results that are distinct on last_net. It can be observed that we have never yet had the need to switch from DistinctIP to !DistinctIP, or from !DistinctIP to DistinctIP, on the same satellite, and we will probably never need to do so in an automated way. It can also be observed that this arrangement makes tests more complicated, because we often have to arrange for test nodes to have IP addresses in different /24 networks (a particular pain on macOS). Those two considerations, plus some pending work on the repair framework that will make repair take last_net into consideration, motivate this change. With this change, in the #2 place, we will _always_ return results that are distinct on last_net. We implement the DistinctIP preference, then, by making the #1 place (ResolveIPAndNetwork()) more flexible. When DistinctIP is enabled, last_net will be calculated as it was before. But when DistinctIP is _off_, last_net can be the same as address (IP and port). That will effectively implement !DistinctIP because every record will have a distinct last_net already. As a side effect, this flexibility will allow us to change the rules about last_net construction arbitrarily. We can do tests where last_net is set to the source IP, or to a /30 prefix, or a /16 prefix, etc., and be able to exercise the production logic without requiring a virtual network bridge. This change should be safe to make without any migration code, because all known production satellite deployments use DistinctIP, and the associated last_net values will not change for them. They will only change for satellites with !DistinctIP, which are mostly test deployments that can be recreated trivially. For those satellites which are both permanent and !DistinctIP, node selection will suddenly start acting as though DistinctIP is enabled, until the operator runs a single SQL update "UPDATE nodes SET last_net = last_ip_port". That can be done either before or after deploying software with this change. I also assert that this will not hurt performance for production deployments. It's true that adding the distinct requirement to node selection makes things a little slower, but the distinct requirement is already present for all production deployments, and they will see no change. Refs: https://github.com/storj/storj/issues/5391 Change-Id: I0e7e92498c3da768df5b4d5fb213dcd2d4862924
2023-02-28 22:57:39 +00:00
# For cases where the release predates changeset I0e7e92498c3da768df5b4d5fb213dcd2d4862924,
# adjust all last_net values for future compatibility. this migration step is only necessary for
# satellites which existed before the aforementioned changeset and use dev defaults (to be specific,
# DistinctIP is off). This is a harmless change for any other satellites using dev defaults.
if [ "${STORJ_SIM_POSTGRES#cockroach:}" != "$STORJ_SIM_POSTGRES" ]; then
schema_set=
pgurl="${STORJ_SIM_POSTGRES/cockroach:/postgres:}"
pgurl="${pgurl%?sslmode=disable}/satellite/0?sslmode=disable"
else
schema_set='set search_path to "satellite/0"; '
pgurl="$STORJ_SIM_POSTGRES"
fi
psql "$pgurl" -c "${schema_set}update nodes set last_net = last_ip_port"
# Run with 9 nodes to exercise more code paths with one node being offline.
STORJ_NUM_NODES=9
##
## Run tests on the branch under test.
##
# check that branch uplink + branch network can read fully release data
test_branch -b release-network-release-uplink download
# check that branch uplink + branch network can upload
test_branch -b branch-network-branch-uplink upload
##
## Run even more tests with the old uplink binary.
##
# overwrite new uplink with release branch and test the download
cp "$RELEASE_DIR"/bin/uplink "$BRANCH_DIR"/bin/uplink
# check that release uplink + branch network can read fully release data
test_branch -b release-network-release-uplink download
# check that release uplink + branch network can read fully branch data
test_branch -b branch-network-branch-uplink download
# check that release uplink + branch network can upload
test_branch -b branch-network-release-uplink upload
# check that release uplink + branch network can read mixed data
test_branch -b branch-network-release-uplink download
##
## Perform cleanup, deleting all of the files/buckets.
##
test_branch cleanup