2019-01-24 16:26:36 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
2018-12-10 14:50:12 +00:00
// See LICENSE for copying information.
package satellitedbtest
// This package should be referenced only in test files!
import (
2020-01-13 13:18:48 +00:00
"context"
2022-05-18 15:43:40 +01:00
"flag"
2020-03-27 16:18:19 +00:00
"fmt"
2022-05-18 15:43:40 +01:00
"os"
2022-01-10 08:53:18 +00:00
"regexp"
2019-10-18 20:03:10 +01:00
"strconv"
2019-02-04 20:37:46 +00:00
"strings"
2018-12-10 14:50:12 +00:00
"testing"
2023-01-24 10:15:38 +00:00
"github.com/google/go-cmp/cmp"
2019-12-04 03:36:21 +00:00
"github.com/zeebo/errs"
"go.uber.org/zap"
2019-02-14 21:55:21 +00:00
"go.uber.org/zap/zaptest"
2020-01-19 16:29:15 +00:00
2020-01-13 13:18:48 +00:00
"storj.io/common/testcontext"
2021-04-23 10:52:40 +01:00
"storj.io/private/dbutil"
"storj.io/private/dbutil/pgtest"
"storj.io/private/dbutil/pgutil"
"storj.io/private/dbutil/tempdb"
satellite/satellitedb: prepare to remove big.Float from db
Why: big.Float is not an ideal type for dealing with monetary amounts,
because no matter how high the precision, some non-integer decimal
values can not be represented exactly in base-2 floating point. Also,
storing gob-encoded big.Float values in the database makes it very hard
to use those values in meaningful queries, making it difficult to do
any sort of analysis on billing.
Now that we have amounts represented using monetary.Amount, we can
simply store them in the database using integers (as given by the
.BaseUnits() method on monetary.Amount).
We should move toward storing the currency along with any monetary
amount, wherever we are storing amounts, because satellites might want
to deal with currencies other than STORJ and USD. Even better, it
becomes much clearer what currency each monetary value is _supposed_ to
be in (I had to dig through code to find that out for our current
monetary columns).
Deployment
----------
Getting rid of the big.Float columns will take multiple deployment
steps. There does not seem to be any way to make the change in a way
that lets existing queries continue to work on CockroachDB (it could be
done with rules and triggers and a stored procedure that knows how to
gob-decode big.Float objects, but CockroachDB doesn't have rules _or_
triggers _or_ stored procedures). Instead, in this first step, we make
no changes to the database schema, but add code that knows how to deal
with the planned changes to the schema when they are made in a future
"step 2" deployment. All functions that deal with the
coinbase_transactions table have been taught to recognize the "undefined
column" error, and when it is seen, to call a separate "transition shim"
function to accomplish the task. Once all the services are running this
code, and the step 2 deployment makes breaking changes to the schema,
any services that are still running and connected to the database will
keep working correctly because of the fallback code included here. The
step 2 deployment can be made without these transition shims included,
because it will apply the database schema changes before any of its code
runs.
Step 1:
No schema changes; just include code that recognizes the
"undefined column" error when dealing with the
coinbase_transactions or stripecoinpayments_tx_conversion_rates
tables, and if found, assumes that the column changes from Step
2 have already been made.
Step 2:
In coinbase_transactions:
* change the names of the 'amount' and 'received' columns to
'amount_gob' and 'received_gob' respectively
* add new 'amount_numeric' and 'received_numeric' columns with
INT8 type.
In stripecoinpayments_tx_conversion_rates:
* change the name of the 'rate' column to 'rate_gob'
* add new 'rate_numeric' column with NUMERIC(8, 8) type
Code reading from either of these tables must query both the X_gob
and X_numeric columns. If X_numeric is not null, its value should
be used; otherwise, the gob-encoded big.Float in X_gob should be
used. A chore might be included in this step that transitions values
from X_gob to X_numeric a few rows at a time.
Step 3:
Once all prod satellites have no values left in the _gob columns, we
can drop those columns and add NOT NULL constraints to the _numeric
columns.
Change-Id: Id6db304b404e6fde44f5a8c23cdaeeaaa2324f20
2021-08-10 23:30:23 +01:00
"storj.io/private/tagsql"
2018-12-27 09:56:25 +00:00
"storj.io/storj/satellite"
2021-05-13 09:14:18 +01:00
"storj.io/storj/satellite/metabase"
2018-12-10 14:50:12 +00:00
"storj.io/storj/satellite/satellitedb"
)
2022-05-18 15:43:40 +01:00
// Cockroach DROP DATABASE takes a significant amount, however, it has no importance in our tests.
var cockroachNoDrop = flag . Bool ( "cockroach-no-drop" , stringToBool ( os . Getenv ( "STORJ_TEST_COCKROACH_NODROP" ) ) , "Skip dropping cockroach databases to speed up tests" )
func stringToBool ( v string ) bool {
b , err := strconv . ParseBool ( v )
if err != nil {
return false
}
return b
}
2020-07-16 15:18:02 +01:00
// SatelliteDatabases maybe name can be better.
2019-10-04 15:12:21 +01:00
type SatelliteDatabases struct {
2020-10-29 16:54:35 +00:00
Name string
MasterDB Database
MetabaseDB Database
2019-10-04 15:12:21 +01:00
}
2020-07-16 15:18:02 +01:00
// Database describes a test database.
2019-02-04 20:37:46 +00:00
type Database struct {
Name string
URL string
Message string
}
2020-04-27 20:34:42 +01:00
type ignoreSkip struct { }
func ( ignoreSkip ) Skip ( ... interface { } ) { }
2019-02-04 20:37:46 +00:00
// Databases returns default databases.
2019-10-04 15:12:21 +01:00
func Databases ( ) [ ] SatelliteDatabases {
2021-05-11 11:08:06 +01:00
var dbs [ ] SatelliteDatabases
2020-04-27 20:34:42 +01:00
postgresConnStr := pgtest . PickPostgres ( ignoreSkip { } )
2021-05-11 11:08:06 +01:00
if ! strings . EqualFold ( postgresConnStr , "omit" ) {
dbs = append ( dbs , SatelliteDatabases {
2020-10-29 16:54:35 +00:00
Name : "Postgres" ,
MasterDB : Database { "Postgres" , postgresConnStr , "Postgres flag missing, example: -postgres-test-db=" + pgtest . DefaultPostgres + " or use STORJ_TEST_POSTGRES environment variable." } ,
MetabaseDB : Database { "Postgres" , postgresConnStr , "" } ,
2021-05-11 11:08:06 +01:00
} )
}
cockroachConnStr := pgtest . PickCockroach ( ignoreSkip { } )
if ! strings . EqualFold ( cockroachConnStr , "omit" ) {
dbs = append ( dbs , SatelliteDatabases {
2020-10-29 16:54:35 +00:00
Name : "Cockroach" ,
MasterDB : Database { "Cockroach" , cockroachConnStr , "Cockroach flag missing, example: -cockroach-test-db=" + pgtest . DefaultCockroach + " or use STORJ_TEST_COCKROACH environment variable." } ,
MetabaseDB : Database { "Cockroach" , cockroachConnStr , "" } ,
2021-05-11 11:08:06 +01:00
} )
2019-02-04 20:37:46 +00:00
}
2021-05-11 11:08:06 +01:00
return dbs
2019-02-04 20:37:46 +00:00
}
2019-10-18 20:03:10 +01:00
// SchemaSuffix returns a suffix for schemas.
func SchemaSuffix ( ) string {
return pgutil . CreateRandomTestingSchemaName ( 6 )
}
// SchemaName returns a properly formatted schema string.
func SchemaName ( testname , category string , index int , schemaSuffix string ) string {
2022-01-10 08:53:18 +00:00
// The database is very lenient on allowed characters
// but the same cannot be said for all tools
nameCleaner := regexp . MustCompile ( ` [^\w] ` )
testname = nameCleaner . ReplaceAllString ( testname , "_" )
category = nameCleaner . ReplaceAllString ( category , "_" )
schemaSuffix = nameCleaner . ReplaceAllString ( schemaSuffix , "_" )
2019-10-18 20:03:10 +01:00
// postgres has a maximum schema length of 64
// we need additional 6 bytes for the random suffix
// and 4 bytes for the satellite index "/S0/""
indexStr := strconv . Itoa ( index )
var maxTestNameLen = 64 - len ( category ) - len ( indexStr ) - len ( schemaSuffix ) - 2
if len ( testname ) > maxTestNameLen {
testname = testname [ : maxTestNameLen ]
}
if schemaSuffix == "" {
2022-01-10 08:53:18 +00:00
return strings . ToLower ( testname + "_" + category + indexStr )
2019-10-18 20:03:10 +01:00
}
2022-01-10 08:53:18 +00:00
return strings . ToLower ( testname + "_" + schemaSuffix + "_" + category + indexStr )
2019-10-18 20:03:10 +01:00
}
2019-12-04 03:36:21 +00:00
// tempMasterDB is a satellite.DB-implementing type that cleans up after itself when closed.
type tempMasterDB struct {
satellite . DB
tempDB * dbutil . TempDatabase
}
// Close closes a tempMasterDB and cleans it up afterward.
func ( db * tempMasterDB ) Close ( ) error {
return errs . Combine ( db . DB . Close ( ) , db . tempDB . Close ( ) )
}
satellite/satellitedb: prepare to remove big.Float from db
Why: big.Float is not an ideal type for dealing with monetary amounts,
because no matter how high the precision, some non-integer decimal
values can not be represented exactly in base-2 floating point. Also,
storing gob-encoded big.Float values in the database makes it very hard
to use those values in meaningful queries, making it difficult to do
any sort of analysis on billing.
Now that we have amounts represented using monetary.Amount, we can
simply store them in the database using integers (as given by the
.BaseUnits() method on monetary.Amount).
We should move toward storing the currency along with any monetary
amount, wherever we are storing amounts, because satellites might want
to deal with currencies other than STORJ and USD. Even better, it
becomes much clearer what currency each monetary value is _supposed_ to
be in (I had to dig through code to find that out for our current
monetary columns).
Deployment
----------
Getting rid of the big.Float columns will take multiple deployment
steps. There does not seem to be any way to make the change in a way
that lets existing queries continue to work on CockroachDB (it could be
done with rules and triggers and a stored procedure that knows how to
gob-decode big.Float objects, but CockroachDB doesn't have rules _or_
triggers _or_ stored procedures). Instead, in this first step, we make
no changes to the database schema, but add code that knows how to deal
with the planned changes to the schema when they are made in a future
"step 2" deployment. All functions that deal with the
coinbase_transactions table have been taught to recognize the "undefined
column" error, and when it is seen, to call a separate "transition shim"
function to accomplish the task. Once all the services are running this
code, and the step 2 deployment makes breaking changes to the schema,
any services that are still running and connected to the database will
keep working correctly because of the fallback code included here. The
step 2 deployment can be made without these transition shims included,
because it will apply the database schema changes before any of its code
runs.
Step 1:
No schema changes; just include code that recognizes the
"undefined column" error when dealing with the
coinbase_transactions or stripecoinpayments_tx_conversion_rates
tables, and if found, assumes that the column changes from Step
2 have already been made.
Step 2:
In coinbase_transactions:
* change the names of the 'amount' and 'received' columns to
'amount_gob' and 'received_gob' respectively
* add new 'amount_numeric' and 'received_numeric' columns with
INT8 type.
In stripecoinpayments_tx_conversion_rates:
* change the name of the 'rate' column to 'rate_gob'
* add new 'rate_numeric' column with NUMERIC(8, 8) type
Code reading from either of these tables must query both the X_gob
and X_numeric columns. If X_numeric is not null, its value should
be used; otherwise, the gob-encoded big.Float in X_gob should be
used. A chore might be included in this step that transitions values
from X_gob to X_numeric a few rows at a time.
Step 3:
Once all prod satellites have no values left in the _gob columns, we
can drop those columns and add NOT NULL constraints to the _numeric
columns.
Change-Id: Id6db304b404e6fde44f5a8c23cdaeeaaa2324f20
2021-08-10 23:30:23 +01:00
// DebugGetDBHandle exposes a handle to the raw database object. This is intended
// only for testing purposes and is temporary.
func ( db * tempMasterDB ) DebugGetDBHandle ( ) tagsql . DB {
return db . tempDB . DB
}
2020-07-16 15:18:02 +01:00
// CreateMasterDB creates a new satellite database for testing.
2023-01-24 10:15:38 +00:00
func CreateMasterDB ( ctx context . Context , log * zap . Logger , name string , category string , index int , dbInfo Database , applicationName string ) ( db satellite . DB , err error ) {
2019-11-26 16:39:57 +00:00
if dbInfo . URL == "" {
2020-03-27 16:18:19 +00:00
return nil , fmt . Errorf ( "Database %s connection string not provided. %s" , dbInfo . Name , dbInfo . Message )
2019-11-26 16:39:57 +00:00
}
schemaSuffix := SchemaSuffix ( )
2020-03-27 16:18:19 +00:00
log . Debug ( "creating" , zap . String ( "suffix" , schemaSuffix ) )
schema := SchemaName ( name , category , index , schemaSuffix )
2019-11-26 16:39:57 +00:00
2020-01-13 13:18:48 +00:00
tempDB , err := tempdb . OpenUnique ( ctx , dbInfo . URL , schema )
2019-12-04 03:36:21 +00:00
if err != nil {
return nil , err
2019-11-26 16:39:57 +00:00
}
2022-05-18 15:43:40 +01:00
if * cockroachNoDrop && tempDB . Driver == "cockroach" {
tempDB . Cleanup = func ( d tagsql . DB ) error { return nil }
}
2019-12-04 03:36:21 +00:00
2023-01-24 10:15:38 +00:00
return CreateMasterDBOnTopOf ( ctx , log , tempDB , applicationName )
2019-12-04 03:36:21 +00:00
}
// CreateMasterDBOnTopOf creates a new satellite database on top of an already existing
// temporary database.
2023-01-24 10:15:38 +00:00
func CreateMasterDBOnTopOf ( ctx context . Context , log * zap . Logger , tempDB * dbutil . TempDatabase , applicationName string ) ( db satellite . DB , err error ) {
masterDB , err := satellitedb . Open ( ctx , log . Named ( "db" ) , tempDB . ConnStr , satellitedb . Options { ApplicationName : applicationName } )
2019-12-04 03:36:21 +00:00
return & tempMasterDB { DB : masterDB , tempDB : tempDB } , err
}
2020-10-29 16:54:35 +00:00
// CreateMetabaseDB creates a new satellite metabase for testing.
2021-09-24 15:18:21 +01:00
func CreateMetabaseDB ( ctx context . Context , log * zap . Logger , name string , category string , index int , dbInfo Database , config metabase . Config ) ( db * metabase . DB , err error ) {
2020-10-29 16:54:35 +00:00
if dbInfo . URL == "" {
return nil , fmt . Errorf ( "Database %s connection string not provided. %s" , dbInfo . Name , dbInfo . Message )
}
schemaSuffix := SchemaSuffix ( )
log . Debug ( "creating" , zap . String ( "suffix" , schemaSuffix ) )
schema := SchemaName ( name , category , index , schemaSuffix )
tempDB , err := tempdb . OpenUnique ( ctx , dbInfo . URL , schema )
if err != nil {
return nil , err
}
2022-05-18 15:43:40 +01:00
if * cockroachNoDrop && tempDB . Driver == "cockroach" {
tempDB . Cleanup = func ( d tagsql . DB ) error { return nil }
}
2020-10-29 16:54:35 +00:00
2021-09-24 15:18:21 +01:00
return CreateMetabaseDBOnTopOf ( ctx , log , tempDB , config )
2020-10-29 16:54:35 +00:00
}
// CreateMetabaseDBOnTopOf creates a new metabase on top of an already existing
// temporary database.
2021-09-24 15:18:21 +01:00
func CreateMetabaseDBOnTopOf ( ctx context . Context , log * zap . Logger , tempDB * dbutil . TempDatabase , config metabase . Config ) ( * metabase . DB , error ) {
db , err := metabase . Open ( ctx , log . Named ( "metabase" ) , tempDB . ConnStr , config )
2020-10-29 16:54:35 +00:00
if err != nil {
return nil , err
}
2021-05-13 09:14:18 +01:00
db . TestingSetCleanup ( tempDB . Close )
return db , nil
2020-10-29 16:54:35 +00:00
}
2018-12-10 14:50:12 +00:00
// Run method will iterate over all supported databases. Will establish
// connection and will create tables for each DB.
2020-01-19 16:29:15 +00:00
func Run ( t * testing . T , test func ( ctx * testcontext . Context , t * testing . T , db satellite . DB ) ) {
2019-02-04 20:37:46 +00:00
for _ , dbInfo := range Databases ( ) {
2019-02-06 09:16:05 +00:00
dbInfo := dbInfo
2020-04-27 17:28:40 +01:00
t . Run ( dbInfo . Name , func ( t * testing . T ) {
2019-02-05 19:44:00 +00:00
t . Parallel ( )
2020-01-13 13:18:48 +00:00
ctx := testcontext . New ( t )
defer ctx . Cleanup ( )
2019-12-30 12:17:41 +00:00
if dbInfo . MasterDB . URL == "" {
t . Skipf ( "Database %s connection string not provided. %s" , dbInfo . MasterDB . Name , dbInfo . MasterDB . Message )
}
2023-01-24 10:15:38 +00:00
logger := zaptest . NewLogger ( t )
applicationName := "satellite-satellitedb-test-" + pgutil . CreateRandomTestingSchemaName ( 6 )
db , err := CreateMasterDB ( ctx , logger , t . Name ( ) , "T" , 0 , dbInfo . MasterDB , applicationName )
2018-12-10 14:50:12 +00:00
if err != nil {
t . Fatal ( err )
}
defer func ( ) {
2019-10-18 20:03:10 +01:00
err := db . Close ( )
2018-12-10 14:50:12 +00:00
if err != nil {
t . Fatal ( err )
}
} ( )
2020-04-30 07:36:59 +01:00
err = db . TestingMigrateToLatest ( ctx )
2018-12-10 14:50:12 +00:00
if err != nil {
t . Fatal ( err )
}
2023-01-24 10:15:38 +00:00
var fullScansBefore [ ] string
tempMasterDB , ok := db . ( * tempMasterDB )
if ok {
fullScansBefore , err = fullTableScanQueries ( ctx , tempMasterDB . tempDB . DB , tempMasterDB . tempDB . Implementation , applicationName )
if err != nil {
t . Fatal ( err )
}
}
2020-01-19 16:29:15 +00:00
test ( ctx , t , db )
2023-01-24 10:15:38 +00:00
if ok {
fullScansAfter , err := fullTableScanQueries ( ctx , tempMasterDB . tempDB . DB , tempMasterDB . tempDB . Implementation , applicationName )
if err != nil {
t . Fatal ( err )
}
diff := cmp . Diff ( fullScansBefore , fullScansAfter )
if diff != "" {
logger . Sugar ( ) . Warnf ( "FULL TABLE SCAN DETECTED\n%s" , diff )
}
}
2018-12-10 14:50:12 +00:00
} )
}
}
2019-05-19 16:10:46 +01:00
// Bench method will iterate over all supported databases. Will establish
// connection and will create tables for each DB.
func Bench ( b * testing . B , bench func ( b * testing . B , db satellite . DB ) ) {
for _ , dbInfo := range Databases ( ) {
dbInfo := dbInfo
2020-04-27 17:28:40 +01:00
b . Run ( dbInfo . Name , func ( b * testing . B ) {
2019-10-04 15:12:21 +01:00
if dbInfo . MasterDB . URL == "" {
b . Skipf ( "Database %s connection string not provided. %s" , dbInfo . MasterDB . Name , dbInfo . MasterDB . Message )
2019-05-19 16:10:46 +01:00
}
2020-01-13 13:18:48 +00:00
ctx := testcontext . New ( b )
defer ctx . Cleanup ( )
2023-01-24 10:15:38 +00:00
db , err := CreateMasterDB ( ctx , zap . NewNop ( ) , b . Name ( ) , "X" , 0 , dbInfo . MasterDB , "satellite-satellitedb-bench" )
2019-05-19 16:10:46 +01:00
if err != nil {
b . Fatal ( err )
}
defer func ( ) {
2019-10-18 20:03:10 +01:00
err := db . Close ( )
2019-05-19 16:10:46 +01:00
if err != nil {
b . Fatal ( err )
}
} ( )
2020-04-30 07:36:59 +01:00
err = db . MigrateToLatest ( ctx )
2019-05-19 16:10:46 +01:00
if err != nil {
b . Fatal ( err )
}
2020-01-13 13:18:48 +00:00
// TODO: pass the ctx down
2019-05-19 16:10:46 +01:00
bench ( b , db )
} )
}
}
2023-01-24 10:15:38 +00:00
func fullTableScanQueries ( ctx context . Context , db tagsql . DB , implementation dbutil . Implementation , applicationName string ) ( queries [ ] string , err error ) {
if implementation . String ( ) != "cockroach" {
return nil , nil
}
rows , err := db . QueryContext ( ctx ,
"SELECT key FROM crdb_internal.node_statement_statistics WHERE full_scan = TRUE AND application_name = $1 ORDER BY count DESC" ,
applicationName ,
)
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
result := map [ string ] struct { } { }
for rows . Next ( ) {
var query string
err := rows . Scan ( & query )
if err != nil {
return nil , err
}
// find smarter way to ignore known full table scan queries
if ! strings . Contains ( strings . ToUpper ( query ) , "WHERE" ) {
continue
}
result [ query ] = struct { } { }
}
if rows . Err ( ) != nil {
return nil , rows . Err ( )
}
for query := range result {
queries = append ( queries , query )
}
return queries , nil
}