storj/storagenode/storagenodedb/infodb.go
paul cannon 17bdb5e9e5
move piece info into files (#2629)
Deprecate the pieceinfo database, and start storing piece info as a header to
piece files. Institute a "storage format version" concept allowing us to handle
pieces stored under multiple different types of storage. Add a piece_expirations
table which will still be used to track expiration times, so we can query it, but
which should be much smaller than the pieceinfo database would be for the
same number of pieces. (Only pieces with expiration times need to be stored in piece_expirations, and we don't need to store large byte blobs like the serialized
order limit, etc.) Use specialized names for accessing any functionality related
only to dealing with V0 pieces (e.g., `store.V0PieceInfo()`). Move SpaceUsed-
type functionality under the purview of the piece store. Add some generic
interfaces for traversing all blobs or all pieces. Add lots of tests.
2019-08-07 20:47:30 -05:00

429 lines
14 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package storagenodedb
import (
"context"
"database/sql"
"database/sql/driver"
"fmt"
"math/rand"
"os"
"path/filepath"
"time"
"github.com/zeebo/errs"
"go.uber.org/zap"
"gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/dbutil"
"storj.io/storj/internal/dbutil/utccheck"
"storj.io/storj/internal/migrate"
)
// ErrInfo is the default error class for InfoDB
var ErrInfo = errs.Class("infodb")
// SQLDB defines interface that matches *sql.DB
// this is such that we can use utccheck.DB for the backend
//
// TODO: wrap the connector instead of *sql.DB
type SQLDB interface {
Begin() (*sql.Tx, error)
BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error)
Close() error
Conn(ctx context.Context) (*sql.Conn, error)
Driver() driver.Driver
Exec(query string, args ...interface{}) (sql.Result, error)
ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error)
Ping() error
PingContext(ctx context.Context) error
Prepare(query string) (*sql.Stmt, error)
PrepareContext(ctx context.Context, query string) (*sql.Stmt, error)
Query(query string, args ...interface{}) (*sql.Rows, error)
QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error)
QueryRow(query string, args ...interface{}) *sql.Row
QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row
SetConnMaxLifetime(d time.Duration)
SetMaxIdleConns(n int)
SetMaxOpenConns(n int)
}
// InfoDB implements information database for piecestore.
type InfoDB struct {
db SQLDB
bandwidthdb bandwidthdb
v0PieceInfo v0PieceInfo
pieceExpirationDB pieceExpirationDB
location string
}
// newInfo creates or opens InfoDB at the specified path.
func newInfo(path string) (*InfoDB, error) {
if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil {
return nil, err
}
db, err := sql.Open("sqlite3", "file:"+path+"?_journal=WAL&_busy_timeout=10000")
if err != nil {
return nil, ErrInfo.Wrap(err)
}
dbutil.Configure(db, mon)
infoDb := &InfoDB{db: db}
infoDb.v0PieceInfo = v0PieceInfo{InfoDB: infoDb}
infoDb.bandwidthdb = bandwidthdb{InfoDB: infoDb}
infoDb.pieceExpirationDB = pieceExpirationDB{InfoDB: infoDb}
infoDb.location = path
return infoDb, nil
}
// NewInfoTest creates a new inmemory InfoDB.
func NewInfoTest() (*InfoDB, error) {
// create memory DB with a shared cache and a unique name to avoid collisions
db, err := sql.Open("sqlite3", fmt.Sprintf("file:memdb%d?mode=memory&cache=shared", rand.Int63()))
if err != nil {
return nil, ErrInfo.Wrap(err)
}
// Set max idle and max open to 1 to control concurrent access to the memory DB
// Setting max open higher than 1 results in table locked errors
db.SetMaxIdleConns(1)
db.SetMaxOpenConns(1)
db.SetConnMaxLifetime(-1)
mon.Chain("db_stats", monkit.StatSourceFunc(
func(cb func(name string, val float64)) {
monkit.StatSourceFromStruct(db.Stats()).Stats(cb)
}))
infoDb := &InfoDB{db: utccheck.New(db)}
infoDb.v0PieceInfo = v0PieceInfo{InfoDB: infoDb}
infoDb.bandwidthdb = bandwidthdb{InfoDB: infoDb}
infoDb.pieceExpirationDB = pieceExpirationDB{InfoDB: infoDb}
return infoDb, nil
}
// Close closes any resources.
func (db *InfoDB) Close() error {
return db.db.Close()
}
// CreateTables creates any necessary tables.
func (db *InfoDB) CreateTables(log *zap.Logger) error {
migration := db.Migration()
return migration.Run(log.Named("migration"), db)
}
// RawDB returns access to the raw database, only for migration tests.
func (db *InfoDB) RawDB() SQLDB { return db.db }
// Begin begins transaction
func (db *InfoDB) Begin() (*sql.Tx, error) { return db.db.Begin() }
// Rebind rebind parameters
func (db *InfoDB) Rebind(s string) string { return s }
// Schema returns schema
func (db *InfoDB) Schema() string { return "" }
// Migration returns table migrations.
func (db *InfoDB) Migration() *migrate.Migration {
return &migrate.Migration{
Table: "versions",
Steps: []*migrate.Step{
{
Description: "Initial setup",
Version: 0,
Action: migrate.SQL{
// table for keeping serials that need to be verified against
`CREATE TABLE used_serial (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
expiration TIMESTAMP NOT NULL
)`,
// primary key on satellite id and serial number
`CREATE UNIQUE INDEX pk_used_serial ON used_serial(satellite_id, serial_number)`,
// expiration index to allow fast deletion
`CREATE INDEX idx_used_serial ON used_serial(expiration)`,
// certificate table for storing uplink/satellite certificates
`CREATE TABLE certificate (
cert_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
node_id BLOB NOT NULL, -- same NodeID can have multiple valid leaf certificates
peer_identity BLOB UNIQUE NOT NULL -- PEM encoded
)`,
// table for storing piece meta info
`CREATE TABLE pieceinfo (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_size BIGINT NOT NULL,
piece_expiration TIMESTAMP, -- date when it can be deleted
uplink_piece_hash BLOB NOT NULL, -- serialized pb.PieceHash signed by uplink
uplink_cert_id INTEGER NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
)`,
// primary key by satellite id and piece id
`CREATE UNIQUE INDEX pk_pieceinfo ON pieceinfo(satellite_id, piece_id)`,
// table for storing bandwidth usage
`CREATE TABLE bandwidth_usage (
satellite_id BLOB NOT NULL,
action INTEGER NOT NULL,
amount BIGINT NOT NULL,
created_at TIMESTAMP NOT NULL
)`,
`CREATE INDEX idx_bandwidth_usage_satellite ON bandwidth_usage(satellite_id)`,
`CREATE INDEX idx_bandwidth_usage_created ON bandwidth_usage(created_at)`,
// table for storing all unsent orders
`CREATE TABLE unsent_order (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
order_limit_serialized BLOB NOT NULL, -- serialized pb.OrderLimit
order_serialized BLOB NOT NULL, -- serialized pb.Order
order_limit_expiration TIMESTAMP NOT NULL, -- when is the deadline for sending it
uplink_cert_id INTEGER NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
)`,
`CREATE UNIQUE INDEX idx_orders ON unsent_order(satellite_id, serial_number)`,
// table for storing all sent orders
`CREATE TABLE order_archive (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
order_limit_serialized BLOB NOT NULL, -- serialized pb.OrderLimit
order_serialized BLOB NOT NULL, -- serialized pb.Order
uplink_cert_id INTEGER NOT NULL,
status INTEGER NOT NULL, -- accepted, rejected, confirmed
archived_at TIMESTAMP NOT NULL, -- when was it rejected
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
)`,
`CREATE INDEX idx_order_archive_satellite ON order_archive(satellite_id)`,
`CREATE INDEX idx_order_archive_status ON order_archive(status)`,
},
},
{
Description: "Network Wipe #2",
Version: 1,
Action: migrate.SQL{
`UPDATE pieceinfo SET piece_expiration = '2019-05-09 00:00:00.000000+00:00'`,
},
},
{
Description: "Add tracking of deletion failures.",
Version: 2,
Action: migrate.SQL{
`ALTER TABLE pieceinfo ADD COLUMN deletion_failed_at TIMESTAMP`,
},
},
{
Description: "Add vouchersDB for storing and retrieving vouchers.",
Version: 3,
Action: migrate.SQL{
`CREATE TABLE vouchers (
satellite_id BLOB PRIMARY KEY NOT NULL,
voucher_serialized BLOB NOT NULL,
expiration TIMESTAMP NOT NULL
)`,
},
},
{
Description: "Add index on pieceinfo expireation",
Version: 4,
Action: migrate.SQL{
`CREATE INDEX idx_pieceinfo_expiration ON pieceinfo(piece_expiration)`,
`CREATE INDEX idx_pieceinfo_deletion_failed ON pieceinfo(deletion_failed_at)`,
},
},
{
Description: "Partial Network Wipe - Tardigrade Satellites",
Version: 5,
Action: migrate.SQL{
`UPDATE pieceinfo SET piece_expiration = '2019-06-25 00:00:00.000000+00:00' WHERE satellite_id
IN (x'84A74C2CD43C5BA76535E1F42F5DF7C287ED68D33522782F4AFABFDB40000000',
x'A28B4F04E10BAE85D67F4C6CB82BF8D4C0F0F47A8EA72627524DEB6EC0000000',
x'AF2C42003EFC826AB4361F73F9D890942146FE0EBE806786F8E7190800000000'
)`,
},
},
{
Description: "Add creation date.",
Version: 6,
Action: migrate.SQL{
`ALTER TABLE pieceinfo ADD COLUMN piece_creation TIMESTAMP NOT NULL DEFAULT 'epoch'`,
},
},
{
Description: "Drop certificate table.",
Version: 7,
Action: migrate.SQL{
`DROP TABLE certificate`,
`CREATE TABLE certificate (cert_id INTEGER)`,
},
},
{
Description: "Drop old used serials and remove pieceinfo_deletion_failed index.",
Version: 8,
Action: migrate.SQL{
`DELETE FROM used_serial`,
`DROP INDEX idx_pieceinfo_deletion_failed`,
},
},
{
Description: "Add order limit table.",
Version: 9,
Action: migrate.SQL{
`ALTER TABLE pieceinfo ADD COLUMN order_limit BLOB NOT NULL DEFAULT X''`,
},
},
{
Description: "Optimize index usage.",
Version: 10,
Action: migrate.SQL{
`DROP INDEX idx_pieceinfo_expiration`,
`DROP INDEX idx_order_archive_satellite`,
`DROP INDEX idx_order_archive_status`,
`CREATE INDEX idx_pieceinfo_expiration ON pieceinfo(piece_expiration) WHERE piece_expiration IS NOT NULL`,
},
},
{
Description: "Create bandwidth_usage_rollup table.",
Version: 11,
Action: migrate.SQL{
`CREATE TABLE bandwidth_usage_rollups (
interval_start TIMESTAMP NOT NULL,
satellite_id BLOB NOT NULL,
action INTEGER NOT NULL,
amount BIGINT NOT NULL,
PRIMARY KEY ( interval_start, satellite_id, action )
)`,
},
},
{
Description: "Clear Tables from Alpha data",
Version: 12,
Action: migrate.SQL{
`DROP TABLE pieceinfo`,
`DROP TABLE used_serial`,
`DROP TABLE order_archive`,
`CREATE TABLE pieceinfo_ (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_size BIGINT NOT NULL,
piece_expiration TIMESTAMP,
order_limit BLOB NOT NULL,
uplink_piece_hash BLOB NOT NULL,
uplink_cert_id INTEGER NOT NULL,
deletion_failed_at TIMESTAMP,
piece_creation TIMESTAMP NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
)`,
`CREATE UNIQUE INDEX pk_pieceinfo_ ON pieceinfo_(satellite_id, piece_id)`,
`CREATE INDEX idx_pieceinfo__expiration ON pieceinfo_(piece_expiration) WHERE piece_expiration IS NOT NULL`,
`CREATE TABLE used_serial_ (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
expiration TIMESTAMP NOT NULL
)`,
`CREATE UNIQUE INDEX pk_used_serial_ ON used_serial_(satellite_id, serial_number)`,
`CREATE INDEX idx_used_serial_ ON used_serial_(expiration)`,
`CREATE TABLE order_archive_ (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
order_limit_serialized BLOB NOT NULL,
order_serialized BLOB NOT NULL,
uplink_cert_id INTEGER NOT NULL,
status INTEGER NOT NULL,
archived_at TIMESTAMP NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
)`,
},
},
{
Description: "Free Storagenodes from trash data",
Version: 13,
Action: migrate.Func(func(log *zap.Logger, mgdb migrate.DB, tx *sql.Tx) error {
// When using inmemory DB, skip deletion process
if db.location == "" {
return nil
}
err := os.RemoveAll(filepath.Join(filepath.Dir(db.location), "blob/ukfu6bhbboxilvt7jrwlqk7y2tapb5d2r2tsmj2sjxvw5qaaaaaa")) // us-central1
if err != nil {
log.Sugar().Debug(err)
}
err = os.RemoveAll(filepath.Join(filepath.Dir(db.location), "blob/v4weeab67sbgvnbwd5z7tweqsqqun7qox2agpbxy44mqqaaaaaaa")) // europe-west1
if err != nil {
log.Sugar().Debug(err)
}
err = os.RemoveAll(filepath.Join(filepath.Dir(db.location), "blob/qstuylguhrn2ozjv4h2c6xpxykd622gtgurhql2k7k75wqaaaaaa")) // asia-east1
if err != nil {
log.Sugar().Debug(err)
}
err = os.RemoveAll(filepath.Join(filepath.Dir(db.location), "blob/abforhuxbzyd35blusvrifvdwmfx4hmocsva4vmpp3rgqaaaaaaa")) // "tothemoon (stefan)"
if err != nil {
log.Sugar().Debug(err)
}
// To prevent the node from starting up, we just log errors and return nil
return nil
}),
},
{
Description: "Free Storagenodes from orphaned tmp data",
Version: 14,
Action: migrate.Func(func(log *zap.Logger, mgdb migrate.DB, tx *sql.Tx) error {
// When using inmemory DB, skip deletion process
if db.location == "" {
return nil
}
err := os.RemoveAll(filepath.Join(filepath.Dir(db.location), "tmp"))
if err != nil {
log.Sugar().Debug(err)
}
// To prevent the node from starting up, we just log errors and return nil
return nil
}),
},
{
Description: "Start piece_expirations table, deprecate pieceinfo table",
Version: 15,
Action: migrate.SQL{
// new table to hold expiration data (and only expirations. no other pieceinfo)
`CREATE TABLE piece_expirations (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_expiration TIMESTAMP NOT NULL, -- date when it can be deleted
deletion_failed_at TIMESTAMP,
PRIMARY KEY (satellite_id, piece_id)
)`,
`CREATE INDEX idx_piece_expirations_piece_expiration ON piece_expirations(piece_expiration)`,
`CREATE INDEX idx_piece_expirations_deletion_failed_at ON piece_expirations(deletion_failed_at)`,
},
},
},
}
}