satellite/metabase: add pending_objects table

This change adds only schema definition of pending_objects table and
small amount of supporting code which will be useful for testing later.

With this table we would like to achieve two major things:
* simplify `objects` table, before we will start working on object
versioning
* gain performance by removing need to filter `objects` results with `status` column, which is not indexed and we would like to avoid that

https://github.com/storj/storj/issues/6045

Change-Id: I6097ce1c644a8a3dad13185915fe01989ad41d90
This commit is contained in:
Michal Niewrzal 2023-07-21 10:23:47 +02:00 committed by Michał Niewrzał
parent 28737f5c62
commit 3d9c217627
3 changed files with 184 additions and 6 deletions

View File

@ -290,6 +290,29 @@ func (obj *ObjectStream) Location() ObjectLocation {
}
}
// PendingObjectStream uniquely defines an pending object and stream.
type PendingObjectStream struct {
ProjectID uuid.UUID
BucketName string
ObjectKey ObjectKey
StreamID uuid.UUID
}
// Verify object stream fields.
func (obj *PendingObjectStream) Verify() error {
switch {
case obj.ProjectID.IsZero():
return ErrInvalidRequest.New("ProjectID missing")
case obj.BucketName == "":
return ErrInvalidRequest.New("BucketName missing")
case len(obj.ObjectKey) == 0:
return ErrInvalidRequest.New("ObjectKey missing")
case obj.StreamID.IsZero():
return ErrInvalidRequest.New("StreamID missing")
}
return nil
}
// SegmentPosition is segment part and index combined.
type SegmentPosition struct {
Part uint32

View File

@ -170,7 +170,7 @@ func (db *DB) TestMigrateToLatest(ctx context.Context) error {
{
DB: &db.db,
Description: "Test snapshot",
Version: 16,
Version: 17,
Action: migrate.SQL{
`CREATE TABLE objects (
project_id BYTEA NOT NULL,
@ -300,7 +300,43 @@ func (db *DB) TestMigrateToLatest(ctx context.Context) error {
COMMENT ON TABLE segment_copies is 'segment_copies contains a reference for sharing stream_id-s.';
COMMENT ON COLUMN segment_copies.stream_id is 'stream_id refers to the objects.stream_id.';
COMMENT ON COLUMN segment_copies.ancestor_stream_id is 'ancestor_stream_id refers to the actual segments where data is stored.';
`,
`, `
CREATE TABLE pending_objects (
project_id BYTEA NOT NULL,
bucket_name BYTEA NOT NULL,
object_key BYTEA NOT NULL,
stream_id BYTEA NOT NULL,
created_at TIMESTAMPTZ NOT NULL default now(),
expires_at TIMESTAMPTZ,
encrypted_metadata_nonce BYTEA default NULL,
encrypted_metadata BYTEA default NULL,
encrypted_metadata_encrypted_key BYTEA default NULL,
encryption INT8 NOT NULL default 0,
zombie_deletion_deadline TIMESTAMPTZ default now() + '1 day',
PRIMARY KEY (project_id, bucket_name, object_key, stream_id)
)`,
`
COMMENT ON TABLE pending_objects is 'Pending objects table contains information about path and streams of in progress uploads';
COMMENT ON COLUMN objects.project_id is 'project_id is a uuid referring to project.id.';
COMMENT ON COLUMN objects.bucket_name is 'bucket_name is a alpha-numeric string referring to bucket_metainfo.name.';
COMMENT ON COLUMN objects.object_key is 'object_key is an encrypted path of the object.';
COMMENT ON COLUMN objects.stream_id is 'stream_id is a random identifier for the content uploaded to the object.';
COMMENT ON COLUMN objects.created_at is 'created_at is the creation date of this object.';
COMMENT ON COLUMN objects.expires_at is 'expires_at is the date when this object will be marked for deletion.';
COMMENT ON COLUMN objects.encrypted_metadata_nonce is 'encrypted_metadata_nonce is random identifier used as part of encryption for encrypted_metadata.';
COMMENT ON COLUMN objects.encrypted_metadata is 'encrypted_metadata is encrypted key-value pairs of user-specified data.';
COMMENT ON COLUMN objects.encrypted_metadata_encrypted_key is 'encrypted_metadata_encrypted_key is the encrypted key for encrypted_metadata.';
COMMENT ON COLUMN objects.encryption is 'encryption contains object encryption parameters encoded into a uint32. See metabase.encryptionParameters type for the implementation.';
COMMENT ON COLUMN objects.zombie_deletion_deadline is 'zombie_deletion_deadline defines when a pending object can be deleted due to a failed upload.';`,
},
},
},
@ -597,6 +633,49 @@ func (db *DB) PostgresMigration() *migrate.Migration {
COMMENT ON COLUMN segment_copies.ancestor_stream_id is 'ancestor_stream_id refers to the actual segments where data is stored.';
`},
},
{
DB: &db.db,
Description: "add pending_objects table",
Version: 17,
Action: migrate.SQL{`
CREATE TABLE pending_objects (
project_id BYTEA NOT NULL,
bucket_name BYTEA NOT NULL,
object_key BYTEA NOT NULL,
stream_id BYTEA NOT NULL,
created_at TIMESTAMPTZ NOT NULL default now(),
expires_at TIMESTAMPTZ,
encrypted_metadata_nonce BYTEA default NULL,
encrypted_metadata BYTEA default NULL,
encrypted_metadata_encrypted_key BYTEA default NULL,
encryption INT8 NOT NULL default 0,
zombie_deletion_deadline TIMESTAMPTZ default now() + '1 day',
PRIMARY KEY (project_id, bucket_name, object_key, stream_id)
)`,
`
COMMENT ON TABLE pending_objects is 'Pending objects table contains information about path and streams of in progress uploads';
COMMENT ON COLUMN objects.project_id is 'project_id is a uuid referring to project.id.';
COMMENT ON COLUMN objects.bucket_name is 'bucket_name is a alpha-numeric string referring to bucket_metainfo.name.';
COMMENT ON COLUMN objects.object_key is 'object_key is an encrypted path of the object.';
COMMENT ON COLUMN objects.stream_id is 'stream_id is a random identifier for the content uploaded to the object.';
COMMENT ON COLUMN objects.created_at is 'created_at is the creation date of this object.';
COMMENT ON COLUMN objects.expires_at is 'expires_at is the date when this object will be marked for deletion.';
COMMENT ON COLUMN objects.encrypted_metadata_nonce is 'encrypted_metadata_nonce is random identifier used as part of encryption for encrypted_metadata.';
COMMENT ON COLUMN objects.encrypted_metadata is 'encrypted_metadata is encrypted key-value pairs of user-specified data.';
COMMENT ON COLUMN objects.encrypted_metadata_encrypted_key is 'encrypted_metadata_encrypted_key is the encrypted key for encrypted_metadata.';
COMMENT ON COLUMN objects.encryption is 'encryption contains object encryption parameters encoded into a uint32. See metabase.encryptionParameters type for the implementation.';
COMMENT ON COLUMN objects.zombie_deletion_deadline is 'zombie_deletion_deadline defines when a pending object can be deleted due to a failed upload.';
`},
},
},
}
}

View File

@ -41,6 +41,25 @@ type RawObject struct {
ZombieDeletionDeadline *time.Time
}
// RawPendingObject defines the full pending object that is stored in the database. It should be rarely used directly.
type RawPendingObject struct {
PendingObjectStream
CreatedAt time.Time
ExpiresAt *time.Time
EncryptedMetadataNonce []byte
EncryptedMetadata []byte
EncryptedMetadataEncryptedKey []byte
Encryption storj.EncryptionParameters
// ZombieDeletionDeadline defines when the pending raw object should be deleted from the database.
// This is as a safeguard against objects that failed to upload and the client has not indicated
// whether they want to continue uploading or delete the already uploaded data.
ZombieDeletionDeadline *time.Time
}
// RawSegment defines the full segment that is stored in the database. It should be rarely used directly.
type RawSegment struct {
StreamID uuid.UUID
@ -77,9 +96,10 @@ type RawCopy struct {
// RawState contains full state of a table.
type RawState struct {
Objects []RawObject
Segments []RawSegment
Copies []RawCopy
Objects []RawObject
PendingObjects []RawPendingObject
Segments []RawSegment
Copies []RawCopy
}
// TestingGetState returns the state of the database.
@ -91,6 +111,11 @@ func (db *DB) TestingGetState(ctx context.Context) (_ *RawState, err error) {
return nil, Error.New("GetState: %w", err)
}
state.PendingObjects, err = db.testingGetAllPendingObjects(ctx)
if err != nil {
return nil, Error.New("GetState: %w", err)
}
state.Segments, err = db.testingGetAllSegments(ctx)
if err != nil {
return nil, Error.New("GetState: %w", err)
@ -108,11 +133,11 @@ func (db *DB) TestingGetState(ctx context.Context) (_ *RawState, err error) {
func (db *DB) TestingDeleteAll(ctx context.Context) (err error) {
_, err = db.db.ExecContext(ctx, `
WITH testing AS (SELECT 1) DELETE FROM objects;
WITH testing AS (SELECT 1) DELETE FROM pending_objects;
WITH testing AS (SELECT 1) DELETE FROM segments;
WITH testing AS (SELECT 1) DELETE FROM segment_copies;
WITH testing AS (SELECT 1) DELETE FROM node_aliases;
WITH testing AS (SELECT 1) SELECT setval('node_alias_seq', 1, false);
`)
db.aliasCache = NewNodeAliasCache(db)
return Error.Wrap(err)
@ -180,6 +205,57 @@ func (db *DB) testingGetAllObjects(ctx context.Context) (_ []RawObject, err erro
return objs, nil
}
// testingGetAllPendingObjects returns the state of the database.
func (db *DB) testingGetAllPendingObjects(ctx context.Context) (_ []RawPendingObject, err error) {
objs := []RawPendingObject{}
rows, err := db.db.QueryContext(ctx, `
WITH testing AS (SELECT 1)
SELECT
project_id, bucket_name, object_key, stream_id,
created_at, expires_at,
encrypted_metadata_nonce, encrypted_metadata, encrypted_metadata_encrypted_key,
encryption, zombie_deletion_deadline
FROM pending_objects
ORDER BY project_id ASC, bucket_name ASC, object_key ASC, stream_id ASC
`)
if err != nil {
return nil, Error.New("testingGetAllPendingObjects query: %w", err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
for rows.Next() {
var obj RawPendingObject
err := rows.Scan(
&obj.ProjectID,
&obj.BucketName,
&obj.ObjectKey,
&obj.StreamID,
&obj.CreatedAt,
&obj.ExpiresAt,
&obj.EncryptedMetadataNonce,
&obj.EncryptedMetadata,
&obj.EncryptedMetadataEncryptedKey,
encryptionParameters{&obj.Encryption},
&obj.ZombieDeletionDeadline,
)
if err != nil {
return nil, Error.New("testingGetAllPendingObjects scan failed: %w", err)
}
objs = append(objs, obj)
}
if err := rows.Err(); err != nil {
return nil, Error.New("testingGetAllPendingObjects scan failed: %w", err)
}
if len(objs) == 0 {
return nil, nil
}
return objs, nil
}
// testingGetAllSegments returns the state of the database.
func (db *DB) testingGetAllSegments(ctx context.Context) (_ []RawSegment, err error) {
segs := []RawSegment{}