storj/satellite/metainfo/metabase/commit.go
Michał Niewrzał 8b9da01817 satellite/metainfo/metabase: allow committing empty inline segments
We need to keep empty inline segments as we did it with pointerDB because otherwise old uplinks after uploading data won't be able to download such file. To reduce number of empty inline segments on uplink side we need to implement skipping empty last inline segments for multipart upload.

Change-Id: Ice86c805babba1ad17149754cbd6b3f4fd652722
2021-02-19 13:41:34 +00:00

605 lines
18 KiB
Go

// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
package metabase
import (
"context"
"database/sql"
"errors"
"time"
pgxerrcode "github.com/jackc/pgerrcode"
"github.com/zeebo/errs"
"storj.io/common/storj"
"storj.io/storj/private/dbutil/pgutil/pgerrcode"
"storj.io/storj/private/dbutil/txutil"
"storj.io/storj/private/tagsql"
)
// we need to disable PlainSize validation for old uplinks.
const validatePlainSize = false
var (
// ErrInvalidRequest is used to indicate invalid requests.
ErrInvalidRequest = errs.Class("metabase: invalid request")
// ErrConflict is used to indicate conflict with the request.
ErrConflict = errs.Class("metabase: conflict")
)
// BeginObjectNextVersion contains arguments necessary for starting an object upload.
type BeginObjectNextVersion struct {
ObjectStream
ExpiresAt *time.Time
ZombieDeletionDeadline *time.Time
Encryption storj.EncryptionParameters
}
// BeginObjectNextVersion adds a pending object to the database, with automatically assigned version.
func (db *DB) BeginObjectNextVersion(ctx context.Context, opts BeginObjectNextVersion) (committed Version, err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return -1, err
}
if opts.Version != NextVersion {
return -1, ErrInvalidRequest.New("Version should be metabase.NextVersion")
}
row := db.db.QueryRow(ctx, `
INSERT INTO objects (
project_id, bucket_name, object_key, version, stream_id,
expires_at, encryption,
zombie_deletion_deadline
) VALUES (
$1, $2, $3,
coalesce((
SELECT version + 1
FROM objects
WHERE project_id = $1 AND bucket_name = $2 AND object_key = $3
ORDER BY version DESC
LIMIT 1
), 1),
$4, $5, $6,
$7)
RETURNING version
`, opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.StreamID,
opts.ExpiresAt, encryptionParameters{&opts.Encryption},
opts.ZombieDeletionDeadline)
var v int64
if err := row.Scan(&v); err != nil {
return -1, Error.New("unable to insert object: %w", err)
}
return Version(v), nil
}
// BeginObjectExactVersion contains arguments necessary for starting an object upload.
type BeginObjectExactVersion struct {
ObjectStream
ExpiresAt *time.Time
ZombieDeletionDeadline *time.Time
Encryption storj.EncryptionParameters
}
// BeginObjectExactVersion adds a pending object to the database, with specific version.
func (db *DB) BeginObjectExactVersion(ctx context.Context, opts BeginObjectExactVersion) (committed Object, err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return Object{}, err
}
if opts.Version == NextVersion {
return Object{}, ErrInvalidRequest.New("Version should not be metabase.NextVersion")
}
object := Object{
ObjectStream: ObjectStream{
ProjectID: opts.ProjectID,
BucketName: opts.BucketName,
ObjectKey: opts.ObjectKey,
Version: opts.Version,
StreamID: opts.StreamID,
},
ExpiresAt: opts.ExpiresAt,
Encryption: opts.Encryption,
ZombieDeletionDeadline: opts.ZombieDeletionDeadline,
}
err = db.db.QueryRow(ctx, `
INSERT INTO objects (
project_id, bucket_name, object_key, version, stream_id,
expires_at, encryption,
zombie_deletion_deadline
) values (
$1, $2, $3, $4, $5,
$6, $7,
$8
)
RETURNING status, created_at
`, opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID,
opts.ExpiresAt, encryptionParameters{&opts.Encryption},
opts.ZombieDeletionDeadline).
Scan(
&object.Status, &object.CreatedAt,
)
if err != nil {
if code := pgerrcode.FromError(err); code == pgxerrcode.UniqueViolation {
return Object{}, ErrConflict.New("object already exists")
}
return Object{}, Error.New("unable to insert object: %w", err)
}
return object, nil
}
// BeginSegment contains options to verify, whether a new segment upload can be started.
type BeginSegment struct {
ObjectStream
Position SegmentPosition
RootPieceID storj.PieceID
Pieces Pieces
}
// BeginSegment verifies, whether a new segment upload can be started.
func (db *DB) BeginSegment(ctx context.Context, opts BeginSegment) (err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return err
}
if err := opts.Pieces.Verify(); err != nil {
return err
}
if opts.RootPieceID.IsZero() {
return ErrInvalidRequest.New("RootPieceID missing")
}
// NOTE: this isn't strictly necessary, since we can also fail this in CommitSegment.
// however, we should prevent creating segements for non-partial objects.
// NOTE: these queries could be combined into one.
return txutil.WithTx(ctx, db.db, nil, func(ctx context.Context, tx tagsql.Tx) (err error) {
// Verify that object exists and is partial.
var value int
err = tx.QueryRow(ctx, `
SELECT 1
FROM objects WHERE
project_id = $1 AND
bucket_name = $2 AND
object_key = $3 AND
version = $4 AND
stream_id = $5 AND
status = `+pendingStatus,
opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID).Scan(&value)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return Error.New("pending object missing")
}
return Error.New("unable to query object status: %w", err)
}
// Verify that the segment does not exist.
err = tx.QueryRow(ctx, `
SELECT 1
FROM segments WHERE
stream_id = $1 AND
position = $2
`, opts.StreamID, opts.Position).Scan(&value)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return Error.New("unable to query segments: %w", err)
}
err = nil //nolint: ineffassign, ignore any other err result (explicitly)
return nil
})
}
// CommitSegment contains all necessary information about the segment.
type CommitSegment struct {
ObjectStream
Position SegmentPosition
RootPieceID storj.PieceID
EncryptedKeyNonce []byte
EncryptedKey []byte
PlainOffset int64 // offset in the original data stream
PlainSize int32 // size before encryption
EncryptedSize int32 // segment size after encryption
Redundancy storj.RedundancyScheme
Pieces Pieces
}
// CommitSegment commits segment to the database.
func (db *DB) CommitSegment(ctx context.Context, opts CommitSegment) (err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return err
}
if err := opts.Pieces.Verify(); err != nil {
return err
}
switch {
case opts.RootPieceID.IsZero():
return ErrInvalidRequest.New("RootPieceID missing")
case len(opts.EncryptedKey) == 0:
return ErrInvalidRequest.New("EncryptedKey missing")
case len(opts.EncryptedKeyNonce) == 0:
return ErrInvalidRequest.New("EncryptedKeyNonce missing")
case opts.EncryptedSize <= 0:
return ErrInvalidRequest.New("EncryptedSize negative or zero")
case opts.PlainSize <= 0 && validatePlainSize:
return ErrInvalidRequest.New("PlainSize negative or zero")
case opts.PlainOffset < 0:
return ErrInvalidRequest.New("PlainOffset negative")
case opts.Redundancy.IsZero():
return ErrInvalidRequest.New("Redundancy zero")
}
// TODO: verify opts.Pieces is compatible with opts.Redundancy
aliasPieces, err := db.aliasCache.ConvertPiecesToAliases(ctx, opts.Pieces)
if err != nil {
return Error.New("unable to convert pieces to aliases: %w", err)
}
// Verify that object exists and is partial.
_, err = db.db.ExecContext(ctx, `
INSERT INTO segments (
stream_id, position,
root_piece_id, encrypted_key_nonce, encrypted_key,
encrypted_size, plain_offset, plain_size,
redundancy,
remote_alias_pieces
) VALUES (
(SELECT stream_id
FROM objects WHERE
project_id = $10 AND
bucket_name = $11 AND
object_key = $12 AND
version = $13 AND
stream_id = $14 AND
status = `+pendingStatus+
` ), $1,
$2, $3, $4,
$5, $6, $7,
$8,
$9
)`, opts.Position,
opts.RootPieceID, opts.EncryptedKeyNonce, opts.EncryptedKey,
opts.EncryptedSize, opts.PlainOffset, opts.PlainSize,
redundancyScheme{&opts.Redundancy},
aliasPieces,
opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID,
)
if err != nil {
if code := pgerrcode.FromError(err); code == pgxerrcode.NotNullViolation {
return Error.New("pending object missing")
}
if code := pgerrcode.FromError(err); code == pgxerrcode.UniqueViolation {
return ErrConflict.New("segment already exists")
}
return Error.New("unable to insert segment: %w", err)
}
return nil
}
// CommitInlineSegment contains all necessary information about the segment.
type CommitInlineSegment struct {
ObjectStream
Position SegmentPosition
EncryptedKeyNonce []byte
EncryptedKey []byte
PlainOffset int64 // offset in the original data stream
PlainSize int32 // size before encryption
InlineData []byte
}
// CommitInlineSegment commits inline segment to the database.
func (db *DB) CommitInlineSegment(ctx context.Context, opts CommitInlineSegment) (err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return err
}
// TODO: do we have a lower limit for inline data?
// TODO should we move check for max inline segment from metainfo here
switch {
case len(opts.EncryptedKey) == 0:
return ErrInvalidRequest.New("EncryptedKey missing")
case len(opts.EncryptedKeyNonce) == 0:
return ErrInvalidRequest.New("EncryptedKeyNonce missing")
case opts.PlainSize <= 0 && validatePlainSize:
return ErrInvalidRequest.New("PlainSize negative or zero")
case opts.PlainOffset < 0:
return ErrInvalidRequest.New("PlainOffset negative")
}
// Verify that object exists and is partial.
_, err = db.db.ExecContext(ctx, `
INSERT INTO segments (
stream_id, position,
root_piece_id, encrypted_key_nonce, encrypted_key,
encrypted_size, plain_offset, plain_size,
inline_data
) VALUES (
(SELECT stream_id
FROM objects WHERE
project_id = $9 AND
bucket_name = $10 AND
object_key = $11 AND
version = $12 AND
stream_id = $13 AND
status = `+pendingStatus+
` ), $1,
$2, $3, $4,
$5, $6, $7,
$8
)`, opts.Position,
storj.PieceID{}, opts.EncryptedKeyNonce, opts.EncryptedKey,
len(opts.InlineData), opts.PlainOffset, opts.PlainSize,
opts.InlineData,
opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID,
)
if err != nil {
if code := pgerrcode.FromError(err); code == pgxerrcode.NotNullViolation {
return Error.New("pending object missing")
}
if code := pgerrcode.FromError(err); code == pgxerrcode.UniqueViolation {
return ErrConflict.New("segment already exists")
}
return Error.New("unable to insert segment: %w", err)
}
return nil
}
// CommitObject contains arguments necessary for committing an object.
type CommitObject struct {
ObjectStream
Encryption storj.EncryptionParameters
EncryptedMetadata []byte
EncryptedMetadataNonce []byte
EncryptedMetadataEncryptedKey []byte
}
// CommitObject adds a pending object to the database.
func (db *DB) CommitObject(ctx context.Context, opts CommitObject) (object Object, err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return Object{}, err
}
if opts.Encryption.CipherSuite != storj.EncUnspecified && opts.Encryption.BlockSize <= 0 {
return Object{}, ErrInvalidRequest.New("Encryption.BlockSize is negative or zero")
}
err = txutil.WithTx(ctx, db.db, nil, func(ctx context.Context, tx tagsql.Tx) error {
type Segment struct {
Position SegmentPosition
EncryptedSize int32
PlainOffset int64
PlainSize int32
}
var segments []Segment
err = withRows(tx.Query(ctx, `
SELECT position, encrypted_size, plain_offset, plain_size
FROM segments
WHERE stream_id = $1
ORDER BY position
`, opts.StreamID))(func(rows tagsql.Rows) error {
for rows.Next() {
var segment Segment
err := rows.Scan(&segment.Position, &segment.EncryptedSize, &segment.PlainOffset, &segment.PlainSize)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
segments = append(segments, segment)
}
return nil
})
if err != nil {
return Error.New("failed to fetch segments: %w", err)
}
// TODO disabled for now
// verify segments
// if len(segments) > 0 {
// // without proofs we expect the segments to be contiguous
// hasOffset := false
// offset := int64(0)
// for i, seg := range segments {
// if seg.Position.Part != 0 && seg.Position.Index != uint32(i) {
// return Error.New("expected segment (%d,%d), found (%d,%d)", 0, i, seg.Position.Part, seg.Position.Index)
// }
// if seg.PlainOffset != 0 {
// hasOffset = true
// }
// if hasOffset && seg.PlainOffset != offset {
// return Error.New("segment %d should be at plain offset %d, offset is %d", seg.Position.Index, offset, seg.PlainOffset)
// }
// offset += int64(seg.PlainSize)
// }
// }
// TODO: would we even need this when we make main index plain_offset?
fixedSegmentSize := int32(0)
if len(segments) > 0 {
fixedSegmentSize = segments[0].PlainSize
for i, seg := range segments {
if seg.Position.Part != 0 || seg.Position.Index != uint32(i) {
fixedSegmentSize = -1
break
}
if i < len(segments)-1 && seg.PlainSize != fixedSegmentSize {
fixedSegmentSize = -1
break
}
}
}
var totalPlainSize, totalEncryptedSize int64
for _, seg := range segments {
totalPlainSize += int64(seg.PlainSize)
totalEncryptedSize += int64(seg.EncryptedSize)
}
err = tx.QueryRow(ctx, `
UPDATE objects SET
status =`+committedStatus+`,
segment_count = $6,
encrypted_metadata_nonce = $7,
encrypted_metadata = $8,
encrypted_metadata_encrypted_key = $9,
total_plain_size = $10,
total_encrypted_size = $11,
fixed_segment_size = $12,
zombie_deletion_deadline = NULL,
-- TODO should we allow to override existing encryption parameters or return error if don't match with opts?
encryption = CASE
WHEN objects.encryption = 0 AND $13 <> 0 THEN $13
WHEN objects.encryption = 0 AND $13 = 0 THEN NULL
ELSE objects.encryption
END
WHERE
project_id = $1 AND
bucket_name = $2 AND
object_key = $3 AND
version = $4 AND
stream_id = $5 AND
status = `+pendingStatus+`
RETURNING
created_at, expires_at,
encryption;
`, opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID,
len(segments),
opts.EncryptedMetadataNonce, opts.EncryptedMetadata, opts.EncryptedMetadataEncryptedKey,
totalPlainSize,
totalEncryptedSize,
fixedSegmentSize,
encryptionParameters{&opts.Encryption},
).
Scan(
&object.CreatedAt, &object.ExpiresAt,
encryptionParameters{&object.Encryption},
)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return storj.ErrObjectNotFound.Wrap(Error.New("object with specified version and pending status is missing"))
} else if code := pgerrcode.FromError(err); code == pgxerrcode.NotNullViolation {
// TODO maybe we should check message if 'encryption' label is there
return ErrInvalidRequest.New("Encryption is missing")
}
return Error.New("failed to update object: %w", err)
}
object.StreamID = opts.StreamID
object.ProjectID = opts.ProjectID
object.BucketName = opts.BucketName
object.ObjectKey = opts.ObjectKey
object.Version = opts.Version
object.Status = Committed
object.SegmentCount = int32(len(segments))
object.EncryptedMetadataNonce = opts.EncryptedMetadataNonce
object.EncryptedMetadata = opts.EncryptedMetadata
object.EncryptedMetadataEncryptedKey = opts.EncryptedMetadataEncryptedKey
object.TotalPlainSize = totalPlainSize
object.TotalEncryptedSize = totalEncryptedSize
object.FixedSegmentSize = fixedSegmentSize
return nil
})
if err != nil {
return Object{}, err
}
return object, nil
}
// UpdateObjectMetadata contains arguments necessary for updating an object metadata.
type UpdateObjectMetadata struct {
ObjectStream
EncryptedMetadata []byte
EncryptedMetadataNonce []byte
EncryptedMetadataEncryptedKey []byte
}
// UpdateObjectMetadata updates an object metadata.
func (db *DB) UpdateObjectMetadata(ctx context.Context, opts UpdateObjectMetadata) (err error) {
defer mon.Task()(&ctx)(&err)
if err := opts.ObjectStream.Verify(); err != nil {
return err
}
if opts.ObjectStream.Version <= 0 {
return ErrInvalidRequest.New("Version invalid: %v", opts.Version)
}
// TODO So the issue is that during a multipart upload of an object,
// uplink can update object metadata. If we add the arguments EncryptedMetadata
// to CommitObject, they will need to account for them being optional.
// Leading to scenarios where uplink calls update metadata, but wants to clear them
// during commit object.
result, err := db.db.ExecContext(ctx, `
UPDATE objects SET
encrypted_metadata_nonce = $6,
encrypted_metadata = $7,
encrypted_metadata_encrypted_key = $8
WHERE
project_id = $1 AND
bucket_name = $2 AND
object_key = $3 AND
version = $4 AND
stream_id = $5 AND
status = `+committedStatus,
opts.ProjectID, opts.BucketName, []byte(opts.ObjectKey), opts.Version, opts.StreamID,
opts.EncryptedMetadataNonce, opts.EncryptedMetadata, opts.EncryptedMetadataEncryptedKey)
if err != nil {
return Error.New("unable to update object metadata: %w", err)
}
affected, err := result.RowsAffected()
if err != nil {
return Error.New("failed to get rows affected: %w", err)
}
if affected == 0 {
return storj.ErrObjectNotFound.Wrap(
Error.New("object with specified version and committed status is missing"),
)
}
return nil
}