2020-11-09 14:55:10 +00:00
|
|
|
// Copyright (C) 2020 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package metabase
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2021-04-16 10:14:27 +01:00
|
|
|
"encoding/hex"
|
2020-11-09 14:55:10 +00:00
|
|
|
"time"
|
|
|
|
|
2023-06-14 13:42:43 +01:00
|
|
|
"github.com/jackc/pgx/v5"
|
2021-04-16 11:03:07 +01:00
|
|
|
"github.com/zeebo/errs"
|
2020-11-09 14:55:10 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2021-05-24 19:37:24 +01:00
|
|
|
"storj.io/private/dbutil/pgxutil"
|
2021-04-23 10:52:40 +01:00
|
|
|
"storj.io/private/tagsql"
|
2020-11-09 14:55:10 +00:00
|
|
|
)
|
|
|
|
|
2021-03-16 11:54:40 +00:00
|
|
|
const (
|
2021-06-25 09:19:32 +01:00
|
|
|
deleteBatchsizeLimit = intLimitRange(1000)
|
2021-03-16 11:54:40 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// DeleteExpiredObjects contains all the information necessary to delete expired objects and segments.
|
|
|
|
type DeleteExpiredObjects struct {
|
2023-05-15 12:48:03 +01:00
|
|
|
ExpiredBefore time.Time
|
|
|
|
AsOfSystemInterval time.Duration
|
|
|
|
BatchSize int
|
2021-03-16 11:54:40 +00:00
|
|
|
}
|
|
|
|
|
2020-11-09 14:55:10 +00:00
|
|
|
// DeleteExpiredObjects deletes all objects that expired before expiredBefore.
|
2021-03-16 11:54:40 +00:00
|
|
|
func (db *DB) DeleteExpiredObjects(ctx context.Context, opts DeleteExpiredObjects) (err error) {
|
2020-11-09 14:55:10 +00:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2021-03-16 11:54:40 +00:00
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
return db.deleteObjectsAndSegmentsBatch(ctx, opts.BatchSize, func(startAfter ObjectStream, batchsize int) (last ObjectStream, err error) {
|
|
|
|
query := `
|
|
|
|
SELECT
|
|
|
|
project_id, bucket_name, object_key, version, stream_id,
|
|
|
|
expires_at
|
|
|
|
FROM objects
|
2023-05-15 12:48:03 +01:00
|
|
|
` + db.impl.AsOfSystemInterval(opts.AsOfSystemInterval) + `
|
2021-04-29 10:21:29 +01:00
|
|
|
WHERE
|
|
|
|
(project_id, bucket_name, object_key, version) > ($1, $2, $3, $4)
|
|
|
|
AND expires_at < $5
|
|
|
|
ORDER BY project_id, bucket_name, object_key, version
|
|
|
|
LIMIT $6;`
|
|
|
|
|
|
|
|
expiredObjects := make([]ObjectStream, 0, batchsize)
|
|
|
|
|
2022-08-05 12:00:50 +01:00
|
|
|
scanErrClass := errs.Class("DB rows scan has failed")
|
2021-04-29 10:21:29 +01:00
|
|
|
err = withRows(db.db.QueryContext(ctx, query,
|
|
|
|
startAfter.ProjectID, []byte(startAfter.BucketName), []byte(startAfter.ObjectKey), startAfter.Version,
|
|
|
|
opts.ExpiredBefore,
|
|
|
|
batchsize),
|
|
|
|
)(func(rows tagsql.Rows) error {
|
|
|
|
for rows.Next() {
|
|
|
|
var expiresAt time.Time
|
|
|
|
err = rows.Scan(
|
|
|
|
&last.ProjectID, &last.BucketName, &last.ObjectKey, &last.Version, &last.StreamID,
|
|
|
|
&expiresAt)
|
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
return scanErrClass.Wrap(err)
|
2021-04-29 10:21:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
db.log.Info("Deleting expired object",
|
|
|
|
zap.Stringer("Project", last.ProjectID),
|
|
|
|
zap.String("Bucket", last.BucketName),
|
|
|
|
zap.String("Object Key", string(last.ObjectKey)),
|
|
|
|
zap.Int64("Version", int64(last.Version)),
|
|
|
|
zap.String("StreamID", hex.EncodeToString(last.StreamID[:])),
|
|
|
|
zap.Time("Expired At", expiresAt),
|
|
|
|
)
|
|
|
|
expiredObjects = append(expiredObjects, last)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
2020-11-09 14:55:10 +00:00
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
if scanErrClass.Has(err) {
|
|
|
|
return ObjectStream{}, Error.New("unable to select expired objects for deletion: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
db.log.Warn("unable to select expired objects for deletion", zap.Error(Error.Wrap(err)))
|
|
|
|
return ObjectStream{}, nil
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
2021-04-29 10:21:29 +01:00
|
|
|
|
|
|
|
err = db.deleteObjectsAndSegments(ctx, expiredObjects)
|
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
db.log.Warn("delete from DB expired objects", zap.Error(err))
|
|
|
|
return ObjectStream{}, nil
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
2021-04-29 10:21:29 +01:00
|
|
|
|
|
|
|
return last, nil
|
|
|
|
})
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
// DeleteZombieObjects contains all the information necessary to delete zombie objects and segments.
|
|
|
|
type DeleteZombieObjects struct {
|
2023-05-15 12:48:03 +01:00
|
|
|
DeadlineBefore time.Time
|
|
|
|
InactiveDeadline time.Time
|
|
|
|
AsOfSystemInterval time.Duration
|
|
|
|
BatchSize int
|
2021-04-29 10:21:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteZombieObjects deletes all objects that zombie deletion deadline passed.
|
|
|
|
func (db *DB) DeleteZombieObjects(ctx context.Context, opts DeleteZombieObjects) (err error) {
|
2020-11-09 14:55:10 +00:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
return db.deleteObjectsAndSegmentsBatch(ctx, opts.BatchSize, func(startAfter ObjectStream, batchsize int) (last ObjectStream, err error) {
|
2022-08-09 20:43:27 +01:00
|
|
|
// pending objects migrated to metabase didn't have zombie_deletion_deadline column set, because
|
|
|
|
// of that we need to get into account also object with zombie_deletion_deadline set to NULL
|
2021-04-29 10:21:29 +01:00
|
|
|
query := `
|
2021-04-16 10:14:27 +01:00
|
|
|
SELECT
|
2021-04-29 10:21:29 +01:00
|
|
|
project_id, bucket_name, object_key, version, stream_id
|
2021-03-16 11:54:40 +00:00
|
|
|
FROM objects
|
2023-05-15 12:48:03 +01:00
|
|
|
` + db.impl.AsOfSystemInterval(opts.AsOfSystemInterval) + `
|
2021-03-02 10:25:54 +00:00
|
|
|
WHERE
|
|
|
|
(project_id, bucket_name, object_key, version) > ($1, $2, $3, $4)
|
2021-04-29 10:21:29 +01:00
|
|
|
AND status = ` + pendingStatus + `
|
2022-08-09 20:43:27 +01:00
|
|
|
AND (zombie_deletion_deadline IS NULL OR zombie_deletion_deadline < $5)
|
2021-03-02 10:25:54 +00:00
|
|
|
ORDER BY project_id, bucket_name, object_key, version
|
2021-03-16 11:54:40 +00:00
|
|
|
LIMIT $6;`
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
objects := make([]ObjectStream, 0, batchsize)
|
|
|
|
|
2022-08-05 12:00:50 +01:00
|
|
|
scanErrClass := errs.Class("DB rows scan has failed")
|
2021-04-29 10:21:29 +01:00
|
|
|
err = withRows(db.db.QueryContext(ctx, query,
|
|
|
|
startAfter.ProjectID, []byte(startAfter.BucketName), []byte(startAfter.ObjectKey), startAfter.Version,
|
|
|
|
opts.DeadlineBefore,
|
|
|
|
batchsize),
|
|
|
|
)(func(rows tagsql.Rows) error {
|
|
|
|
for rows.Next() {
|
|
|
|
err = rows.Scan(&last.ProjectID, &last.BucketName, &last.ObjectKey, &last.Version, &last.StreamID)
|
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
return scanErrClass.Wrap(err)
|
2021-04-29 10:21:29 +01:00
|
|
|
}
|
|
|
|
|
2022-08-05 12:00:50 +01:00
|
|
|
db.log.Debug("selected zombie object for deleting it",
|
2021-04-29 10:21:29 +01:00
|
|
|
zap.Stringer("Project", last.ProjectID),
|
|
|
|
zap.String("Bucket", last.BucketName),
|
|
|
|
zap.String("Object Key", string(last.ObjectKey)),
|
|
|
|
zap.Int64("Version", int64(last.Version)),
|
|
|
|
zap.String("StreamID", hex.EncodeToString(last.StreamID[:])),
|
|
|
|
)
|
|
|
|
objects = append(objects, last)
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
if scanErrClass.Has(err) {
|
|
|
|
return ObjectStream{}, Error.New("unable to select zombie objects for deletion: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
db.log.Warn("unable to select zombie objects for deletion", zap.Error(Error.Wrap(err)))
|
|
|
|
return ObjectStream{}, nil
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
|
|
|
|
2023-05-15 12:48:03 +01:00
|
|
|
err = db.deleteInactiveObjectsAndSegments(ctx, objects, opts)
|
2021-04-29 10:21:29 +01:00
|
|
|
if err != nil {
|
2022-08-05 12:00:50 +01:00
|
|
|
db.log.Warn("delete from DB zombie objects", zap.Error(err))
|
|
|
|
return ObjectStream{}, nil
|
2021-04-29 10:21:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return last, nil
|
2020-11-09 14:55:10 +00:00
|
|
|
})
|
2021-04-29 10:21:29 +01:00
|
|
|
}
|
2021-03-16 11:54:40 +00:00
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
func (db *DB) deleteObjectsAndSegmentsBatch(ctx context.Context, batchsize int, deleteBatch func(startAfter ObjectStream, batchsize int) (last ObjectStream, err error)) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2020-11-09 14:55:10 +00:00
|
|
|
|
2021-06-25 09:19:32 +01:00
|
|
|
deleteBatchsizeLimit.Ensure(&batchsize)
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
var startAfter ObjectStream
|
|
|
|
for {
|
2021-06-25 09:19:32 +01:00
|
|
|
lastDeleted, err := deleteBatch(startAfter, batchsize)
|
2021-04-29 10:21:29 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if lastDeleted.StreamID.IsZero() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
startAfter = lastDeleted
|
|
|
|
}
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
func (db *DB) deleteObjectsAndSegments(ctx context.Context, objects []ObjectStream) (err error) {
|
2020-11-09 14:55:10 +00:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2021-04-29 10:21:29 +01:00
|
|
|
if len(objects) == 0 {
|
2020-11-09 14:55:10 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-05-24 19:37:24 +01:00
|
|
|
err = pgxutil.Conn(ctx, db.db, func(conn *pgx.Conn) error {
|
2021-04-16 11:03:07 +01:00
|
|
|
var batch pgx.Batch
|
2021-04-29 10:21:29 +01:00
|
|
|
for _, obj := range objects {
|
2021-04-16 11:03:07 +01:00
|
|
|
obj := obj
|
|
|
|
|
|
|
|
batch.Queue(`
|
2021-07-27 13:19:23 +01:00
|
|
|
WITH deleted_objects AS (
|
|
|
|
DELETE FROM objects
|
|
|
|
WHERE (project_id, bucket_name, object_key, version, stream_id) = ($1::BYTEA, $2, $3, $4, $5::BYTEA)
|
|
|
|
RETURNING stream_id
|
|
|
|
)
|
2021-04-16 11:03:07 +01:00
|
|
|
DELETE FROM segments
|
2021-07-27 13:19:23 +01:00
|
|
|
WHERE segments.stream_id = $5::BYTEA
|
|
|
|
`, obj.ProjectID, []byte(obj.BucketName), []byte(obj.ObjectKey), obj.Version, obj.StreamID)
|
2021-04-16 11:03:07 +01:00
|
|
|
}
|
|
|
|
|
2021-05-24 19:37:24 +01:00
|
|
|
results := conn.SendBatch(ctx, &batch)
|
2021-04-16 11:03:07 +01:00
|
|
|
defer func() { err = errs.Combine(err, results.Close()) }()
|
|
|
|
|
2021-07-27 13:19:23 +01:00
|
|
|
var objectsDeletedGuess, segmentsDeleted int64
|
2021-06-04 14:21:09 +01:00
|
|
|
|
2021-04-16 11:03:07 +01:00
|
|
|
var errlist errs.Group
|
|
|
|
for i := 0; i < batch.Len(); i++ {
|
2021-06-04 14:21:09 +01:00
|
|
|
result, err := results.Exec()
|
2021-04-16 11:03:07 +01:00
|
|
|
errlist.Add(err)
|
2021-06-04 14:21:09 +01:00
|
|
|
|
2021-07-27 13:19:23 +01:00
|
|
|
if affectedSegmentCount := result.RowsAffected(); affectedSegmentCount > 0 {
|
|
|
|
// Note, this slightly miscounts objects without any segments
|
|
|
|
// there doesn't seem to be a simple work around for this.
|
|
|
|
// Luckily, this is used only for metrics, where it's not a
|
|
|
|
// significant problem to slightly miscount.
|
|
|
|
objectsDeletedGuess++
|
|
|
|
segmentsDeleted += affectedSegmentCount
|
2021-06-04 14:21:09 +01:00
|
|
|
}
|
2021-04-16 11:03:07 +01:00
|
|
|
}
|
|
|
|
|
2021-07-27 13:19:23 +01:00
|
|
|
mon.Meter("object_delete").Mark64(objectsDeletedGuess)
|
2021-06-04 14:21:09 +01:00
|
|
|
mon.Meter("segment_delete").Mark64(segmentsDeleted)
|
|
|
|
|
2021-04-16 11:03:07 +01:00
|
|
|
return errlist.Err()
|
|
|
|
})
|
2020-11-09 14:55:10 +00:00
|
|
|
if err != nil {
|
2021-03-16 11:54:40 +00:00
|
|
|
return Error.New("unable to delete expired objects: %w", err)
|
2020-11-09 14:55:10 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2021-05-10 11:43:43 +01:00
|
|
|
|
2023-05-15 12:48:03 +01:00
|
|
|
func (db *DB) deleteInactiveObjectsAndSegments(ctx context.Context, objects []ObjectStream, opts DeleteZombieObjects) (err error) {
|
2021-05-10 11:43:43 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
if len(objects) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
err = pgxutil.Conn(ctx, db.db, func(conn *pgx.Conn) error {
|
|
|
|
var batch pgx.Batch
|
|
|
|
for _, obj := range objects {
|
|
|
|
batch.Queue(`
|
2023-05-15 12:48:03 +01:00
|
|
|
WITH check_segments AS (
|
|
|
|
SELECT 1 FROM segments WHERE stream_id = $5::BYTEA AND created_at > $6
|
|
|
|
), deleted_objects AS (
|
2021-05-10 11:43:43 +01:00
|
|
|
DELETE FROM objects
|
|
|
|
WHERE
|
|
|
|
(project_id, bucket_name, object_key, version) = ($1::BYTEA, $2::BYTEA, $3::BYTEA, $4) AND
|
2023-05-15 12:48:03 +01:00
|
|
|
NOT EXISTS (SELECT 1 FROM check_segments)
|
|
|
|
RETURNING stream_id
|
2021-05-10 11:43:43 +01:00
|
|
|
)
|
|
|
|
DELETE FROM segments
|
2023-05-15 12:48:03 +01:00
|
|
|
`+db.impl.AsOfSystemInterval(opts.AsOfSystemInterval)+`
|
2021-05-10 11:43:43 +01:00
|
|
|
WHERE
|
2023-05-15 12:48:03 +01:00
|
|
|
segments.stream_id IN (SELECT stream_id FROM deleted_objects)
|
|
|
|
`, obj.ProjectID, []byte(obj.BucketName), []byte(obj.ObjectKey), obj.Version, obj.StreamID, opts.InactiveDeadline)
|
2021-05-10 11:43:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
results := conn.SendBatch(ctx, &batch)
|
|
|
|
defer func() { err = errs.Combine(err, results.Close()) }()
|
|
|
|
|
2021-09-28 12:00:08 +01:00
|
|
|
var segmentsDeleted int64
|
2021-05-10 11:43:43 +01:00
|
|
|
var errlist errs.Group
|
|
|
|
for i := 0; i < batch.Len(); i++ {
|
2021-09-28 12:00:08 +01:00
|
|
|
result, err := results.Exec()
|
2021-05-10 11:43:43 +01:00
|
|
|
errlist.Add(err)
|
2021-09-28 12:00:08 +01:00
|
|
|
|
|
|
|
if err == nil {
|
|
|
|
segmentsDeleted += result.RowsAffected()
|
|
|
|
}
|
2021-05-10 11:43:43 +01:00
|
|
|
}
|
|
|
|
|
2021-09-28 12:00:08 +01:00
|
|
|
// TODO calculate deleted objects
|
|
|
|
mon.Meter("zombie_segment_delete").Mark64(segmentsDeleted)
|
|
|
|
mon.Meter("segment_delete").Mark64(segmentsDeleted)
|
|
|
|
|
2021-05-10 11:43:43 +01:00
|
|
|
return errlist.Err()
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return Error.New("unable to delete zombie objects: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|