// Copyright (C) 2023 Storj Labs, Inc. // See LICENSE for copying information. package metabase import ( "bytes" "context" "strings" "github.com/zeebo/errs" "storj.io/common/uuid" "storj.io/private/tagsql" ) // pendingObjectsIterator enables iteration on pending objects in a bucket. type pendingObjectsIterator struct { db *DB projectID uuid.UUID bucketName []byte prefix ObjectKey prefixLimit ObjectKey batchSize int recursive bool includeCustomMetadata bool includeSystemMetadata bool curIndex int curRows tagsql.Rows cursor pendingObjectIterateCursor // not relative to prefix skipPrefix ObjectKey // relative to prefix doNextQuery func(context.Context, *pendingObjectsIterator) (_ tagsql.Rows, err error) // failErr is set when either scan or next query fails during iteration. failErr error } type pendingObjectIterateCursor struct { Key ObjectKey StreamID uuid.UUID Inclusive bool } func iterateAllPendingObjects(ctx context.Context, db *DB, opts IteratePendingObjects, fn func(context.Context, PendingObjectsIterator) error) (err error) { defer mon.Task()(&ctx)(&err) it := &pendingObjectsIterator{ db: db, projectID: opts.ProjectID, bucketName: []byte(opts.BucketName), prefix: opts.Prefix, prefixLimit: prefixLimit(opts.Prefix), batchSize: opts.BatchSize, recursive: opts.Recursive, includeCustomMetadata: opts.IncludeCustomMetadata, includeSystemMetadata: opts.IncludeSystemMetadata, curIndex: 0, cursor: firstPendingObjectIterateCursor(opts.Recursive, opts.Cursor, opts.Prefix), doNextQuery: doNextQueryAllPendingObjects, } // start from either the cursor or prefix, depending on which is larger if lessKey(it.cursor.Key, opts.Prefix) { it.cursor.Key = opts.Prefix it.cursor.Inclusive = true } return iteratePendingObjects(ctx, it, fn) } func iteratePendingObjects(ctx context.Context, it *pendingObjectsIterator, fn func(context.Context, PendingObjectsIterator) error) (err error) { batchsizeLimit.Ensure(&it.batchSize) it.curRows, err = it.doNextQuery(ctx, it) if err != nil { return err } it.cursor.Inclusive = false defer func() { if rowsErr := it.curRows.Err(); rowsErr != nil { err = errs.Combine(err, rowsErr) } err = errs.Combine(err, it.failErr, it.curRows.Close()) }() return fn(ctx, it) } // Next returns true if there was another item and copy it in item. func (it *pendingObjectsIterator) Next(ctx context.Context, item *PendingObjectEntry) bool { if it.recursive { return it.next(ctx, item) } // TODO: implement this on the database side // skip until we are past the prefix we returned before. if it.skipPrefix != "" { for strings.HasPrefix(string(item.ObjectKey), string(it.skipPrefix)) { if !it.next(ctx, item) { return false } } it.skipPrefix = "" } else { ok := it.next(ctx, item) if !ok { return false } } // should this be treated as a prefix? p := strings.IndexByte(string(item.ObjectKey), Delimiter) if p >= 0 { it.skipPrefix = item.ObjectKey[:p+1] *item = PendingObjectEntry{ IsPrefix: true, ObjectKey: item.ObjectKey[:p+1], } } return true } // next returns true if there was another item and copy it in item. func (it *pendingObjectsIterator) next(ctx context.Context, item *PendingObjectEntry) bool { next := it.curRows.Next() if !next { if it.curIndex < it.batchSize { return false } if it.curRows.Err() != nil { return false } if !it.recursive { afterPrefix := it.cursor.Key[len(it.prefix):] p := bytes.IndexByte([]byte(afterPrefix), Delimiter) if p >= 0 { it.cursor.Key = it.prefix + prefixLimit(afterPrefix[:p+1]) it.cursor.StreamID = uuid.UUID{} } } rows, err := it.doNextQuery(ctx, it) if err != nil { it.failErr = errs.Combine(it.failErr, err) return false } if closeErr := it.curRows.Close(); closeErr != nil { it.failErr = errs.Combine(it.failErr, closeErr, rows.Close()) return false } it.curRows = rows it.curIndex = 0 if !it.curRows.Next() { return false } } err := it.scanItem(item) if err != nil { it.failErr = errs.Combine(it.failErr, err) return false } it.curIndex++ it.cursor.Key = it.prefix + item.ObjectKey it.cursor.StreamID = item.StreamID return true } func doNextQueryAllPendingObjects(ctx context.Context, it *pendingObjectsIterator) (_ tagsql.Rows, err error) { defer mon.Task()(&ctx)(&err) cursorCompare := ">" if it.cursor.Inclusive { cursorCompare = ">=" } if it.prefixLimit == "" { querySelectFields := pendingObjectsQuerySelectorFields("object_key", it) return it.db.db.QueryContext(ctx, ` SELECT `+querySelectFields+` FROM pending_objects WHERE (project_id, bucket_name, object_key, stream_id) `+cursorCompare+` ($1, $2, $3, $4) AND (project_id, bucket_name) < ($1, $6) AND (expires_at IS NULL OR expires_at > now()) ORDER BY (project_id, bucket_name, object_key, stream_id) ASC LIMIT $5 `, it.projectID, it.bucketName, []byte(it.cursor.Key), it.cursor.StreamID, it.batchSize, nextBucket(it.bucketName), ) } fromSubstring := 1 if it.prefix != "" { fromSubstring = len(it.prefix) + 1 } querySelectFields := pendingObjectsQuerySelectorFields("SUBSTRING(object_key FROM $7)", it) return it.db.db.QueryContext(ctx, ` SELECT `+querySelectFields+` FROM pending_objects WHERE (project_id, bucket_name, object_key, stream_id) `+cursorCompare+` ($1, $2, $3, $4) AND (project_id, bucket_name, object_key) < ($1, $2, $5) AND (expires_at IS NULL OR expires_at > now()) ORDER BY (project_id, bucket_name, object_key, stream_id) ASC LIMIT $6 `, it.projectID, it.bucketName, []byte(it.cursor.Key), it.cursor.StreamID, []byte(it.prefixLimit), it.batchSize, fromSubstring, ) } func pendingObjectsQuerySelectorFields(objectKeyColumn string, it *pendingObjectsIterator) string { querySelectFields := objectKeyColumn + ` ,stream_id ,encryption` if it.includeSystemMetadata { querySelectFields += ` ,created_at ,expires_at` } if it.includeCustomMetadata { querySelectFields += ` ,encrypted_metadata_nonce ,encrypted_metadata ,encrypted_metadata_encrypted_key` } return querySelectFields } // scanItem scans doNextQuery results into PendingObjectEntry. func (it *pendingObjectsIterator) scanItem(item *PendingObjectEntry) (err error) { item.IsPrefix = false fields := []interface{}{ &item.ObjectKey, &item.StreamID, encryptionParameters{&item.Encryption}, } if it.includeSystemMetadata { fields = append(fields, &item.CreatedAt, &item.ExpiresAt, ) } if it.includeCustomMetadata { fields = append(fields, &item.EncryptedMetadataNonce, &item.EncryptedMetadata, &item.EncryptedMetadataEncryptedKey, ) } return it.curRows.Scan(fields...) } // firstPendingObjectIterateCursor adjust the cursor for a non-recursive iteration. // The cursor is non-inclusive and we need to adjust to handle prefix as cursor properly. // We return the next possible key from the prefix. func firstPendingObjectIterateCursor(recursive bool, cursor PendingObjectsCursor, prefix ObjectKey) pendingObjectIterateCursor { if recursive { return pendingObjectIterateCursor{ Key: cursor.Key, StreamID: cursor.StreamID, } } // when the cursor does not match the prefix, we'll return the original cursor. if !strings.HasPrefix(string(cursor.Key), string(prefix)) { return pendingObjectIterateCursor{ Key: cursor.Key, StreamID: cursor.StreamID, } } // handle case where: // prefix: x/y/ // cursor: x/y/z/w // In this case, we want the skip prefix to be `x/y/z` + string('/' + 1). cursorWithoutPrefix := cursor.Key[len(prefix):] p := strings.IndexByte(string(cursorWithoutPrefix), Delimiter) if p < 0 { // The cursor is not a prefix, but instead a path inside the prefix, // so we can use it directly. return pendingObjectIterateCursor{ Key: cursor.Key, StreamID: cursor.StreamID, } } // return the next prefix given a scoped path return pendingObjectIterateCursor{ Key: cursor.Key[:len(prefix)+p] + ObjectKey(Delimiter+1), StreamID: cursor.StreamID, Inclusive: true, } } func iteratePendingObjectsByKeyNew(ctx context.Context, db *DB, opts IteratePendingObjectsByKey, fn func(context.Context, PendingObjectsIterator) error) (err error) { defer mon.Task()(&ctx)(&err) cursor := opts.Cursor if cursor.StreamID.IsZero() { cursor.StreamID = uuid.UUID{} } it := &pendingObjectsIterator{ db: db, projectID: opts.ProjectID, bucketName: []byte(opts.BucketName), prefix: "", prefixLimit: "", batchSize: opts.BatchSize, recursive: true, includeCustomMetadata: true, includeSystemMetadata: true, curIndex: 0, cursor: pendingObjectIterateCursor{ Key: opts.ObjectKey, StreamID: opts.Cursor.StreamID, }, doNextQuery: doNextQueryPendingStreamsByKey, } return iteratePendingObjects(ctx, it, fn) } func doNextQueryPendingStreamsByKey(ctx context.Context, it *pendingObjectsIterator) (_ tagsql.Rows, err error) { defer mon.Task()(&ctx)(&err) return it.db.db.QueryContext(ctx, ` SELECT object_key, stream_id, encryption, created_at, expires_at, encrypted_metadata_nonce, encrypted_metadata, encrypted_metadata_encrypted_key FROM pending_objects WHERE (project_id, bucket_name, object_key) = ($1, $2, $3) AND stream_id > $4::BYTEA ORDER BY stream_id ASC LIMIT $5 `, it.projectID, it.bucketName, []byte(it.cursor.Key), it.cursor.StreamID, it.batchSize, ) }