satellite/metainfo/metabase: rename ListObjectsSegments to ListLoopSegmentEntries

This method will be used only with metainfo loop and we need to customize query to consume less memory.

Change-Id: Iaa97392f483c5df5609d501b3847b80eb1ea2583
This commit is contained in:
Michał Niewrzał 2021-03-02 13:20:02 +01:00 committed by Michal Niewrzal
parent 8e97111513
commit c223c2d845
7 changed files with 215 additions and 187 deletions

View File

@ -205,8 +205,8 @@ type MetabaseDB interface {
GetLatestObjectLastSegment(ctx context.Context, opts metabase.GetLatestObjectLastSegment) (segment metabase.Segment, err error)
// ListSegments lists specified stream segments.
ListSegments(ctx context.Context, opts metabase.ListSegments) (result metabase.ListSegmentsResult, err error)
// ListObjectsSegments lists multiple streams segments.
ListObjectsSegments(ctx context.Context, opts metabase.ListObjectsSegments) (result metabase.ListObjectsSegmentsResult, err error)
// ListLoopSegmentEntries lists streams loop segment entries.
ListLoopSegmentEntries(ctx context.Context, opts metabase.ListLoopSegmentEntries) (result metabase.ListLoopSegmentEntriesResult, err error)
// IterateObjectsAllVersions iterates through all versions of all objects.
IterateObjectsAllVersions(ctx context.Context, opts metabase.IterateObjects, fn func(context.Context, metabase.ObjectsIterator) error) (err error)
// IterateObjectsAllVersionsWithStatus iterates through all versions of all objects with specified status.

View File

@ -313,7 +313,7 @@ func iterateObjects(ctx context.Context, metabaseDB MetabaseDB, observers []*obs
return nil
}
segments, err := metabaseDB.ListObjectsSegments(ctx, metabase.ListObjectsSegments{
segments, err := metabaseDB.ListLoopSegmentEntries(ctx, metabase.ListLoopSegmentEntries{
StreamIDs: ids,
})
if err != nil {
@ -459,7 +459,7 @@ func handleObject(ctx context.Context, observer *observerContext, object *Object
return true
}
func handleSegment(ctx context.Context, observer *observerContext, location metabase.SegmentLocation, segment metabase.Segment, expiresAt *time.Time) bool {
func handleSegment(ctx context.Context, observer *observerContext, location metabase.SegmentLocation, segment metabase.LoopSegmentEntry, expiresAt *time.Time) bool {
loopSegment := &Segment{
Location: location,
}

View File

@ -4,14 +4,11 @@
package metabase
import (
"bytes"
"context"
"database/sql"
"errors"
"sort"
"storj.io/common/uuid"
"storj.io/storj/private/dbutil/pgutil"
"storj.io/storj/private/tagsql"
)
@ -96,82 +93,3 @@ func (db *DB) ListSegments(ctx context.Context, opts ListSegments) (result ListS
return result, nil
}
// ListObjectsSegments contains arguments necessary for listing multiple streams segments.
type ListObjectsSegments struct {
StreamIDs []uuid.UUID
}
// ListObjectsSegmentsResult result of listing segments.
type ListObjectsSegmentsResult struct {
Segments []Segment
}
// ListObjectsSegments lists multiple streams segments.
func (db *DB) ListObjectsSegments(ctx context.Context, opts ListObjectsSegments) (result ListObjectsSegmentsResult, err error) {
defer mon.Task()(&ctx)(&err)
if len(opts.StreamIDs) == 0 {
return ListObjectsSegmentsResult{}, ErrInvalidRequest.New("StreamIDs list is empty")
}
// TODO do something like pgutil.UUIDArray()
ids := make([][]byte, len(opts.StreamIDs))
for i, streamID := range opts.StreamIDs {
if streamID.IsZero() {
return ListObjectsSegmentsResult{}, ErrInvalidRequest.New("StreamID missing: index %d", i)
}
id := streamID
ids[i] = id[:]
}
sort.Slice(ids, func(i, j int) bool {
return bytes.Compare(ids[i], ids[j]) < 0
})
err = withRows(db.db.Query(ctx, `
SELECT
stream_id, position,
root_piece_id, encrypted_key_nonce, encrypted_key,
encrypted_size, plain_offset, plain_size,
redundancy,
inline_data, remote_alias_pieces
FROM segments
WHERE
-- this turns out to be a little bit faster than stream_id IN (SELECT unnest($1::BYTEA[]))
stream_id = ANY ($1::BYTEA[])
ORDER BY stream_id ASC, position ASC
`, pgutil.ByteaArray(ids)))(func(rows tagsql.Rows) error {
for rows.Next() {
var segment Segment
var aliasPieces AliasPieces
err = rows.Scan(
&segment.StreamID, &segment.Position,
&segment.RootPieceID, &segment.EncryptedKeyNonce, &segment.EncryptedKey,
&segment.EncryptedSize, &segment.PlainOffset, &segment.PlainSize,
redundancyScheme{&segment.Redundancy},
&segment.InlineData, &aliasPieces,
)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
segment.Pieces, err = db.aliasCache.ConvertAliasesToPieces(ctx, aliasPieces)
if err != nil {
return Error.New("failed to convert aliases to pieces: %w", err)
}
result.Segments = append(result.Segments, segment)
}
return nil
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return ListObjectsSegmentsResult{}, nil
}
return ListObjectsSegmentsResult{}, Error.New("unable to fetch object segments: %w", err)
}
return result, nil
}

View File

@ -4,14 +4,11 @@
package metabase_test
import (
"bytes"
"sort"
"testing"
"storj.io/common/storj"
"storj.io/common/testcontext"
"storj.io/common/testrand"
"storj.io/common/uuid"
"storj.io/storj/satellite/metainfo/metabase"
)
@ -262,96 +259,3 @@ func TestListSegments(t *testing.T) {
})
})
}
func TestListObjectsSegments(t *testing.T) {
All(t, func(ctx *testcontext.Context, t *testing.T, db *metabase.DB) {
t.Run("StreamIDs list is empty", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
ListObjectsSegments{
Opts: metabase.ListObjectsSegments{},
ErrClass: &metabase.ErrInvalidRequest,
ErrText: "StreamIDs list is empty",
}.Check(ctx, t, db)
Verify{}.Check(ctx, t, db)
})
t.Run("StreamIDs list contains empty ID", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
ListObjectsSegments{
Opts: metabase.ListObjectsSegments{
StreamIDs: []uuid.UUID{{}},
},
ErrClass: &metabase.ErrInvalidRequest,
ErrText: "StreamID missing: index 0",
}.Check(ctx, t, db)
Verify{}.Check(ctx, t, db)
})
t.Run("List objects segments", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
expectedObject01 := createObject(ctx, t, db, randObjectStream(), 1)
expectedObject02 := createObject(ctx, t, db, randObjectStream(), 5)
expectedObject03 := createObject(ctx, t, db, randObjectStream(), 3)
expectedSegments := []metabase.Segment{}
expectedRawSegments := []metabase.RawSegment{}
objects := []metabase.Object{expectedObject01, expectedObject02, expectedObject03}
sort.Slice(objects, func(i, j int) bool {
return bytes.Compare(objects[i].StreamID[:], objects[j].StreamID[:]) < 0
})
addSegments := func(object metabase.Object) {
for i := 0; i < int(object.SegmentCount); i++ {
segment := metabase.Segment{
StreamID: object.StreamID,
Position: metabase.SegmentPosition{
Index: uint32(i),
},
RootPieceID: storj.PieceID{1},
EncryptedKey: []byte{3},
EncryptedKeyNonce: []byte{4},
EncryptedSize: 1024,
PlainSize: 512,
Pieces: metabase.Pieces{{Number: 0, StorageNode: storj.NodeID{2}}},
Redundancy: defaultTestRedundancy,
}
expectedSegments = append(expectedSegments, segment)
expectedRawSegments = append(expectedRawSegments, metabase.RawSegment(segment))
}
}
for _, object := range objects {
addSegments(object)
}
ListObjectsSegments{
Opts: metabase.ListObjectsSegments{
StreamIDs: []uuid.UUID{
expectedObject01.StreamID,
expectedObject02.StreamID,
expectedObject03.StreamID,
},
},
Result: metabase.ListObjectsSegmentsResult{
Segments: expectedSegments,
},
}.Check(ctx, t, db)
Verify{
Objects: []metabase.RawObject{
metabase.RawObject(expectedObject01),
metabase.RawObject(expectedObject02),
metabase.RawObject(expectedObject03),
},
Segments: expectedRawSegments,
}.Check(ctx, t, db)
})
})
}

View File

@ -4,12 +4,18 @@
package metabase
import (
"bytes"
"context"
"database/sql"
"errors"
"sort"
"time"
"github.com/zeebo/errs"
"storj.io/common/storj"
"storj.io/common/uuid"
"storj.io/storj/private/dbutil/pgutil"
"storj.io/storj/private/tagsql"
)
@ -169,3 +175,107 @@ func (it *loopIterator) scanItem(item *LoopObjectEntry) error {
&item.EncryptedMetadataSize,
)
}
// LoopSegmentEntry contains information about segment metadata needed by metainfo loop.
type LoopSegmentEntry struct {
StreamID uuid.UUID
Position SegmentPosition
RootPieceID storj.PieceID
EncryptedKeyNonce []byte
EncryptedKey []byte
EncryptedSize int32 // size of the whole segment (not a piece)
PlainSize int32
PlainOffset int64
// TODO: add fields for proofs/chains
Redundancy storj.RedundancyScheme
InlineData []byte
Pieces Pieces
}
// Inline returns true if segment is inline.
func (s LoopSegmentEntry) Inline() bool {
return s.Redundancy.IsZero() && len(s.Pieces) == 0
}
// ListLoopSegmentEntries contains arguments necessary for listing streams loop segment entries.
type ListLoopSegmentEntries struct {
StreamIDs []uuid.UUID
}
// ListLoopSegmentEntriesResult result of listing streams loop segment entries.
type ListLoopSegmentEntriesResult struct {
Segments []LoopSegmentEntry
}
// ListLoopSegmentEntries lists streams loop segment entries.
func (db *DB) ListLoopSegmentEntries(ctx context.Context, opts ListLoopSegmentEntries) (result ListLoopSegmentEntriesResult, err error) {
defer mon.Task()(&ctx)(&err)
if len(opts.StreamIDs) == 0 {
return ListLoopSegmentEntriesResult{}, ErrInvalidRequest.New("StreamIDs list is empty")
}
// TODO do something like pgutil.UUIDArray()
ids := make([][]byte, len(opts.StreamIDs))
for i, streamID := range opts.StreamIDs {
if streamID.IsZero() {
return ListLoopSegmentEntriesResult{}, ErrInvalidRequest.New("StreamID missing: index %d", i)
}
id := streamID
ids[i] = id[:]
}
sort.Slice(ids, func(i, j int) bool {
return bytes.Compare(ids[i], ids[j]) < 0
})
err = withRows(db.db.Query(ctx, `
SELECT
stream_id, position,
root_piece_id, encrypted_key_nonce, encrypted_key,
encrypted_size, plain_offset, plain_size,
redundancy,
inline_data, remote_alias_pieces
FROM segments
WHERE
-- this turns out to be a little bit faster than stream_id IN (SELECT unnest($1::BYTEA[]))
stream_id = ANY ($1::BYTEA[])
ORDER BY stream_id ASC, position ASC
`, pgutil.ByteaArray(ids)))(func(rows tagsql.Rows) error {
for rows.Next() {
var segment LoopSegmentEntry
var aliasPieces AliasPieces
err = rows.Scan(
&segment.StreamID, &segment.Position,
&segment.RootPieceID, &segment.EncryptedKeyNonce, &segment.EncryptedKey,
&segment.EncryptedSize, &segment.PlainOffset, &segment.PlainSize,
redundancyScheme{&segment.Redundancy},
&segment.InlineData, &aliasPieces,
)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
segment.Pieces, err = db.aliasCache.ConvertAliasesToPieces(ctx, aliasPieces)
if err != nil {
return Error.New("failed to convert aliases to pieces: %w", err)
}
result.Segments = append(result.Segments, segment)
}
return nil
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return ListLoopSegmentEntriesResult{}, nil
}
return ListLoopSegmentEntriesResult{}, Error.New("unable to fetch object segments: %w", err)
}
return result, nil
}

View File

@ -4,9 +4,12 @@
package metabase_test
import (
"bytes"
"sort"
"strings"
"testing"
"storj.io/common/storj"
"storj.io/common/testcontext"
"storj.io/common/testrand"
"storj.io/common/uuid"
@ -257,3 +260,96 @@ func loopObjectEntryFromRaw(m metabase.RawObject) metabase.LoopObjectEntry {
SegmentCount: m.SegmentCount,
}
}
func TestListLoopSegmentEntries(t *testing.T) {
All(t, func(ctx *testcontext.Context, t *testing.T, db *metabase.DB) {
t.Run("StreamIDs list is empty", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
ListLoopSegmentEntries{
Opts: metabase.ListLoopSegmentEntries{},
ErrClass: &metabase.ErrInvalidRequest,
ErrText: "StreamIDs list is empty",
}.Check(ctx, t, db)
Verify{}.Check(ctx, t, db)
})
t.Run("StreamIDs list contains empty ID", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
ListLoopSegmentEntries{
Opts: metabase.ListLoopSegmentEntries{
StreamIDs: []uuid.UUID{{}},
},
ErrClass: &metabase.ErrInvalidRequest,
ErrText: "StreamID missing: index 0",
}.Check(ctx, t, db)
Verify{}.Check(ctx, t, db)
})
t.Run("List objects segments", func(t *testing.T) {
defer DeleteAll{}.Check(ctx, t, db)
expectedObject01 := createObject(ctx, t, db, randObjectStream(), 1)
expectedObject02 := createObject(ctx, t, db, randObjectStream(), 5)
expectedObject03 := createObject(ctx, t, db, randObjectStream(), 3)
expectedSegments := []metabase.LoopSegmentEntry{}
expectedRawSegments := []metabase.RawSegment{}
objects := []metabase.Object{expectedObject01, expectedObject02, expectedObject03}
sort.Slice(objects, func(i, j int) bool {
return bytes.Compare(objects[i].StreamID[:], objects[j].StreamID[:]) < 0
})
addSegments := func(object metabase.Object) {
for i := 0; i < int(object.SegmentCount); i++ {
segment := metabase.LoopSegmentEntry{
StreamID: object.StreamID,
Position: metabase.SegmentPosition{
Index: uint32(i),
},
RootPieceID: storj.PieceID{1},
EncryptedKey: []byte{3},
EncryptedKeyNonce: []byte{4},
EncryptedSize: 1024,
PlainSize: 512,
Pieces: metabase.Pieces{{Number: 0, StorageNode: storj.NodeID{2}}},
Redundancy: defaultTestRedundancy,
}
expectedSegments = append(expectedSegments, segment)
expectedRawSegments = append(expectedRawSegments, metabase.RawSegment(segment))
}
}
for _, object := range objects {
addSegments(object)
}
ListLoopSegmentEntries{
Opts: metabase.ListLoopSegmentEntries{
StreamIDs: []uuid.UUID{
expectedObject01.StreamID,
expectedObject02.StreamID,
expectedObject03.StreamID,
},
},
Result: metabase.ListLoopSegmentEntriesResult{
Segments: expectedSegments,
},
}.Check(ctx, t, db)
Verify{
Objects: []metabase.RawObject{
metabase.RawObject(expectedObject01),
metabase.RawObject(expectedObject02),
metabase.RawObject(expectedObject03),
},
Segments: expectedRawSegments,
}.Check(ctx, t, db)
})
})
}

View File

@ -274,15 +274,15 @@ func (step ListSegments) Check(ctx *testcontext.Context, t testing.TB, db *metab
require.Zero(t, diff)
}
type ListObjectsSegments struct {
Opts metabase.ListObjectsSegments
Result metabase.ListObjectsSegmentsResult
type ListLoopSegmentEntries struct {
Opts metabase.ListLoopSegmentEntries
Result metabase.ListLoopSegmentEntriesResult
ErrClass *errs.Class
ErrText string
}
func (step ListObjectsSegments) Check(ctx *testcontext.Context, t testing.TB, db *metabase.DB) {
result, err := db.ListObjectsSegments(ctx, step.Opts)
func (step ListLoopSegmentEntries) Check(ctx *testcontext.Context, t testing.TB, db *metabase.DB) {
result, err := db.ListLoopSegmentEntries(ctx, step.Opts)
checkError(t, err, step.ErrClass, step.ErrText)
diff := cmp.Diff(step.Result, result, cmpopts.EquateApproxTime(5*time.Second))