storj/satellite/metabase/list_segments.go
Jeff Wendling 7b06575f6f satellite/meta{base,info}: reduce db round trips for download
This combines the ListStreamPositions and GetSegmentByPosition
calls with a ListSegments call that now knows how to return
only the segments within a Range, just like ListStreamPositions.

It would theoretically be possible to also include the
GetObjectLastCommitted call by having it do one of three
queries based on the incoming request range, but that would
mean duplicating the data for the object in every single
row that is returned for each segment in the range.

One gross thing that ListSegments has to do now is update the
first segment returned with the information from any ancestor
segment because GetSegmentByPosition used to do that. It only
updates the first segment so that it doesn't do O(N) database
queries. It seems difficult to have it do a single query to
update all of the segments at once. I'm not certain this change
should be merged on this basis alone.

This change has made me think a couple of things should happen:

1. Server side copy with ancestor segments strikes again
   making the code less clear and potentially more buggy
   or inefficient for a rare case (empirically <0.1%)

2. The download code requests individual segments from
   the satellite lazily as part of its download which
   requires the satellite telling it the locations of
   all of the segments which requires the satellite
   querying the locations of all of the segments. Instead
   the download RPC could return the orders for all of
   the segments for a range and the download code could
   issue N download calls rather than 1 download call and
   N get segment calls. I believe both sides of the code
   paths would be simpler and more efficient this way.

3. In looking at the timing information for downloads when
   testing this, we really need to focus on getting the
   auth key and bandwidth limit verification times down.
   Here's the timing I saw:

       - 42ms: validate auth
       - 52ms: bandwidth usage checking
       - 14ms: get object info
       - 26ms: get segment position info
       - 26ms: getting the first segment full info
       - 20ms: unaccounted for by spans
       - 6ms:  creating the orders

   This change will remove 26ms, but there's a good 90ms
   in just validation. With improved semantics hitting the
   database only once and improved validation, a download
   rpc taking ~30ms seems doable compared to our current
   ~200ms.

Change-Id: I4109dba082eaedb79e634c61dbf86efa93ab1222
2023-04-03 16:49:00 +00:00

305 lines
8.8 KiB
Go

// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
package metabase
import (
"context"
"database/sql"
"errors"
"time"
"storj.io/common/uuid"
"storj.io/private/dbutil/pgutil"
"storj.io/private/tagsql"
)
// ListSegments contains arguments necessary for listing stream segments.
type ListSegments struct {
StreamID uuid.UUID
Cursor SegmentPosition
Limit int
Range *StreamRange
// This causes ListSegments to update the first Segment in the response
// with the ancestor info if it exists and server side copy is enabled.
UpdateFirstWithAncestor bool
}
// ListSegmentsResult result of listing segments.
type ListSegmentsResult struct {
Segments []Segment
More bool
}
// ListSegments lists specified stream segments.
func (db *DB) ListSegments(ctx context.Context, opts ListSegments) (result ListSegmentsResult, err error) {
defer mon.Task()(&ctx)(&err)
if opts.StreamID.IsZero() {
return ListSegmentsResult{}, ErrInvalidRequest.New("StreamID missing")
}
if opts.Limit < 0 {
return ListSegmentsResult{}, ErrInvalidRequest.New("Invalid limit: %d", opts.Limit)
}
ListLimit.Ensure(&opts.Limit)
if opts.Range != nil {
if opts.Range.PlainStart > opts.Range.PlainLimit {
return ListSegmentsResult{}, ErrInvalidRequest.New("invalid range: %d:%d", opts.Range.PlainStart, opts.Range.PlainLimit)
}
}
var rows tagsql.Rows
var rowsErr error
if opts.Range == nil {
rows, rowsErr = db.db.QueryContext(ctx, `
SELECT
position, created_at, expires_at, root_piece_id,
encrypted_key_nonce, encrypted_key, encrypted_size,
plain_offset, plain_size, encrypted_etag, redundancy,
inline_data, remote_alias_pieces
FROM segments
WHERE
stream_id = $1 AND
($2 = 0::INT8 OR position > $2)
ORDER BY stream_id, position ASC
LIMIT $3
`, opts.StreamID, opts.Cursor, opts.Limit+1)
} else {
rows, rowsErr = db.db.QueryContext(ctx, `
SELECT
position, created_at, expires_at, root_piece_id,
encrypted_key_nonce, encrypted_key, encrypted_size,
plain_offset, plain_size, encrypted_etag, redundancy,
inline_data, remote_alias_pieces
FROM segments
WHERE
stream_id = $1 AND
($2 = 0::INT8 OR position > $2) AND
$4 < plain_offset + plain_size AND plain_offset < $5
ORDER BY stream_id, position ASC
LIMIT $3
`, opts.StreamID, opts.Cursor, opts.Limit+1, opts.Range.PlainStart, opts.Range.PlainLimit)
}
err = withRows(rows, rowsErr)(func(rows tagsql.Rows) error {
for rows.Next() {
var segment Segment
var aliasPieces AliasPieces
err = rows.Scan(
&segment.Position,
&segment.CreatedAt, &segment.ExpiresAt,
&segment.RootPieceID, &segment.EncryptedKeyNonce, &segment.EncryptedKey,
&segment.EncryptedSize, &segment.PlainOffset, &segment.PlainSize,
&segment.EncryptedETag,
redundancyScheme{&segment.Redundancy},
&segment.InlineData, &aliasPieces,
)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
segment.Pieces, err = db.aliasCache.ConvertAliasesToPieces(ctx, aliasPieces)
if err != nil {
return Error.New("failed to convert aliases to pieces: %w", err)
}
segment.StreamID = opts.StreamID
result.Segments = append(result.Segments, segment)
}
return nil
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return ListSegmentsResult{}, nil
}
return ListSegmentsResult{}, Error.New("unable to fetch object segments: %w", err)
}
if db.config.ServerSideCopy {
copies := make([]Segment, 0, len(result.Segments))
copiesPositions := make([]int64, 0, len(result.Segments))
for _, segment := range result.Segments {
if segment.PiecesInAncestorSegment() {
copies = append(copies, segment)
copiesPositions = append(copiesPositions, int64(segment.Position.Encode()))
}
}
if len(copies) > 0 {
index := 0
err = withRows(db.db.QueryContext(ctx, `
SELECT
root_piece_id,
remote_alias_pieces
FROM segments as segments
LEFT JOIN segment_copies as copies
ON copies.ancestor_stream_id = segments.stream_id
WHERE
copies.stream_id = $1 AND segments.position IN (SELECT position FROM UNNEST($2::INT8[]) as position)
ORDER BY segments.stream_id, segments.position ASC
`, opts.StreamID, pgutil.Int8Array(copiesPositions)))(func(rows tagsql.Rows) error {
for rows.Next() {
var aliasPieces AliasPieces
err = rows.Scan(
&copies[index].RootPieceID,
&aliasPieces,
)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
copies[index].Pieces, err = db.aliasCache.ConvertAliasesToPieces(ctx, aliasPieces)
if err != nil {
return Error.New("failed to convert aliases to pieces: %w", err)
}
index++
}
return nil
})
if err != nil {
return ListSegmentsResult{}, Error.New("unable to fetch object segments: %w", err)
}
if index != len(copies) {
return ListSegmentsResult{}, Error.New("number of ancestor segments is different than copies: want %d got %d",
len(copies), index)
}
}
// we have to update the first segment because DownloadObject uses this call
// and we only need to do the first segment because it only uses the extra
// information for the first segment.
if len(result.Segments) > 0 && opts.UpdateFirstWithAncestor {
err = db.updateWithAncestorSegment(ctx, &result.Segments[0])
if err != nil {
return ListSegmentsResult{}, err
}
}
}
if len(result.Segments) > opts.Limit {
result.More = true
result.Segments = result.Segments[:len(result.Segments)-1]
}
return result, nil
}
// ListStreamPositions contains arguments necessary for listing stream segments.
type ListStreamPositions struct {
StreamID uuid.UUID
Cursor SegmentPosition
Limit int
Range *StreamRange
}
// StreamRange allows to limit stream positions based on the plain offsets.
type StreamRange struct {
PlainStart int64
PlainLimit int64 // limit is exclusive
}
// ListStreamPositionsResult result of listing segments.
type ListStreamPositionsResult struct {
Segments []SegmentPositionInfo
More bool
}
// SegmentPositionInfo contains information for segment position.
type SegmentPositionInfo struct {
Position SegmentPosition
// PlainSize is 0 for a migrated object.
PlainSize int32
// PlainOffset is 0 for a migrated object.
PlainOffset int64
CreatedAt *time.Time // TODO: make it non-nilable after we migrate all existing segments to have creation time
EncryptedETag []byte
EncryptedKeyNonce []byte
EncryptedKey []byte
}
// ListStreamPositions lists specified stream segment positions.
func (db *DB) ListStreamPositions(ctx context.Context, opts ListStreamPositions) (result ListStreamPositionsResult, err error) {
defer mon.Task()(&ctx)(&err)
if opts.StreamID.IsZero() {
return ListStreamPositionsResult{}, ErrInvalidRequest.New("StreamID missing")
}
if opts.Limit < 0 {
return ListStreamPositionsResult{}, ErrInvalidRequest.New("Invalid limit: %d", opts.Limit)
}
ListLimit.Ensure(&opts.Limit)
if opts.Range != nil {
if opts.Range.PlainStart > opts.Range.PlainLimit {
return ListStreamPositionsResult{}, ErrInvalidRequest.New("invalid range: %d:%d", opts.Range.PlainStart, opts.Range.PlainLimit)
}
}
var rows tagsql.Rows
var rowsErr error
if opts.Range == nil {
rows, rowsErr = db.db.QueryContext(ctx, `
SELECT
position, plain_size, plain_offset, created_at,
encrypted_etag, encrypted_key_nonce, encrypted_key
FROM segments
WHERE
stream_id = $1 AND
($2 = 0::INT8 OR position > $2)
ORDER BY position ASC
LIMIT $3
`, opts.StreamID, opts.Cursor, opts.Limit+1)
} else {
rows, rowsErr = db.db.QueryContext(ctx, `
SELECT
position, plain_size, plain_offset, created_at,
encrypted_etag, encrypted_key_nonce, encrypted_key
FROM segments
WHERE
stream_id = $1 AND
($2 = 0::INT8 OR position > $2) AND
$4 < plain_offset + plain_size AND plain_offset < $5
ORDER BY position ASC
LIMIT $3
`, opts.StreamID, opts.Cursor, opts.Limit+1, opts.Range.PlainStart, opts.Range.PlainLimit)
}
err = withRows(rows, rowsErr)(func(rows tagsql.Rows) error {
for rows.Next() {
var segment SegmentPositionInfo
err = rows.Scan(
&segment.Position, &segment.PlainSize, &segment.PlainOffset, &segment.CreatedAt,
&segment.EncryptedETag, &segment.EncryptedKeyNonce, &segment.EncryptedKey,
)
if err != nil {
return Error.New("failed to scan segments: %w", err)
}
result.Segments = append(result.Segments, segment)
}
return nil
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return ListStreamPositionsResult{}, nil
}
return ListStreamPositionsResult{}, Error.New("unable to fetch object segments: %w", err)
}
if len(result.Segments) > opts.Limit {
result.More = true
result.Segments = result.Segments[:len(result.Segments)-1]
}
return result, nil
}