2019-01-24 20:15:10 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
2018-12-21 15:11:19 +00:00
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package satellitedb
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-03-14 21:12:47 +00:00
|
|
|
"database/sql"
|
2020-07-14 14:04:38 +01:00
|
|
|
"errors"
|
2023-09-20 11:15:26 +01:00
|
|
|
"fmt"
|
|
|
|
"strings"
|
2020-09-09 21:52:22 +01:00
|
|
|
"time"
|
2019-12-03 05:21:46 +00:00
|
|
|
|
2019-12-05 03:34:44 +00:00
|
|
|
"github.com/zeebo/errs"
|
2018-12-27 09:56:25 +00:00
|
|
|
|
2023-09-20 11:15:26 +01:00
|
|
|
"storj.io/common/storj"
|
2021-06-17 16:05:04 +01:00
|
|
|
"storj.io/common/uuid"
|
2021-04-23 10:52:40 +01:00
|
|
|
"storj.io/private/dbutil"
|
2022-04-19 10:22:05 +01:00
|
|
|
"storj.io/private/dbutil/pgutil"
|
2021-06-17 16:05:04 +01:00
|
|
|
"storj.io/storj/satellite/metabase"
|
|
|
|
"storj.io/storj/satellite/repair/queue"
|
2020-09-09 21:52:22 +01:00
|
|
|
"storj.io/storj/satellite/satellitedb/dbx"
|
2018-12-21 15:11:19 +00:00
|
|
|
)
|
|
|
|
|
2020-01-22 19:00:46 +00:00
|
|
|
// RepairQueueSelectLimit defines how many items can be selected at the same time.
|
|
|
|
const RepairQueueSelectLimit = 1000
|
|
|
|
|
2022-04-19 10:22:05 +01:00
|
|
|
// repairQueue implements storj.io/storj/satellite/repair/queue.RepairQueue.
|
2018-12-21 15:11:19 +00:00
|
|
|
type repairQueue struct {
|
2019-12-14 02:29:54 +00:00
|
|
|
db *satelliteDB
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
func (r *repairQueue) Insert(ctx context.Context, seg *queue.InjuredSegment) (alreadyInserted bool, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2020-02-21 21:32:05 +00:00
|
|
|
// insert if not exists, or update healthy count if does exist
|
2020-05-22 20:54:05 +01:00
|
|
|
var query string
|
|
|
|
|
2020-12-14 21:28:04 +00:00
|
|
|
// we want to insert the segment if it is not in the queue, but update the segment health if it already is in the queue
|
2020-05-22 20:54:05 +01:00
|
|
|
// we also want to know if the result was an insert or an update - this is the reasoning for the xmax section of the postgres query
|
|
|
|
// and the separate cockroach query (which the xmax trick does not work for)
|
2021-05-11 09:49:26 +01:00
|
|
|
switch r.db.impl {
|
2020-05-22 20:54:05 +01:00
|
|
|
case dbutil.Postgres:
|
|
|
|
query = `
|
2021-06-17 16:05:04 +01:00
|
|
|
INSERT INTO repair_queue
|
2020-05-22 20:54:05 +01:00
|
|
|
(
|
2023-09-18 12:58:33 +01:00
|
|
|
stream_id, position, segment_health, placement
|
2020-05-22 20:54:05 +01:00
|
|
|
)
|
|
|
|
VALUES (
|
2023-09-18 12:58:33 +01:00
|
|
|
$1, $2, $3, $4
|
2020-05-22 20:54:05 +01:00
|
|
|
)
|
2021-06-17 16:05:04 +01:00
|
|
|
ON CONFLICT (stream_id, position)
|
2020-05-22 20:54:05 +01:00
|
|
|
DO UPDATE
|
2023-09-18 12:58:33 +01:00
|
|
|
SET segment_health=$3, updated_at=current_timestamp, placement=$4
|
2020-05-22 20:54:05 +01:00
|
|
|
RETURNING (xmax != 0) AS alreadyInserted
|
2020-02-21 21:32:05 +00:00
|
|
|
`
|
2020-05-22 20:54:05 +01:00
|
|
|
case dbutil.Cockroach:
|
2022-03-22 14:15:45 +00:00
|
|
|
// TODO it's not optimal solution but crdb is not used in prod for repair queue
|
2020-05-22 20:54:05 +01:00
|
|
|
query = `
|
2022-03-22 14:15:45 +00:00
|
|
|
WITH inserted AS (
|
2022-04-19 10:22:05 +01:00
|
|
|
SELECT count(*) as alreadyInserted FROM repair_queue
|
2021-06-17 16:05:04 +01:00
|
|
|
WHERE stream_id = $1 AND position = $2
|
2020-05-22 20:54:05 +01:00
|
|
|
)
|
2022-03-22 14:15:45 +00:00
|
|
|
INSERT INTO repair_queue
|
|
|
|
(
|
2023-09-18 12:58:33 +01:00
|
|
|
stream_id, position, segment_health, placement
|
2022-03-22 14:15:45 +00:00
|
|
|
)
|
|
|
|
VALUES (
|
2023-09-18 12:58:33 +01:00
|
|
|
$1, $2, $3, $4
|
2022-03-22 14:15:45 +00:00
|
|
|
)
|
|
|
|
ON CONFLICT (stream_id, position)
|
|
|
|
DO UPDATE
|
2023-09-18 12:58:33 +01:00
|
|
|
SET segment_health=$3, updated_at=current_timestamp, placement=$4
|
2022-03-22 14:15:45 +00:00
|
|
|
RETURNING (SELECT alreadyInserted FROM inserted)
|
2020-05-22 20:54:05 +01:00
|
|
|
`
|
|
|
|
}
|
2023-09-18 12:58:33 +01:00
|
|
|
rows, err := r.db.QueryContext(ctx, query, seg.StreamID, seg.Position.Encode(), seg.SegmentHealth, seg.Placement)
|
2020-05-22 20:54:05 +01:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2020-06-03 21:31:21 +01:00
|
|
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
2020-05-22 20:54:05 +01:00
|
|
|
|
|
|
|
if !rows.Next() {
|
|
|
|
// cockroach query does not return anything if the segment is already in the queue
|
|
|
|
alreadyInserted = true
|
|
|
|
} else {
|
|
|
|
err = rows.Scan(&alreadyInserted)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
}
|
2020-06-03 21:31:21 +01:00
|
|
|
return alreadyInserted, rows.Err()
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
|
|
|
|
2022-04-19 10:22:05 +01:00
|
|
|
func (r *repairQueue) InsertBatch(
|
|
|
|
ctx context.Context,
|
|
|
|
segments []*queue.InjuredSegment,
|
|
|
|
) (newlyInsertedSegments []*queue.InjuredSegment, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
if len(segments) == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// insert if not exists, or update healthy count if does exist
|
|
|
|
var query string
|
|
|
|
|
|
|
|
// we want to insert the segment if it is not in the queue, but update the segment health if it already is in the queue
|
|
|
|
// we also want to know if the result was an insert or an update - this is the reasoning for the xmax section of the postgres query
|
|
|
|
// and the separate cockroach query (which the xmax trick does not work for)
|
|
|
|
switch r.db.impl {
|
|
|
|
case dbutil.Postgres:
|
|
|
|
query = `
|
|
|
|
INSERT INTO repair_queue
|
|
|
|
(
|
2023-09-18 12:58:33 +01:00
|
|
|
stream_id, position, segment_health, placement
|
2022-04-19 10:22:05 +01:00
|
|
|
)
|
|
|
|
VALUES (
|
|
|
|
UNNEST($1::BYTEA[]),
|
|
|
|
UNNEST($2::INT8[]),
|
2023-09-18 12:58:33 +01:00
|
|
|
UNNEST($3::double precision[]),
|
|
|
|
UNNEST($4::INT2[])
|
2022-04-19 10:22:05 +01:00
|
|
|
)
|
|
|
|
ON CONFLICT (stream_id, position)
|
|
|
|
DO UPDATE
|
|
|
|
SET segment_health=EXCLUDED.segment_health, updated_at=current_timestamp
|
|
|
|
RETURNING NOT(xmax != 0) AS newlyInserted
|
|
|
|
`
|
|
|
|
case dbutil.Cockroach:
|
|
|
|
// TODO it's not optimal solution but crdb is not used in prod for repair queue
|
|
|
|
query = `
|
|
|
|
WITH to_insert AS (
|
|
|
|
SELECT
|
|
|
|
UNNEST($1::BYTEA[]) AS stream_id,
|
|
|
|
UNNEST($2::INT8[]) AS position,
|
2023-09-18 12:58:33 +01:00
|
|
|
UNNEST($3::double precision[]) AS segment_health,
|
|
|
|
UNNEST($4::INT2[]) AS placement
|
2022-04-19 10:22:05 +01:00
|
|
|
),
|
|
|
|
do_insert AS (
|
|
|
|
INSERT INTO repair_queue (
|
2023-09-18 12:58:33 +01:00
|
|
|
stream_id, position, segment_health, placement
|
2022-04-19 10:22:05 +01:00
|
|
|
)
|
2023-09-18 12:58:33 +01:00
|
|
|
SELECT stream_id, position, segment_health, placement
|
2022-04-19 10:22:05 +01:00
|
|
|
FROM to_insert
|
|
|
|
ON CONFLICT (stream_id, position)
|
|
|
|
DO UPDATE
|
|
|
|
SET
|
|
|
|
segment_health=EXCLUDED.segment_health,
|
2023-09-18 12:58:33 +01:00
|
|
|
updated_at=current_timestamp,
|
|
|
|
placement=EXCLUDED.placement
|
2022-04-19 10:22:05 +01:00
|
|
|
RETURNING false
|
|
|
|
)
|
|
|
|
SELECT
|
|
|
|
(repair_queue.stream_id IS NULL) AS newlyInserted
|
|
|
|
FROM to_insert
|
|
|
|
LEFT JOIN repair_queue
|
|
|
|
ON to_insert.stream_id = repair_queue.stream_id
|
|
|
|
AND to_insert.position = repair_queue.position
|
|
|
|
`
|
|
|
|
}
|
|
|
|
|
|
|
|
var insertData struct {
|
|
|
|
StreamIDs []uuid.UUID
|
|
|
|
Positions []int64
|
|
|
|
SegmentHealths []float64
|
2023-09-18 12:58:33 +01:00
|
|
|
placements []int16
|
2022-04-19 10:22:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
insertData.StreamIDs = append(insertData.StreamIDs, segment.StreamID)
|
|
|
|
insertData.Positions = append(insertData.Positions, int64(segment.Position.Encode()))
|
|
|
|
insertData.SegmentHealths = append(insertData.SegmentHealths, segment.SegmentHealth)
|
2023-09-18 12:58:33 +01:00
|
|
|
insertData.placements = append(insertData.placements, int16(segment.Placement))
|
2022-04-19 10:22:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
rows, err := r.db.QueryContext(
|
|
|
|
ctx, query,
|
|
|
|
pgutil.UUIDArray(insertData.StreamIDs),
|
|
|
|
pgutil.Int8Array(insertData.Positions),
|
|
|
|
pgutil.Float8Array(insertData.SegmentHealths),
|
2023-09-18 12:58:33 +01:00
|
|
|
pgutil.Int2Array(insertData.placements),
|
2022-04-19 10:22:05 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return newlyInsertedSegments, err
|
|
|
|
}
|
|
|
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
|
|
|
|
|
|
|
i := 0
|
|
|
|
for rows.Next() {
|
|
|
|
var isNewlyInserted bool
|
|
|
|
err = rows.Scan(&isNewlyInserted)
|
|
|
|
if err != nil {
|
|
|
|
return newlyInsertedSegments, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if isNewlyInserted {
|
|
|
|
newlyInsertedSegments = append(newlyInsertedSegments, segments[i])
|
|
|
|
}
|
|
|
|
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
|
|
|
|
return newlyInsertedSegments, rows.Err()
|
2022-04-19 07:37:40 +01:00
|
|
|
}
|
|
|
|
|
2023-09-20 11:15:26 +01:00
|
|
|
func (r *repairQueue) Select(ctx context.Context, includedPlacements []storj.PlacementConstraint, excludedPlacements []storj.PlacementConstraint) (seg *queue.InjuredSegment, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2023-09-20 11:15:26 +01:00
|
|
|
restriction := ""
|
|
|
|
|
|
|
|
placementsToString := func(placements []storj.PlacementConstraint) string {
|
|
|
|
var ps []string
|
|
|
|
for _, p := range placements {
|
|
|
|
ps = append(ps, fmt.Sprintf("%d", p))
|
|
|
|
}
|
|
|
|
return strings.Join(ps, ",")
|
|
|
|
}
|
|
|
|
if len(includedPlacements) > 0 {
|
|
|
|
restriction += fmt.Sprintf(" AND placement IN (%s)", placementsToString(includedPlacements))
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(excludedPlacements) > 0 {
|
|
|
|
restriction += fmt.Sprintf(" AND placement NOT IN (%s)", placementsToString(excludedPlacements))
|
|
|
|
}
|
2020-12-02 15:45:33 +00:00
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
segment := queue.InjuredSegment{}
|
2021-05-11 09:49:26 +01:00
|
|
|
switch r.db.impl {
|
2019-12-03 05:21:46 +00:00
|
|
|
case dbutil.Cockroach:
|
2019-12-19 09:20:52 +00:00
|
|
|
err = r.db.QueryRowContext(ctx, `
|
2021-06-17 16:05:04 +01:00
|
|
|
UPDATE repair_queue SET attempted_at = now()
|
2023-09-20 11:15:26 +01:00
|
|
|
WHERE (attempted_at IS NULL OR attempted_at < now() - interval '6 hours') `+restriction+`
|
2021-06-17 16:05:04 +01:00
|
|
|
ORDER BY segment_health ASC, attempted_at NULLS FIRST
|
2020-12-02 15:45:33 +00:00
|
|
|
LIMIT 1
|
2023-09-18 12:58:33 +01:00
|
|
|
RETURNING stream_id, position, attempted_at, updated_at, inserted_at, segment_health, placement
|
2021-06-17 16:05:04 +01:00
|
|
|
`).Scan(&segment.StreamID, &segment.Position, &segment.AttemptedAt,
|
2023-09-18 12:58:33 +01:00
|
|
|
&segment.UpdatedAt, &segment.InsertedAt, &segment.SegmentHealth, &segment.Placement)
|
2019-12-03 05:21:46 +00:00
|
|
|
case dbutil.Postgres:
|
|
|
|
err = r.db.QueryRowContext(ctx, `
|
2021-06-17 16:05:04 +01:00
|
|
|
UPDATE repair_queue SET attempted_at = now() WHERE (stream_id, position) = (
|
|
|
|
SELECT stream_id, position FROM repair_queue
|
2023-09-20 11:15:26 +01:00
|
|
|
WHERE (attempted_at IS NULL OR attempted_at < now() - interval '6 hours') `+restriction+`
|
2021-06-17 16:05:04 +01:00
|
|
|
ORDER BY segment_health ASC, attempted_at NULLS FIRST FOR UPDATE SKIP LOCKED LIMIT 1
|
2023-09-18 12:58:33 +01:00
|
|
|
) RETURNING stream_id, position, attempted_at, updated_at, inserted_at, segment_health, placement
|
2021-06-17 16:05:04 +01:00
|
|
|
`).Scan(&segment.StreamID, &segment.Position, &segment.AttemptedAt,
|
2023-09-18 12:58:33 +01:00
|
|
|
&segment.UpdatedAt, &segment.InsertedAt, &segment.SegmentHealth, &segment.Placement)
|
2019-12-03 05:21:46 +00:00
|
|
|
default:
|
2021-05-11 09:49:26 +01:00
|
|
|
return seg, errs.New("unhandled database: %v", r.db.impl)
|
2019-12-03 05:21:46 +00:00
|
|
|
}
|
2021-06-17 16:05:04 +01:00
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, sql.ErrNoRows) {
|
2023-04-06 14:44:54 +01:00
|
|
|
return nil, queue.ErrEmpty.New("")
|
2021-06-17 16:05:04 +01:00
|
|
|
}
|
|
|
|
return nil, err
|
2018-12-27 09:56:25 +00:00
|
|
|
}
|
2021-06-17 16:05:04 +01:00
|
|
|
return &segment, err
|
2019-03-14 21:12:47 +00:00
|
|
|
}
|
2018-12-21 15:11:19 +00:00
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
func (r *repairQueue) Delete(ctx context.Context, seg *queue.InjuredSegment) (err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2021-06-17 16:05:04 +01:00
|
|
|
_, err = r.db.ExecContext(ctx, r.db.Rebind(`DELETE FROM repair_queue WHERE stream_id = ? AND position = ?`), seg.StreamID, seg.Position.Encode())
|
2019-07-23 15:28:06 +01:00
|
|
|
return Error.Wrap(err)
|
2019-04-16 19:14:09 +01:00
|
|
|
}
|
|
|
|
|
2020-09-09 21:52:22 +01:00
|
|
|
func (r *repairQueue) Clean(ctx context.Context, before time.Time) (deleted int64, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2021-06-17 16:05:04 +01:00
|
|
|
n, err := r.db.Delete_RepairQueue_By_UpdatedAt_Less(ctx, dbx.RepairQueue_UpdatedAt(before))
|
2020-09-09 21:52:22 +01:00
|
|
|
return n, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
func (r *repairQueue) SelectN(ctx context.Context, limit int) (segs []queue.InjuredSegment, err error) {
|
2019-06-04 12:55:38 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2020-01-22 19:00:46 +00:00
|
|
|
if limit <= 0 || limit > RepairQueueSelectLimit {
|
|
|
|
limit = RepairQueueSelectLimit
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
2020-10-13 13:47:55 +01:00
|
|
|
// TODO: strictly enforce order-by or change tests
|
2021-06-17 16:05:04 +01:00
|
|
|
rows, err := r.db.QueryContext(ctx,
|
2023-09-18 12:58:33 +01:00
|
|
|
r.db.Rebind(`SELECT stream_id, position, attempted_at, updated_at, segment_health, placement
|
2021-06-17 16:05:04 +01:00
|
|
|
FROM repair_queue LIMIT ?`), limit,
|
|
|
|
)
|
2019-07-25 16:01:44 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, Error.Wrap(err)
|
|
|
|
}
|
2020-01-16 14:27:24 +00:00
|
|
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
|
|
|
|
2019-04-16 19:14:09 +01:00
|
|
|
for rows.Next() {
|
2021-06-17 16:05:04 +01:00
|
|
|
var seg queue.InjuredSegment
|
|
|
|
err = rows.Scan(&seg.StreamID, &seg.Position, &seg.AttemptedAt,
|
2023-09-18 12:58:33 +01:00
|
|
|
&seg.UpdatedAt, &seg.SegmentHealth, &seg.Placement)
|
2019-04-16 19:14:09 +01:00
|
|
|
if err != nil {
|
2019-07-25 16:01:44 +01:00
|
|
|
return segs, Error.Wrap(err)
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
2019-04-16 19:14:09 +01:00
|
|
|
segs = append(segs, seg)
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
2020-01-16 14:27:24 +00:00
|
|
|
|
2019-07-25 16:01:44 +01:00
|
|
|
return segs, Error.Wrap(rows.Err())
|
2018-12-21 15:11:19 +00:00
|
|
|
}
|
2019-07-30 16:21:40 +01:00
|
|
|
|
|
|
|
func (r *repairQueue) Count(ctx context.Context) (count int, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
// Count every segment regardless of how recently repair was last attempted
|
2021-06-17 16:05:04 +01:00
|
|
|
err = r.db.QueryRowContext(ctx, r.db.Rebind(`SELECT COUNT(*) as count FROM repair_queue`)).Scan(&count)
|
2019-07-30 16:21:40 +01:00
|
|
|
|
|
|
|
return count, Error.Wrap(err)
|
|
|
|
}
|
2020-11-30 11:03:40 +00:00
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
// TestingSetAttemptedTime sets attempted time for a segment.
|
|
|
|
func (r *repairQueue) TestingSetAttemptedTime(ctx context.Context, streamID uuid.UUID,
|
|
|
|
position metabase.SegmentPosition, t time.Time) (rowsAffected int64, err error) {
|
|
|
|
|
2020-11-30 11:03:40 +00:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2021-06-17 16:05:04 +01:00
|
|
|
res, err := r.db.ExecContext(ctx,
|
|
|
|
r.db.Rebind(`UPDATE repair_queue SET attempted_at = ? WHERE stream_id = ? AND position = ?`),
|
|
|
|
t, streamID, position.Encode(),
|
|
|
|
)
|
2020-11-30 11:03:40 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, Error.Wrap(err)
|
|
|
|
}
|
|
|
|
count, err := res.RowsAffected()
|
|
|
|
return count, Error.Wrap(err)
|
|
|
|
}
|