storj/satellite/satellitedb/repairqueue.go

129 lines
3.7 KiB
Go
Raw Normal View History

2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package satellitedb
import (
"context"
"database/sql"
"fmt"
"github.com/lib/pq"
sqlite3 "github.com/mattn/go-sqlite3"
"storj.io/storj/internal/dbutil/pgutil"
"storj.io/storj/internal/dbutil/sqliteutil"
"storj.io/storj/pkg/pb"
dbx "storj.io/storj/satellite/satellitedb/dbx"
"storj.io/storj/storage"
)
type repairQueue struct {
db *dbx.DB
}
func (r *repairQueue) Insert(ctx context.Context, seg *pb.InjuredSegment) (err error) {
defer mon.Task()(&ctx)(&err)
_, err = r.db.ExecContext(ctx, r.db.Rebind(`INSERT INTO injuredsegments ( path, data ) VALUES ( ?, ? )`), seg.Path, seg)
if err != nil {
if pgutil.IsConstraintError(err) || sqliteutil.IsConstraintError(err) {
return nil // quietly fail on reinsert
}
return err
}
return nil
}
func (r *repairQueue) postgresSelect(ctx context.Context) (seg *pb.InjuredSegment, err error) {
defer mon.Task()(&ctx)(&err)
err = r.db.QueryRowContext(ctx, `
UPDATE injuredsegments SET attempted = timezone('utc', now()) WHERE path = (
SELECT path FROM injuredsegments
WHERE attempted IS NULL OR attempted < timezone('utc', now()) - interval '1 hour'
ORDER BY attempted NULLS FIRST FOR UPDATE SKIP LOCKED LIMIT 1
) RETURNING data`).Scan(&seg)
if err == sql.ErrNoRows {
err = storage.ErrEmptyQueue.New("")
}
return
}
func (r *repairQueue) sqliteSelect(ctx context.Context) (seg *pb.InjuredSegment, err error) {
defer mon.Task()(&ctx)(&err)
err = r.db.WithTx(ctx, func(ctx context.Context, tx *dbx.Tx) error {
var path []byte
err = tx.Tx.QueryRowContext(ctx, r.db.Rebind(`
SELECT path, data FROM injuredsegments
WHERE attempted IS NULL
OR attempted < datetime('now','-1 hours')
ORDER BY attempted LIMIT 1`)).Scan(&path, &seg)
if err != nil {
return err
}
res, err := tx.Tx.ExecContext(ctx, r.db.Rebind(`UPDATE injuredsegments SET attempted = datetime('now') WHERE path = ?`), path)
if err != nil {
return err
}
count, err := res.RowsAffected()
if err != nil {
return err
}
if count != 1 {
return fmt.Errorf("Expected 1, got %d segments deleted", count)
}
return nil
})
if err == sql.ErrNoRows {
err = storage.ErrEmptyQueue.New("")
}
return seg, err
}
func (r *repairQueue) Select(ctx context.Context) (seg *pb.InjuredSegment, err error) {
defer mon.Task()(&ctx)(&err)
switch t := r.db.DB.Driver().(type) {
case *sqlite3.SQLiteDriver:
return r.sqliteSelect(ctx)
case *pq.Driver:
return r.postgresSelect(ctx)
default:
return seg, fmt.Errorf("Unsupported database %t", t)
}
}
func (r *repairQueue) Delete(ctx context.Context, seg *pb.InjuredSegment) (err error) {
defer mon.Task()(&ctx)(&err)
_, err = r.db.ExecContext(ctx, r.db.Rebind(`DELETE FROM injuredsegments WHERE path = ?`), seg.Path)
[v3 2137] - Add more info to find out repair failures (#2623) * pkg/datarepair/repairer: Track always time for repair Make a minor change in the worker function of the repairer, that when successful, always track the metric time for repair independently if the time since checker queue metric can be tracked. * storage/postgreskv: Wrap error in Get func Wrap the returned error of the Get function as it is done when the query doesn't return any row. * satellite/metainfo: Move debug msg to the right place NewStore function was writing a debug log message when the DB was connected, however it was always writing it out despite if an error happened when getting the connection. * pkg/datarepair/repairer: Wrap error before logging it Wrap the error returned by process which is executed by the Run method of the repairer service to add context to the error log message. * pkg/datarepair/repairer: Make errors more specific in worker Make the error messages of the "worker" method of the Service more specific and the logged message for such errors. * pkg/storage/repair: Improve error reporting Repair In order of improving the error reporting by the pkg/storage/repair.Repair method, several errors of this method and functions/methods which this one relies one have been updated to be wrapper into their corresponding classes. * pkg/storage/segments: Track path param of Repair method Track in monkit the path parameter passed to the Repair method. * satellite/satellitedb: Wrap Error returned by Delete Wrap the error returned by repairQueue.Delete method to enhance the error with a class and stack and the pkg/storage/segments.Repairer.Repair method get a more contextualized error from it.
2019-07-23 15:28:06 +01:00
return Error.Wrap(err)
}
func (r *repairQueue) SelectN(ctx context.Context, limit int) (segs []pb.InjuredSegment, err error) {
defer mon.Task()(&ctx)(&err)
if limit <= 0 || limit > storage.LookupLimit {
limit = storage.LookupLimit
}
//todo: strictly enforce order-by or change tests
rows, err := r.db.QueryContext(ctx, r.db.Rebind(`SELECT data FROM injuredsegments LIMIT ?`), limit)
2019-07-25 16:01:44 +01:00
if err != nil {
return nil, Error.Wrap(err)
}
for rows.Next() {
var seg pb.InjuredSegment
err = rows.Scan(&seg)
if err != nil {
2019-07-25 16:01:44 +01:00
return segs, Error.Wrap(err)
}
segs = append(segs, seg)
}
2019-07-25 16:01:44 +01:00
return segs, Error.Wrap(rows.Err())
}
func (r *repairQueue) Count(ctx context.Context) (count int, err error) {
defer mon.Task()(&ctx)(&err)
// Count every segment regardless of how recently repair was last attempted
err = r.db.QueryRowContext(ctx, r.db.Rebind(`SELECT COUNT(*) as count FROM injuredsegments`)).Scan(&count)
return count, Error.Wrap(err)
}