2019-01-24 20:15:10 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
2018-10-02 20:46:29 +01:00
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
2019-01-18 13:54:08 +00:00
|
|
|
package checker_test
|
2018-10-09 17:09:33 +01:00
|
|
|
|
|
|
|
import (
|
2019-05-08 18:59:50 +01:00
|
|
|
"context"
|
2019-02-14 12:33:41 +00:00
|
|
|
"fmt"
|
2018-10-09 17:09:33 +01:00
|
|
|
"testing"
|
2018-10-30 19:16:40 +00:00
|
|
|
"time"
|
2018-10-09 17:09:33 +01:00
|
|
|
|
2019-02-14 12:33:41 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2018-11-29 14:57:00 +00:00
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/storj"
|
|
|
|
"storj.io/common/testcontext"
|
|
|
|
"storj.io/common/testrand"
|
2021-06-17 16:05:04 +01:00
|
|
|
"storj.io/common/uuid"
|
2019-11-14 19:46:15 +00:00
|
|
|
"storj.io/storj/private/testplanet"
|
2021-04-21 13:42:57 +01:00
|
|
|
"storj.io/storj/satellite/metabase"
|
2018-10-09 17:09:33 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestIdentifyInjuredSegments(t *testing.T) {
|
2019-02-05 16:00:52 +00:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
2020-12-10 20:49:23 +00:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
|
2019-02-05 16:00:52 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-02-11 21:06:39 +00:00
|
|
|
checker := planet.Satellites[0].Repair.Checker
|
2019-10-04 11:05:25 +01:00
|
|
|
repairQueue := planet.Satellites[0].DB.RepairQueue()
|
|
|
|
|
|
|
|
checker.Loop.Pause()
|
|
|
|
planet.Satellites[0].Repair.Repairer.Loop.Pause()
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
rs := storj.RedundancyScheme{
|
|
|
|
RequiredShares: 2,
|
|
|
|
RepairShares: 3,
|
|
|
|
OptimalShares: 4,
|
|
|
|
TotalShares: 5,
|
|
|
|
ShareSize: 256,
|
2019-10-04 11:05:25 +01:00
|
|
|
}
|
2019-02-05 16:00:52 +00:00
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
projectID := planet.Uplinks[0].Projects[0].ID
|
|
|
|
err := planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "test-bucket")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
expectedLocation := metabase.SegmentLocation{
|
|
|
|
ProjectID: projectID,
|
|
|
|
BucketName: "test-bucket",
|
|
|
|
}
|
2019-09-12 11:38:49 +01:00
|
|
|
|
2019-10-04 11:05:25 +01:00
|
|
|
// add some valid pointers
|
2019-07-18 17:21:21 +01:00
|
|
|
for x := 0; x < 10; x++ {
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey(fmt.Sprintf("a-%d", x))
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, createPieces(planet, rs), time.Time{})
|
2019-02-05 16:00:52 +00:00
|
|
|
}
|
2019-10-04 11:05:25 +01:00
|
|
|
|
|
|
|
// add pointer that needs repair
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey("b-0")
|
2021-06-17 16:05:04 +01:00
|
|
|
b0StreamID := insertSegment(ctx, t, planet, rs, expectedLocation, createLostPieces(planet, rs), time.Time{})
|
2020-04-15 20:20:16 +01:00
|
|
|
|
|
|
|
// add pointer that is unhealthy, but is expired
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey("b-1")
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, createLostPieces(planet, rs), time.Now().Add(-time.Hour))
|
2019-10-04 11:05:25 +01:00
|
|
|
|
|
|
|
// add some valid pointers
|
2019-07-18 17:21:21 +01:00
|
|
|
for x := 0; x < 10; x++ {
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey(fmt.Sprintf("c-%d", x))
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, createPieces(planet, rs), time.Time{})
|
2019-01-18 13:54:08 +00:00
|
|
|
}
|
2019-10-04 11:05:25 +01:00
|
|
|
|
|
|
|
checker.Loop.TriggerWait()
|
2019-02-05 16:00:52 +00:00
|
|
|
|
2020-04-15 20:20:16 +01:00
|
|
|
// check that the unhealthy, non-expired segment was added to the queue
|
|
|
|
// and that the expired segment was ignored
|
2019-04-16 19:14:09 +01:00
|
|
|
injuredSegment, err := repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
err = repairQueue.Delete(ctx, injuredSegment)
|
|
|
|
require.NoError(t, err)
|
2018-12-06 18:51:23 +00:00
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
require.Equal(t, b0StreamID, injuredSegment.StreamID)
|
2019-10-04 11:05:25 +01:00
|
|
|
|
|
|
|
_, err = repairQueue.Select(ctx)
|
|
|
|
require.Error(t, err)
|
2019-02-05 16:00:52 +00:00
|
|
|
})
|
|
|
|
}
|
2018-10-09 17:09:33 +01:00
|
|
|
|
2019-02-06 13:03:38 +00:00
|
|
|
func TestIdentifyIrreparableSegments(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
2020-12-10 20:49:23 +00:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 3, UplinkCount: 1,
|
2019-02-06 13:03:38 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-02-11 21:06:39 +00:00
|
|
|
checker := planet.Satellites[0].Repair.Checker
|
2019-02-14 12:33:41 +00:00
|
|
|
checker.Loop.Stop()
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
const numberOfNodes = 10
|
2020-12-10 20:49:23 +00:00
|
|
|
pieces := make(metabase.Pieces, 0, numberOfNodes)
|
2019-02-06 13:03:38 +00:00
|
|
|
// use online nodes
|
|
|
|
for i, storagenode := range planet.StorageNodes {
|
2020-12-10 20:49:23 +00:00
|
|
|
pieces = append(pieces, metabase.Piece{
|
|
|
|
Number: uint16(i),
|
|
|
|
StorageNode: storagenode.ID(),
|
2019-02-06 13:03:38 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// simulate offline nodes
|
|
|
|
expectedLostPieces := make(map[int32]bool)
|
|
|
|
for i := len(pieces); i < numberOfNodes; i++ {
|
2020-12-10 20:49:23 +00:00
|
|
|
pieces = append(pieces, metabase.Piece{
|
|
|
|
Number: uint16(i),
|
|
|
|
StorageNode: storj.NodeID{byte(i)},
|
2019-02-06 13:03:38 +00:00
|
|
|
})
|
|
|
|
expectedLostPieces[int32(i)] = true
|
|
|
|
}
|
2019-09-06 20:20:36 +01:00
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
rs := storj.RedundancyScheme{
|
|
|
|
ShareSize: 256,
|
|
|
|
RequiredShares: 4,
|
|
|
|
RepairShares: 8,
|
|
|
|
OptimalShares: 9,
|
|
|
|
TotalShares: 10,
|
2019-02-06 13:03:38 +00:00
|
|
|
}
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
projectID := planet.Uplinks[0].Projects[0].ID
|
|
|
|
err := planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "test-bucket")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
expectedLocation := metabase.SegmentLocation{
|
2020-09-08 11:13:18 +01:00
|
|
|
ProjectID: projectID,
|
2020-12-10 20:49:23 +00:00
|
|
|
BucketName: "test-bucket",
|
2020-09-08 11:13:18 +01:00
|
|
|
}
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
// when number of healthy piece is less than minimum required number of piece in redundancy,
|
2021-06-15 22:45:31 +01:00
|
|
|
// the piece is considered irreparable but also will be put into repair queue
|
2020-12-10 20:49:23 +00:00
|
|
|
|
|
|
|
expectedLocation.ObjectKey = "piece"
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, pieces, time.Time{})
|
|
|
|
|
|
|
|
expectedLocation.ObjectKey = "piece-expired"
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, pieces, time.Now().Add(-time.Hour))
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
2021-06-15 22:45:31 +01:00
|
|
|
// check that single irreparable segment was added repair queue
|
2019-02-06 13:03:38 +00:00
|
|
|
repairQueue := planet.Satellites[0].DB.RepairQueue()
|
2019-04-16 19:14:09 +01:00
|
|
|
_, err = repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
2021-06-15 22:45:31 +01:00
|
|
|
count, err := repairQueue.Count(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, 1, count)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
// check irreparable once again but wait a second
|
|
|
|
time.Sleep(1 * time.Second)
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = "piece"
|
|
|
|
_, err = planet.Satellites[0].Metainfo.Metabase.DeleteObjectLatestVersion(ctx, metabase.DeleteObjectLatestVersion{
|
|
|
|
ObjectLocation: expectedLocation.Object(),
|
|
|
|
})
|
2019-05-30 16:18:20 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
2021-06-15 22:45:31 +01:00
|
|
|
count, err = repairQueue.Count(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, 0, count)
|
2019-02-06 13:03:38 +00:00
|
|
|
})
|
|
|
|
}
|
2019-02-14 12:33:41 +00:00
|
|
|
|
2020-09-09 21:52:22 +01:00
|
|
|
func TestCleanRepairQueue(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
2020-12-10 20:49:23 +00:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 1,
|
2020-09-09 21:52:22 +01:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
checker := planet.Satellites[0].Repair.Checker
|
|
|
|
repairQueue := planet.Satellites[0].DB.RepairQueue()
|
|
|
|
|
|
|
|
checker.Loop.Pause()
|
|
|
|
planet.Satellites[0].Repair.Repairer.Loop.Pause()
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
rs := storj.RedundancyScheme{
|
|
|
|
RequiredShares: 2,
|
|
|
|
RepairShares: 3,
|
|
|
|
OptimalShares: 4,
|
|
|
|
TotalShares: 5,
|
|
|
|
ShareSize: 256,
|
2020-09-09 21:52:22 +01:00
|
|
|
}
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
projectID := planet.Uplinks[0].Projects[0].ID
|
|
|
|
err := planet.Uplinks[0].CreateBucket(ctx, planet.Satellites[0], "test-bucket")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
expectedLocation := metabase.SegmentLocation{
|
|
|
|
ProjectID: projectID,
|
|
|
|
BucketName: "test-bucket",
|
|
|
|
}
|
2020-09-09 21:52:22 +01:00
|
|
|
|
|
|
|
healthyCount := 5
|
|
|
|
for i := 0; i < healthyCount; i++ {
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey(fmt.Sprintf("healthy-%d", i))
|
|
|
|
insertSegment(ctx, t, planet, rs, expectedLocation, createPieces(planet, rs), time.Time{})
|
2020-09-09 21:52:22 +01:00
|
|
|
}
|
|
|
|
unhealthyCount := 5
|
2021-06-17 16:05:04 +01:00
|
|
|
unhealthyIDs := make(map[uuid.UUID]struct{})
|
2020-09-09 21:52:22 +01:00
|
|
|
for i := 0; i < unhealthyCount; i++ {
|
2020-12-10 20:49:23 +00:00
|
|
|
expectedLocation.ObjectKey = metabase.ObjectKey(fmt.Sprintf("unhealthy-%d", i))
|
2021-06-17 16:05:04 +01:00
|
|
|
unhealthyStreamID := insertSegment(ctx, t, planet, rs, expectedLocation, createLostPieces(planet, rs), time.Time{})
|
|
|
|
unhealthyIDs[unhealthyStreamID] = struct{}{}
|
2020-09-09 21:52:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// suspend enough nodes to make healthy pointers unhealthy
|
2020-12-10 20:49:23 +00:00
|
|
|
for i := rs.RequiredShares; i < rs.OptimalShares; i++ {
|
2020-09-09 21:52:22 +01:00
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.DB.SuspendNodeUnknownAudit(ctx, planet.StorageNodes[i].ID(), time.Now()))
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repair.Checker.RefreshReliabilityCache(ctx))
|
|
|
|
|
|
|
|
// check that repair queue is empty to avoid false positive
|
|
|
|
count, err := repairQueue.Count(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, 0, count)
|
|
|
|
|
|
|
|
checker.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// check that the pointers were put into the repair queue
|
|
|
|
// and not cleaned up at the end of the checker iteration
|
|
|
|
count, err = repairQueue.Count(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, healthyCount+unhealthyCount, count)
|
|
|
|
|
|
|
|
// unsuspend nodes to make the previously healthy pointers healthy again
|
2020-12-10 20:49:23 +00:00
|
|
|
for i := rs.RequiredShares; i < rs.OptimalShares; i++ {
|
2020-09-09 21:52:22 +01:00
|
|
|
require.NoError(t, planet.Satellites[0].Overlay.DB.UnsuspendNodeUnknownAudit(ctx, planet.StorageNodes[i].ID()))
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, planet.Satellites[0].Repair.Checker.RefreshReliabilityCache(ctx))
|
|
|
|
|
|
|
|
// The checker will not insert/update the now healthy segments causing
|
|
|
|
// them to be removed from the queue at the end of the checker iteration
|
|
|
|
checker.Loop.TriggerWait()
|
|
|
|
|
|
|
|
// only unhealthy segments should remain
|
|
|
|
count, err = repairQueue.Count(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, unhealthyCount, count)
|
|
|
|
|
|
|
|
segs, err := repairQueue.SelectN(ctx, count)
|
|
|
|
require.NoError(t, err)
|
2021-06-17 16:05:04 +01:00
|
|
|
require.Equal(t, len(unhealthyIDs), len(segs))
|
2020-09-09 21:52:22 +01:00
|
|
|
|
|
|
|
for _, s := range segs {
|
2021-06-17 16:05:04 +01:00
|
|
|
_, ok := unhealthyIDs[s.StreamID]
|
|
|
|
require.True(t, ok)
|
2020-09-09 21:52:22 +01:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
func createPieces(planet *testplanet.Planet, rs storj.RedundancyScheme) metabase.Pieces {
|
|
|
|
pieces := make(metabase.Pieces, rs.OptimalShares)
|
|
|
|
for i := range pieces {
|
|
|
|
pieces[i] = metabase.Piece{
|
|
|
|
Number: uint16(i),
|
|
|
|
StorageNode: planet.StorageNodes[i].Identity.ID,
|
2019-10-04 11:05:25 +01:00
|
|
|
}
|
2020-12-10 20:49:23 +00:00
|
|
|
}
|
|
|
|
return pieces
|
|
|
|
}
|
|
|
|
|
|
|
|
func createLostPieces(planet *testplanet.Planet, rs storj.RedundancyScheme) metabase.Pieces {
|
|
|
|
pieces := make(metabase.Pieces, rs.OptimalShares)
|
|
|
|
for i := range pieces[:rs.RequiredShares] {
|
|
|
|
pieces[i] = metabase.Piece{
|
|
|
|
Number: uint16(i),
|
|
|
|
StorageNode: planet.StorageNodes[i].Identity.ID,
|
2019-10-04 11:05:25 +01:00
|
|
|
}
|
2020-12-10 20:49:23 +00:00
|
|
|
}
|
|
|
|
for i := rs.RequiredShares; i < rs.OptimalShares; i++ {
|
|
|
|
pieces[i] = metabase.Piece{
|
|
|
|
Number: uint16(i),
|
|
|
|
StorageNode: storj.NodeID{byte(0xFF)},
|
2019-02-14 12:33:41 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-10 20:49:23 +00:00
|
|
|
return pieces
|
|
|
|
}
|
2019-10-04 11:05:25 +01:00
|
|
|
|
2021-06-17 16:05:04 +01:00
|
|
|
func insertSegment(ctx context.Context, t *testing.T, planet *testplanet.Planet, rs storj.RedundancyScheme, location metabase.SegmentLocation, pieces metabase.Pieces, expire time.Time) uuid.UUID {
|
2020-12-10 20:49:23 +00:00
|
|
|
var expiresAt *time.Time
|
2020-04-15 20:20:16 +01:00
|
|
|
if !expire.IsZero() {
|
2020-12-10 20:49:23 +00:00
|
|
|
expiresAt = &expire
|
2020-04-15 20:20:16 +01:00
|
|
|
}
|
2019-10-04 11:05:25 +01:00
|
|
|
|
2020-12-10 20:49:23 +00:00
|
|
|
metabaseDB := planet.Satellites[0].Metainfo.Metabase
|
|
|
|
|
|
|
|
obj := metabase.ObjectStream{
|
|
|
|
ProjectID: location.ProjectID,
|
|
|
|
BucketName: location.BucketName,
|
|
|
|
ObjectKey: location.ObjectKey,
|
|
|
|
Version: 1,
|
|
|
|
StreamID: testrand.UUID(),
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err := metabaseDB.BeginObjectExactVersion(ctx, metabase.BeginObjectExactVersion{
|
|
|
|
ObjectStream: obj,
|
|
|
|
Encryption: storj.EncryptionParameters{
|
|
|
|
CipherSuite: storj.EncAESGCM,
|
|
|
|
BlockSize: 256,
|
|
|
|
},
|
|
|
|
ExpiresAt: expiresAt,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
rootPieceID := testrand.PieceID()
|
|
|
|
err = metabaseDB.BeginSegment(ctx, metabase.BeginSegment{
|
|
|
|
ObjectStream: obj,
|
|
|
|
RootPieceID: rootPieceID,
|
|
|
|
Pieces: pieces,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
err = metabaseDB.CommitSegment(ctx, metabase.CommitSegment{
|
|
|
|
ObjectStream: obj,
|
|
|
|
RootPieceID: rootPieceID,
|
|
|
|
Pieces: pieces,
|
|
|
|
EncryptedKey: testrand.Bytes(256),
|
|
|
|
EncryptedKeyNonce: testrand.Bytes(256),
|
|
|
|
PlainSize: 1,
|
|
|
|
EncryptedSize: 1,
|
|
|
|
Redundancy: rs,
|
|
|
|
})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
_, err = metabaseDB.CommitObject(ctx, metabase.CommitObject{
|
|
|
|
ObjectStream: obj,
|
|
|
|
})
|
2019-02-14 12:33:41 +00:00
|
|
|
require.NoError(t, err)
|
2021-06-17 16:05:04 +01:00
|
|
|
|
|
|
|
return obj.StreamID
|
2019-02-14 12:33:41 +00:00
|
|
|
}
|