2019-01-24 20:15:10 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
2018-10-02 20:46:29 +01:00
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
2019-01-18 13:54:08 +00:00
|
|
|
package checker_test
|
2018-10-09 17:09:33 +01:00
|
|
|
|
|
|
|
import (
|
2019-05-08 18:59:50 +01:00
|
|
|
"context"
|
2019-02-14 12:33:41 +00:00
|
|
|
"fmt"
|
2018-10-09 17:09:33 +01:00
|
|
|
"testing"
|
2018-10-30 19:16:40 +00:00
|
|
|
"time"
|
2018-10-09 17:09:33 +01:00
|
|
|
|
2019-02-14 12:33:41 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2019-05-08 18:59:50 +01:00
|
|
|
"github.com/zeebo/errs"
|
2018-11-29 14:57:00 +00:00
|
|
|
|
2019-01-18 13:54:08 +00:00
|
|
|
"storj.io/storj/internal/testcontext"
|
|
|
|
"storj.io/storj/internal/testplanet"
|
2019-03-18 10:55:06 +00:00
|
|
|
"storj.io/storj/internal/teststorj"
|
2019-05-08 18:59:50 +01:00
|
|
|
"storj.io/storj/pkg/datarepair/checker"
|
2018-10-09 17:09:33 +01:00
|
|
|
"storj.io/storj/pkg/pb"
|
2018-11-29 18:39:27 +00:00
|
|
|
"storj.io/storj/pkg/storj"
|
2019-02-06 13:03:38 +00:00
|
|
|
"storj.io/storj/storage"
|
2018-10-09 17:09:33 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestIdentifyInjuredSegments(t *testing.T) {
|
2019-02-05 16:00:52 +00:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-02-11 21:06:39 +00:00
|
|
|
checker := planet.Satellites[0].Repair.Checker
|
2019-02-14 12:33:41 +00:00
|
|
|
checker.Loop.Stop()
|
2019-02-05 16:00:52 +00:00
|
|
|
|
2019-04-25 09:46:32 +01:00
|
|
|
//add noise to metainfo before bad record
|
2019-02-14 12:33:41 +00:00
|
|
|
for x := 0; x < 1000; x++ {
|
|
|
|
makePointer(t, planet, fmt.Sprintf("a-%d", x), false)
|
2019-02-05 16:00:52 +00:00
|
|
|
}
|
2019-02-14 12:33:41 +00:00
|
|
|
//create piece that needs repair
|
|
|
|
makePointer(t, planet, fmt.Sprintf("b"), true)
|
2019-04-25 09:46:32 +01:00
|
|
|
//add more noise to metainfo after bad record
|
2019-02-14 12:33:41 +00:00
|
|
|
for x := 0; x < 1000; x++ {
|
|
|
|
makePointer(t, planet, fmt.Sprintf("c-%d", x), false)
|
2019-01-18 13:54:08 +00:00
|
|
|
}
|
2019-02-14 12:33:41 +00:00
|
|
|
err := checker.IdentifyInjuredSegments(ctx)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-05 16:00:52 +00:00
|
|
|
|
|
|
|
//check if the expected segments were added to the queue
|
|
|
|
repairQueue := planet.Satellites[0].DB.RepairQueue()
|
2019-04-16 19:14:09 +01:00
|
|
|
injuredSegment, err := repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
err = repairQueue.Delete(ctx, injuredSegment)
|
|
|
|
require.NoError(t, err)
|
2018-12-06 18:51:23 +00:00
|
|
|
|
2019-02-14 12:33:41 +00:00
|
|
|
numValidNode := int32(len(planet.StorageNodes))
|
2019-04-16 19:14:09 +01:00
|
|
|
require.Equal(t, "b", injuredSegment.Path)
|
|
|
|
require.Equal(t, len(planet.StorageNodes), len(injuredSegment.LostPieces))
|
2019-02-05 16:00:52 +00:00
|
|
|
for _, lostPiece := range injuredSegment.LostPieces {
|
2019-02-14 12:33:41 +00:00
|
|
|
// makePointer() starts with numValidNode good pieces
|
2019-04-16 19:14:09 +01:00
|
|
|
require.True(t, lostPiece >= numValidNode, fmt.Sprintf("%d >= %d \n", lostPiece, numValidNode))
|
2019-02-14 12:33:41 +00:00
|
|
|
// makePointer() than has numValidNode bad pieces
|
2019-04-16 19:14:09 +01:00
|
|
|
require.True(t, lostPiece < numValidNode*2, fmt.Sprintf("%d < %d \n", lostPiece, numValidNode*2))
|
2019-02-05 16:00:52 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2018-10-09 17:09:33 +01:00
|
|
|
|
2019-02-06 13:03:38 +00:00
|
|
|
func TestIdentifyIrreparableSegments(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 3, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
2019-02-11 21:06:39 +00:00
|
|
|
checker := planet.Satellites[0].Repair.Checker
|
2019-02-14 12:33:41 +00:00
|
|
|
checker.Loop.Stop()
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
const numberOfNodes = 10
|
|
|
|
pieces := make([]*pb.RemotePiece, 0, numberOfNodes)
|
|
|
|
// use online nodes
|
|
|
|
for i, storagenode := range planet.StorageNodes {
|
|
|
|
pieces = append(pieces, &pb.RemotePiece{
|
|
|
|
PieceNum: int32(i),
|
|
|
|
NodeId: storagenode.ID(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// simulate offline nodes
|
|
|
|
expectedLostPieces := make(map[int32]bool)
|
|
|
|
for i := len(pieces); i < numberOfNodes; i++ {
|
|
|
|
pieces = append(pieces, &pb.RemotePiece{
|
|
|
|
PieceNum: int32(i),
|
|
|
|
NodeId: storj.NodeID{byte(i)},
|
|
|
|
})
|
|
|
|
expectedLostPieces[int32(i)] = true
|
|
|
|
}
|
|
|
|
pointer := &pb.Pointer{
|
2019-07-08 23:16:50 +01:00
|
|
|
CreationDate: time.Now(),
|
2019-02-06 13:03:38 +00:00
|
|
|
Remote: &pb.RemoteSegment{
|
|
|
|
Redundancy: &pb.RedundancyScheme{
|
2019-06-19 21:13:11 +01:00
|
|
|
MinReq: int32(3),
|
|
|
|
RepairThreshold: int32(8),
|
|
|
|
SuccessThreshold: int32(9),
|
|
|
|
Total: int32(10),
|
2019-02-06 13:03:38 +00:00
|
|
|
},
|
2019-03-18 10:55:06 +00:00
|
|
|
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
|
2019-02-06 13:03:38 +00:00
|
|
|
RemotePieces: pieces,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// put test pointer to db
|
2019-04-25 09:46:32 +01:00
|
|
|
metainfo := planet.Satellites[0].Metainfo.Service
|
2019-06-05 15:23:10 +01:00
|
|
|
err := metainfo.Put(ctx, "fake-piece-id", pointer)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
// check if nothing was added to repair queue
|
|
|
|
repairQueue := planet.Satellites[0].DB.RepairQueue()
|
2019-04-16 19:14:09 +01:00
|
|
|
_, err = repairQueue.Select(ctx)
|
|
|
|
require.True(t, storage.ErrEmptyQueue.Has(err))
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
//check if the expected segments were added to the irreparable DB
|
|
|
|
irreparable := planet.Satellites[0].DB.Irreparable()
|
|
|
|
remoteSegmentInfo, err := irreparable.Get(ctx, []byte("fake-piece-id"))
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
2019-04-16 19:14:09 +01:00
|
|
|
require.Equal(t, len(expectedLostPieces), int(remoteSegmentInfo.LostPieces))
|
|
|
|
require.Equal(t, 1, int(remoteSegmentInfo.RepairAttemptCount))
|
2019-03-15 20:21:52 +00:00
|
|
|
firstRepair := remoteSegmentInfo.LastRepairAttempt
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
// check irreparable once again but wait a second
|
|
|
|
time.Sleep(1 * time.Second)
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
|
|
|
remoteSegmentInfo, err = irreparable.Get(ctx, []byte("fake-piece-id"))
|
2019-04-16 19:14:09 +01:00
|
|
|
require.NoError(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
|
2019-04-16 19:14:09 +01:00
|
|
|
require.Equal(t, len(expectedLostPieces), int(remoteSegmentInfo.LostPieces))
|
2019-02-06 13:03:38 +00:00
|
|
|
// check if repair attempt count was incremented
|
2019-04-16 19:14:09 +01:00
|
|
|
require.Equal(t, 2, int(remoteSegmentInfo.RepairAttemptCount))
|
|
|
|
require.True(t, firstRepair < remoteSegmentInfo.LastRepairAttempt)
|
2019-05-30 16:18:20 +01:00
|
|
|
|
|
|
|
// make the pointer repairable
|
|
|
|
pointer = &pb.Pointer{
|
2019-07-08 23:16:50 +01:00
|
|
|
CreationDate: time.Now(),
|
2019-05-30 16:18:20 +01:00
|
|
|
Remote: &pb.RemoteSegment{
|
|
|
|
Redundancy: &pb.RedundancyScheme{
|
2019-06-19 21:13:11 +01:00
|
|
|
MinReq: int32(2),
|
|
|
|
RepairThreshold: int32(8),
|
|
|
|
SuccessThreshold: int32(9),
|
|
|
|
Total: int32(10),
|
2019-05-30 16:18:20 +01:00
|
|
|
},
|
|
|
|
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
|
|
|
|
RemotePieces: pieces,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
// put test pointer to db
|
|
|
|
metainfo = planet.Satellites[0].Metainfo.Service
|
2019-06-05 15:23:10 +01:00
|
|
|
err = metainfo.Put(ctx, "fake-piece-id", pointer)
|
2019-05-30 16:18:20 +01:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
err = checker.IdentifyInjuredSegments(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
remoteSegmentInfo, err = irreparable.Get(ctx, []byte("fake-piece-id"))
|
|
|
|
require.Error(t, err)
|
2019-02-06 13:03:38 +00:00
|
|
|
})
|
|
|
|
}
|
2019-02-14 12:33:41 +00:00
|
|
|
|
|
|
|
func makePointer(t *testing.T, planet *testplanet.Planet, pieceID string, createLost bool) {
|
2019-06-05 15:23:10 +01:00
|
|
|
ctx := context.TODO()
|
2019-02-14 12:33:41 +00:00
|
|
|
numOfStorageNodes := len(planet.StorageNodes)
|
|
|
|
pieces := make([]*pb.RemotePiece, 0, numOfStorageNodes)
|
|
|
|
// use online nodes
|
|
|
|
for i := 0; i < numOfStorageNodes; i++ {
|
|
|
|
pieces = append(pieces, &pb.RemotePiece{
|
|
|
|
PieceNum: int32(i),
|
|
|
|
NodeId: planet.StorageNodes[i].Identity.ID,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
// simulate offline nodes equal to the number of online nodes
|
|
|
|
if createLost {
|
|
|
|
for i := 0; i < numOfStorageNodes; i++ {
|
|
|
|
pieces = append(pieces, &pb.RemotePiece{
|
|
|
|
PieceNum: int32(numOfStorageNodes + i),
|
|
|
|
NodeId: storj.NodeID{byte(i)},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
minReq, repairThreshold := numOfStorageNodes-1, numOfStorageNodes-1
|
|
|
|
if createLost {
|
|
|
|
minReq, repairThreshold = numOfStorageNodes-1, numOfStorageNodes+1
|
|
|
|
}
|
|
|
|
pointer := &pb.Pointer{
|
2019-07-08 23:16:50 +01:00
|
|
|
CreationDate: time.Now(),
|
2019-02-14 12:33:41 +00:00
|
|
|
Remote: &pb.RemoteSegment{
|
|
|
|
Redundancy: &pb.RedundancyScheme{
|
2019-06-19 21:13:11 +01:00
|
|
|
MinReq: int32(minReq),
|
|
|
|
RepairThreshold: int32(repairThreshold),
|
|
|
|
SuccessThreshold: int32(repairThreshold) + 1,
|
|
|
|
Total: int32(repairThreshold) + 2,
|
2019-02-14 12:33:41 +00:00
|
|
|
},
|
2019-03-18 10:55:06 +00:00
|
|
|
RootPieceId: teststorj.PieceIDFromString(pieceID),
|
2019-02-14 12:33:41 +00:00
|
|
|
RemotePieces: pieces,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
// put test pointer to db
|
|
|
|
pointerdb := planet.Satellites[0].Metainfo.Service
|
2019-06-05 15:23:10 +01:00
|
|
|
err := pointerdb.Put(ctx, pieceID, pointer)
|
2019-02-14 12:33:41 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
2019-05-08 18:59:50 +01:00
|
|
|
|
|
|
|
func TestCheckerResume(t *testing.T) {
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
|
|
SatelliteCount: 1, StorageNodeCount: 4, UplinkCount: 0,
|
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
repairQueue := &mockRepairQueue{}
|
2019-05-30 16:18:20 +01:00
|
|
|
irrepairQueue := planet.Satellites[0].DB.Irreparable()
|
2019-07-08 23:04:35 +01:00
|
|
|
config := checker.Config{
|
|
|
|
Interval: 30 * time.Second,
|
|
|
|
IrreparableInterval: 15 * time.Second,
|
|
|
|
ReliabilityCacheStaleness: 5 * time.Minute,
|
|
|
|
}
|
|
|
|
c := checker.NewChecker(planet.Satellites[0].Metainfo.Service, repairQueue, planet.Satellites[0].Overlay.Service, irrepairQueue, 0, nil, config)
|
2019-05-08 18:59:50 +01:00
|
|
|
|
|
|
|
// create pointer that needs repair
|
|
|
|
makePointer(t, planet, "a", true)
|
|
|
|
// create pointer that will cause an error
|
|
|
|
makePointer(t, planet, "b", true)
|
|
|
|
// create pointer that needs repair
|
|
|
|
makePointer(t, planet, "c", true)
|
|
|
|
// create pointer that will cause an error
|
|
|
|
makePointer(t, planet, "d", true)
|
|
|
|
|
|
|
|
err := c.IdentifyInjuredSegments(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
|
|
|
|
// "a" should be the only segment in the repair queue
|
|
|
|
injuredSegment, err := repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, injuredSegment.Path, "a")
|
|
|
|
err = repairQueue.Delete(ctx, injuredSegment)
|
|
|
|
require.NoError(t, err)
|
|
|
|
injuredSegment, err = repairQueue.Select(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
|
|
|
|
err = c.IdentifyInjuredSegments(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
|
|
|
|
// "c" should be the only segment in the repair queue
|
|
|
|
injuredSegment, err = repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, injuredSegment.Path, "c")
|
|
|
|
err = repairQueue.Delete(ctx, injuredSegment)
|
|
|
|
require.NoError(t, err)
|
|
|
|
injuredSegment, err = repairQueue.Select(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
|
|
|
|
err = c.IdentifyInjuredSegments(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
|
|
|
|
// "a" should be the only segment in the repair queue
|
|
|
|
injuredSegment, err = repairQueue.Select(ctx)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, injuredSegment.Path, "a")
|
|
|
|
err = repairQueue.Delete(ctx, injuredSegment)
|
|
|
|
require.NoError(t, err)
|
|
|
|
injuredSegment, err = repairQueue.Select(ctx)
|
|
|
|
require.Error(t, err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// mock repair queue used for TestCheckerResume
|
|
|
|
type mockRepairQueue struct {
|
|
|
|
injuredSegments []pb.InjuredSegment
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mockRepairQueue *mockRepairQueue) Insert(ctx context.Context, s *pb.InjuredSegment) error {
|
|
|
|
if s.Path == "b" || s.Path == "d" {
|
|
|
|
return errs.New("mock Insert error")
|
|
|
|
}
|
|
|
|
mockRepairQueue.injuredSegments = append(mockRepairQueue.injuredSegments, *s)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mockRepairQueue *mockRepairQueue) Select(ctx context.Context) (*pb.InjuredSegment, error) {
|
|
|
|
if len(mockRepairQueue.injuredSegments) == 0 {
|
|
|
|
return &pb.InjuredSegment{}, errs.New("mock Select error")
|
|
|
|
}
|
|
|
|
s := mockRepairQueue.injuredSegments[0]
|
|
|
|
return &s, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mockRepairQueue *mockRepairQueue) Delete(ctx context.Context, s *pb.InjuredSegment) error {
|
|
|
|
var toDelete int
|
|
|
|
found := false
|
|
|
|
for i, seg := range mockRepairQueue.injuredSegments {
|
|
|
|
if seg.Path == s.Path {
|
|
|
|
toDelete = i
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
return errs.New("mock Delete error")
|
|
|
|
}
|
|
|
|
|
|
|
|
mockRepairQueue.injuredSegments = append(mockRepairQueue.injuredSegments[:toDelete], mockRepairQueue.injuredSegments[toDelete+1:]...)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mockRepairQueue *mockRepairQueue) SelectN(ctx context.Context, limit int) ([]pb.InjuredSegment, error) {
|
|
|
|
return []pb.InjuredSegment{}, errs.New("mock SelectN error")
|
|
|
|
}
|