satellite/repair: fix lastSeenSegmentKey bug in IrreparableProcess

A change was made to use a metabase.SegmentKey (a byte slice alias)
as the last seen item to iterate through the irreparable DB in a
for loop. However, this SegmentKey was not initialized, thus it was
nil. This caused the DB query to return nothing, and healthy segments
could not be cleaned out of the irreparable DB.

Change-Id: Idb30d6fef6113a30a27158d548f62c7443e65a81
This commit is contained in:
Cameron Ayer 2020-11-03 12:09:50 -05:00 committed by Cameron Ayer
parent 1e356f1c5f
commit d63b7658e8
2 changed files with 88 additions and 1 deletions

View File

@ -391,7 +391,7 @@ func (obs *checkerObserver) InlineSegment(ctx context.Context, segment *metainfo
func (checker *Checker) IrreparableProcess(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
const limit = 1000
var lastSeenSegmentKey metabase.SegmentKey
lastSeenSegmentKey := metabase.SegmentKey{}
for {
segments, err := checker.irrdb.GetLimited(ctx, limit, lastSeenSegmentKey)

View File

@ -13,8 +13,10 @@ import (
"storj.io/common/pb"
"storj.io/common/testcontext"
"storj.io/storj/private/testplanet"
"storj.io/storj/satellite"
"storj.io/storj/satellite/internalpb"
"storj.io/storj/satellite/metainfo/metabase"
"storj.io/storj/satellite/satellitedb/satellitedbtest"
)
@ -103,3 +105,88 @@ func TestIrreparable(t *testing.T) {
}
})
}
func TestIrreparableProcess(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1, StorageNodeCount: 3, UplinkCount: 0,
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
checker := planet.Satellites[0].Repair.Checker
checker.Loop.Stop()
checker.IrreparableLoop.Stop()
irreparabledb := planet.Satellites[0].DB.Irreparable()
queue := planet.Satellites[0].DB.RepairQueue()
seg := &internalpb.IrreparableSegment{
Path: []byte{1},
SegmentDetail: &pb.Pointer{
Type: pb.Pointer_REMOTE,
CreationDate: time.Now(),
Remote: &pb.RemoteSegment{
Redundancy: &pb.RedundancyScheme{
MinReq: 1,
RepairThreshold: 2,
SuccessThreshold: 3,
Total: 4,
},
RemotePieces: []*pb.RemotePiece{
{
NodeId: planet.StorageNodes[0].ID(),
},
{
NodeId: planet.StorageNodes[1].ID(),
},
{
NodeId: planet.StorageNodes[2].ID(),
},
},
},
},
LostPieces: int32(4),
LastRepairAttempt: time.Now().Unix(),
RepairAttemptCount: int64(10),
}
require.NoError(t, irreparabledb.IncrementRepairAttempts(ctx, seg))
result, err := irreparabledb.Get(ctx, metabase.SegmentKey(seg.GetPath()))
require.NoError(t, err)
require.NotNil(t, result)
// test healthy segment is removed from irreparable DB
require.NoError(t, checker.IrreparableProcess(ctx))
result, err = irreparabledb.Get(ctx, metabase.SegmentKey(seg.GetPath()))
require.Error(t, err)
require.Nil(t, result)
// test unhealthy repairable segment is removed from irreparable DB and inserted into repair queue
seg.SegmentDetail.Remote.RemotePieces[0] = &pb.RemotePiece{}
seg.SegmentDetail.Remote.RemotePieces[1] = &pb.RemotePiece{}
require.NoError(t, irreparabledb.IncrementRepairAttempts(ctx, seg))
require.NoError(t, checker.IrreparableProcess(ctx))
result, err = irreparabledb.Get(ctx, metabase.SegmentKey(seg.GetPath()))
require.Error(t, err)
require.Nil(t, result)
injured, err := queue.Select(ctx)
require.NoError(t, err)
require.Equal(t, seg.GetPath(), injured.GetPath())
n, err := queue.Clean(ctx, time.Now())
require.NoError(t, err)
require.EqualValues(t, 1, n)
// test irreparable segment remains in irreparable DB and repair_attempt_count is incremented
seg.SegmentDetail.Remote.RemotePieces[2] = &pb.RemotePiece{}
require.NoError(t, irreparabledb.IncrementRepairAttempts(ctx, seg))
require.NoError(t, checker.IrreparableProcess(ctx))
result, err = irreparabledb.Get(ctx, metabase.SegmentKey(seg.GetPath()))
require.NoError(t, err)
require.Equal(t, seg.GetPath(), result.Path)
require.Equal(t, seg.RepairAttemptCount+1, result.RepairAttemptCount)
})
}