storj/pkg/storage/segments/repairer_test.go

132 lines
3.8 KiB
Go
Raw Normal View History

2019-03-21 14:26:56 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package segments_test
import (
"math/rand"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testplanet"
"storj.io/storj/pkg/pb"
ecclient "storj.io/storj/pkg/storage/ec"
"storj.io/storj/pkg/storage/segments"
"storj.io/storj/pkg/storj"
"storj.io/storj/uplink"
)
func TestSegmentStoreRepair(t *testing.T) {
2019-03-23 08:06:11 +00:00
2019-03-21 14:26:56 +00:00
testplanet.Run(t, testplanet.Config{
2019-04-08 18:33:47 +01:00
SatelliteCount: 1, StorageNodeCount: 6, UplinkCount: 1,
2019-03-21 14:26:56 +00:00
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
// first, upload some remote data
ul := planet.Uplinks[0]
satellite := planet.Satellites[0]
2019-03-22 13:14:17 +00:00
satellite.Repair.Checker.Loop.Stop()
2019-04-04 00:00:25 +01:00
// stop discovery service so that we do not get a race condition when we delete nodes from overlay cache
satellite.Discovery.Service.Discovery.Stop()
2019-03-22 13:14:17 +00:00
testData := make([]byte, 1*memory.MiB)
2019-03-21 14:26:56 +00:00
_, err := rand.Read(testData)
2019-04-04 00:00:25 +01:00
require.NoError(t, err)
2019-03-21 14:26:56 +00:00
err = ul.UploadWithConfig(ctx, satellite, &uplink.RSConfig{
2019-04-04 00:00:25 +01:00
MinThreshold: 2,
RepairThreshold: 3,
SuccessThreshold: 4,
2019-04-08 18:33:47 +01:00
MaxThreshold: 4,
2019-03-21 14:26:56 +00:00
}, "testbucket", "test/path", testData)
require.NoError(t, err)
// get a remote segment from pointerdb
pdb := satellite.Metainfo.Service
listResponse, _, err := pdb.List("", "", "", true, 0, 0)
require.NoError(t, err)
var path string
var pointer *pb.Pointer
for _, v := range listResponse {
path = v.GetPath()
pointer, err = pdb.Get(path)
2019-04-04 00:00:25 +01:00
require.NoError(t, err)
2019-03-21 14:26:56 +00:00
if pointer.GetType() == pb.Pointer_REMOTE {
break
}
}
// calculate how many storagenodes to kill
numStorageNodes := len(planet.StorageNodes)
redundancy := pointer.GetRemote().GetRedundancy()
remotePieces := pointer.GetRemote().GetRemotePieces()
minReq := redundancy.GetMinReq()
numPieces := len(remotePieces)
toKill := numPieces - int(minReq)
// we should have enough storage nodes to repair on
assert.True(t, (numStorageNodes-toKill) >= numPieces)
// kill nodes and track lost pieces
var lostPieces []int32
nodesToKill := make(map[storj.NodeID]bool)
nodesToKeepAlive := make(map[storj.NodeID]bool)
for i, piece := range remotePieces {
if i >= toKill {
nodesToKeepAlive[piece.NodeId] = true
continue
}
nodesToKill[piece.NodeId] = true
lostPieces = append(lostPieces, piece.GetPieceNum())
}
for _, node := range planet.StorageNodes {
if nodesToKill[node.ID()] {
err = planet.StopPeer(node)
2019-04-04 00:00:25 +01:00
require.NoError(t, err)
_, err = satellite.Overlay.Service.UpdateUptime(ctx, node.ID(), false)
require.NoError(t, err)
2019-03-21 14:26:56 +00:00
}
}
// repair segment
2019-03-28 20:09:23 +00:00
os := satellite.Orders.Service
2019-03-21 14:26:56 +00:00
oc := satellite.Overlay.Service
ec := ecclient.NewClient(satellite.Transport, 0)
2019-03-28 20:09:23 +00:00
repairer := segments.NewSegmentRepairer(pdb, os, oc, ec, satellite.Identity, time.Minute)
2019-03-21 14:26:56 +00:00
assert.NotNil(t, repairer)
err = repairer.Repair(ctx, path, lostPieces)
assert.NoError(t, err)
2019-04-04 00:00:25 +01:00
// kill one of the nodes kept alive to ensure repair worked
2019-03-21 14:26:56 +00:00
for _, node := range planet.StorageNodes {
if nodesToKeepAlive[node.ID()] {
err = planet.StopPeer(node)
2019-04-04 00:00:25 +01:00
require.NoError(t, err)
_, err = satellite.Overlay.Service.UpdateUptime(ctx, node.ID(), false)
require.NoError(t, err)
2019-04-04 00:00:25 +01:00
break
2019-03-21 14:26:56 +00:00
}
}
// we should be able to download data without any of the original nodes
newData, err := ul.Download(ctx, satellite, "testbucket", "test/path")
assert.NoError(t, err)
assert.Equal(t, newData, testData)
// updated pointer should not contain any of the killed nodes
pointer, err = pdb.Get(path)
assert.NoError(t, err)
remotePieces = pointer.GetRemote().GetRemotePieces()
for _, piece := range remotePieces {
assert.False(t, nodesToKill[piece.NodeId])
}
})
}