2019-03-21 14:26:56 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package segments_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math/rand"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
|
|
|
"storj.io/storj/internal/memory"
|
|
|
|
"storj.io/storj/internal/testcontext"
|
|
|
|
"storj.io/storj/internal/testplanet"
|
|
|
|
"storj.io/storj/pkg/pb"
|
|
|
|
ecclient "storj.io/storj/pkg/storage/ec"
|
|
|
|
"storj.io/storj/pkg/storage/segments"
|
|
|
|
"storj.io/storj/pkg/storj"
|
|
|
|
"storj.io/storj/uplink"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestSegmentStoreRepair(t *testing.T) {
|
2019-03-23 08:06:11 +00:00
|
|
|
|
2019-03-21 14:26:56 +00:00
|
|
|
testplanet.Run(t, testplanet.Config{
|
2019-04-08 18:33:47 +01:00
|
|
|
SatelliteCount: 1, StorageNodeCount: 6, UplinkCount: 1,
|
2019-03-21 14:26:56 +00:00
|
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
|
|
// first, upload some remote data
|
|
|
|
ul := planet.Uplinks[0]
|
|
|
|
satellite := planet.Satellites[0]
|
|
|
|
|
2019-03-22 13:14:17 +00:00
|
|
|
satellite.Repair.Checker.Loop.Stop()
|
2019-04-04 00:00:25 +01:00
|
|
|
// stop discovery service so that we do not get a race condition when we delete nodes from overlay cache
|
|
|
|
satellite.Discovery.Service.Discovery.Stop()
|
2019-03-22 13:14:17 +00:00
|
|
|
|
|
|
|
testData := make([]byte, 1*memory.MiB)
|
2019-03-21 14:26:56 +00:00
|
|
|
_, err := rand.Read(testData)
|
2019-04-04 00:00:25 +01:00
|
|
|
require.NoError(t, err)
|
2019-03-21 14:26:56 +00:00
|
|
|
|
|
|
|
err = ul.UploadWithConfig(ctx, satellite, &uplink.RSConfig{
|
2019-04-04 00:00:25 +01:00
|
|
|
MinThreshold: 2,
|
|
|
|
RepairThreshold: 3,
|
|
|
|
SuccessThreshold: 4,
|
2019-04-08 18:33:47 +01:00
|
|
|
MaxThreshold: 4,
|
2019-03-21 14:26:56 +00:00
|
|
|
}, "testbucket", "test/path", testData)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// get a remote segment from pointerdb
|
|
|
|
pdb := satellite.Metainfo.Service
|
|
|
|
listResponse, _, err := pdb.List("", "", "", true, 0, 0)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
var path string
|
|
|
|
var pointer *pb.Pointer
|
|
|
|
for _, v := range listResponse {
|
|
|
|
path = v.GetPath()
|
|
|
|
pointer, err = pdb.Get(path)
|
2019-04-04 00:00:25 +01:00
|
|
|
require.NoError(t, err)
|
2019-03-21 14:26:56 +00:00
|
|
|
if pointer.GetType() == pb.Pointer_REMOTE {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// calculate how many storagenodes to kill
|
|
|
|
numStorageNodes := len(planet.StorageNodes)
|
|
|
|
redundancy := pointer.GetRemote().GetRedundancy()
|
|
|
|
remotePieces := pointer.GetRemote().GetRemotePieces()
|
|
|
|
minReq := redundancy.GetMinReq()
|
|
|
|
numPieces := len(remotePieces)
|
|
|
|
toKill := numPieces - int(minReq)
|
|
|
|
// we should have enough storage nodes to repair on
|
|
|
|
assert.True(t, (numStorageNodes-toKill) >= numPieces)
|
|
|
|
|
|
|
|
// kill nodes and track lost pieces
|
|
|
|
var lostPieces []int32
|
|
|
|
nodesToKill := make(map[storj.NodeID]bool)
|
|
|
|
nodesToKeepAlive := make(map[storj.NodeID]bool)
|
|
|
|
for i, piece := range remotePieces {
|
|
|
|
if i >= toKill {
|
|
|
|
nodesToKeepAlive[piece.NodeId] = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
nodesToKill[piece.NodeId] = true
|
|
|
|
lostPieces = append(lostPieces, piece.GetPieceNum())
|
|
|
|
}
|
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
if nodesToKill[node.ID()] {
|
|
|
|
err = planet.StopPeer(node)
|
2019-04-04 00:00:25 +01:00
|
|
|
require.NoError(t, err)
|
2019-04-04 17:34:36 +01:00
|
|
|
_, err = satellite.Overlay.Service.UpdateUptime(ctx, node.ID(), false)
|
|
|
|
require.NoError(t, err)
|
2019-03-21 14:26:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// repair segment
|
2019-03-28 20:09:23 +00:00
|
|
|
os := satellite.Orders.Service
|
2019-03-21 14:26:56 +00:00
|
|
|
oc := satellite.Overlay.Service
|
|
|
|
ec := ecclient.NewClient(satellite.Transport, 0)
|
2019-03-28 20:09:23 +00:00
|
|
|
repairer := segments.NewSegmentRepairer(pdb, os, oc, ec, satellite.Identity, time.Minute)
|
2019-03-21 14:26:56 +00:00
|
|
|
assert.NotNil(t, repairer)
|
|
|
|
|
|
|
|
err = repairer.Repair(ctx, path, lostPieces)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
2019-04-04 00:00:25 +01:00
|
|
|
// kill one of the nodes kept alive to ensure repair worked
|
2019-03-21 14:26:56 +00:00
|
|
|
for _, node := range planet.StorageNodes {
|
|
|
|
if nodesToKeepAlive[node.ID()] {
|
|
|
|
err = planet.StopPeer(node)
|
2019-04-04 00:00:25 +01:00
|
|
|
require.NoError(t, err)
|
2019-04-04 17:34:36 +01:00
|
|
|
_, err = satellite.Overlay.Service.UpdateUptime(ctx, node.ID(), false)
|
|
|
|
require.NoError(t, err)
|
2019-04-04 00:00:25 +01:00
|
|
|
break
|
2019-03-21 14:26:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we should be able to download data without any of the original nodes
|
|
|
|
newData, err := ul.Download(ctx, satellite, "testbucket", "test/path")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, newData, testData)
|
|
|
|
|
|
|
|
// updated pointer should not contain any of the killed nodes
|
|
|
|
pointer, err = pdb.Get(path)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
remotePieces = pointer.GetRemote().GetRemotePieces()
|
|
|
|
for _, piece := range remotePieces {
|
|
|
|
assert.False(t, nodesToKill[piece.NodeId])
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|