2018-12-13 07:12:36 +00:00
|
|
|
// Copyright (C) 2018 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package segments
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
|
|
|
|
"storj.io/storj/pkg/overlay"
|
|
|
|
"storj.io/storj/pkg/pb"
|
|
|
|
"storj.io/storj/pkg/piecestore/psclient"
|
|
|
|
"storj.io/storj/pkg/pointerdb/pdbclient"
|
2019-01-02 18:47:34 +00:00
|
|
|
ecclient "storj.io/storj/pkg/storage/ec"
|
2018-12-13 07:12:36 +00:00
|
|
|
"storj.io/storj/pkg/storj"
|
|
|
|
"storj.io/storj/pkg/utils"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Repairer for segments
|
|
|
|
type Repairer struct {
|
|
|
|
oc overlay.Client
|
|
|
|
ec ecclient.Client
|
|
|
|
pdb pdbclient.Client
|
|
|
|
nodeStats *pb.NodeStats
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewSegmentRepairer creates a new instance of SegmentRepairer
|
2018-12-17 21:05:05 +00:00
|
|
|
func NewSegmentRepairer(oc overlay.Client, ec ecclient.Client, pdb pdbclient.Client) *Repairer {
|
|
|
|
return &Repairer{oc: oc, ec: ec, pdb: pdb}
|
2018-12-13 07:12:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Repair retrieves an at-risk segment and repairs and stores lost pieces on new nodes
|
|
|
|
func (s *Repairer) Repair(ctx context.Context, path storj.Path, lostPieces []int32) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
// Read the segment's pointer's info from the PointerDB
|
2019-01-10 11:41:57 +00:00
|
|
|
pr, originalNodes, _, err := s.pdb.Get(ctx, path)
|
2018-12-13 07:12:36 +00:00
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if pr.GetType() != pb.Pointer_REMOTE {
|
|
|
|
return Error.New("cannot repair inline segment %s", psclient.PieceID(pr.GetInlineSegment()))
|
|
|
|
}
|
|
|
|
|
|
|
|
seg := pr.GetRemote()
|
|
|
|
pid := psclient.PieceID(seg.GetPieceId())
|
|
|
|
|
|
|
|
originalNodes, err = lookupAndAlignNodes(ctx, s.oc, originalNodes, seg)
|
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the nodes list that needs to be excluded
|
|
|
|
var excludeNodeIDs storj.NodeIDList
|
|
|
|
|
|
|
|
// Count the number of nil nodes thats needs to be repaired
|
|
|
|
totalNilNodes := 0
|
|
|
|
|
|
|
|
healthyNodes := make([]*pb.Node, len(originalNodes))
|
|
|
|
|
|
|
|
// Populate healthyNodes with all nodes from originalNodes except those correlating to indices in lostPieces
|
|
|
|
for i, v := range originalNodes {
|
|
|
|
if v == nil {
|
|
|
|
totalNilNodes++
|
|
|
|
continue
|
|
|
|
}
|
2019-01-02 18:47:34 +00:00
|
|
|
v.Type.DPanicOnInvalid("repair")
|
2018-12-13 07:12:36 +00:00
|
|
|
excludeNodeIDs = append(excludeNodeIDs, v.Id)
|
|
|
|
|
|
|
|
// If node index exists in lostPieces, skip adding it to healthyNodes
|
|
|
|
if contains(lostPieces, i) {
|
|
|
|
totalNilNodes++
|
|
|
|
} else {
|
|
|
|
healthyNodes[i] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Request Overlay for n-h new storage nodes
|
|
|
|
op := overlay.Options{Amount: totalNilNodes, Space: 0, Excluded: excludeNodeIDs}
|
|
|
|
newNodes, err := s.oc.Choose(ctx, op)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if totalNilNodes != len(newNodes) {
|
|
|
|
return Error.New("Number of new nodes from overlay (%d) does not equal total nil nodes (%d)", len(newNodes), totalNilNodes)
|
|
|
|
}
|
|
|
|
|
|
|
|
totalRepairCount := len(newNodes)
|
|
|
|
|
|
|
|
// Make a repair nodes list just with new unique ids
|
|
|
|
repairNodes := make([]*pb.Node, len(healthyNodes))
|
|
|
|
for i, vr := range healthyNodes {
|
|
|
|
// Check that totalRepairCount is non-negative
|
|
|
|
if totalRepairCount < 0 {
|
|
|
|
return Error.New("Total repair count (%d) less than zero", totalRepairCount)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the nil nodes in the healthyNodes list
|
|
|
|
if vr == nil {
|
|
|
|
// Assign the item in repairNodes list with an item from the newNode list
|
|
|
|
totalRepairCount--
|
|
|
|
repairNodes[i] = newNodes[totalRepairCount]
|
|
|
|
}
|
|
|
|
}
|
2019-01-02 18:47:34 +00:00
|
|
|
for _, v := range repairNodes {
|
|
|
|
v.Type.DPanicOnInvalid("repair 2")
|
|
|
|
}
|
2018-12-13 07:12:36 +00:00
|
|
|
|
|
|
|
// Check that all nil nodes have a replacement prepared
|
|
|
|
if totalRepairCount != 0 {
|
|
|
|
return Error.New("Failed to replace all nil nodes (%d). (%d) new nodes not inserted", len(newNodes), totalRepairCount)
|
|
|
|
}
|
|
|
|
|
|
|
|
rs, err := makeRedundancyStrategy(pr.GetRemote().GetRedundancy())
|
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
signedMessage := s.pdb.SignedMessage()
|
2019-01-10 11:41:57 +00:00
|
|
|
pbaGet, err := s.pdb.PayerBandwidthAllocation(ctx, pb.PayerBandwidthAllocation_GET_REPAIR)
|
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
2018-12-13 07:12:36 +00:00
|
|
|
// Download the segment using just the healthyNodes
|
2019-01-10 11:41:57 +00:00
|
|
|
rr, err := s.ec.Get(ctx, healthyNodes, rs, pid, pr.GetSegmentSize(), pbaGet, signedMessage)
|
2018-12-13 07:12:36 +00:00
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
r, err := rr.Range(ctx, 0, rr.Size())
|
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
defer utils.LogClose(r)
|
|
|
|
|
2019-01-10 11:41:57 +00:00
|
|
|
pbaPut, err := s.pdb.PayerBandwidthAllocation(ctx, pb.PayerBandwidthAllocation_PUT_REPAIR)
|
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
2018-12-13 07:12:36 +00:00
|
|
|
// Upload the repaired pieces to the repairNodes
|
2019-01-10 11:41:57 +00:00
|
|
|
successfulNodes, err := s.ec.Put(ctx, repairNodes, rs, pid, r, convertTime(pr.GetExpirationDate()), pbaPut, signedMessage)
|
2018-12-13 07:12:36 +00:00
|
|
|
if err != nil {
|
|
|
|
return Error.Wrap(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge the successful nodes list into the healthy nodes list
|
|
|
|
for i, v := range healthyNodes {
|
|
|
|
if v == nil {
|
|
|
|
// copy the successfuNode info
|
|
|
|
healthyNodes[i] = successfulNodes[i]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
metadata := pr.GetMetadata()
|
|
|
|
pointer, err := makeRemotePointer(healthyNodes, rs, pid, rr.Size(), pr.GetExpirationDate(), metadata)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// update the segment info in the pointerDB
|
|
|
|
return s.pdb.Put(ctx, path, pointer)
|
|
|
|
}
|