satellite/repair/checker: optimize processing, part 3

ClassifySegmentPieces uses custom set implementation instead map.

Side note, for custom set implementation I also checked int8 bit set but
it didn't give better performance so I used simpler implementation.

Benchmark results (compared against part 2 optimization change):
name                                       old time/op    new time/op    delta
RemoteSegment/healthy_segment-8    21.7µs ± 8%    15.4µs ±16%  -29.38%  (p=0.008 n=5+5)

name                                       old alloc/op   new alloc/op   delta
RemoteSegment/healthy_segment-8    7.41kB ± 0%    1.87kB ± 0%  -74.83%  (p=0.000 n=5+4)

name                                       old allocs/op  new allocs/op  delta
RemoteSegment/healthy_segment-8       150 ± 0%       130 ± 0%  -13.33%  (p=0.008 n=5+5)

Change-Id: I21feca9ec6ac0a2558ac5ce8894451c54f69e52d
This commit is contained in:
Michal Niewrzal 2023-10-06 11:53:48 +02:00 committed by Storj Robot
parent c3fbac2e7a
commit 0eaf43120b
4 changed files with 188 additions and 118 deletions

View File

@ -390,18 +390,18 @@ func (fork *observerFork) process(ctx context.Context, segment *rangedloop.Segme
piecesCheck := repair.ClassifySegmentPieces(segment.Pieces, selectedNodes, fork.excludedCountryCodes, fork.doPlacementCheck,
fork.doDeclumping, fork.placementRules(segment.Placement), fork.nodeIDs)
numHealthy := len(piecesCheck.Healthy)
numHealthy := piecesCheck.Healthy.Size()
segmentTotalCountIntVal.Observe(int64(len(pieces)))
stats.segmentStats.segmentTotalCount.Observe(int64(len(pieces)))
segmentHealthyCountIntVal.Observe(int64(numHealthy))
stats.segmentStats.segmentHealthyCount.Observe(int64(numHealthy))
segmentClumpedCountIntVal.Observe(int64(len(piecesCheck.Clumped)))
stats.segmentStats.segmentClumpedCount.Observe(int64(len(piecesCheck.Clumped)))
segmentExitingCountIntVal.Observe(int64(len(piecesCheck.Exiting)))
stats.segmentStats.segmentExitingCount.Observe(int64(len(piecesCheck.Exiting)))
segmentOffPlacementCountIntVal.Observe(int64(len(piecesCheck.OutOfPlacement)))
stats.segmentStats.segmentOffPlacementCount.Observe(int64(len(piecesCheck.OutOfPlacement)))
segmentClumpedCountIntVal.Observe(int64(piecesCheck.Clumped.Size()))
stats.segmentStats.segmentClumpedCount.Observe(int64(piecesCheck.Clumped.Size()))
segmentExitingCountIntVal.Observe(int64(piecesCheck.Exiting.Size()))
stats.segmentStats.segmentExitingCount.Observe(int64(piecesCheck.Exiting.Size()))
segmentOffPlacementCountIntVal.Observe(int64(piecesCheck.OutOfPlacement.Size()))
stats.segmentStats.segmentOffPlacementCount.Observe(int64(piecesCheck.OutOfPlacement.Size()))
segmentAge := time.Since(segment.CreatedAt)
segmentAgeIntVal.Observe(int64(segmentAge.Seconds()))
@ -417,7 +417,7 @@ func (fork *observerFork) process(ctx context.Context, segment *rangedloop.Segme
// except for the case when the repair and success thresholds are the same (a case usually seen during testing).
// separate case is when we find pieces which are outside segment placement. in such case we are putting segment
// into queue right away.
if (numHealthy <= repairThreshold && numHealthy < successThreshold) || len(piecesCheck.ForcingRepair) > 0 {
if (numHealthy <= repairThreshold && numHealthy < successThreshold) || piecesCheck.ForcingRepair.Size() > 0 {
injuredSegmentHealthFloatVal.Observe(segmentHealth)
stats.segmentStats.injuredSegmentHealth.Observe(segmentHealth)
fork.totalStats.remoteSegmentsNeedingRepair++
@ -440,7 +440,7 @@ func (fork *observerFork) process(ctx context.Context, segment *rangedloop.Segme
}
// monitor irreparable segments
if len(piecesCheck.Retrievable) < required {
if piecesCheck.Retrievable.Size() < required {
if !slices.Contains(fork.totalStats.objectsLost, segment.StreamID) {
fork.totalStats.objectsLost = append(fork.totalStats.objectsLost, segment.StreamID)
}
@ -472,13 +472,13 @@ func (fork *observerFork) process(ctx context.Context, segment *rangedloop.Segme
var missingNodes []string
for _, piece := range pieces {
if _, isMissing := piecesCheck.Missing[piece.Number]; isMissing {
if piecesCheck.Missing.Contains(int(piece.Number)) {
missingNodes = append(missingNodes, piece.StorageNode.String())
}
}
fork.log.Warn("checker found irreparable segment", zap.String("Segment StreamID", segment.StreamID.String()), zap.Int("Segment Position",
int(segment.Position.Encode())), zap.Int("total pieces", len(pieces)), zap.Int("min required", required), zap.String("unavailable node IDs", strings.Join(missingNodes, ",")))
} else if len(piecesCheck.Clumped) > 0 && len(piecesCheck.Healthy)+len(piecesCheck.Clumped) > repairThreshold && len(piecesCheck.ForcingRepair) == 0 {
} else if piecesCheck.Clumped.Size() > 0 && piecesCheck.Healthy.Size()+piecesCheck.Clumped.Size() > repairThreshold && piecesCheck.ForcingRepair.Size() == 0 {
// This segment is to be repaired because of clumping (it wouldn't need repair yet
// otherwise). Produce a brief report of where the clumping occurred so that we have
// a better understanding of the cause.

View File

@ -4,8 +4,6 @@
package repair
import (
"golang.org/x/exp/maps"
"storj.io/common/storj"
"storj.io/common/storj/location"
"storj.io/storj/satellite/metabase"
@ -22,39 +20,39 @@ type PiecesCheckResult struct {
// Missing is a set of Piece Numbers which are to be considered as lost and irretrievable.
// (They reside on offline/disqualified/unknown nodes.)
Missing map[uint16]struct{}
Missing IntSet
// Retrievable contains all Piece Numbers that are retrievable; that is, all piece numbers
// from the segment that are NOT in Missing.
Retrievable map[uint16]struct{}
Retrievable IntSet
// Suspended is a set of Piece Numbers which reside on nodes which are suspended.
Suspended map[uint16]struct{}
Suspended IntSet
// Clumped is a set of Piece Numbers which are to be considered unhealthy because of IP
// clumping. (If DoDeclumping is disabled, this set will be empty.)
Clumped map[uint16]struct{}
Clumped IntSet
// Exiting is a set of Piece Numbers which are considered unhealthy because the node on
// which they reside has initiated graceful exit.
Exiting map[uint16]struct{}
Exiting IntSet
// OutOfPlacement is a set of Piece Numbers which are unhealthy because of placement rules.
// (If DoPlacementCheck is disabled, this set will be empty.)
OutOfPlacement map[uint16]struct{}
OutOfPlacement IntSet
// InExcludedCountry is a set of Piece Numbers which are unhealthy because they are in
// Excluded countries.
InExcludedCountry map[uint16]struct{}
InExcludedCountry IntSet
// ForcingRepair is the set of pieces which force a repair operation for this segment (that
// includes, currently, only pieces in OutOfPlacement).
ForcingRepair map[uint16]struct{}
ForcingRepair IntSet
// Unhealthy contains all Piece Numbers which are in Missing OR Suspended OR Clumped OR
// Exiting OR OutOfPlacement OR InExcludedCountry.
Unhealthy map[uint16]struct{}
Unhealthy IntSet
// UnhealthyRetrievable is the set of pieces that are "unhealthy-but-retrievable". That is,
// pieces that are in Unhealthy AND Retrievable.
UnhealthyRetrievable map[uint16]struct{}
UnhealthyRetrievable IntSet
// Healthy contains all Piece Numbers from the segment which are not in Unhealthy.
// (Equivalently: all Piece Numbers from the segment which are NOT in Missing OR
// Suspended OR Clumped OR Exiting OR OutOfPlacement OR InExcludedCountry).
Healthy map[uint16]struct{}
Healthy IntSet
}
// ClassifySegmentPieces classifies the pieces of a segment into the categories
@ -64,12 +62,20 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
doPlacementCheck, doDeclumping bool, filter nodeselection.NodeFilter, excludeNodeIDs []storj.NodeID) (result PiecesCheckResult) {
result.ExcludeNodeIDs = excludeNodeIDs
maxPieceNum := 0
for _, piece := range pieces {
if int(piece.Number) > maxPieceNum {
maxPieceNum = int(piece.Number)
}
}
maxPieceNum++
// check excluded countries and remove online nodes from missing pieces
result.Missing = make(map[uint16]struct{})
result.Suspended = make(map[uint16]struct{})
result.Exiting = make(map[uint16]struct{})
result.Retrievable = make(map[uint16]struct{})
result.InExcludedCountry = make(map[uint16]struct{})
result.Missing = NewIntSet(maxPieceNum)
result.Suspended = NewIntSet(maxPieceNum)
result.Exiting = NewIntSet(maxPieceNum)
result.Retrievable = NewIntSet(maxPieceNum)
result.InExcludedCountry = NewIntSet(maxPieceNum)
for index, nodeRecord := range nodes {
pieceNum := pieces[index].Number
@ -80,21 +86,21 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
if nodeRecord.ID.IsZero() || !nodeRecord.Online {
// node ID was not found, or the node is disqualified or exited,
// or it is offline
result.Missing[pieceNum] = struct{}{}
result.Missing.Include(int(pieceNum))
} else {
// node is expected to be online and receiving requests.
result.Retrievable[pieceNum] = struct{}{}
result.Retrievable.Include(int(pieceNum))
}
if nodeRecord.Suspended {
result.Suspended[pieceNum] = struct{}{}
result.Suspended.Include(int(pieceNum))
}
if nodeRecord.Exiting {
result.Exiting[pieceNum] = struct{}{}
result.Exiting.Include(int(pieceNum))
}
if _, excluded := excludedCountryCodes[nodeRecord.CountryCode]; excluded {
result.InExcludedCountry[pieceNum] = struct{}{}
result.InExcludedCountry.Include(int(pieceNum))
}
}
@ -103,7 +109,7 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
// to be considered retrievable but unhealthy.
lastNets := make(map[string]struct{}, len(pieces))
result.Clumped = make(map[uint16]struct{})
result.Clumped = NewIntSet(maxPieceNum)
collectClumpedPieces := func(onlineness bool) {
for index, nodeRecord := range nodes {
@ -117,7 +123,7 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
_, ok := lastNets[nodeRecord.LastNet]
if ok {
// this LastNet was already seen
result.Clumped[pieceNum] = struct{}{}
result.Clumped.Include(int(pieceNum))
} else {
// add to the list of seen LastNets
lastNets[nodeRecord.LastNet] = struct{}{}
@ -133,7 +139,7 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
if doPlacementCheck {
// mark all pieces that are out of placement.
result.OutOfPlacement = make(map[uint16]struct{})
result.OutOfPlacement = NewIntSet(maxPieceNum)
for index, nodeRecord := range nodes {
if nodeRecord.ID.IsZero() {
continue
@ -142,37 +148,92 @@ func ClassifySegmentPieces(pieces metabase.Pieces, nodes []nodeselection.Selecte
continue
}
pieceNum := pieces[index].Number
result.OutOfPlacement[pieceNum] = struct{}{}
result.OutOfPlacement.Include(int(pieceNum))
}
}
// ForcingRepair = OutOfPlacement only, for now
result.ForcingRepair = make(map[uint16]struct{})
maps.Copy(result.ForcingRepair, result.OutOfPlacement)
result.ForcingRepair = copyIntSet(NewIntSet(maxPieceNum),
result.OutOfPlacement,
)
// Unhealthy = Missing OR Suspended OR Clumped OR OutOfPlacement OR InExcludedCountry
result.Unhealthy = make(map[uint16]struct{})
maps.Copy(result.Unhealthy, result.Missing)
maps.Copy(result.Unhealthy, result.Suspended)
maps.Copy(result.Unhealthy, result.Clumped)
maps.Copy(result.Unhealthy, result.Exiting)
maps.Copy(result.Unhealthy, result.OutOfPlacement)
maps.Copy(result.Unhealthy, result.InExcludedCountry)
// Unhealthy = Missing OR Suspended OR Clumped OR Exiting OR OutOfPlacement OR InExcludedCountry
result.Unhealthy = copyIntSet(NewIntSet(maxPieceNum),
result.Missing,
result.Suspended,
result.Clumped,
result.Exiting,
result.OutOfPlacement,
result.InExcludedCountry,
)
// UnhealthyRetrievable = Unhealthy AND Retrievable
result.UnhealthyRetrievable = make(map[uint16]struct{})
for pieceNum := range result.Unhealthy {
if _, isRetrievable := result.Retrievable[pieceNum]; isRetrievable {
result.UnhealthyRetrievable[pieceNum] = struct{}{}
// Healthy = NOT Unhealthy
result.UnhealthyRetrievable = NewIntSet(maxPieceNum)
result.Healthy = NewIntSet(maxPieceNum)
for _, piece := range pieces {
if !result.Unhealthy.Contains(int(piece.Number)) {
result.Healthy.Include(int(piece.Number))
} else if result.Retrievable.Contains(int(piece.Number)) {
result.UnhealthyRetrievable.Include(int(piece.Number))
}
}
// Healthy = NOT Unhealthy
result.Healthy = make(map[uint16]struct{})
for _, piece := range pieces {
if _, found := result.Unhealthy[piece.Number]; !found {
result.Healthy[piece.Number] = struct{}{}
}
}
return result
}
func copyIntSet(destination IntSet, sources ...IntSet) IntSet {
for element := 0; element < destination.Cap(); element++ {
for _, sources := range sources {
if sources.Contains(element) {
destination.Include(element)
break
}
}
}
return destination
}
// IntSet set of pieces.
type IntSet struct {
bits []bool
size int
}
// NewIntSet creates new int set.
func NewIntSet(n int) IntSet {
return IntSet{
bits: make([]bool, n),
}
}
// Contains returns true if set includes int value.
func (i IntSet) Contains(value int) bool {
if value >= cap(i.bits) {
return false
}
return i.bits[value]
}
// Include includes int value into set.
// Ignores values above set size.
func (i *IntSet) Include(value int) {
i.bits[value] = true
i.size++
}
// Remove removes int value from set.
func (i *IntSet) Remove(value int) {
i.bits[value] = true
i.size--
}
// Size returns size of set.
func (i IntSet) Size() int {
return i.size
}
// Cap returns set capacity.
func (i IntSet) Cap() int {
return cap(i.bits)
}

View File

@ -45,10 +45,10 @@ func TestClassifySegmentPieces(t *testing.T) {
pieces := createPieces(selectedNodes, 0, 1, 2, 3, 4)
result := ClassifySegmentPieces(pieces, getNodes(selectedNodes, pieces), map[location.CountryCode]struct{}{}, true, false, parsed.CreateFilters(0), piecesToNodeIDs(pieces))
require.Equal(t, 0, len(result.Missing))
require.Equal(t, 0, len(result.Clumped))
require.Equal(t, 0, len(result.OutOfPlacement))
require.Equal(t, 0, len(result.UnhealthyRetrievable))
require.Equal(t, 0, result.Missing.Size())
require.Equal(t, 0, result.Clumped.Size())
require.Equal(t, 0, result.OutOfPlacement.Size())
require.Equal(t, 0, result.UnhealthyRetrievable.Size())
})
t.Run("out of placement", func(t *testing.T) {
@ -71,11 +71,11 @@ func TestClassifySegmentPieces(t *testing.T) {
pieces := createPieces(selectedNodes, 1, 2, 3, 4, 7, 8)
result := ClassifySegmentPieces(pieces, getNodes(selectedNodes, pieces), map[location.CountryCode]struct{}{}, true, false, c.CreateFilters(10), piecesToNodeIDs(pieces))
require.Equal(t, 0, len(result.Missing))
require.Equal(t, 0, len(result.Clumped))
require.Equal(t, 0, result.Missing.Size())
require.Equal(t, 0, result.Clumped.Size())
// 1,2,3 are in Germany instead of GB
require.Equal(t, 3, len(result.OutOfPlacement))
require.Equal(t, 3, len(result.UnhealthyRetrievable))
require.Equal(t, 3, result.OutOfPlacement.Size())
require.Equal(t, 3, result.UnhealthyRetrievable.Size())
})
t.Run("out of placement and offline", func(t *testing.T) {
@ -95,11 +95,11 @@ func TestClassifySegmentPieces(t *testing.T) {
result := ClassifySegmentPieces(pieces, getNodes(selectedNodes, pieces), map[location.CountryCode]struct{}{}, true, false, c.CreateFilters(10), piecesToNodeIDs(pieces))
// offline nodes
require.Equal(t, 5, len(result.Missing))
require.Equal(t, 0, len(result.Clumped))
require.Equal(t, 10, len(result.OutOfPlacement))
require.Equal(t, 5, len(result.UnhealthyRetrievable))
numHealthy := len(pieces) - len(result.Missing) - len(result.UnhealthyRetrievable)
require.Equal(t, 5, result.Missing.Size())
require.Equal(t, 0, result.Clumped.Size())
require.Equal(t, 10, result.OutOfPlacement.Size())
require.Equal(t, 5, result.UnhealthyRetrievable.Size())
numHealthy := len(pieces) - result.Missing.Size() - result.UnhealthyRetrievable.Size()
require.Equal(t, 0, numHealthy)
})
@ -118,11 +118,11 @@ func TestClassifySegmentPieces(t *testing.T) {
result := ClassifySegmentPieces(pieces, getNodes(selectedNodes, pieces), map[location.CountryCode]struct{}{}, true, true, c.CreateFilters(0), piecesToNodeIDs(pieces))
// offline nodes
require.Equal(t, 2, len(result.Missing))
require.Equal(t, 3, len(result.Clumped))
require.Equal(t, 0, len(result.OutOfPlacement))
require.Equal(t, 2, len(result.UnhealthyRetrievable))
numHealthy := len(pieces) - len(result.Missing) - len(result.UnhealthyRetrievable)
require.Equal(t, 2, result.Missing.Size())
require.Equal(t, 3, result.Clumped.Size())
require.Equal(t, 0, result.OutOfPlacement.Size())
require.Equal(t, 2, result.UnhealthyRetrievable.Size())
numHealthy := len(pieces) - result.Missing.Size() - result.UnhealthyRetrievable.Size()
require.Equal(t, 3, numHealthy)
})
@ -145,11 +145,11 @@ func TestClassifySegmentPieces(t *testing.T) {
result := ClassifySegmentPieces(pieces, getNodes(selectedNodes, pieces), map[location.CountryCode]struct{}{}, true, true, c.CreateFilters(10), piecesToNodeIDs(pieces))
// offline nodes
require.Equal(t, 2, len(result.Missing))
require.Equal(t, 0, len(result.Clumped))
require.Equal(t, 0, len(result.OutOfPlacement))
require.Equal(t, 0, len(result.UnhealthyRetrievable))
numHealthy := len(pieces) - len(result.Missing) - len(result.UnhealthyRetrievable)
require.Equal(t, 2, result.Missing.Size())
require.Equal(t, 0, result.Clumped.Size())
require.Equal(t, 0, result.OutOfPlacement.Size())
require.Equal(t, 0, result.UnhealthyRetrievable.Size())
numHealthy := len(pieces) - result.Missing.Size() - result.UnhealthyRetrievable.Size()
require.Equal(t, 5, numHealthy)
})

View File

@ -14,7 +14,6 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap"
"golang.org/x/exp/maps"
"storj.io/common/pb"
"storj.io/common/storj"
@ -225,7 +224,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
piecesCheck := repair.ClassifySegmentPieces(pieces, selectedNodes, repairer.excludedCountryCodes, repairer.doPlacementCheck, repairer.doDeclumping, repairer.placementRules(segment.Placement), allNodeIDs)
// irreparable segment
if len(piecesCheck.Retrievable) < int(segment.Redundancy.RequiredShares) {
if piecesCheck.Retrievable.Size() < int(segment.Redundancy.RequiredShares) {
mon.Counter("repairer_segments_below_min_req").Inc(1) //mon:locked
stats.repairerSegmentsBelowMinReq.Inc(1)
mon.Meter("repair_nodes_unavailable").Mark(1) //mon:locked
@ -234,7 +233,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
repairer.log.Warn("irreparable segment",
zap.String("StreamID", queueSegment.StreamID.String()),
zap.Uint64("Position", queueSegment.Position.Encode()),
zap.Int("piecesAvailable", len(piecesCheck.Retrievable)),
zap.Int("piecesAvailable", piecesCheck.Retrievable.Size()),
zap.Int16("piecesRequired", segment.Redundancy.RequiredShares),
)
return false, nil
@ -257,15 +256,15 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
repairThreshold = overrideValue
}
if len(piecesCheck.Healthy) > int(repairThreshold) {
if piecesCheck.Healthy.Size() > int(repairThreshold) {
// No repair is needed (note Healthy does not include pieces in ForcingRepair).
var dropPieces metabase.Pieces
if len(piecesCheck.ForcingRepair) > 0 {
if piecesCheck.ForcingRepair.Size() > 0 {
// No repair is needed, but remove forcing-repair pieces without a repair operation,
// as we will still be above the repair threshold.
for _, piece := range pieces {
if _, ok := piecesCheck.ForcingRepair[piece.Number]; ok {
if piecesCheck.ForcingRepair.Contains(int(piece.Number)) {
dropPieces = append(dropPieces, piece)
}
}
@ -295,23 +294,23 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
mon.Meter("repair_unnecessary").Mark(1) //mon:locked
stats.repairUnnecessary.Mark(1)
repairer.log.Debug("segment above repair threshold", zap.Int("numHealthy", len(piecesCheck.Healthy)), zap.Int32("repairThreshold", repairThreshold),
zap.Int("numClumped", len(piecesCheck.Clumped)), zap.Int("numExiting", len(piecesCheck.Exiting)), zap.Int("numOffPieces", len(piecesCheck.OutOfPlacement)),
zap.Int("numExcluded", len(piecesCheck.InExcludedCountry)), zap.Int("droppedPieces", len(dropPieces)))
repairer.log.Debug("segment above repair threshold", zap.Int("numHealthy", piecesCheck.Healthy.Size()), zap.Int32("repairThreshold", repairThreshold),
zap.Int("numClumped", piecesCheck.Clumped.Size()), zap.Int("numExiting", piecesCheck.Exiting.Size()), zap.Int("numOffPieces", piecesCheck.OutOfPlacement.Size()),
zap.Int("numExcluded", piecesCheck.InExcludedCountry.Size()), zap.Int("droppedPieces", len(dropPieces)))
return true, nil
}
healthyRatioBeforeRepair := 0.0
if segment.Redundancy.TotalShares != 0 {
healthyRatioBeforeRepair = float64(len(piecesCheck.Healthy)) / float64(segment.Redundancy.TotalShares)
healthyRatioBeforeRepair = float64(piecesCheck.Healthy.Size()) / float64(segment.Redundancy.TotalShares)
}
mon.FloatVal("healthy_ratio_before_repair").Observe(healthyRatioBeforeRepair) //mon:locked
stats.healthyRatioBeforeRepair.Observe(healthyRatioBeforeRepair)
// Create the order limits for the GET_REPAIR action
retrievablePieces := make(metabase.Pieces, 0, len(piecesCheck.Retrievable))
retrievablePieces := make(metabase.Pieces, 0, piecesCheck.Retrievable.Size())
for _, piece := range pieces {
if _, found := piecesCheck.Retrievable[piece.Number]; found {
if piecesCheck.Retrievable.Contains(int(piece.Number)) {
retrievablePieces = append(retrievablePieces, piece)
}
}
@ -338,11 +337,12 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
// call to CreateGetRepairOrderLimits. Add or remove them from the appropriate sets.
for _, piece := range retrievablePieces {
if getOrderLimits[piece.Number] == nil {
piecesCheck.Missing[piece.Number] = struct{}{}
piecesCheck.Unhealthy[piece.Number] = struct{}{}
delete(piecesCheck.Healthy, piece.Number)
delete(piecesCheck.Retrievable, piece.Number)
delete(piecesCheck.UnhealthyRetrievable, piece.Number)
piecesCheck.Missing.Include(int(piece.Number))
piecesCheck.Unhealthy.Include(int(piece.Number))
piecesCheck.Healthy.Remove(int(piece.Number))
piecesCheck.Retrievable.Remove(int(piece.Number))
piecesCheck.UnhealthyRetrievable.Remove(int(piece.Number))
}
}
@ -352,9 +352,9 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
if totalNeeded > redundancy.TotalCount() {
totalNeeded = redundancy.TotalCount()
}
requestCount = totalNeeded - len(piecesCheck.Healthy)
requestCount = totalNeeded - piecesCheck.Healthy.Size()
}
minSuccessfulNeeded := redundancy.OptimalThreshold() - len(piecesCheck.Healthy)
minSuccessfulNeeded := redundancy.OptimalThreshold() - piecesCheck.Healthy.Size()
// Request Overlay for n-h new storage nodes
request := overlay.FindStorageNodesRequest{
@ -372,13 +372,22 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
// pieces they have, as long as they are kept intact and retrievable).
maxToKeep := int(segment.Redundancy.TotalShares) - len(newNodes)
toKeep := map[uint16]struct{}{}
maps.Copy(toKeep, piecesCheck.Healthy)
for excludedNodeNum := range piecesCheck.InExcludedCountry {
if len(toKeep) >= maxToKeep {
break
// TODO how to avoid this two loops
for _, piece := range pieces {
if piecesCheck.Healthy.Contains(int(piece.Number)) {
toKeep[piece.Number] = struct{}{}
}
toKeep[excludedNodeNum] = struct{}{}
}
for _, piece := range pieces {
if piecesCheck.InExcludedCountry.Contains(int(piece.Number)) {
if len(toKeep) >= maxToKeep {
break
}
toKeep[piece.Number] = struct{}{}
}
}
putLimits, putPrivateKey, err := repairer.orders.CreatePutRepairOrderLimits(ctx, segment, getOrderLimits, toKeep, newNodes)
if err != nil {
return false, orderLimitFailureError.New("could not create PUT_REPAIR order limits: %w", err)
@ -551,7 +560,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
mon.Meter("repair_bytes_uploaded").Mark64(bytesRepaired) //mon:locked
healthyAfterRepair := len(piecesCheck.Healthy) + len(repairedPieces)
healthyAfterRepair := piecesCheck.Healthy.Size() + len(repairedPieces)
switch {
case healthyAfterRepair >= int(segment.Redundancy.OptimalShares):
mon.Meter("repair_success").Mark(1) //mon:locked
@ -584,9 +593,9 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
// (Retrievable AND InExcludedCountry). Those, we allow to remain on the nodes as
// long as the nodes are keeping the pieces intact and available.
for _, piece := range pieces {
if _, isUnhealthy := piecesCheck.Unhealthy[piece.Number]; isUnhealthy {
_, retrievable := piecesCheck.Retrievable[piece.Number]
_, inExcludedCountry := piecesCheck.InExcludedCountry[piece.Number]
if piecesCheck.Unhealthy.Contains(int(piece.Number)) {
retrievable := piecesCheck.Retrievable.Contains(int(piece.Number))
inExcludedCountry := piecesCheck.InExcludedCountry.Contains(int(piece.Number))
if retrievable && inExcludedCountry {
continue
}
@ -598,7 +607,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
// pieces. We want to do that wherever possible, except where doing so puts data in
// jeopardy.
for _, piece := range pieces {
if _, ok := piecesCheck.OutOfPlacement[piece.Number]; ok {
if piecesCheck.OutOfPlacement.Contains(int(piece.Number)) {
toRemove = append(toRemove, piece)
}
}
@ -657,15 +666,15 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
repairer.log.Debug("repaired segment",
zap.Stringer("Stream ID", segment.StreamID),
zap.Uint64("Position", segment.Position.Encode()),
zap.Int("clumped pieces", len(piecesCheck.Clumped)),
zap.Int("exiting-node pieces", len(piecesCheck.Exiting)),
zap.Int("out of placement pieces", len(piecesCheck.OutOfPlacement)),
zap.Int("in excluded countries", len(piecesCheck.InExcludedCountry)),
zap.Int("missing pieces", len(piecesCheck.Missing)),
zap.Int("clumped pieces", piecesCheck.Clumped.Size()),
zap.Int("exiting-node pieces", piecesCheck.Exiting.Size()),
zap.Int("out of placement pieces", piecesCheck.OutOfPlacement.Size()),
zap.Int("in excluded countries", piecesCheck.InExcludedCountry.Size()),
zap.Int("missing pieces", piecesCheck.Missing.Size()),
zap.Int("removed pieces", len(toRemove)),
zap.Int("repaired pieces", len(repairedPieces)),
zap.Int("retrievable pieces", len(piecesCheck.Retrievable)),
zap.Int("healthy before repair", len(piecesCheck.Healthy)),
zap.Int("retrievable pieces", piecesCheck.Retrievable.Size()),
zap.Int("healthy before repair", piecesCheck.Healthy.Size()),
zap.Int("healthy after repair", healthyAfterRepair),
zap.Int("total before repair", len(piecesCheck.ExcludeNodeIDs)),
zap.Int("total after repair", len(newPieces)))