satellite/repair: piecescheck.OutOfPlacementPiecesSet should not contain offline nodes
When we check the availability of the pieces, we do: ``` result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet) // + some magic if there are overlaps between them numHealthy := len(pieces) - len(piecesCheck.MissingPiecesSet) - piecesCheck.NumUnhealthyRetrievable ``` This works only if OutOfPlacementPieceSet doesn't contain the offline nodes (which are already included in MissingPieceSet). But `result.OutOfPlacementPieces.Set` should include all the nodes (even offline), as in case of lucky conditions, we are able to remove those pieces from DB. The solution is to remove all offline nodes from `NumUnhealthyRetrievable`. Change-Id: I90baa0396352dd040e1e1516314b3271f8712034
This commit is contained in:
parent
80186ecc67
commit
9ddc8b4ca3
@ -648,8 +648,10 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
|
|||||||
type piecesCheckResult struct {
|
type piecesCheckResult struct {
|
||||||
ExcludeNodeIDs []storj.NodeID
|
ExcludeNodeIDs []storj.NodeID
|
||||||
|
|
||||||
MissingPiecesSet map[uint16]bool
|
MissingPiecesSet map[uint16]bool
|
||||||
ClumpedPiecesSet map[uint16]bool
|
ClumpedPiecesSet map[uint16]bool
|
||||||
|
|
||||||
|
// piece which are out of placement (both offline and online)
|
||||||
OutOfPlacementPiecesSet map[uint16]bool
|
OutOfPlacementPiecesSet map[uint16]bool
|
||||||
|
|
||||||
NumUnhealthyRetrievable int
|
NumUnhealthyRetrievable int
|
||||||
@ -662,6 +664,20 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
|
|||||||
pieces := segment.Pieces
|
pieces := segment.Pieces
|
||||||
|
|
||||||
allNodeIDs := make([]storj.NodeID, len(pieces))
|
allNodeIDs := make([]storj.NodeID, len(pieces))
|
||||||
|
for i, piece := range pieces {
|
||||||
|
allNodeIDs[i] = piece.StorageNode
|
||||||
|
}
|
||||||
|
|
||||||
|
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
|
||||||
|
if err != nil {
|
||||||
|
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
|
||||||
|
}
|
||||||
|
return repairer.classifySegmentPiecesWithNodes(ctx, segment, allNodeIDs, online, offline)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (repairer *SegmentRepairer) classifySegmentPiecesWithNodes(ctx context.Context, segment metabase.Segment, allNodeIDs []storj.NodeID, online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) (result piecesCheckResult, err error) {
|
||||||
|
pieces := segment.Pieces
|
||||||
|
|
||||||
nodeIDPieceMap := map[storj.NodeID]uint16{}
|
nodeIDPieceMap := map[storj.NodeID]uint16{}
|
||||||
result.MissingPiecesSet = map[uint16]bool{}
|
result.MissingPiecesSet = map[uint16]bool{}
|
||||||
for i, p := range pieces {
|
for i, p := range pieces {
|
||||||
@ -672,11 +688,6 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
|
|||||||
|
|
||||||
result.ExcludeNodeIDs = allNodeIDs
|
result.ExcludeNodeIDs = allNodeIDs
|
||||||
|
|
||||||
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
|
|
||||||
if err != nil {
|
|
||||||
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeFilters := repairer.placementRules(segment.Placement)
|
nodeFilters := repairer.placementRules(segment.Placement)
|
||||||
|
|
||||||
// remove online nodes from missing pieces
|
// remove online nodes from missing pieces
|
||||||
@ -733,14 +744,16 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
|
|||||||
checkPlacement(online)
|
checkPlacement(online)
|
||||||
checkPlacement(offline)
|
checkPlacement(offline)
|
||||||
|
|
||||||
result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet)
|
// verify that some of clumped pieces and out of placement pieces are not the same
|
||||||
if len(result.ClumpedPiecesSet) != 0 && len(result.OutOfPlacementPiecesSet) != 0 {
|
unhealthyRetrievableSet := map[uint16]bool{}
|
||||||
// verify that some of clumped pieces and out of placement pieces are not the same
|
maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
|
||||||
unhealthyRetrievableSet := map[uint16]bool{}
|
maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet)
|
||||||
maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
|
|
||||||
maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet)
|
// offline nodes are not retrievable
|
||||||
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet)
|
for _, node := range offline {
|
||||||
|
delete(unhealthyRetrievableSet, nodeIDPieceMap[node.ID])
|
||||||
}
|
}
|
||||||
|
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet)
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
140
satellite/repair/repairer/segments_unit_test.go
Normal file
140
satellite/repair/repairer/segments_unit_test.go
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
// Copyright (C) 2023 Storj Labs, Inc.
|
||||||
|
// See LICENSE for copying information.
|
||||||
|
|
||||||
|
package repairer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"storj.io/common/identity/testidentity"
|
||||||
|
"storj.io/common/storj"
|
||||||
|
"storj.io/common/storj/location"
|
||||||
|
"storj.io/common/testcontext"
|
||||||
|
"storj.io/storj/satellite/metabase"
|
||||||
|
"storj.io/storj/satellite/nodeselection"
|
||||||
|
"storj.io/storj/satellite/overlay"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestClassify(t *testing.T) {
|
||||||
|
ctx := testcontext.New(t)
|
||||||
|
|
||||||
|
t.Run("all online", func(t *testing.T) {
|
||||||
|
var online, offline = generateNodes(5, func(ix int) bool {
|
||||||
|
return true
|
||||||
|
}, func(ix int, node *nodeselection.SelectedNode) {
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
c := &overlay.ConfigurablePlacementRule{}
|
||||||
|
require.NoError(t, c.Set(""))
|
||||||
|
s := SegmentRepairer{
|
||||||
|
placementRules: c.CreateFilters,
|
||||||
|
}
|
||||||
|
pieces := createPieces(online, offline, 0, 1, 2, 3, 4)
|
||||||
|
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces}, allNodeIDs(pieces), online, offline)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, 0, len(result.MissingPiecesSet))
|
||||||
|
require.Equal(t, 0, len(result.ClumpedPiecesSet))
|
||||||
|
require.Equal(t, 0, len(result.OutOfPlacementPiecesSet))
|
||||||
|
require.Equal(t, 0, result.NumUnhealthyRetrievable)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("out of placement", func(t *testing.T) {
|
||||||
|
var online, offline = generateNodes(10, func(ix int) bool {
|
||||||
|
return true
|
||||||
|
}, func(ix int, node *nodeselection.SelectedNode) {
|
||||||
|
if ix > 4 {
|
||||||
|
node.CountryCode = location.Germany
|
||||||
|
} else {
|
||||||
|
node.CountryCode = location.UnitedKingdom
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
c := &overlay.ConfigurablePlacementRule{}
|
||||||
|
require.NoError(t, c.Set("10:country(\"GB\")"))
|
||||||
|
s := SegmentRepairer{
|
||||||
|
placementRules: c.CreateFilters,
|
||||||
|
}
|
||||||
|
|
||||||
|
pieces := createPieces(online, offline, 1, 2, 3, 4, 7, 8)
|
||||||
|
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, 0, len(result.MissingPiecesSet))
|
||||||
|
require.Equal(t, 0, len(result.ClumpedPiecesSet))
|
||||||
|
// 1,2,3 are in Germany instead of GB
|
||||||
|
require.Equal(t, 3, len(result.OutOfPlacementPiecesSet))
|
||||||
|
require.Equal(t, 3, result.NumUnhealthyRetrievable)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("out of placement and offline", func(t *testing.T) {
|
||||||
|
// all nodes are in wrong region and half of them are offline
|
||||||
|
var online, offline = generateNodes(10, func(ix int) bool {
|
||||||
|
return ix < 5
|
||||||
|
}, func(ix int, node *nodeselection.SelectedNode) {
|
||||||
|
node.CountryCode = location.Germany
|
||||||
|
})
|
||||||
|
|
||||||
|
c := &overlay.ConfigurablePlacementRule{}
|
||||||
|
require.NoError(t, c.Set("10:country(\"GB\")"))
|
||||||
|
s := SegmentRepairer{
|
||||||
|
placementRules: c.CreateFilters,
|
||||||
|
}
|
||||||
|
|
||||||
|
pieces := createPieces(online, offline, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||||
|
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// offline nodes
|
||||||
|
require.Equal(t, 5, len(result.MissingPiecesSet))
|
||||||
|
require.Equal(t, 0, len(result.ClumpedPiecesSet))
|
||||||
|
require.Equal(t, 10, len(result.OutOfPlacementPiecesSet))
|
||||||
|
require.Equal(t, 5, result.NumUnhealthyRetrievable)
|
||||||
|
numHealthy := len(pieces) - len(result.MissingPiecesSet) - result.NumUnhealthyRetrievable
|
||||||
|
require.Equal(t, 0, numHealthy)
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateNodes(num int, isOnline func(i int) bool, config func(ix int, node *nodeselection.SelectedNode)) (online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) {
|
||||||
|
for i := 0; i < num; i++ {
|
||||||
|
node := nodeselection.SelectedNode{
|
||||||
|
ID: testidentity.MustPregeneratedIdentity(i, storj.LatestIDVersion()).ID,
|
||||||
|
}
|
||||||
|
config(i, &node)
|
||||||
|
if isOnline(i) {
|
||||||
|
online = append(online, node)
|
||||||
|
} else {
|
||||||
|
offline = append(offline, node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func createPieces(online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode, indexes ...int) (res metabase.Pieces) {
|
||||||
|
for _, index := range indexes {
|
||||||
|
piece := metabase.Piece{
|
||||||
|
Number: uint16(index),
|
||||||
|
}
|
||||||
|
if len(online)-1 < index {
|
||||||
|
piece.StorageNode = offline[index-len(online)].ID
|
||||||
|
} else {
|
||||||
|
piece.StorageNode = online[index].ID
|
||||||
|
}
|
||||||
|
res = append(res, piece)
|
||||||
|
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func allNodeIDs(pieces metabase.Pieces) (res []storj.NodeID) {
|
||||||
|
for _, piece := range pieces {
|
||||||
|
res = append(res, piece.StorageNode)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user