satellite/repair: piecescheck.OutOfPlacementPiecesSet should not contain offline nodes

When we check the availability of the pieces, we do:

```
result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet)
// + some magic if there are overlaps between them
numHealthy := len(pieces) - len(piecesCheck.MissingPiecesSet) - piecesCheck.NumUnhealthyRetrievable
```

This works only if OutOfPlacementPieceSet doesn't contain the offline nodes (which are already included in MissingPieceSet).

But `result.OutOfPlacementPieces.Set` should include all the nodes (even offline), as in case of lucky conditions, we are able to remove those pieces from DB.

The solution is to remove all offline nodes from `NumUnhealthyRetrievable`.

Change-Id: I90baa0396352dd040e1e1516314b3271f8712034
This commit is contained in:
Márton Elek 2023-08-15 14:35:54 +02:00 committed by Storj Robot
parent 80186ecc67
commit 9ddc8b4ca3
2 changed files with 167 additions and 14 deletions

View File

@ -648,8 +648,10 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
type piecesCheckResult struct { type piecesCheckResult struct {
ExcludeNodeIDs []storj.NodeID ExcludeNodeIDs []storj.NodeID
MissingPiecesSet map[uint16]bool MissingPiecesSet map[uint16]bool
ClumpedPiecesSet map[uint16]bool ClumpedPiecesSet map[uint16]bool
// piece which are out of placement (both offline and online)
OutOfPlacementPiecesSet map[uint16]bool OutOfPlacementPiecesSet map[uint16]bool
NumUnhealthyRetrievable int NumUnhealthyRetrievable int
@ -662,6 +664,20 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
pieces := segment.Pieces pieces := segment.Pieces
allNodeIDs := make([]storj.NodeID, len(pieces)) allNodeIDs := make([]storj.NodeID, len(pieces))
for i, piece := range pieces {
allNodeIDs[i] = piece.StorageNode
}
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
if err != nil {
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
}
return repairer.classifySegmentPiecesWithNodes(ctx, segment, allNodeIDs, online, offline)
}
func (repairer *SegmentRepairer) classifySegmentPiecesWithNodes(ctx context.Context, segment metabase.Segment, allNodeIDs []storj.NodeID, online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) (result piecesCheckResult, err error) {
pieces := segment.Pieces
nodeIDPieceMap := map[storj.NodeID]uint16{} nodeIDPieceMap := map[storj.NodeID]uint16{}
result.MissingPiecesSet = map[uint16]bool{} result.MissingPiecesSet = map[uint16]bool{}
for i, p := range pieces { for i, p := range pieces {
@ -672,11 +688,6 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
result.ExcludeNodeIDs = allNodeIDs result.ExcludeNodeIDs = allNodeIDs
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
if err != nil {
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
}
nodeFilters := repairer.placementRules(segment.Placement) nodeFilters := repairer.placementRules(segment.Placement)
// remove online nodes from missing pieces // remove online nodes from missing pieces
@ -733,14 +744,16 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
checkPlacement(online) checkPlacement(online)
checkPlacement(offline) checkPlacement(offline)
result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet) // verify that some of clumped pieces and out of placement pieces are not the same
if len(result.ClumpedPiecesSet) != 0 && len(result.OutOfPlacementPiecesSet) != 0 { unhealthyRetrievableSet := map[uint16]bool{}
// verify that some of clumped pieces and out of placement pieces are not the same maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
unhealthyRetrievableSet := map[uint16]bool{} maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet)
maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet) // offline nodes are not retrievable
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet) for _, node := range offline {
delete(unhealthyRetrievableSet, nodeIDPieceMap[node.ID])
} }
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet)
return result, nil return result, nil
} }

View File

@ -0,0 +1,140 @@
// Copyright (C) 2023 Storj Labs, Inc.
// See LICENSE for copying information.
package repairer
import (
"testing"
"github.com/stretchr/testify/require"
"storj.io/common/identity/testidentity"
"storj.io/common/storj"
"storj.io/common/storj/location"
"storj.io/common/testcontext"
"storj.io/storj/satellite/metabase"
"storj.io/storj/satellite/nodeselection"
"storj.io/storj/satellite/overlay"
)
func TestClassify(t *testing.T) {
ctx := testcontext.New(t)
t.Run("all online", func(t *testing.T) {
var online, offline = generateNodes(5, func(ix int) bool {
return true
}, func(ix int, node *nodeselection.SelectedNode) {
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set(""))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 0, 1, 2, 3, 4)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
require.Equal(t, 0, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
require.Equal(t, 0, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 0, result.NumUnhealthyRetrievable)
})
t.Run("out of placement", func(t *testing.T) {
var online, offline = generateNodes(10, func(ix int) bool {
return true
}, func(ix int, node *nodeselection.SelectedNode) {
if ix > 4 {
node.CountryCode = location.Germany
} else {
node.CountryCode = location.UnitedKingdom
}
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set("10:country(\"GB\")"))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 1, 2, 3, 4, 7, 8)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
require.Equal(t, 0, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
// 1,2,3 are in Germany instead of GB
require.Equal(t, 3, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 3, result.NumUnhealthyRetrievable)
})
t.Run("out of placement and offline", func(t *testing.T) {
// all nodes are in wrong region and half of them are offline
var online, offline = generateNodes(10, func(ix int) bool {
return ix < 5
}, func(ix int, node *nodeselection.SelectedNode) {
node.CountryCode = location.Germany
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set("10:country(\"GB\")"))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
// offline nodes
require.Equal(t, 5, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
require.Equal(t, 10, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 5, result.NumUnhealthyRetrievable)
numHealthy := len(pieces) - len(result.MissingPiecesSet) - result.NumUnhealthyRetrievable
require.Equal(t, 0, numHealthy)
})
}
func generateNodes(num int, isOnline func(i int) bool, config func(ix int, node *nodeselection.SelectedNode)) (online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) {
for i := 0; i < num; i++ {
node := nodeselection.SelectedNode{
ID: testidentity.MustPregeneratedIdentity(i, storj.LatestIDVersion()).ID,
}
config(i, &node)
if isOnline(i) {
online = append(online, node)
} else {
offline = append(offline, node)
}
}
return
}
func createPieces(online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode, indexes ...int) (res metabase.Pieces) {
for _, index := range indexes {
piece := metabase.Piece{
Number: uint16(index),
}
if len(online)-1 < index {
piece.StorageNode = offline[index-len(online)].ID
} else {
piece.StorageNode = online[index].ID
}
res = append(res, piece)
}
return
}
func allNodeIDs(pieces metabase.Pieces) (res []storj.NodeID) {
for _, piece := range pieces {
res = append(res, piece.StorageNode)
}
return res
}