satellite/repair: piecescheck.OutOfPlacementPiecesSet should not contain offline nodes

When we check the availability of the pieces, we do:

```
result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet)
// + some magic if there are overlaps between them
numHealthy := len(pieces) - len(piecesCheck.MissingPiecesSet) - piecesCheck.NumUnhealthyRetrievable
```

This works only if OutOfPlacementPieceSet doesn't contain the offline nodes (which are already included in MissingPieceSet).

But `result.OutOfPlacementPieces.Set` should include all the nodes (even offline), as in case of lucky conditions, we are able to remove those pieces from DB.

The solution is to remove all offline nodes from `NumUnhealthyRetrievable`.

Change-Id: I90baa0396352dd040e1e1516314b3271f8712034
This commit is contained in:
Márton Elek 2023-08-15 14:35:54 +02:00 committed by Storj Robot
parent 80186ecc67
commit 9ddc8b4ca3
2 changed files with 167 additions and 14 deletions

View File

@ -648,8 +648,10 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, queueSegment *queue
type piecesCheckResult struct {
ExcludeNodeIDs []storj.NodeID
MissingPiecesSet map[uint16]bool
ClumpedPiecesSet map[uint16]bool
MissingPiecesSet map[uint16]bool
ClumpedPiecesSet map[uint16]bool
// piece which are out of placement (both offline and online)
OutOfPlacementPiecesSet map[uint16]bool
NumUnhealthyRetrievable int
@ -662,6 +664,20 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
pieces := segment.Pieces
allNodeIDs := make([]storj.NodeID, len(pieces))
for i, piece := range pieces {
allNodeIDs[i] = piece.StorageNode
}
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
if err != nil {
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
}
return repairer.classifySegmentPiecesWithNodes(ctx, segment, allNodeIDs, online, offline)
}
func (repairer *SegmentRepairer) classifySegmentPiecesWithNodes(ctx context.Context, segment metabase.Segment, allNodeIDs []storj.NodeID, online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) (result piecesCheckResult, err error) {
pieces := segment.Pieces
nodeIDPieceMap := map[storj.NodeID]uint16{}
result.MissingPiecesSet = map[uint16]bool{}
for i, p := range pieces {
@ -672,11 +688,6 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
result.ExcludeNodeIDs = allNodeIDs
online, offline, err := repairer.overlay.KnownReliable(ctx, allNodeIDs)
if err != nil {
return piecesCheckResult{}, overlayQueryError.New("error identifying missing pieces: %w", err)
}
nodeFilters := repairer.placementRules(segment.Placement)
// remove online nodes from missing pieces
@ -733,14 +744,16 @@ func (repairer *SegmentRepairer) classifySegmentPieces(ctx context.Context, segm
checkPlacement(online)
checkPlacement(offline)
result.NumUnhealthyRetrievable = len(result.ClumpedPiecesSet) + len(result.OutOfPlacementPiecesSet)
if len(result.ClumpedPiecesSet) != 0 && len(result.OutOfPlacementPiecesSet) != 0 {
// verify that some of clumped pieces and out of placement pieces are not the same
unhealthyRetrievableSet := map[uint16]bool{}
maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet)
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet)
// verify that some of clumped pieces and out of placement pieces are not the same
unhealthyRetrievableSet := map[uint16]bool{}
maps.Copy(unhealthyRetrievableSet, result.ClumpedPiecesSet)
maps.Copy(unhealthyRetrievableSet, result.OutOfPlacementPiecesSet)
// offline nodes are not retrievable
for _, node := range offline {
delete(unhealthyRetrievableSet, nodeIDPieceMap[node.ID])
}
result.NumUnhealthyRetrievable = len(unhealthyRetrievableSet)
return result, nil
}

View File

@ -0,0 +1,140 @@
// Copyright (C) 2023 Storj Labs, Inc.
// See LICENSE for copying information.
package repairer
import (
"testing"
"github.com/stretchr/testify/require"
"storj.io/common/identity/testidentity"
"storj.io/common/storj"
"storj.io/common/storj/location"
"storj.io/common/testcontext"
"storj.io/storj/satellite/metabase"
"storj.io/storj/satellite/nodeselection"
"storj.io/storj/satellite/overlay"
)
func TestClassify(t *testing.T) {
ctx := testcontext.New(t)
t.Run("all online", func(t *testing.T) {
var online, offline = generateNodes(5, func(ix int) bool {
return true
}, func(ix int, node *nodeselection.SelectedNode) {
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set(""))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 0, 1, 2, 3, 4)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
require.Equal(t, 0, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
require.Equal(t, 0, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 0, result.NumUnhealthyRetrievable)
})
t.Run("out of placement", func(t *testing.T) {
var online, offline = generateNodes(10, func(ix int) bool {
return true
}, func(ix int, node *nodeselection.SelectedNode) {
if ix > 4 {
node.CountryCode = location.Germany
} else {
node.CountryCode = location.UnitedKingdom
}
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set("10:country(\"GB\")"))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 1, 2, 3, 4, 7, 8)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
require.Equal(t, 0, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
// 1,2,3 are in Germany instead of GB
require.Equal(t, 3, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 3, result.NumUnhealthyRetrievable)
})
t.Run("out of placement and offline", func(t *testing.T) {
// all nodes are in wrong region and half of them are offline
var online, offline = generateNodes(10, func(ix int) bool {
return ix < 5
}, func(ix int, node *nodeselection.SelectedNode) {
node.CountryCode = location.Germany
})
c := &overlay.ConfigurablePlacementRule{}
require.NoError(t, c.Set("10:country(\"GB\")"))
s := SegmentRepairer{
placementRules: c.CreateFilters,
}
pieces := createPieces(online, offline, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
result, err := s.classifySegmentPiecesWithNodes(ctx, metabase.Segment{Pieces: pieces, Placement: 10}, allNodeIDs(pieces), online, offline)
require.NoError(t, err)
// offline nodes
require.Equal(t, 5, len(result.MissingPiecesSet))
require.Equal(t, 0, len(result.ClumpedPiecesSet))
require.Equal(t, 10, len(result.OutOfPlacementPiecesSet))
require.Equal(t, 5, result.NumUnhealthyRetrievable)
numHealthy := len(pieces) - len(result.MissingPiecesSet) - result.NumUnhealthyRetrievable
require.Equal(t, 0, numHealthy)
})
}
func generateNodes(num int, isOnline func(i int) bool, config func(ix int, node *nodeselection.SelectedNode)) (online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode) {
for i := 0; i < num; i++ {
node := nodeselection.SelectedNode{
ID: testidentity.MustPregeneratedIdentity(i, storj.LatestIDVersion()).ID,
}
config(i, &node)
if isOnline(i) {
online = append(online, node)
} else {
offline = append(offline, node)
}
}
return
}
func createPieces(online []nodeselection.SelectedNode, offline []nodeselection.SelectedNode, indexes ...int) (res metabase.Pieces) {
for _, index := range indexes {
piece := metabase.Piece{
Number: uint16(index),
}
if len(online)-1 < index {
piece.StorageNode = offline[index-len(online)].ID
} else {
piece.StorageNode = online[index].ID
}
res = append(res, piece)
}
return
}
func allNodeIDs(pieces metabase.Pieces) (res []storj.NodeID) {
for _, piece := range pieces {
res = append(res, piece.StorageNode)
}
return res
}