39c2bb9e4b
Clarify and expand some tests, and add a large test that tries to cover the whole segment-verify stack. Trying to reproduce a problem we saw in production where the Found count in the output csv increases with each entry, eventually reaching the thousands, instead of representing the actual number of pieces found for each entry. Change-Id: I65e342fad7dc1c350830fd0c8ce75a87a01d495c
210 lines
6.9 KiB
Go
210 lines
6.9 KiB
Go
// Copyright (C) 2022 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package main_test
|
|
|
|
import (
|
|
"fmt"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
"go.uber.org/zap"
|
|
"go.uber.org/zap/zaptest/observer"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"storj.io/common/memory"
|
|
"storj.io/common/testcontext"
|
|
"storj.io/common/testrand"
|
|
"storj.io/common/uuid"
|
|
segmentverify "storj.io/storj/cmd/tools/segment-verify"
|
|
"storj.io/storj/private/testplanet"
|
|
"storj.io/storj/satellite/metabase"
|
|
)
|
|
|
|
func TestVerifier(t *testing.T) {
|
|
const (
|
|
nodeCount = 10
|
|
uplinkCount = 10
|
|
)
|
|
|
|
testplanet.Run(t, testplanet.Config{
|
|
SatelliteCount: 1, StorageNodeCount: nodeCount, UplinkCount: uplinkCount,
|
|
Reconfigure: testplanet.Reconfigure{
|
|
Satellite: testplanet.ReconfigureRS(nodeCount, nodeCount, nodeCount, nodeCount),
|
|
},
|
|
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
|
|
satellite := planet.Satellites[0]
|
|
|
|
olderNodeVersion := "v1.68.1" // version without Exists endpoint
|
|
newerNodeVersion := "v1.69.2" // minimum version with Exists endpoint
|
|
|
|
config := segmentverify.VerifierConfig{
|
|
PerPieceTimeout: time.Second,
|
|
OrderRetryThrottle: 500 * time.Millisecond,
|
|
RequestThrottle: 500 * time.Millisecond,
|
|
VersionWithExists: "v1.69.2",
|
|
}
|
|
|
|
// create new observed logger
|
|
observedZapCore, observedLogs := observer.New(zap.DebugLevel)
|
|
observedLogger := zap.New(observedZapCore).Named("verifier")
|
|
|
|
verifier := segmentverify.NewVerifier(
|
|
observedLogger,
|
|
satellite.Dialer,
|
|
satellite.Orders.Service,
|
|
config)
|
|
|
|
// upload some data
|
|
data := testrand.Bytes(8 * memory.KiB)
|
|
for u, up := range planet.Uplinks {
|
|
for i := 0; i < nodeCount; i++ {
|
|
err := up.Upload(ctx, satellite, "bucket1", fmt.Sprintf("uplink%d/i%d", u, i), data)
|
|
require.NoError(t, err)
|
|
}
|
|
}
|
|
|
|
result, err := satellite.Metabase.DB.ListVerifySegments(ctx, metabase.ListVerifySegments{
|
|
CursorStreamID: uuid.UUID{},
|
|
CursorPosition: metabase.SegmentPosition{},
|
|
Limit: 10000,
|
|
})
|
|
require.NoError(t, err)
|
|
require.Len(t, result.Segments, uplinkCount*nodeCount)
|
|
|
|
validSegments := make([]*segmentverify.Segment, len(result.Segments))
|
|
for i, raw := range result.Segments {
|
|
validSegments[i] = &segmentverify.Segment{VerifySegment: raw}
|
|
}
|
|
|
|
resetStatuses := func() {
|
|
for _, seg := range validSegments {
|
|
seg.Status = segmentverify.Status{Retry: nodeCount}
|
|
}
|
|
}
|
|
resetStatuses()
|
|
|
|
aliasMap, err := satellite.Metabase.DB.LatestNodesAliasMap(ctx)
|
|
require.NoError(t, err)
|
|
|
|
t.Run("verify all", func(t *testing.T) {
|
|
nodeWithExistsEndpoint := planet.StorageNodes[testrand.Intn(len(planet.StorageNodes)-1)]
|
|
|
|
var g errgroup.Group
|
|
for _, node := range planet.StorageNodes {
|
|
node := node
|
|
nodeVersion := olderNodeVersion
|
|
if node == nodeWithExistsEndpoint {
|
|
nodeVersion = newerNodeVersion
|
|
}
|
|
alias, ok := aliasMap.Alias(node.ID())
|
|
require.True(t, ok)
|
|
g.Go(func() error {
|
|
_, err := verifier.Verify(ctx, alias, node.NodeURL(), nodeVersion, validSegments, true)
|
|
return err
|
|
})
|
|
}
|
|
require.NoError(t, g.Wait())
|
|
require.NotZero(t, len(observedLogs.All()))
|
|
|
|
// check that segments were verified with download method
|
|
fallbackLogs := observedLogs.FilterMessage("fallback to download method").All()
|
|
require.Equal(t, nodeCount-1, len(fallbackLogs))
|
|
require.Equal(t, zap.DebugLevel, fallbackLogs[0].Level)
|
|
|
|
// check that segments were verified with exists endpoint
|
|
existsLogs := observedLogs.FilterMessage("verify segments using Exists method").All()
|
|
require.Equal(t, 1, len(existsLogs))
|
|
require.Equal(t, zap.DebugLevel, existsLogs[0].Level)
|
|
|
|
for segNum, seg := range validSegments {
|
|
require.Equal(t, segmentverify.Status{Found: nodeCount, NotFound: 0, Retry: 0}, seg.Status, segNum)
|
|
}
|
|
})
|
|
|
|
// segment not found
|
|
alias0, ok := aliasMap.Alias(planet.StorageNodes[0].ID())
|
|
require.True(t, ok)
|
|
|
|
validSegment0 := &segmentverify.Segment{
|
|
VerifySegment: result.Segments[0],
|
|
Status: segmentverify.Status{Retry: 1},
|
|
}
|
|
missingSegment := &segmentverify.Segment{
|
|
VerifySegment: metabase.VerifySegment{
|
|
StreamID: testrand.UUID(),
|
|
Position: metabase.SegmentPosition{},
|
|
RootPieceID: testrand.PieceID(),
|
|
Redundancy: result.Segments[0].Redundancy,
|
|
AliasPieces: metabase.AliasPieces{{Number: 0, Alias: alias0}},
|
|
},
|
|
Status: segmentverify.Status{Retry: 1},
|
|
}
|
|
validSegment1 := &segmentverify.Segment{
|
|
VerifySegment: result.Segments[1],
|
|
Status: segmentverify.Status{Retry: 1},
|
|
}
|
|
|
|
var count int
|
|
t.Run("segment not found using download method", func(t *testing.T) {
|
|
// for older node version
|
|
count, err = verifier.Verify(ctx, alias0, planet.StorageNodes[0].NodeURL(), olderNodeVersion,
|
|
[]*segmentverify.Segment{validSegment0, missingSegment, validSegment1}, true)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 3, count)
|
|
require.Equal(t, segmentverify.Status{Found: 1}, validSegment0.Status)
|
|
require.Equal(t, segmentverify.Status{NotFound: 1}, missingSegment.Status)
|
|
require.Equal(t, segmentverify.Status{Found: 1}, validSegment1.Status)
|
|
})
|
|
|
|
// reset status
|
|
validSegment0.Status = segmentverify.Status{Retry: 1}
|
|
missingSegment.Status = segmentverify.Status{Retry: 1}
|
|
validSegment1.Status = segmentverify.Status{Retry: 1}
|
|
|
|
t.Run("segment not found using exists method", func(t *testing.T) {
|
|
count, err = verifier.Verify(ctx, alias0, planet.StorageNodes[0].NodeURL(), newerNodeVersion,
|
|
[]*segmentverify.Segment{validSegment0, missingSegment, validSegment1}, true)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 3, count)
|
|
require.Equal(t, segmentverify.Status{Found: 1}, validSegment0.Status)
|
|
require.Equal(t, segmentverify.Status{NotFound: 1}, missingSegment.Status)
|
|
require.Equal(t, segmentverify.Status{Found: 1}, validSegment1.Status)
|
|
})
|
|
|
|
resetStatuses()
|
|
|
|
t.Run("test throttling", func(t *testing.T) {
|
|
// Test throttling
|
|
verifyStart := time.Now()
|
|
const throttleN = 5
|
|
count, err = verifier.Verify(ctx, alias0, planet.StorageNodes[0].NodeURL(), olderNodeVersion, validSegments[:throttleN], false)
|
|
require.NoError(t, err)
|
|
verifyDuration := time.Since(verifyStart)
|
|
require.Equal(t, throttleN, count)
|
|
require.Greater(t, verifyDuration, config.RequestThrottle*(throttleN-1))
|
|
})
|
|
|
|
resetStatuses()
|
|
|
|
// TODO: test download timeout
|
|
t.Run("Node offline", func(t *testing.T) {
|
|
err = planet.StopNodeAndUpdate(ctx, planet.StorageNodes[0])
|
|
require.NoError(t, err)
|
|
|
|
// for older node version
|
|
count, err = verifier.Verify(ctx, alias0, planet.StorageNodes[0].NodeURL(), olderNodeVersion, validSegments, true)
|
|
require.Error(t, err)
|
|
require.Equal(t, 0, count)
|
|
require.True(t, segmentverify.ErrNodeOffline.Has(err))
|
|
|
|
// for node version with Exists endpoint
|
|
count, err = verifier.Verify(ctx, alias0, planet.StorageNodes[0].NodeURL(), newerNodeVersion, validSegments, true)
|
|
require.Error(t, err)
|
|
require.Equal(t, 0, count)
|
|
require.True(t, segmentverify.ErrNodeOffline.Has(err))
|
|
})
|
|
})
|
|
}
|