2021-06-07 18:32:03 +01:00
|
|
|
// Copyright (C) 2021 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package audit
|
|
|
|
|
|
|
|
import (
|
2022-12-09 22:22:39 +00:00
|
|
|
"encoding/binary"
|
2022-12-14 23:43:50 +00:00
|
|
|
"fmt"
|
2021-06-07 18:32:03 +01:00
|
|
|
"math/rand"
|
2022-12-09 22:22:39 +00:00
|
|
|
"sort"
|
2021-06-07 18:32:03 +01:00
|
|
|
"testing"
|
2021-11-29 17:29:02 +00:00
|
|
|
"time"
|
2021-06-07 18:32:03 +01:00
|
|
|
|
2022-12-09 22:22:39 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
2021-06-07 18:32:03 +01:00
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
2021-11-29 17:29:02 +00:00
|
|
|
"storj.io/common/testrand"
|
2021-06-07 18:32:03 +01:00
|
|
|
"storj.io/common/uuid"
|
2021-11-29 17:29:02 +00:00
|
|
|
"storj.io/storj/satellite/metabase"
|
2023-05-09 12:13:19 +01:00
|
|
|
"storj.io/storj/satellite/metabase/rangedloop"
|
2021-06-07 18:32:03 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestReservoir(t *testing.T) {
|
2022-12-14 23:43:50 +00:00
|
|
|
rng := rand.New(rand.NewSource(time.Now().Unix()))
|
2021-06-07 18:32:03 +01:00
|
|
|
|
2022-12-14 23:43:50 +00:00
|
|
|
for size := 0; size < maxReservoirSize; size++ {
|
|
|
|
t.Run(fmt.Sprintf("size %d", size), func(t *testing.T) {
|
2023-05-09 12:13:19 +01:00
|
|
|
samples := []rangedloop.Segment{}
|
2022-12-14 23:43:50 +00:00
|
|
|
for i := 0; i < size; i++ {
|
2022-12-15 02:19:29 +00:00
|
|
|
samples = append(samples, makeSegment(i))
|
2022-12-14 23:43:50 +00:00
|
|
|
}
|
2021-06-07 18:32:03 +01:00
|
|
|
|
2022-12-14 23:43:50 +00:00
|
|
|
// If we sample N segments, less than the max, we should record all N
|
|
|
|
r := NewReservoir(size)
|
|
|
|
for _, sample := range samples {
|
2023-04-24 11:07:16 +01:00
|
|
|
r.Sample(rng, sample)
|
2022-12-14 23:43:50 +00:00
|
|
|
}
|
|
|
|
require.Equal(t, samples, r.Segments())
|
|
|
|
require.Len(t, r.Keys(), len(samples))
|
|
|
|
})
|
|
|
|
}
|
2021-06-07 18:32:03 +01:00
|
|
|
}
|
2021-11-29 17:29:02 +00:00
|
|
|
|
2022-12-15 02:19:29 +00:00
|
|
|
func TestReservoirMerge(t *testing.T) {
|
|
|
|
t.Run("merge successful", func(t *testing.T) {
|
|
|
|
// Use a fixed rng so we get deterministic sampling results.
|
2023-05-09 12:13:19 +01:00
|
|
|
segments := []rangedloop.Segment{
|
2022-12-15 02:19:29 +00:00
|
|
|
makeSegment(0), makeSegment(1), makeSegment(2),
|
|
|
|
makeSegment(3), makeSegment(4), makeSegment(5),
|
|
|
|
}
|
|
|
|
rng := rand.New(rand.NewSource(999))
|
|
|
|
r1 := NewReservoir(3)
|
2023-04-24 11:07:16 +01:00
|
|
|
r1.Sample(rng, segments[0])
|
|
|
|
r1.Sample(rng, segments[1])
|
|
|
|
r1.Sample(rng, segments[2])
|
2022-12-15 02:19:29 +00:00
|
|
|
|
|
|
|
r2 := NewReservoir(3)
|
2023-04-24 11:07:16 +01:00
|
|
|
r2.Sample(rng, segments[3])
|
|
|
|
r2.Sample(rng, segments[4])
|
|
|
|
r2.Sample(rng, segments[5])
|
2022-12-15 02:19:29 +00:00
|
|
|
|
|
|
|
err := r1.Merge(r2)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Segments should contain a cross section from r1 and r2. If the rng
|
|
|
|
// changes, this result will likely change too since that will affect
|
|
|
|
// the keys. and therefore how they are merged.
|
2023-05-09 12:13:19 +01:00
|
|
|
require.Equal(t, []rangedloop.Segment{
|
2022-12-15 02:19:29 +00:00
|
|
|
segments[5],
|
|
|
|
segments[1],
|
|
|
|
segments[2],
|
|
|
|
}, r1.Segments())
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("mismatched size", func(t *testing.T) {
|
|
|
|
r1 := NewReservoir(2)
|
|
|
|
r2 := NewReservoir(1)
|
|
|
|
err := r1.Merge(r2)
|
|
|
|
require.EqualError(t, err, "cannot merge: mismatched size: expected 2 but got 1")
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-12-09 22:22:39 +00:00
|
|
|
func TestReservoirWeights(t *testing.T) {
|
2021-11-29 17:29:02 +00:00
|
|
|
var weight10StreamID = testrand.UUID()
|
|
|
|
var weight5StreamID = testrand.UUID()
|
|
|
|
var weight2StreamID = testrand.UUID()
|
|
|
|
var weight1StreamID = testrand.UUID()
|
|
|
|
streamIDCountsMap := map[uuid.UUID]int{
|
|
|
|
weight10StreamID: 0,
|
|
|
|
weight5StreamID: 0,
|
|
|
|
weight2StreamID: 0,
|
|
|
|
weight1StreamID: 0,
|
|
|
|
}
|
|
|
|
|
2023-05-09 12:13:19 +01:00
|
|
|
segments := []rangedloop.Segment{
|
2021-11-29 17:29:02 +00:00
|
|
|
{
|
|
|
|
StreamID: weight10StreamID,
|
|
|
|
Position: metabase.SegmentPosition{},
|
|
|
|
ExpiresAt: nil,
|
|
|
|
EncryptedSize: 10,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
StreamID: weight5StreamID,
|
|
|
|
Position: metabase.SegmentPosition{},
|
|
|
|
ExpiresAt: nil,
|
|
|
|
EncryptedSize: 5,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
StreamID: weight2StreamID,
|
|
|
|
Position: metabase.SegmentPosition{},
|
|
|
|
ExpiresAt: nil,
|
|
|
|
EncryptedSize: 2,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
StreamID: weight1StreamID,
|
|
|
|
Position: metabase.SegmentPosition{},
|
|
|
|
ExpiresAt: nil,
|
|
|
|
EncryptedSize: 1,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// run a large number of times in loop for bias to show up
|
|
|
|
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
|
|
for i := 1; i < 100000; i++ {
|
|
|
|
r := NewReservoir(3)
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
r.Sample(rng, segment)
|
|
|
|
}
|
|
|
|
|
2022-12-14 23:43:50 +00:00
|
|
|
for _, segment := range r.Segments() {
|
2021-11-29 17:29:02 +00:00
|
|
|
streamIDCountsMap[segment.StreamID]++
|
|
|
|
}
|
|
|
|
|
|
|
|
// shuffle the segments order after each result
|
|
|
|
rng.Shuffle(len(segments),
|
|
|
|
func(i, j int) {
|
|
|
|
segments[i], segments[j] = segments[j], segments[i]
|
|
|
|
})
|
|
|
|
}
|
|
|
|
require.Greater(t, streamIDCountsMap[weight10StreamID], streamIDCountsMap[weight5StreamID])
|
|
|
|
require.Greater(t, streamIDCountsMap[weight5StreamID], streamIDCountsMap[weight2StreamID])
|
|
|
|
require.Greater(t, streamIDCountsMap[weight2StreamID], streamIDCountsMap[weight1StreamID])
|
|
|
|
}
|
2022-12-09 22:22:39 +00:00
|
|
|
|
|
|
|
// Sample many segments, with equal weight, uniformly distributed, and in order,
|
|
|
|
// through the reservoir. Expect that elements show up in the result set with
|
|
|
|
// equal chance, whether they were inserted near the beginning of the list or
|
|
|
|
// near the end.
|
|
|
|
func TestReservoirBias(t *testing.T) {
|
|
|
|
const (
|
|
|
|
reservoirSize = 3
|
|
|
|
useBits = 14
|
|
|
|
numSegments = 1 << useBits
|
|
|
|
weight = 100000 // any number; same for all segments
|
|
|
|
numRounds = 1000
|
|
|
|
)
|
|
|
|
|
|
|
|
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
|
|
numsSelected := make([]uint64, numRounds*reservoirSize)
|
|
|
|
|
|
|
|
for r := 0; r < numRounds; r++ {
|
|
|
|
res := NewReservoir(reservoirSize)
|
|
|
|
for n := 0; n < numSegments; n++ {
|
2023-05-09 12:13:19 +01:00
|
|
|
seg := rangedloop.Segment{
|
2022-12-09 22:22:39 +00:00
|
|
|
EncryptedSize: weight,
|
|
|
|
}
|
|
|
|
binary.BigEndian.PutUint64(seg.StreamID[0:8], uint64(n)<<(64-useBits))
|
2023-04-24 11:07:16 +01:00
|
|
|
res.Sample(rng, seg)
|
2022-12-09 22:22:39 +00:00
|
|
|
}
|
2022-12-14 23:43:50 +00:00
|
|
|
for i, seg := range res.Segments() {
|
2022-12-09 22:22:39 +00:00
|
|
|
num := binary.BigEndian.Uint64(seg.StreamID[0:8]) >> (64 - useBits)
|
|
|
|
numsSelected[r*reservoirSize+i] = num
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sort.Sort(uint64Slice(numsSelected))
|
|
|
|
|
|
|
|
// this delta is probably way too generous. but, the A-Chao
|
|
|
|
// implementation failed the test with this value, so maybe it's fine.
|
|
|
|
delta := float64(numSegments / 8)
|
|
|
|
quartile0 := numsSelected[len(numsSelected)*0/4]
|
|
|
|
assert.InDelta(t, numSegments*0/4, quartile0, delta)
|
|
|
|
quartile1 := numsSelected[len(numsSelected)*1/4]
|
|
|
|
assert.InDelta(t, numSegments*1/4, quartile1, delta)
|
|
|
|
quartile2 := numsSelected[len(numsSelected)*2/4]
|
|
|
|
assert.InDelta(t, numSegments*2/4, quartile2, delta)
|
|
|
|
quartile3 := numsSelected[len(numsSelected)*3/4]
|
|
|
|
assert.InDelta(t, numSegments*3/4, quartile3, delta)
|
|
|
|
quartile4 := numsSelected[len(numsSelected)-1]
|
|
|
|
assert.InDelta(t, numSegments*4/4, quartile4, delta)
|
|
|
|
}
|
|
|
|
|
|
|
|
type uint64Slice []uint64
|
|
|
|
|
|
|
|
func (us uint64Slice) Len() int { return len(us) }
|
|
|
|
func (us uint64Slice) Swap(i, j int) { us[i], us[j] = us[j], us[i] }
|
|
|
|
func (us uint64Slice) Less(i, j int) bool { return us[i] < us[j] }
|
2022-12-15 02:19:29 +00:00
|
|
|
|
2023-05-09 12:13:19 +01:00
|
|
|
func makeSegment(n int) rangedloop.Segment {
|
|
|
|
return rangedloop.Segment{
|
2022-12-15 02:19:29 +00:00
|
|
|
StreamID: uuid.UUID{0: byte(n)},
|
|
|
|
EncryptedSize: int32(n * 1000),
|
|
|
|
}
|
|
|
|
}
|