satellite/audit: fix reservoir sampling bias

Change-Id: Icc522fd86538b8182a1b7d42c1588c32a257acaf
This commit is contained in:
Jeff Wendling 2021-06-07 13:32:03 -04:00 committed by Egon Elbre
parent 1cd13adc1c
commit 944bceabcd
2 changed files with 29 additions and 2 deletions

View File

@ -37,15 +37,15 @@ func NewReservoir(size int) *Reservoir {
// Sample makes sure that for every segment in metainfo from index i=size..n-1,
// pick a random number r = rand(0..i), and if r < size, replace reservoir.Segments[r] with segment.
func (reservoir *Reservoir) Sample(r *rand.Rand, segment Segment) {
reservoir.index++
if reservoir.index < int64(reservoir.size) {
reservoir.Segments[reservoir.index] = segment
} else {
random := r.Int63n(reservoir.index)
random := r.Int63n(reservoir.index + 1)
if random < int64(reservoir.size) {
reservoir.Segments[random] = segment
}
}
reservoir.index++
}
// Segment is a segment to audit.

View File

@ -0,0 +1,27 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package audit
import (
"math/rand"
"testing"
"github.com/stretchr/testify/require"
"storj.io/common/uuid"
)
func TestReservoir(t *testing.T) {
rng := rand.New(rand.NewSource(0))
r := NewReservoir(3)
seg := func(n byte) Segment { return Segment{StreamID: uuid.UUID{0: n}} }
// if we sample 3 segments, we should record all 3
r.Sample(rng, seg(1))
r.Sample(rng, seg(2))
r.Sample(rng, seg(3))
require.Equal(t, r.Segments[:], []Segment{seg(1), seg(2), seg(3)})
}