satellite/audit: fix reservoir sampling bias
Change-Id: Icc522fd86538b8182a1b7d42c1588c32a257acaf
This commit is contained in:
parent
1cd13adc1c
commit
944bceabcd
@ -37,15 +37,15 @@ func NewReservoir(size int) *Reservoir {
|
||||
// Sample makes sure that for every segment in metainfo from index i=size..n-1,
|
||||
// pick a random number r = rand(0..i), and if r < size, replace reservoir.Segments[r] with segment.
|
||||
func (reservoir *Reservoir) Sample(r *rand.Rand, segment Segment) {
|
||||
reservoir.index++
|
||||
if reservoir.index < int64(reservoir.size) {
|
||||
reservoir.Segments[reservoir.index] = segment
|
||||
} else {
|
||||
random := r.Int63n(reservoir.index)
|
||||
random := r.Int63n(reservoir.index + 1)
|
||||
if random < int64(reservoir.size) {
|
||||
reservoir.Segments[random] = segment
|
||||
}
|
||||
}
|
||||
reservoir.index++
|
||||
}
|
||||
|
||||
// Segment is a segment to audit.
|
||||
|
27
satellite/audit/reservoir_test.go
Normal file
27
satellite/audit/reservoir_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2021 Storj Labs, Inc.
|
||||
// See LICENSE for copying information.
|
||||
|
||||
package audit
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"storj.io/common/uuid"
|
||||
)
|
||||
|
||||
func TestReservoir(t *testing.T) {
|
||||
rng := rand.New(rand.NewSource(0))
|
||||
r := NewReservoir(3)
|
||||
|
||||
seg := func(n byte) Segment { return Segment{StreamID: uuid.UUID{0: n}} }
|
||||
|
||||
// if we sample 3 segments, we should record all 3
|
||||
r.Sample(rng, seg(1))
|
||||
r.Sample(rng, seg(2))
|
||||
r.Sample(rng, seg(3))
|
||||
|
||||
require.Equal(t, r.Segments[:], []Segment{seg(1), seg(2), seg(3)})
|
||||
}
|
Loading…
Reference in New Issue
Block a user