93fad70e4b
This change fixes the access of unset segments and keys on the reservoir when the reservoir size is less than the max OR the number of sampled segments is smaller than the reservoir size. It does so by tucking away the segments and keys behind methods that return properly sized slices into the segments/keys arrays. It also fixes a bug in the housekeeping for the internal index variable that holds onto how many items in the array have been populated. As part of this fix, it changes the type of index to int8, which reduces the size of the reservoir struct by 8 bytes. The tests have been updated to provide better coverage for this case. Change-Id: I3ceb17b692fe456fc4c1ca5d67d35c96aeb0a169
98 lines
2.7 KiB
Go
98 lines
2.7 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package audit
|
|
|
|
import (
|
|
"math"
|
|
"math/rand"
|
|
"time"
|
|
|
|
"storj.io/common/uuid"
|
|
"storj.io/storj/satellite/metabase"
|
|
"storj.io/storj/satellite/metabase/segmentloop"
|
|
)
|
|
|
|
const maxReservoirSize = 3
|
|
|
|
// Reservoir holds a certain number of segments to reflect a random sample.
|
|
type Reservoir struct {
|
|
segments [maxReservoirSize]segmentloop.Segment
|
|
keys [maxReservoirSize]float64
|
|
size int8
|
|
index int8
|
|
}
|
|
|
|
// NewReservoir instantiates a Reservoir.
|
|
func NewReservoir(size int) *Reservoir {
|
|
if size < 1 {
|
|
size = 1
|
|
} else if size > maxReservoirSize {
|
|
size = maxReservoirSize
|
|
}
|
|
return &Reservoir{
|
|
size: int8(size),
|
|
index: 0,
|
|
}
|
|
}
|
|
|
|
// Segments returns the segments picked by the reservoir.
|
|
func (reservoir *Reservoir) Segments() []segmentloop.Segment {
|
|
return reservoir.segments[:reservoir.index]
|
|
}
|
|
|
|
// Keys returns the keys for the segments picked by the reservoir.
|
|
func (reservoir *Reservoir) Keys() []float64 {
|
|
return reservoir.keys[:reservoir.index]
|
|
}
|
|
|
|
// Sample tries to ensure that each segment passed in has a chance (proportional
|
|
// to its size) to be in the reservoir when sampling is complete.
|
|
//
|
|
// The tricky part is that we do not know ahead of time how many segments will
|
|
// be passed in. The way this is accomplished is known as _Reservoir Sampling_.
|
|
// The specific algorithm we are using here is called A-Res on the Wikipedia
|
|
// article: https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_A-Res
|
|
func (reservoir *Reservoir) Sample(r *rand.Rand, segment *segmentloop.Segment) {
|
|
k := -math.Log(r.Float64()) / float64(segment.EncryptedSize)
|
|
if reservoir.index < reservoir.size {
|
|
reservoir.segments[reservoir.index] = *segment
|
|
reservoir.keys[reservoir.index] = k
|
|
reservoir.index++
|
|
} else {
|
|
max := int8(0)
|
|
for i := int8(1); i < reservoir.size; i++ {
|
|
if reservoir.keys[i] > reservoir.keys[max] {
|
|
max = i
|
|
}
|
|
}
|
|
if k < reservoir.keys[max] {
|
|
reservoir.segments[max] = *segment
|
|
reservoir.keys[max] = k
|
|
}
|
|
}
|
|
}
|
|
|
|
// Segment is a segment to audit.
|
|
type Segment struct {
|
|
StreamID uuid.UUID
|
|
Position metabase.SegmentPosition
|
|
ExpiresAt *time.Time
|
|
EncryptedSize int32 // size of the whole segment (not a piece)
|
|
}
|
|
|
|
// NewSegment creates a new segment to audit from a metainfo loop segment.
|
|
func NewSegment(loopSegment segmentloop.Segment) Segment {
|
|
return Segment{
|
|
StreamID: loopSegment.StreamID,
|
|
Position: loopSegment.Position,
|
|
ExpiresAt: loopSegment.ExpiresAt,
|
|
EncryptedSize: loopSegment.EncryptedSize,
|
|
}
|
|
}
|
|
|
|
// Expired checks if segment is expired relative to now.
|
|
func (segment *Segment) Expired(now time.Time) bool {
|
|
return segment.ExpiresAt != nil && segment.ExpiresAt.Before(now)
|
|
}
|