storj/satellite/audit/reservoir.go
Andrew Harding 93fad70e4b satellite/audit: prevent accessing unset reservoir segments
This change fixes the access of unset segments and keys on the reservoir
when the reservoir size is less than the max OR the number of sampled
segments is smaller than the reservoir size. It does so by tucking away
the segments and keys behind methods that return properly sized slices
into the segments/keys arrays.

It also fixes a bug in the housekeeping for the internal index variable
that holds onto how many items in the array have been populated. As part
of this fix, it changes the type of index to int8, which reduces the
size of the reservoir struct by 8 bytes.

The tests have been updated to provide better coverage for this case.

Change-Id: I3ceb17b692fe456fc4c1ca5d67d35c96aeb0a169
2022-12-14 17:43:17 -07:00

98 lines
2.7 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package audit
import (
"math"
"math/rand"
"time"
"storj.io/common/uuid"
"storj.io/storj/satellite/metabase"
"storj.io/storj/satellite/metabase/segmentloop"
)
const maxReservoirSize = 3
// Reservoir holds a certain number of segments to reflect a random sample.
type Reservoir struct {
segments [maxReservoirSize]segmentloop.Segment
keys [maxReservoirSize]float64
size int8
index int8
}
// NewReservoir instantiates a Reservoir.
func NewReservoir(size int) *Reservoir {
if size < 1 {
size = 1
} else if size > maxReservoirSize {
size = maxReservoirSize
}
return &Reservoir{
size: int8(size),
index: 0,
}
}
// Segments returns the segments picked by the reservoir.
func (reservoir *Reservoir) Segments() []segmentloop.Segment {
return reservoir.segments[:reservoir.index]
}
// Keys returns the keys for the segments picked by the reservoir.
func (reservoir *Reservoir) Keys() []float64 {
return reservoir.keys[:reservoir.index]
}
// Sample tries to ensure that each segment passed in has a chance (proportional
// to its size) to be in the reservoir when sampling is complete.
//
// The tricky part is that we do not know ahead of time how many segments will
// be passed in. The way this is accomplished is known as _Reservoir Sampling_.
// The specific algorithm we are using here is called A-Res on the Wikipedia
// article: https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_A-Res
func (reservoir *Reservoir) Sample(r *rand.Rand, segment *segmentloop.Segment) {
k := -math.Log(r.Float64()) / float64(segment.EncryptedSize)
if reservoir.index < reservoir.size {
reservoir.segments[reservoir.index] = *segment
reservoir.keys[reservoir.index] = k
reservoir.index++
} else {
max := int8(0)
for i := int8(1); i < reservoir.size; i++ {
if reservoir.keys[i] > reservoir.keys[max] {
max = i
}
}
if k < reservoir.keys[max] {
reservoir.segments[max] = *segment
reservoir.keys[max] = k
}
}
}
// Segment is a segment to audit.
type Segment struct {
StreamID uuid.UUID
Position metabase.SegmentPosition
ExpiresAt *time.Time
EncryptedSize int32 // size of the whole segment (not a piece)
}
// NewSegment creates a new segment to audit from a metainfo loop segment.
func NewSegment(loopSegment segmentloop.Segment) Segment {
return Segment{
StreamID: loopSegment.StreamID,
Position: loopSegment.Position,
ExpiresAt: loopSegment.ExpiresAt,
EncryptedSize: loopSegment.EncryptedSize,
}
}
// Expired checks if segment is expired relative to now.
func (segment *Segment) Expired(now time.Time) bool {
return segment.ExpiresAt != nil && segment.ExpiresAt.Before(now)
}