2019-08-21 16:49:27 +01:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package audit
|
|
|
|
|
|
|
|
import (
|
2022-12-09 22:22:39 +00:00
|
|
|
"math"
|
2019-08-21 16:49:27 +01:00
|
|
|
"math/rand"
|
2020-12-14 12:54:22 +00:00
|
|
|
"time"
|
2019-08-21 16:49:27 +01:00
|
|
|
|
2022-12-15 02:19:29 +00:00
|
|
|
"github.com/zeebo/errs"
|
|
|
|
|
2020-12-14 12:54:22 +00:00
|
|
|
"storj.io/common/uuid"
|
2021-04-21 13:42:57 +01:00
|
|
|
"storj.io/storj/satellite/metabase"
|
2023-05-09 12:13:19 +01:00
|
|
|
"storj.io/storj/satellite/metabase/rangedloop"
|
2019-08-21 16:49:27 +01:00
|
|
|
)
|
|
|
|
|
2019-09-05 16:40:52 +01:00
|
|
|
const maxReservoirSize = 3
|
2019-08-21 16:49:27 +01:00
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Reservoir holds a certain number of segments to reflect a random sample.
|
2019-08-21 16:49:27 +01:00
|
|
|
type Reservoir struct {
|
2023-05-09 12:13:19 +01:00
|
|
|
segments [maxReservoirSize]rangedloop.Segment
|
2022-12-14 23:43:50 +00:00
|
|
|
keys [maxReservoirSize]float64
|
2020-12-14 12:54:22 +00:00
|
|
|
size int8
|
2022-12-14 23:43:50 +00:00
|
|
|
index int8
|
2019-08-21 16:49:27 +01:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// NewReservoir instantiates a Reservoir.
|
2019-08-21 16:49:27 +01:00
|
|
|
func NewReservoir(size int) *Reservoir {
|
|
|
|
if size < 1 {
|
|
|
|
size = 1
|
|
|
|
} else if size > maxReservoirSize {
|
|
|
|
size = maxReservoirSize
|
|
|
|
}
|
|
|
|
return &Reservoir{
|
|
|
|
size: int8(size),
|
|
|
|
index: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-14 23:43:50 +00:00
|
|
|
// Segments returns the segments picked by the reservoir.
|
2023-05-09 12:13:19 +01:00
|
|
|
func (reservoir *Reservoir) Segments() []rangedloop.Segment {
|
2022-12-14 23:43:50 +00:00
|
|
|
return reservoir.segments[:reservoir.index]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Keys returns the keys for the segments picked by the reservoir.
|
|
|
|
func (reservoir *Reservoir) Keys() []float64 {
|
|
|
|
return reservoir.keys[:reservoir.index]
|
|
|
|
}
|
|
|
|
|
2022-12-09 22:22:39 +00:00
|
|
|
// Sample tries to ensure that each segment passed in has a chance (proportional
|
|
|
|
// to its size) to be in the reservoir when sampling is complete.
|
|
|
|
//
|
|
|
|
// The tricky part is that we do not know ahead of time how many segments will
|
|
|
|
// be passed in. The way this is accomplished is known as _Reservoir Sampling_.
|
|
|
|
// The specific algorithm we are using here is called A-Res on the Wikipedia
|
|
|
|
// article: https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_A-Res
|
2023-05-09 12:13:19 +01:00
|
|
|
func (reservoir *Reservoir) Sample(r *rand.Rand, segment rangedloop.Segment) {
|
2022-12-09 22:22:39 +00:00
|
|
|
k := -math.Log(r.Float64()) / float64(segment.EncryptedSize)
|
2022-12-15 02:19:29 +00:00
|
|
|
reservoir.sample(k, segment)
|
|
|
|
}
|
|
|
|
|
2023-05-09 12:13:19 +01:00
|
|
|
func (reservoir *Reservoir) sample(k float64, segment rangedloop.Segment) {
|
2022-12-14 23:43:50 +00:00
|
|
|
if reservoir.index < reservoir.size {
|
2023-04-24 11:07:16 +01:00
|
|
|
reservoir.segments[reservoir.index] = segment
|
2022-12-14 23:43:50 +00:00
|
|
|
reservoir.keys[reservoir.index] = k
|
|
|
|
reservoir.index++
|
2019-08-21 16:49:27 +01:00
|
|
|
} else {
|
2022-12-14 23:43:50 +00:00
|
|
|
max := int8(0)
|
|
|
|
for i := int8(1); i < reservoir.size; i++ {
|
|
|
|
if reservoir.keys[i] > reservoir.keys[max] {
|
2022-12-09 22:22:39 +00:00
|
|
|
max = i
|
|
|
|
}
|
|
|
|
}
|
2022-12-14 23:43:50 +00:00
|
|
|
if k < reservoir.keys[max] {
|
2023-04-24 11:07:16 +01:00
|
|
|
reservoir.segments[max] = segment
|
2022-12-14 23:43:50 +00:00
|
|
|
reservoir.keys[max] = k
|
2019-08-21 16:49:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-14 12:54:22 +00:00
|
|
|
|
2022-12-15 02:19:29 +00:00
|
|
|
// Merge merges the given reservoir into the first. Both reservoirs must have the same size.
|
|
|
|
func (reservoir *Reservoir) Merge(operand *Reservoir) error {
|
|
|
|
if reservoir.size != operand.size {
|
|
|
|
return errs.New("cannot merge: mismatched size: expected %d but got %d", reservoir.size, operand.size)
|
|
|
|
}
|
|
|
|
for i := int8(0); i < operand.index; i++ {
|
2023-04-24 11:07:16 +01:00
|
|
|
reservoir.sample(operand.keys[i], operand.segments[i])
|
2022-12-15 02:19:29 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-12-14 12:54:22 +00:00
|
|
|
// Segment is a segment to audit.
|
|
|
|
type Segment struct {
|
2021-11-29 17:29:02 +00:00
|
|
|
StreamID uuid.UUID
|
|
|
|
Position metabase.SegmentPosition
|
|
|
|
ExpiresAt *time.Time
|
|
|
|
EncryptedSize int32 // size of the whole segment (not a piece)
|
2020-12-14 12:54:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewSegment creates a new segment to audit from a metainfo loop segment.
|
2023-05-09 12:13:19 +01:00
|
|
|
func NewSegment(loopSegment rangedloop.Segment) Segment {
|
2020-12-14 12:54:22 +00:00
|
|
|
return Segment{
|
2021-11-29 17:29:02 +00:00
|
|
|
StreamID: loopSegment.StreamID,
|
|
|
|
Position: loopSegment.Position,
|
|
|
|
ExpiresAt: loopSegment.ExpiresAt,
|
|
|
|
EncryptedSize: loopSegment.EncryptedSize,
|
2020-12-14 12:54:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expired checks if segment is expired relative to now.
|
|
|
|
func (segment *Segment) Expired(now time.Time) bool {
|
2021-06-14 16:40:46 +01:00
|
|
|
return segment.ExpiresAt != nil && segment.ExpiresAt.Before(now)
|
2020-12-14 12:54:22 +00:00
|
|
|
}
|