satellite/gc/bloomfilter: take CreationDate from latest segment
Bloom filter CreationDate is used to avoid deleting pieces that where not processed by GC. Every piece created after that timestamp won't be deleted. Current GC process is taking CreationDate as a beginning of bloom filter creation. This is appraoch allows to avoid issues with inconsistent view on DB as currently we are using live DB to create bloom filters. With appraoch were we will be using DB snaphot with segment loop we can get CreationDate from latest created segment in DB. Every piece created after latest created segment won't be touched by GC on storage node. Updates https://github.com/storj/team-metainfo/issues/120 Change-Id: I6aaf64948ab7f60cfea62195689ad77c25ea772e
This commit is contained in:
parent
a848c29b9b
commit
90eded4d99
@ -20,31 +20,33 @@ var _ segmentloop.Observer = (*PieceTracker)(nil)
|
|||||||
|
|
||||||
// RetainInfo contains info needed for a storage node to retain important data and delete garbage data.
|
// RetainInfo contains info needed for a storage node to retain important data and delete garbage data.
|
||||||
type RetainInfo struct {
|
type RetainInfo struct {
|
||||||
Filter *bloomfilter.Filter
|
Filter *bloomfilter.Filter
|
||||||
CreationDate time.Time
|
Count int
|
||||||
Count int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// PieceTracker implements the metainfo loop observer interface for garbage collection.
|
// PieceTracker implements the segments loop observer interface for garbage collection.
|
||||||
//
|
//
|
||||||
// architecture: Observer
|
// architecture: Observer
|
||||||
type PieceTracker struct {
|
type PieceTracker struct {
|
||||||
log *zap.Logger
|
log *zap.Logger
|
||||||
config Config
|
config Config
|
||||||
creationDate time.Time
|
|
||||||
// TODO: should we use int or int64 consistently for piece count (db type is int64)?
|
// TODO: should we use int or int64 consistently for piece count (db type is int64)?
|
||||||
pieceCounts map[storj.NodeID]int
|
pieceCounts map[storj.NodeID]int
|
||||||
|
startTime time.Time
|
||||||
|
|
||||||
RetainInfos map[storj.NodeID]*RetainInfo
|
RetainInfos map[storj.NodeID]*RetainInfo
|
||||||
|
// LatestCreationTime will be used to set bloom filter CreationDate.
|
||||||
|
// Because bloom filter service needs to be run against immutable database snapshot
|
||||||
|
// we can set CreationDate for bloom filters as a latest segment CreatedAt value.
|
||||||
|
LatestCreationTime time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewPieceTracker instantiates a new gc piece tracker to be subscribed to the metainfo loop.
|
// NewPieceTracker instantiates a new gc piece tracker to be subscribed to the segments loop.
|
||||||
func NewPieceTracker(log *zap.Logger, config Config, pieceCounts map[storj.NodeID]int) *PieceTracker {
|
func NewPieceTracker(log *zap.Logger, config Config, pieceCounts map[storj.NodeID]int) *PieceTracker {
|
||||||
return &PieceTracker{
|
return &PieceTracker{
|
||||||
log: log,
|
log: log,
|
||||||
config: config,
|
config: config,
|
||||||
creationDate: time.Now().UTC(),
|
pieceCounts: pieceCounts,
|
||||||
pieceCounts: pieceCounts,
|
|
||||||
|
|
||||||
RetainInfos: make(map[storj.NodeID]*RetainInfo, len(pieceCounts)),
|
RetainInfos: make(map[storj.NodeID]*RetainInfo, len(pieceCounts)),
|
||||||
}
|
}
|
||||||
@ -52,9 +54,7 @@ func NewPieceTracker(log *zap.Logger, config Config, pieceCounts map[storj.NodeI
|
|||||||
|
|
||||||
// LoopStarted is called at each start of a loop.
|
// LoopStarted is called at each start of a loop.
|
||||||
func (pieceTracker *PieceTracker) LoopStarted(ctx context.Context, info segmentloop.LoopInfo) (err error) {
|
func (pieceTracker *PieceTracker) LoopStarted(ctx context.Context, info segmentloop.LoopInfo) (err error) {
|
||||||
if pieceTracker.creationDate.After(info.Started) {
|
pieceTracker.startTime = info.Started
|
||||||
return errs.New("Creation date after loop starting time.")
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,6 +62,18 @@ func (pieceTracker *PieceTracker) LoopStarted(ctx context.Context, info segmentl
|
|||||||
func (pieceTracker *PieceTracker) RemoteSegment(ctx context.Context, segment *segmentloop.Segment) error {
|
func (pieceTracker *PieceTracker) RemoteSegment(ctx context.Context, segment *segmentloop.Segment) error {
|
||||||
// we are expliticy not adding monitoring here as we are tracking loop observers separately
|
// we are expliticy not adding monitoring here as we are tracking loop observers separately
|
||||||
|
|
||||||
|
// sanity check to detect if loop is not running against live database
|
||||||
|
if segment.CreatedAt.After(pieceTracker.startTime) {
|
||||||
|
pieceTracker.log.Error("segment created after loop started", zap.Stringer("StreamID", segment.StreamID),
|
||||||
|
zap.Time("loop started", pieceTracker.startTime),
|
||||||
|
zap.Time("segment created", segment.CreatedAt))
|
||||||
|
return errs.New("segment created after loop started")
|
||||||
|
}
|
||||||
|
|
||||||
|
if pieceTracker.LatestCreationTime.Before(segment.CreatedAt) {
|
||||||
|
pieceTracker.LatestCreationTime = segment.CreatedAt
|
||||||
|
}
|
||||||
|
|
||||||
deriver := segment.RootPieceID.Deriver()
|
deriver := segment.RootPieceID.Deriver()
|
||||||
for _, piece := range segment.Pieces {
|
for _, piece := range segment.Pieces {
|
||||||
pieceID := deriver.Derive(piece.StorageNode, int32(piece.Number))
|
pieceID := deriver.Derive(piece.StorageNode, int32(piece.Number))
|
||||||
@ -83,8 +95,7 @@ func (pieceTracker *PieceTracker) add(nodeID storj.NodeID, pieceID storj.PieceID
|
|||||||
// limit size of bloom filter to ensure we are under the limit for RPC
|
// limit size of bloom filter to ensure we are under the limit for RPC
|
||||||
filter := bloomfilter.NewOptimalMaxSize(numPieces, pieceTracker.config.FalsePositiveRate, 2*memory.MiB)
|
filter := bloomfilter.NewOptimalMaxSize(numPieces, pieceTracker.config.FalsePositiveRate, 2*memory.MiB)
|
||||||
info = &RetainInfo{
|
info = &RetainInfo{
|
||||||
Filter: filter,
|
Filter: filter,
|
||||||
CreationDate: pieceTracker.creationDate,
|
|
||||||
}
|
}
|
||||||
pieceTracker.RetainInfos[nodeID] = info
|
pieceTracker.RetainInfos[nodeID] = info
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@ type Config struct {
|
|||||||
ExpireIn time.Duration `help:"how quickly uploaded bloom filters will be automatically deleted" default:"336h"`
|
ExpireIn time.Duration `help:"how quickly uploaded bloom filters will be automatically deleted" default:"336h"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Service implements the garbage collection service.
|
// Service implements service to collect bloom filters for the garbage collection.
|
||||||
//
|
//
|
||||||
// architecture: Chore
|
// architecture: Chore
|
||||||
type Service struct {
|
type Service struct {
|
||||||
@ -106,7 +106,7 @@ func (service *Service) RunOnce(ctx context.Context) (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
err = service.uploadBloomFilters(ctx, pieceTracker.RetainInfos)
|
err = service.uploadBloomFilters(ctx, pieceTracker.LatestCreationTime, pieceTracker.RetainInfos)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -116,7 +116,7 @@ func (service *Service) RunOnce(ctx context.Context) (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (service *Service) uploadBloomFilters(ctx context.Context, retainInfos map[storj.NodeID]*RetainInfo) (err error) {
|
func (service *Service) uploadBloomFilters(ctx context.Context, latestCreationDate time.Time, retainInfos map[storj.NodeID]*RetainInfo) (err error) {
|
||||||
defer mon.Task()(&ctx)(&err)
|
defer mon.Task()(&ctx)(&err)
|
||||||
|
|
||||||
if len(retainInfos) == 0 {
|
if len(retainInfos) == 0 {
|
||||||
@ -163,8 +163,10 @@ func (service *Service) uploadBloomFilters(ctx context.Context, retainInfos map[
|
|||||||
batchNumber := 0
|
batchNumber := 0
|
||||||
for nodeID, info := range retainInfos {
|
for nodeID, info := range retainInfos {
|
||||||
infos = append(infos, internalpb.RetainInfo{
|
infos = append(infos, internalpb.RetainInfo{
|
||||||
Filter: info.Filter.Bytes(),
|
Filter: info.Filter.Bytes(),
|
||||||
CreationDate: info.CreationDate,
|
// because bloom filters should be created from immutable database
|
||||||
|
// snapshot we are using latest segment creation date
|
||||||
|
CreationDate: latestCreationDate,
|
||||||
PieceCount: int64(info.Count),
|
PieceCount: int64(info.Count),
|
||||||
StorageNodeId: nodeID,
|
StorageNodeId: nodeID,
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user