2021-07-20 10:19:34 +01:00
|
|
|
// Copyright (C) 2021 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
|
|
|
|
"github.com/jackc/pgx/v4"
|
|
|
|
"github.com/spacemonkeygo/monkit/v3"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
flag "github.com/spf13/pflag"
|
|
|
|
"github.com/zeebo/errs"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
|
|
|
"storj.io/common/uuid"
|
|
|
|
"storj.io/private/dbutil/pgutil"
|
|
|
|
"storj.io/private/process"
|
|
|
|
"storj.io/storj/satellite/metabase"
|
|
|
|
)
|
|
|
|
|
|
|
|
var mon = monkit.Package()
|
|
|
|
|
|
|
|
var (
|
|
|
|
rootCmd = &cobra.Command{
|
|
|
|
Use: "metainfo-orphaned-segments",
|
|
|
|
Short: "metainfo-orphaned-segments",
|
|
|
|
}
|
|
|
|
|
|
|
|
reportCmd = &cobra.Command{
|
|
|
|
Use: "report",
|
|
|
|
Short: "report metainfo-orphaned-segments",
|
|
|
|
RunE: reportCommand,
|
|
|
|
}
|
|
|
|
|
|
|
|
deleteCmd = &cobra.Command{
|
|
|
|
Use: "delete",
|
|
|
|
Short: "delete metainfo-orphaned-segments",
|
|
|
|
RunE: deleteCommand,
|
|
|
|
}
|
|
|
|
|
|
|
|
config Config
|
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
rootCmd.AddCommand(reportCmd)
|
|
|
|
rootCmd.AddCommand(deleteCmd)
|
|
|
|
|
|
|
|
config.BindFlags(reportCmd.Flags())
|
2021-07-22 10:00:37 +01:00
|
|
|
config.BindFlags(deleteCmd.Flags())
|
2021-07-20 10:19:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Config defines configuration for cleanup.
|
|
|
|
type Config struct {
|
|
|
|
MetabaseDB string
|
|
|
|
LoopBatchSize int
|
|
|
|
DeleteBatchSize int
|
|
|
|
Cockroach bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// BindFlags adds bench flags to the the flagset.
|
|
|
|
func (config *Config) BindFlags(flag *flag.FlagSet) {
|
|
|
|
flag.StringVar(&config.MetabaseDB, "metabasedb", "", "connection URL for MetabaseDB")
|
|
|
|
flag.IntVar(&config.LoopBatchSize, "loop-batch-size", 10000, "number of objects to process at once")
|
|
|
|
flag.IntVar(&config.DeleteBatchSize, "delete-batch-size", 100, "number of entries to delete with single query")
|
|
|
|
flag.BoolVar(&config.Cockroach, "cockroach", true, "metabase is on CRDB")
|
|
|
|
}
|
|
|
|
|
|
|
|
// VerifyFlags verifies whether the values provided are valid.
|
|
|
|
func (config *Config) VerifyFlags() error {
|
|
|
|
var errlist errs.Group
|
|
|
|
if config.MetabaseDB == "" {
|
|
|
|
errlist.Add(errors.New("flag '--metabasedb' is not set"))
|
|
|
|
}
|
|
|
|
return errlist.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
func reportCommand(cmd *cobra.Command, args []string) error {
|
|
|
|
if err := config.VerifyFlags(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx, _ := process.Ctx(cmd)
|
|
|
|
log := zap.L()
|
|
|
|
|
|
|
|
return Report(ctx, log, config)
|
|
|
|
}
|
|
|
|
|
|
|
|
func deleteCommand(cmd *cobra.Command, args []string) error {
|
|
|
|
if err := config.VerifyFlags(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx, _ := process.Ctx(cmd)
|
|
|
|
log := zap.L()
|
|
|
|
|
|
|
|
return Delete(ctx, log, config)
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
process.Exec(rootCmd)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report finds and reports orphaned segments, nothing is deleted.
|
|
|
|
func Report(ctx context.Context, log *zap.Logger, config Config) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
ids, err := findOrphanedSegments(ctx, log, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-07-22 10:00:37 +01:00
|
|
|
log.Info("orphaned segments stream ids (number of existing segments can be bigger)",
|
|
|
|
zap.Int("count", len(ids)))
|
2021-07-20 10:19:34 +01:00
|
|
|
for _, id := range ids {
|
2021-07-22 10:00:37 +01:00
|
|
|
log.Info("StreamID", zap.String("id", id.String()))
|
2021-07-20 10:19:34 +01:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete finds and deletes orphaned segments.
|
|
|
|
func Delete(ctx context.Context, log *zap.Logger, config Config) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
ids, err := findOrphanedSegments(ctx, log, config)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
rawMetabaseDB, err := pgx.Connect(ctx, config.MetabaseDB)
|
|
|
|
if err != nil {
|
|
|
|
return errs.New("unable to connect %q: %w", config.MetabaseDB, err)
|
|
|
|
}
|
|
|
|
defer func() { err = errs.Combine(err, rawMetabaseDB.Close(ctx)) }()
|
|
|
|
|
2021-07-22 10:00:37 +01:00
|
|
|
log.Info("orphaned segments stream ids (number of existing segments can be bigger)", zap.Int("count", len(ids)))
|
2021-07-20 10:19:34 +01:00
|
|
|
log.Info("starting deletion")
|
|
|
|
|
|
|
|
for len(ids) > 0 {
|
|
|
|
batch := config.DeleteBatchSize
|
|
|
|
if batch > len(ids) {
|
|
|
|
batch = len(ids)
|
|
|
|
}
|
|
|
|
err = sendDelete(ctx, rawMetabaseDB, ids[:batch])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
ids = ids[batch:]
|
|
|
|
|
|
|
|
log.Info("deleted", zap.Int("count", batch))
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func findOrphanedSegments(ctx context.Context, log *zap.Logger, config Config) (_ []uuid.UUID, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2022-02-20 21:29:41 +00:00
|
|
|
metabaseDB, err := metabase.Open(ctx, log.Named("metabase"), config.MetabaseDB, metabase.Config{ApplicationName: "metabase-orphaned-segments"})
|
2021-07-20 10:19:34 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, errs.New("unable to connect %q: %w", config.MetabaseDB, err)
|
|
|
|
}
|
|
|
|
defer func() { err = errs.Combine(err, metabaseDB.Close()) }()
|
|
|
|
|
|
|
|
startingTime, err := metabaseDB.Now(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
streamIDs := make(map[uuid.UUID]struct{})
|
|
|
|
numberOfElements := 0
|
|
|
|
err = metabaseDB.IterateLoopSegments(ctx, metabase.IterateLoopSegments{
|
|
|
|
BatchSize: config.LoopBatchSize,
|
|
|
|
AsOfSystemTime: startingTime,
|
|
|
|
}, func(ctx context.Context, it metabase.LoopSegmentsIterator) error {
|
|
|
|
var entry metabase.LoopSegmentEntry
|
|
|
|
for it.Next(ctx, &entry) {
|
2021-07-26 16:17:04 +01:00
|
|
|
// avoid segments created after starting processing
|
2021-08-05 00:56:50 +01:00
|
|
|
if entry.CreatedAt.After(startingTime) {
|
2021-07-26 16:17:04 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-07-20 10:19:34 +01:00
|
|
|
streamIDs[entry.StreamID] = struct{}{}
|
|
|
|
|
|
|
|
if numberOfElements%100000 == 0 {
|
|
|
|
log.Info("segments iterated", zap.Int("segments", numberOfElements))
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfElements++
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfElements = 0
|
|
|
|
err = metabaseDB.IterateLoopObjects(ctx, metabase.IterateLoopObjects{
|
|
|
|
BatchSize: config.LoopBatchSize,
|
|
|
|
AsOfSystemTime: startingTime,
|
|
|
|
}, func(ctx context.Context, it metabase.LoopObjectsIterator) error {
|
|
|
|
var entry metabase.LoopObjectEntry
|
|
|
|
for it.Next(ctx, &entry) {
|
2021-07-26 16:17:04 +01:00
|
|
|
// avoid objects created after starting processing
|
|
|
|
if entry.CreatedAt.After(startingTime) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-07-20 10:19:34 +01:00
|
|
|
delete(streamIDs, entry.StreamID)
|
|
|
|
|
|
|
|
if numberOfElements%100000 == 0 {
|
|
|
|
log.Info("objects iterated", zap.Int("objects", numberOfElements))
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfElements++
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ids := make([]uuid.UUID, 0, len(streamIDs))
|
|
|
|
for id := range streamIDs {
|
|
|
|
ids = append(ids, id)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func sendDelete(ctx context.Context, conn *pgx.Conn, ids []uuid.UUID) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
if len(ids) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = conn.Exec(ctx, "DELETE FROM segments WHERE stream_id = ANY($1)", pgutil.UUIDArray(ids))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|