storj/cmd/storagenode/internalcmd/gc_filewalker.go

// Copyright (C) 2023 Storj Labs, Inc.
// See LICENSE for copying information.

package internalcmd

import (
	"encoding/json"
	"runtime"

	"github.com/spf13/cobra"
	"github.com/zeebo/errs"
	"go.uber.org/zap"

	"storj.io/common/bloomfilter"
	"storj.io/private/process"
	"storj.io/storj/storagenode/iopriority"
	"storj.io/storj/storagenode/pieces"
	"storj.io/storj/storagenode/pieces/lazyfilewalker"
	"storj.io/storj/storagenode/storagenodedb"
)

// NewGCFilewalkerCmd creates a new cobra command for running garbage collection filewalker.
func NewGCFilewalkerCmd() *LazyFilewalkerCmd {
	var cfg FilewalkerCfg
	var runOpts RunOptions

	cmd := &cobra.Command{
		Use:   lazyfilewalker.GCFilewalkerCmdName,
		Short: "An internal subcommand used to run garbage collection filewalker as a separate subprocess with lower IO priority",
		RunE: func(cmd *cobra.Command, args []string) error {
			runOpts.normalize(cmd)
			runOpts.config = &cfg

			return gcCmdRun(&runOpts)
		},
		FParseErrWhitelist: cobra.FParseErrWhitelist{
			UnknownFlags: true,
		},
		Hidden: true,
		Args:   cobra.ExactArgs(0),
	}

	process.Bind(cmd, &cfg)

	return &LazyFilewalkerCmd{
		Command:    cmd,
		RunOptions: &runOpts,
	}
}

// Run runs the GCLazyFileWalker.
func gcCmdRun(g *RunOptions) (err error) {
	if g.config.LowerIOPriority {
		if runtime.GOOS == "linux" {
			// Pin the current goroutine to the current OS thread, so we can set the IO priority
			// for the current thread.
			// This is necessary because Go does use CLONE_IO when creating new threads,
			// so they do not share a single IO context.
			runtime.LockOSThread()
			defer runtime.UnlockOSThread()
		}

		err = iopriority.SetLowIOPriority()
		if err != nil {
			return err
		}
	}

	log := g.Logger

	// Decode the data struct received from the main process
	var req lazyfilewalker.GCFilewalkerRequest
	if err = json.NewDecoder(g.stdin).Decode(&req); err != nil {
		return errs.New("Error decoding data from stdin: %v", err)
	}

	// Validate the request data
	switch {
	case req.SatelliteID.IsZero():
		return errs.New("SatelliteID is required")
	case req.CreatedBefore.IsZero():
		return errs.New("CreatedBefore is required")
	}

	// We still need the DB in this case because we still have to deal with v0 pieces.
	// Once we drop support for v0 pieces, we can remove this.
	db, err := storagenodedb.OpenExisting(g.Ctx, log.Named("db"), g.config.DatabaseConfig())
	if err != nil {
		return errs.New("Error starting master database on storage node: %v", err)
	}
	log.Info("Database started")
	defer func() {
		err = errs.Combine(err, db.Close())
	}()

	// Decode the bloom filter
	filter, err := bloomfilter.NewFromBytes(req.BloomFilter)
	if err != nil {
		return err
	}

	log.Info("gc-filewalker started", zap.Time("createdBefore", req.CreatedBefore), zap.Int("bloomFilterSize", len(req.BloomFilter)))

	filewalker := pieces.NewFileWalker(log, db.Pieces(), db.V0PieceInfo())
	pieceIDs, piecesCount, piecesSkippedCount, err := filewalker.WalkSatellitePiecesToTrash(g.Ctx, req.SatelliteID, req.CreatedBefore, filter)
	if err != nil {
		return err
	}

	resp := lazyfilewalker.GCFilewalkerResponse{
		PieceIDs:           pieceIDs,
		PiecesCount:        piecesCount,
		PiecesSkippedCount: piecesSkippedCount,
	}

	log.Info("gc-filewalker completed", zap.Int64("piecesCount", piecesCount), zap.Int64("piecesSkippedCount", piecesSkippedCount))

	// encode the response struct and write it to stdout
	return json.NewEncoder(g.stdout).Encode(resp)
}
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`// Copyright (C) 2023 Storj Labs, Inc.`
			`// See LICENSE for copying information.`

			`package internalcmd`

			`import (`
			`"encoding/json"`
			`"runtime"`

{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`"github.com/spf13/cobra"`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`"github.com/zeebo/errs"`
			`"go.uber.org/zap"`

			`"storj.io/common/bloomfilter"`
{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`"storj.io/private/process"`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`"storj.io/storj/storagenode/iopriority"`
			`"storj.io/storj/storagenode/pieces"`
			`"storj.io/storj/storagenode/pieces/lazyfilewalker"`
			`"storj.io/storj/storagenode/storagenodedb"`
			`)`

{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`// NewGCFilewalkerCmd creates a new cobra command for running garbage collection filewalker.`
			`func NewGCFilewalkerCmd() *LazyFilewalkerCmd {`
			`var cfg FilewalkerCfg`
			`var runOpts RunOptions`

			`cmd := &cobra.Command{`
			`Use: lazyfilewalker.GCFilewalkerCmdName,`
			`Short: "An internal subcommand used to run garbage collection filewalker as a separate subprocess with lower IO priority",`
			`RunE: func(cmd *cobra.Command, args []string) error {`
			`runOpts.normalize(cmd)`
			`runOpts.config = &cfg`

			`return gcCmdRun(&runOpts)`
			`},`
			`FParseErrWhitelist: cobra.FParseErrWhitelist{`
			`UnknownFlags: true,`
			`},`
			`Hidden: true,`
			`Args: cobra.ExactArgs(0),`
			`}`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00
{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`process.Bind(cmd, &cfg)`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00
{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`return &LazyFilewalkerCmd{`
			`Command: cmd,`
			`RunOptions: &runOpts,`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`}`
			`}`

			`// Run runs the GCLazyFileWalker.`
{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`func gcCmdRun(g *RunOptions) (err error) {`
			`if g.config.LowerIOPriority {`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`if runtime.GOOS == "linux" {`
			`// Pin the current goroutine to the current OS thread, so we can set the IO priority`
			`// for the current thread.`
			`// This is necessary because Go does use CLONE_IO when creating new threads,`
			`// so they do not share a single IO context.`
			`runtime.LockOSThread()`
			`defer runtime.UnlockOSThread()`
			`}`

			`err = iopriority.SetLowIOPriority()`
			`if err != nil {`
			`return err`
			`}`
			`}`

			`log := g.Logger`

			`// Decode the data struct received from the main process`
			`var req lazyfilewalker.GCFilewalkerRequest`
			`if err = json.NewDecoder(g.stdin).Decode(&req); err != nil {`
			`return errs.New("Error decoding data from stdin: %v", err)`
			`}`

			`// Validate the request data`
			`switch {`
			`case req.SatelliteID.IsZero():`
			`return errs.New("SatelliteID is required")`
			`case req.CreatedBefore.IsZero():`
			`return errs.New("CreatedBefore is required")`
			`}`

			`// We still need the DB in this case because we still have to deal with v0 pieces.`
			`// Once we drop support for v0 pieces, we can remove this.`
{storagenode/pieces,cmd/storagenode}: refactor lazyfilewalker commands and tests With this change we are directly testing how the command is executed when the args are passed Change-Id: Ibb33926014c9d71c928e0fd374bf4edc5a8a1232 2023-05-24 22:26:33 +01:00			`db, err := storagenodedb.OpenExisting(g.Ctx, log.Named("db"), g.config.DatabaseConfig())`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00			`if err != nil {`
			`return errs.New("Error starting master database on storage node: %v", err)`
			`}`
			`log.Info("Database started")`
			`defer func() {`
			`err = errs.Combine(err, db.Close())`
			`}()`

			`// Decode the bloom filter`
			`filter, err := bloomfilter.NewFromBytes(req.BloomFilter)`
			`if err != nil {`
			`return err`
			`}`

storagenode: add tests for lazyfilewalker Updates https://github.com/storj/storj/issues/5349 Change-Id: I9544c14ba2acacd5b304f151ab29c70ff61adc5b 2023-05-05 22:26:53 +01:00			`log.Info("gc-filewalker started", zap.Time("createdBefore", req.CreatedBefore), zap.Int("bloomFilterSize", len(req.BloomFilter)))`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00
			`filewalker := pieces.NewFileWalker(log, db.Pieces(), db.V0PieceInfo())`
			`pieceIDs, piecesCount, piecesSkippedCount, err := filewalker.WalkSatellitePiecesToTrash(g.Ctx, req.SatelliteID, req.CreatedBefore, filter)`
			`if err != nil {`
			`return err`
			`}`

			`resp := lazyfilewalker.GCFilewalkerResponse{`
			`PieceIDs: pieceIDs,`
			`PiecesCount: piecesCount,`
			`PiecesSkippedCount: piecesSkippedCount,`
			`}`

storagenode: add tests for lazyfilewalker Updates https://github.com/storj/storj/issues/5349 Change-Id: I9544c14ba2acacd5b304f151ab29c70ff61adc5b 2023-05-05 22:26:53 +01:00			`log.Info("gc-filewalker completed", zap.Int64("piecesCount", piecesCount), zap.Int64("piecesSkippedCount", piecesSkippedCount))`
cmd/storagenode: refactor lazyfilewalker commands to satisfy the execwrapper.Command interface Follow-up change for https://review.dev.storj.io/c/storj/storj/+/10335 Updates https://github.com/storj/storj/issues/5349 Change-Id: Iadf55bae84ebc0803a0766830e596c396dfb332b 2023-05-05 22:26:09 +01:00
			`// encode the response struct and write it to stdout`
			`return json.NewEncoder(g.stdout).Encode(resp)`
			`}`