private/testplanet, storagenode/{monitor,pieces}: write storage dir verification file on run and verify on loop
On run, write the storage directory verification file. Every time the node runs it will write the file even if it already exists. The reason we do this is because if the verification file is missing, the SN doesn't know whether it is an incorrect directory, or it simply hasn't written the file yet, and we want to keep nodes running without needing operator intervention. Once this change has been a part of the minimum version for several releases, we will move the file creation from the run command to the setup command. Run will only verify its existence. Change-Id: Ib7d20e78e711c63817db0ab3036a50af0e8f49cb
This commit is contained in:
parent
586e6f2f13
commit
0155c21b44
@ -149,6 +149,7 @@ func (planet *Planet) newStorageNodes(count int, whitelistedSatellites storj.Nod
|
||||
Monitor: monitor.Config{
|
||||
MinimumDiskSpace: 100 * memory.MB,
|
||||
NotifyLowDiskCooldown: defaultInterval,
|
||||
VerifyDirInterval: defaultInterval,
|
||||
},
|
||||
Trust: trust.Config{
|
||||
Sources: sources,
|
||||
|
@ -30,6 +30,7 @@ var (
|
||||
// Config defines parameters for storage node disk and bandwidth usage monitoring.
|
||||
type Config struct {
|
||||
Interval time.Duration `help:"how frequently Kademlia bucket should be refreshed with node stats" default:"1h0m0s"`
|
||||
VerifyDirInterval time.Duration `help:"how frequently to verify access to the storage directory" releaseDefault:"1m" devDefault:"30s"`
|
||||
MinimumDiskSpace memory.Size `help:"how much disk space a node at minimum has to advertise" default:"500GB"`
|
||||
MinimumBandwidth memory.Size `help:"how much bandwidth a node at minimum has to advertise (deprecated)" default:"0TB"`
|
||||
NotifyLowDiskCooldown time.Duration `help:"minimum length of time between capacity reports" default:"10m" hidden:"true"`
|
||||
@ -46,6 +47,7 @@ type Service struct {
|
||||
allocatedDiskSpace int64
|
||||
cooldown *sync2.Cooldown
|
||||
Loop *sync2.Cycle
|
||||
VerifyDirLoop *sync2.Cycle
|
||||
Config Config
|
||||
}
|
||||
|
||||
@ -59,6 +61,7 @@ func NewService(log *zap.Logger, store *pieces.Store, contact *contact.Service,
|
||||
allocatedDiskSpace: allocatedDiskSpace,
|
||||
cooldown: sync2.NewCooldown(config.NotifyLowDiskCooldown),
|
||||
Loop: sync2.NewCycle(interval),
|
||||
VerifyDirLoop: sync2.NewCycle(config.VerifyDirInterval),
|
||||
Config: config,
|
||||
}
|
||||
}
|
||||
@ -108,7 +111,21 @@ func (service *Service) Run(ctx context.Context) (err error) {
|
||||
return Error.New("disk space requirement not met")
|
||||
}
|
||||
|
||||
var group errgroup.Group
|
||||
// Create file to identify the storage directory.
|
||||
if err := service.store.CreateVerificationFile(service.contact.Local().ID); err != nil {
|
||||
return Error.New("failed to create storage directory verification: %v", err)
|
||||
}
|
||||
|
||||
group, ctx := errgroup.WithContext(ctx)
|
||||
group.Go(func() error {
|
||||
return service.VerifyDirLoop.Run(ctx, func(ctx context.Context) error {
|
||||
err := service.store.VerifyStorageDir(service.contact.Local().ID)
|
||||
if err != nil {
|
||||
return Error.New("error verifying storage directory: %v", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
})
|
||||
group.Go(func() error {
|
||||
return service.Loop.Run(ctx, func(ctx context.Context) error {
|
||||
err := service.updateNodeInformation(ctx)
|
||||
@ -118,7 +135,7 @@ func (service *Service) Run(ctx context.Context) (err error) {
|
||||
return nil
|
||||
})
|
||||
})
|
||||
service.cooldown.Start(ctx, &group, func(ctx context.Context) error {
|
||||
service.cooldown.Start(ctx, group, func(ctx context.Context) error {
|
||||
err := service.updateNodeInformation(ctx)
|
||||
if err != nil {
|
||||
service.log.Error("error during updating node information: ", zap.Error(err))
|
||||
|
@ -32,6 +32,7 @@ func TestMonitor(t *testing.T) {
|
||||
nodeAssertions := 0
|
||||
for _, storageNode := range planet.StorageNodes {
|
||||
storageNode.Storage2.Monitor.Loop.TriggerWait()
|
||||
storageNode.Storage2.Monitor.VerifyDirLoop.TriggerWait()
|
||||
stats, err := storageNode.Storage2.Inspector.Stats(ctx, &pb.StatsRequest{})
|
||||
require.NoError(t, err)
|
||||
if stats.UsedSpace > 0 {
|
||||
|
@ -509,6 +509,7 @@ func (endpoint *Endpoint) Download(stream pb.DRPCPiecestore_DownloadStream) (err
|
||||
pieceReader, err = endpoint.store.Reader(ctx, limit.SatelliteId, limit.PieceId)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
endpoint.monitor.VerifyDirLoop.TriggerWait()
|
||||
return rpcstatus.Wrap(rpcstatus.NotFound, err)
|
||||
}
|
||||
return rpcstatus.Wrap(rpcstatus.Internal, err)
|
||||
|
Loading…
Reference in New Issue
Block a user