storj/pkg/datarepair/checker/online.go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.

package checker

import (
	"context"
	"sync"
	"sync/atomic"
	"time"

	"storj.io/storj/pkg/overlay"
	"storj.io/storj/pkg/pb"
	"storj.io/storj/pkg/storj"
)

// ReliabilityCache caches the reliable nodes for the specified staleness duration
// and updates automatically from overlay.
type ReliabilityCache struct {
	overlay   *overlay.Cache
	staleness time.Duration
	mu        sync.Mutex
	state     atomic.Value // contains immutable *reliabilityState
}

// reliabilityState
type reliabilityState struct {
	reliable map[storj.NodeID]struct{}
	created  time.Time
}

// NewReliabilityCache creates a new reliability checking cache.
func NewReliabilityCache(overlay *overlay.Cache, staleness time.Duration) *ReliabilityCache {
	return &ReliabilityCache{
		overlay:   overlay,
		staleness: staleness,
	}
}

// LastUpdate returns when the cache was last updated.
func (cache *ReliabilityCache) LastUpdate() time.Time {
	if state, ok := cache.state.Load().(*reliabilityState); ok {
		return state.created
	}
	return time.Time{}
}

// MissingPieces returns piece indices that are unreliable with the given staleness period.
func (cache *ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces []*pb.RemotePiece) (_ []int32, err error) {
	defer mon.Task()(&ctx)(&err)

	// This code is designed to be very fast in the case where a refresh is not needed: just an
	// atomic load from rarely written to bit of shared memory. The general strategy is to first
	// read if the state suffices to answer the query. If not (due to it not existing, being
	// too stale, etc.), then we acquire the mutex to block other requests that may be stale
	// and ensure we only issue one refresh at a time. After acquiring the mutex, we have to
	// double check that the state is still stale because some other call may have beat us to
	// the acquisition. Only then do we refresh and can then proceed answering the query.

	state, ok := cache.state.Load().(*reliabilityState)
	if !ok || created.After(state.created) || time.Since(state.created) > cache.staleness {
		cache.mu.Lock()
		state, ok = cache.state.Load().(*reliabilityState)
		if !ok || created.After(state.created) || time.Since(state.created) > cache.staleness {
			state, err = cache.refreshLocked(ctx)
		}
		cache.mu.Unlock()
		if err != nil {
			return nil, err
		}
	}

	var unreliable []int32
	for _, piece := range pieces {
		if _, ok := state.reliable[piece.NodeId]; !ok {
			unreliable = append(unreliable, piece.PieceNum)
		}
	}
	return unreliable, nil
}

// Refresh refreshes the cache.
func (cache *ReliabilityCache) Refresh(ctx context.Context) (err error) {
	defer mon.Task()(&ctx)(&err)

	cache.mu.Lock()
	defer cache.mu.Unlock()

	_, err = cache.refreshLocked(ctx)
	return err
}

// refreshLocked does the refreshes assuming the write mutex is held.
func (cache *ReliabilityCache) refreshLocked(ctx context.Context) (_ *reliabilityState, err error) {
	defer mon.Task()(&ctx)(&err)

	nodes, err := cache.overlay.Reliable(ctx)
	if err != nil {
		return nil, Error.Wrap(err)
	}

	state := &reliabilityState{
		created:  time.Now(),
		reliable: make(map[storj.NodeID]struct{}, len(nodes)),
	}
	for _, id := range nodes {
		state.reliable[id] = struct{}{}
	}

	cache.state.Store(state)
	return state, nil
}
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`// Copyright (C) 2019 Storj Labs, Inc.`
			`// See LICENSE for copying information.`

			`package checker`

			`import (`
			`"context"`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`"sync"`
			`"sync/atomic"`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`"time"`

			`"storj.io/storj/pkg/overlay"`
			`"storj.io/storj/pkg/pb"`
			`"storj.io/storj/pkg/storj"`
			`)`

			`// ReliabilityCache caches the reliable nodes for the specified staleness duration`
			`// and updates automatically from overlay.`
			`type ReliabilityCache struct {`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`overlay *overlay.Cache`
			`staleness time.Duration`
			`mu sync.Mutex`
			`state atomic.Value // contains immutable *reliabilityState`
			`}`

			`// reliabilityState`
			`type reliabilityState struct {`
			`reliable map[storj.NodeID]struct{}`
			`created time.Time`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`}`

			`// NewReliabilityCache creates a new reliability checking cache.`
			`func NewReliabilityCache(overlay overlay.Cache, staleness time.Duration) ReliabilityCache {`
			`return &ReliabilityCache{`
			`overlay: overlay,`
			`staleness: staleness,`
			`}`
			`}`

			`// LastUpdate returns when the cache was last updated.`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`func (cache *ReliabilityCache) LastUpdate() time.Time {`
			`if state, ok := cache.state.Load().(*reliabilityState); ok {`
			`return state.created`
			`}`
			`return time.Time{}`
			`}`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00
			`// MissingPieces returns piece indices that are unreliable with the given staleness period.`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`func (cache ReliabilityCache) MissingPieces(ctx context.Context, created time.Time, pieces []pb.RemotePiece) (_ []int32, err error) {`
			`defer mon.Task()(&ctx)(&err)`

			`// This code is designed to be very fast in the case where a refresh is not needed: just an`
			`// atomic load from rarely written to bit of shared memory. The general strategy is to first`
			`// read if the state suffices to answer the query. If not (due to it not existing, being`
			`// too stale, etc.), then we acquire the mutex to block other requests that may be stale`
			`// and ensure we only issue one refresh at a time. After acquiring the mutex, we have to`
			`// double check that the state is still stale because some other call may have beat us to`
			`// the acquisition. Only then do we refresh and can then proceed answering the query.`

			`state, ok := cache.state.Load().(*reliabilityState)`
			`if !ok \|\| created.After(state.created) \|\| time.Since(state.created) > cache.staleness {`
			`cache.mu.Lock()`
			`state, ok = cache.state.Load().(*reliabilityState)`
			`if !ok \|\| created.After(state.created) \|\| time.Since(state.created) > cache.staleness {`
			`state, err = cache.refreshLocked(ctx)`
			`}`
			`cache.mu.Unlock()`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`if err != nil {`
			`return nil, err`
			`}`
			`}`

			`var unreliable []int32`
			`for _, piece := range pieces {`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`if _, ok := state.reliable[piece.NodeId]; !ok {`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`unreliable = append(unreliable, piece.PieceNum)`
			`}`
			`}`
			`return unreliable, nil`
			`}`

			`// Refresh refreshes the cache.`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`func (cache *ReliabilityCache) Refresh(ctx context.Context) (err error) {`
			`defer mon.Task()(&ctx)(&err)`

			`cache.mu.Lock()`
			`defer cache.mu.Unlock()`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`_, err = cache.refreshLocked(ctx)`
			`return err`
			`}`

			`// refreshLocked does the refreshes assuming the write mutex is held.`
			`func (cache ReliabilityCache) refreshLocked(ctx context.Context) (_ reliabilityState, err error) {`
			`defer mon.Task()(&ctx)(&err)`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00
			`nodes, err := cache.overlay.Reliable(ctx)`
			`if err != nil {`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`return nil, Error.Wrap(err)`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`}`

repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`state := &reliabilityState{`
			`created: time.Now(),`
			`reliable: make(map[storj.NodeID]struct{}, len(nodes)),`
			`}`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`for _, id := range nodes {`
repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`state.reliable[id] = struct{}{}`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`}`

repair: fix data race in reliability cache (#2561) 2019-07-15 20:58:39 +01:00			`cache.state.Store(state)`
			`return state, nil`
satellite/datarepair: use reliability cache (#1976) 2019-07-08 23:04:35 +01:00			`}`