storj/satellite/overlay/service.go

574 lines
23 KiB
Go
Raw Normal View History

2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
2018-04-18 17:55:28 +01:00
// See LICENSE for copying information.
package overlay
2018-04-18 16:34:15 +01:00
import (
"context"
"errors"
"net"
"time"
2018-04-18 16:34:15 +01:00
"github.com/zeebo/errs"
"go.uber.org/zap"
2018-11-16 16:31:14 +00:00
"storj.io/common/pb"
"storj.io/common/storj"
"storj.io/storj/satellite/internalpb"
"storj.io/storj/storage"
2018-04-18 16:34:15 +01:00
)
// ErrEmptyNode is returned when the nodeID is empty.
2018-12-17 18:47:26 +00:00
var ErrEmptyNode = errs.New("empty node ID")
// ErrNodeNotFound is returned if a node does not exist in database.
var ErrNodeNotFound = errs.Class("node not found")
// ErrNodeOffline is returned if a nodes is offline.
var ErrNodeOffline = errs.Class("node is offline")
// ErrNodeDisqualified is returned if a nodes is disqualified.
var ErrNodeDisqualified = errs.Class("node is disqualified")
// ErrNodeFinishedGE is returned if a node has finished graceful exit.
var ErrNodeFinishedGE = errs.Class("node finished graceful exit")
// ErrNotEnoughNodes is when selecting nodes failed with the given parameters.
var ErrNotEnoughNodes = errs.Class("not enough nodes")
// DB implements the database for overlay.Service
2019-09-10 14:24:16 +01:00
//
// architecture: Database
type DB interface {
// GetOnlineNodesForGetDelete returns a map of nodes for the supplied nodeIDs
GetOnlineNodesForGetDelete(ctx context.Context, nodeIDs []storj.NodeID, onlineWindow time.Duration) (map[storj.NodeID]*SelectedNode, error)
// SelectStorageNodes looks up nodes based on criteria
SelectStorageNodes(ctx context.Context, totalNeededNodes, newNodeCount int, criteria *NodeCriteria) ([]*SelectedNode, error)
// SelectAllStorageNodesUpload returns all nodes that qualify to store data, organized as reputable nodes and new nodes
SelectAllStorageNodesUpload(ctx context.Context, selectionCfg NodeSelectionConfig) (reputable, new []*SelectedNode, err error)
// Get looks up the node by nodeID
Get(ctx context.Context, nodeID storj.NodeID) (*NodeDossier, error)
// KnownOffline filters a set of nodes to offline nodes
KnownOffline(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
// KnownUnreliableOrOffline filters a set of nodes to unhealth or offlines node, independent of new
KnownUnreliableOrOffline(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
KnownReliable(ctx context.Context, onlineWindow time.Duration, nodeIDs storj.NodeIDList) ([]*pb.Node, error)
// Reliable returns all nodes that are reliable
Reliable(context.Context, *NodeCriteria) (storj.NodeIDList, error)
// BatchUpdateStats updates multiple storagenode's stats in one transaction.
BatchUpdateStats(ctx context.Context, updateRequests []*UpdateRequest, batchSize int, now time.Time) (failed storj.NodeIDList, err error)
// UpdateStats all parts of single storagenode's stats.
UpdateStats(ctx context.Context, request *UpdateRequest, now time.Time) (stats *NodeStats, err error)
Add Version Information into KAD Network and SatelliteDB & Change Selection Process (#1648) * Initial Webserver Draft for Version Controlling * Rename type to avoid confusion * Move Function Calls into Version Package * Fix Linting and Language Typos * Fix Linting and Spelling Mistakes * Include Copyright * Include Copyright * Adjust Version-Control Server to return list of Versions * Linting * Improve Request Handling and Readability * Add Configuration File Option Add Systemd Service file * Add Logging to File * Smaller Changes * Add Semantic Versioning and refuses outdated Software from Startup (#1612) * implements internal Semantic Version library * adds version logging + reporting to process * Advance SemVer struct for easier handling * Add Accepted Version Store * Fix Function * Restructure * Type Conversion * Handle Version String properly * Add Note about array index * Set temporary Default Version * Add Copyright * Adding Version to Dashboard * Adding Version Info Log * Renaming and adding CheckerProcess * Iteration Sync * Iteration V2 * linting * made LogAndReportVersion a go routine * Refactor to Go Routine * Add Context to Go Routine and allow Operation if Lookup to Control Server fails * Handle Unmarshal properly * Linting * Relocate Version Checks * Relocating Version Check and specified default Version for now * Linting Error Prevention * Refuse Startup on outdated Version * Add Startup Check Function * Straighten Logging * Dont force Shutdown if --dev flag is set * Create full Service/Peer Structure for ControlServer * Linting * Straighting Naming * Finish VersionControl Service Layout * Improve Error Handling * Change Listening Address * Move Checker Function * Remove VersionControl Peer * Linting * Linting * Create VersionClient Service * Renaming * Add Version Client to Peer Definitions * Linting and Renaming * Linting * Remove Transport Checks for now * Move to Client Side Flag * Remove check * Linting * Transport Client Version Intro * Adding Version Client to Transport Client * Add missing parameter * Adding Version Check, to set Allowed = true * Set Default to true, testing * Restructuring Code * Uplink Changes * Add more proper Defaults * Renaming of Version struct * Dont pass Service use Pointer * Set Defaults for Versioning Checks * Put HTTP Server in go routine * Add Versioncontrol to Storj-Sim * Testplanet Fixes * Linting * Add Error Handling and new Server Struct * Move Lock slightly * Reduce Race Potentials * Remove unnecessary files * Linting * Add Proper Transport Handling * small fixes * add fence for allowed check * Add Startup Version Check and Service Naming * make errormessage private * Add Comments about VersionedClient * Linting * Remove Checks that refuse outgoing connections * Remove release cmd * Add Release Script * Linting * Update to use correct Values * Change Timestamp handling * Adding Protobuf changes back in * Adding SatelliteDB Changes and adding Storj Node Version to PB * Add Migration Table * Add Default Stats for Creation * Move to BigInt * Proper SQL Migration * Ensure minimum Version is passed to the node selection * Linting... * Remove VersionedClient and adjust smaller changes from prior merge * Linting * Fix PB Message Handling and Query for Node Selection * some future-proofing type changes Change-Id: I3cb5018dcccdbc9739fe004d859065992720caaf * fix a compiler error Change-Id: If66bb92d8b98e31cd618ecec9c6448ab9b037fa5 * Comment on Constant for Overlay * Remove NOT NULL and add epoch call as function * add versions to bootstrap and satellites Change-Id: I436944589ea5f21600cdd997742a84fe0b16e47b * Change Update Migration * Fix DB Migration * Increase Timeout temporarily, to see whats going on * Remove unnecessary const and vars Cleanup Function calls from deprecated NodeVersion struct * Updated Protopuf, removed depcreated Code from Inspector * Implement NodeVersion into InfoResponse * Regenerated locked.go * Linting * Fix Tests * Remove unnecessary constant * Update Function and Flag Description * Remove Empty Stat Creation * return properly with error * Remove unnecessary struct * simplify migration step * Update Inspector to return Version Info * Update local Endpoint Version Handling * Reset Travis Timeout * Add Default for CommitHash * single quotes
2019-04-10 07:04:24 +01:00
// UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.
UpdateNodeInfo(ctx context.Context, node storj.NodeID, nodeInfo *InfoResponse) (stats *NodeDossier, err error)
// UpdateUptime updates a single storagenode's uptime stats.
UpdateUptime(ctx context.Context, nodeID storj.NodeID, isUp bool) (stats *NodeStats, err error)
// UpdateCheckIn updates a single storagenode's check-in stats.
UpdateCheckIn(ctx context.Context, node NodeCheckInInfo, timestamp time.Time, config NodeSelectionConfig) (err error)
// UpdateAuditHistory updates a node's audit history with an online or offline audit.
UpdateAuditHistory(ctx context.Context, nodeID storj.NodeID, auditTime time.Time, online bool, config AuditHistoryConfig) (auditHistory *internalpb.AuditHistory, err error)
// AllPieceCounts returns a map of node IDs to piece counts from the db.
AllPieceCounts(ctx context.Context) (pieceCounts map[storj.NodeID]int, err error)
// UpdatePieceCounts sets the piece count field for the given node IDs.
UpdatePieceCounts(ctx context.Context, pieceCounts map[storj.NodeID]int) (err error)
// UpdateExitStatus is used to update a node's graceful exit status.
UpdateExitStatus(ctx context.Context, request *ExitStatusRequest) (_ *NodeDossier, err error)
// GetExitingNodes returns nodes who have initiated a graceful exit, but have not completed it.
GetExitingNodes(ctx context.Context) (exitingNodes []*ExitStatus, err error)
// GetGracefulExitCompletedByTimeFrame returns nodes who have completed graceful exit within a time window (time window is around graceful exit completion).
GetGracefulExitCompletedByTimeFrame(ctx context.Context, begin, end time.Time) (exitedNodes storj.NodeIDList, err error)
// GetGracefulExitIncompleteByTimeFrame returns nodes who have initiated, but not completed graceful exit within a time window (time window is around graceful exit initiation).
GetGracefulExitIncompleteByTimeFrame(ctx context.Context, begin, end time.Time) (exitingNodes storj.NodeIDList, err error)
// GetExitStatus returns a node's graceful exit status.
GetExitStatus(ctx context.Context, nodeID storj.NodeID) (exitStatus *ExitStatus, err error)
// GetNodesNetwork returns the /24 subnet for each storage node, order is not guaranteed.
GetNodesNetwork(ctx context.Context, nodeIDs []storj.NodeID) (nodeNets []string, err error)
// GetSuccesfulNodesNotCheckedInSince returns all nodes that last check-in was successful, but haven't checked-in within a given duration.
GetSuccesfulNodesNotCheckedInSince(ctx context.Context, duration time.Duration) (nodeAddresses []NodeLastContact, err error)
// GetOfflineNodesLimited returns a list of the first N offline nodes ordered by least recently contacted.
GetOfflineNodesLimited(ctx context.Context, limit int) ([]NodeLastContact, error)
// DisqualifyNode disqualifies a storage node.
DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error)
// SuspendNodeUnknownAudit suspends a storage node for unknown audits.
SuspendNodeUnknownAudit(ctx context.Context, nodeID storj.NodeID, suspendedAt time.Time) (err error)
// UnsuspendNodeUnknownAudit unsuspends a storage node for unknown audits.
UnsuspendNodeUnknownAudit(ctx context.Context, nodeID storj.NodeID) (err error)
// TestVetNode directly sets a node's vetted_at timestamp to make testing easier
TestVetNode(ctx context.Context, nodeID storj.NodeID) (vettedTime *time.Time, err error)
// TestUnvetNode directly sets a node's vetted_at timestamp to null to make testing easier
TestUnvetNode(ctx context.Context, nodeID storj.NodeID) (err error)
}
// NodeCheckInInfo contains all the info that will be updated when a node checkins.
type NodeCheckInInfo struct {
NodeID storj.NodeID
Address *pb.NodeAddress
LastNet string
LastIPPort string
IsUp bool
Operator *pb.NodeOperator
Capacity *pb.NodeCapacity
Version *pb.NodeVersion
}
// InfoResponse contains node dossier info requested from the storage node.
type InfoResponse struct {
Type pb.NodeType
Operator *pb.NodeOperator
Capacity *pb.NodeCapacity
Version *pb.NodeVersion
}
2019-03-23 08:06:11 +00:00
// FindStorageNodesRequest defines easy request parameters.
type FindStorageNodesRequest struct {
RequestedCount int
ExcludedIDs []storj.NodeID
MinimumVersion string // semver or empty
2019-03-23 08:06:11 +00:00
}
// NodeCriteria are the requirements for selecting nodes.
2019-03-23 08:06:11 +00:00
type NodeCriteria struct {
FreeDisk int64
ExcludedIDs []storj.NodeID
ExcludedNetworks []string // the /24 subnet IPv4 or /64 subnet IPv6 for nodes
MinimumVersion string // semver or empty
OnlineWindow time.Duration
DistinctIP bool
2019-03-23 08:06:11 +00:00
}
// AuditType is an enum representing the outcome of a particular audit reported to the overlay.
type AuditType int
const (
// AuditSuccess represents a successful audit.
AuditSuccess AuditType = iota
// AuditFailure represents a failed audit.
AuditFailure
// AuditUnknown represents an audit that resulted in an unknown error from the node.
AuditUnknown
// AuditOffline represents an audit where a node was offline.
AuditOffline
)
// UpdateRequest is used to update a node status.
type UpdateRequest struct {
NodeID storj.NodeID
AuditOutcome AuditType
IsUp bool
// n.b. these are set values from the satellite.
// They are part of the UpdateRequest struct in order to be
// more easily accessible in satellite/satellitedb/overlaycache.go.
AuditLambda float64
AuditWeight float64
AuditDQ float64
SuspensionGracePeriod time.Duration
SuspensionDQEnabled bool
AuditsRequiredForVetting int64
UptimesRequiredForVetting int64
AuditHistory AuditHistoryConfig
}
// ExitStatus is used for reading graceful exit status.
type ExitStatus struct {
NodeID storj.NodeID
ExitInitiatedAt *time.Time
ExitLoopCompletedAt *time.Time
ExitFinishedAt *time.Time
ExitSuccess bool
}
// ExitStatusRequest is used to update a node's graceful exit status.
type ExitStatusRequest struct {
NodeID storj.NodeID
ExitInitiatedAt time.Time
ExitLoopCompletedAt time.Time
ExitFinishedAt time.Time
ExitSuccess bool
}
// NodeDossier is the complete info that the satellite tracks for a storage node.
type NodeDossier struct {
pb.Node
Type pb.NodeType
Operator pb.NodeOperator
Capacity pb.NodeCapacity
Reputation NodeStats
Version pb.NodeVersion
Contained bool
Disqualified *time.Time
UnknownAuditSuspended *time.Time
OfflineSuspended *time.Time
OfflineUnderReview *time.Time
PieceCount int64
ExitStatus ExitStatus
CreatedAt time.Time
LastNet string
LastIPPort string
}
// NodeStats contains statistics about a node.
type NodeStats struct {
Latency90 int64
VettedAt *time.Time
AuditSuccessCount int64
AuditCount int64
UptimeSuccessCount int64
UptimeCount int64
LastContactSuccess time.Time
LastContactFailure time.Time
AuditReputationAlpha float64
AuditReputationBeta float64
Disqualified *time.Time
UnknownAuditReputationAlpha float64
UnknownAuditReputationBeta float64
UnknownAuditSuspended *time.Time
OfflineUnderReview *time.Time
OfflineSuspended *time.Time
OnlineScore float64
}
// NodeLastContact contains the ID, address, and timestamp.
type NodeLastContact struct {
URL storj.NodeURL
LastIPPort string
LastContactSuccess time.Time
LastContactFailure time.Time
}
// SelectedNode is used as a result for creating orders limits.
type SelectedNode struct {
ID storj.NodeID
Address *pb.NodeAddress
LastNet string
LastIPPort string
}
// Clone returns a deep clone of the selected node.
func (node *SelectedNode) Clone() *SelectedNode {
return &SelectedNode{
ID: node.ID,
Address: &pb.NodeAddress{
Transport: node.Address.Transport,
Address: node.Address.Address,
},
LastNet: node.LastNet,
LastIPPort: node.LastIPPort,
}
}
// Service is used to store and handle node information
2019-09-10 14:24:16 +01:00
//
// architecture: Service
type Service struct {
log *zap.Logger
db DB
config Config
SelectionCache *NodeSelectionCache
2018-04-18 16:34:15 +01:00
}
// NewService returns a new Service.
func NewService(log *zap.Logger, db DB, config Config) *Service {
return &Service{
log: log,
db: db,
config: config,
SelectionCache: NewNodeSelectionCache(log, db,
config.NodeSelectionCache.Staleness, config.Node,
),
2019-03-23 08:06:11 +00:00
}
}
// Close closes resources.
func (service *Service) Close() error { return nil }
Satellite Peer (#1034) * add satellite peer * Add overlay * reorganize kademlia * add RunRefresh * add refresh to storagenode.Peer * add discovery * add agreements and metainfo * rename * add datarepair checker * add repair * add todo notes for audit * add testing interface * add into testplanet * fixes * fix compilation errors * fix compilation errors * make testplanet run * remove audit refrences * ensure that audit tests run * dev * checker tests compilable * fix discovery * fix compilation * fix * fix * dev * fix * disable auth * fixes * revert go.mod/sum * fix linter errors * fix * fix copyright * Add address param for SN dashboard (#1076) * Rename storj-sdk to storj-sim (#1078) * Storagenode logs and config improvements (#1075) * Add more info to SN logs * remove config-dir from user config * add output where config was stored * add message for successful connection * fix linter * remove storage.path from user config * resolve config path * move success message to info * log improvements * Remove captplanet (#1070) * pkg/server: include production cert (#1082) Change-Id: Ie8e6fe78550be83c3bd797db7a1e58d37c684792 * Generate Payments Report (#1079) * memory.Size: autoformat sizes based on value entropy (#1081) * Jj/bytes (#1085) * run tally and rollup * sets dev default tally and rollup intervals * nonessential storj-sim edits (#1086) * Closing context doesn't stop storage node (#1084) * Print when cancelled * Close properly * Don't log nil * Don't print error when closing dashboard * Fix panic in inspector if ping fails (#1088) * Consolidate identity management to identity cli commands (#1083) * Consolidate identity management: Move identity cretaion/signing out of storagenode setup command. * fixes * linters * Consolidate identity management: Move identity cretaion/signing out of storagenode setup command. * fixes * sava backups before saving signed certs * add "-prebuilt-test-cmds" test flag * linters * prepare cli tests for travis * linter fixes * more fixes * linter gods * sp/sdk/sim * remove ca.difficulty * remove unused difficulty * return setup to its rightful place * wip travis * Revert "wip travis" This reverts commit 56834849dcf066d3cc0a4f139033fc3f6d7188ca. * typo in travis.yaml * remove tests * remove more * make it only create one identity at a time for consistency * add config-dir for consitency * add identity creation to storj-sim * add flags * simplify * fix nolint and compile * prevent overwrite and pass difficulty, concurrency, and parent creds * goimports
2019-01-18 13:54:08 +00:00
// Inspect lists limited number of items in the cache.
func (service *Service) Inspect(ctx context.Context) (_ storage.Keys, err error) {
defer mon.Task()(&ctx)(&err)
// TODO: implement inspection tools
return nil, errors.New("not implemented")
}
// Get looks up the provided nodeID from the overlay.
func (service *Service) Get(ctx context.Context, nodeID storj.NodeID) (_ *NodeDossier, err error) {
2019-03-23 08:06:11 +00:00
defer mon.Task()(&ctx)(&err)
2018-12-17 18:47:26 +00:00
if nodeID.IsZero() {
return nil, ErrEmptyNode
}
return service.db.Get(ctx, nodeID)
2018-04-18 16:34:15 +01:00
}
// GetOnlineNodesForGetDelete returns a map of nodes for the supplied nodeIDs.
func (service *Service) GetOnlineNodesForGetDelete(ctx context.Context, nodeIDs []storj.NodeID) (_ map[storj.NodeID]*SelectedNode, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.GetOnlineNodesForGetDelete(ctx, nodeIDs, service.config.Node.OnlineWindow)
}
// IsOnline checks if a node is 'online' based on the collected statistics.
func (service *Service) IsOnline(node *NodeDossier) bool {
return time.Since(node.Reputation.LastContactSuccess) < service.config.Node.OnlineWindow
}
// FindStorageNodesForGracefulExit searches the overlay network for nodes that meet the provided requirements for graceful-exit requests.
//
// The main difference between this method and the normal FindStorageNodes is that here we avoid using the cache.
func (service *Service) FindStorageNodesForGracefulExit(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error) {
defer mon.Task()(&ctx)(&err)
return service.FindStorageNodesWithPreferences(ctx, req, &service.config.Node)
}
// FindStorageNodesForUpload searches the overlay network for nodes that meet the provided requirements for upload.
//
// When enabled it uses the cache to select nodes.
// When the node selection from the cache fails, it falls back to the old implementation.
func (service *Service) FindStorageNodesForUpload(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error) {
defer mon.Task()(&ctx)(&err)
if service.config.NodeSelectionCache.Disabled {
return service.FindStorageNodesWithPreferences(ctx, req, &service.config.Node)
}
selectedNodes, err := service.SelectionCache.GetNodes(ctx, req)
if err != nil {
service.log.Warn("error selecting from node selection cache", zap.String("err", err.Error()))
}
if len(selectedNodes) < req.RequestedCount {
mon.Event("default_node_selection")
return service.FindStorageNodesWithPreferences(ctx, req, &service.config.Node)
}
return selectedNodes, nil
}
// FindStorageNodesWithPreferences searches the overlay network for nodes that meet the provided criteria.
//
// This does not use a cache.
func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req FindStorageNodesRequest, preferences *NodeSelectionConfig) (nodes []*SelectedNode, err error) {
2019-03-23 08:06:11 +00:00
defer mon.Task()(&ctx)(&err)
// TODO: add sanity limits to requested node count
// TODO: add sanity limits to excluded nodes
totalNeededNodes := req.RequestedCount
excludedIDs := req.ExcludedIDs
// if distinctIP is enabled, keep track of the network
// to make sure we only select nodes from different networks
var excludedNetworks []string
if preferences.DistinctIP && len(excludedIDs) > 0 {
excludedNetworks, err = service.db.GetNodesNetwork(ctx, excludedIDs)
if err != nil {
return nil, Error.Wrap(err)
}
}
newNodeCount := 0
if preferences.NewNodeFraction > 0 {
newNodeCount = int(float64(totalNeededNodes) * preferences.NewNodeFraction)
}
criteria := NodeCriteria{
FreeDisk: preferences.MinimumDiskSpace.Int64(),
ExcludedIDs: excludedIDs,
ExcludedNetworks: excludedNetworks,
MinimumVersion: preferences.MinimumVersion,
OnlineWindow: preferences.OnlineWindow,
DistinctIP: preferences.DistinctIP,
}
nodes, err = service.db.SelectStorageNodes(ctx, totalNeededNodes, newNodeCount, &criteria)
if err != nil {
return nil, Error.Wrap(err)
}
if len(nodes) < totalNeededNodes {
return nodes, ErrNotEnoughNodes.New("requested %d found %d; %+v ", totalNeededNodes, len(nodes), criteria)
}
return nodes, nil
}
// KnownOffline filters a set of nodes to offline nodes.
func (service *Service) KnownOffline(ctx context.Context, nodeIds storj.NodeIDList) (offlineNodes storj.NodeIDList, err error) {
defer mon.Task()(&ctx)(&err)
criteria := &NodeCriteria{
OnlineWindow: service.config.Node.OnlineWindow,
}
return service.db.KnownOffline(ctx, criteria, nodeIds)
}
// KnownUnreliableOrOffline filters a set of nodes to unhealth or offlines node, independent of new.
func (service *Service) KnownUnreliableOrOffline(ctx context.Context, nodeIds storj.NodeIDList) (badNodes storj.NodeIDList, err error) {
2019-03-23 08:06:11 +00:00
defer mon.Task()(&ctx)(&err)
criteria := &NodeCriteria{
OnlineWindow: service.config.Node.OnlineWindow,
}
return service.db.KnownUnreliableOrOffline(ctx, criteria, nodeIds)
}
// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
func (service *Service) KnownReliable(ctx context.Context, nodeIDs storj.NodeIDList) (nodes []*pb.Node, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.KnownReliable(ctx, service.config.Node.OnlineWindow, nodeIDs)
}
// Reliable filters a set of nodes that are reliable, independent of new.
func (service *Service) Reliable(ctx context.Context) (nodes storj.NodeIDList, err error) {
defer mon.Task()(&ctx)(&err)
criteria := &NodeCriteria{
OnlineWindow: service.config.Node.OnlineWindow,
}
return service.db.Reliable(ctx, criteria)
}
// BatchUpdateStats updates multiple storagenode's stats in one transaction.
func (service *Service) BatchUpdateStats(ctx context.Context, requests []*UpdateRequest) (failed storj.NodeIDList, err error) {
defer mon.Task()(&ctx)(&err)
for _, request := range requests {
request.AuditLambda = service.config.Node.AuditReputationLambda
request.AuditWeight = service.config.Node.AuditReputationWeight
request.AuditDQ = service.config.Node.AuditReputationDQ
request.SuspensionGracePeriod = service.config.Node.SuspensionGracePeriod
request.SuspensionDQEnabled = service.config.Node.SuspensionDQEnabled
request.AuditsRequiredForVetting = service.config.Node.AuditCount
request.UptimesRequiredForVetting = service.config.Node.UptimeCount
request.AuditHistory = service.config.AuditHistory
}
return service.db.BatchUpdateStats(ctx, requests, service.config.UpdateStatsBatchSize, time.Now())
}
// UpdateStats all parts of single storagenode's stats.
func (service *Service) UpdateStats(ctx context.Context, request *UpdateRequest) (stats *NodeStats, err error) {
defer mon.Task()(&ctx)(&err)
request.AuditLambda = service.config.Node.AuditReputationLambda
request.AuditWeight = service.config.Node.AuditReputationWeight
request.AuditDQ = service.config.Node.AuditReputationDQ
request.SuspensionGracePeriod = service.config.Node.SuspensionGracePeriod
request.SuspensionDQEnabled = service.config.Node.SuspensionDQEnabled
request.AuditsRequiredForVetting = service.config.Node.AuditCount
request.UptimesRequiredForVetting = service.config.Node.UptimeCount
request.AuditHistory = service.config.AuditHistory
return service.db.UpdateStats(ctx, request, time.Now())
}
Add Version Information into KAD Network and SatelliteDB & Change Selection Process (#1648) * Initial Webserver Draft for Version Controlling * Rename type to avoid confusion * Move Function Calls into Version Package * Fix Linting and Language Typos * Fix Linting and Spelling Mistakes * Include Copyright * Include Copyright * Adjust Version-Control Server to return list of Versions * Linting * Improve Request Handling and Readability * Add Configuration File Option Add Systemd Service file * Add Logging to File * Smaller Changes * Add Semantic Versioning and refuses outdated Software from Startup (#1612) * implements internal Semantic Version library * adds version logging + reporting to process * Advance SemVer struct for easier handling * Add Accepted Version Store * Fix Function * Restructure * Type Conversion * Handle Version String properly * Add Note about array index * Set temporary Default Version * Add Copyright * Adding Version to Dashboard * Adding Version Info Log * Renaming and adding CheckerProcess * Iteration Sync * Iteration V2 * linting * made LogAndReportVersion a go routine * Refactor to Go Routine * Add Context to Go Routine and allow Operation if Lookup to Control Server fails * Handle Unmarshal properly * Linting * Relocate Version Checks * Relocating Version Check and specified default Version for now * Linting Error Prevention * Refuse Startup on outdated Version * Add Startup Check Function * Straighten Logging * Dont force Shutdown if --dev flag is set * Create full Service/Peer Structure for ControlServer * Linting * Straighting Naming * Finish VersionControl Service Layout * Improve Error Handling * Change Listening Address * Move Checker Function * Remove VersionControl Peer * Linting * Linting * Create VersionClient Service * Renaming * Add Version Client to Peer Definitions * Linting and Renaming * Linting * Remove Transport Checks for now * Move to Client Side Flag * Remove check * Linting * Transport Client Version Intro * Adding Version Client to Transport Client * Add missing parameter * Adding Version Check, to set Allowed = true * Set Default to true, testing * Restructuring Code * Uplink Changes * Add more proper Defaults * Renaming of Version struct * Dont pass Service use Pointer * Set Defaults for Versioning Checks * Put HTTP Server in go routine * Add Versioncontrol to Storj-Sim * Testplanet Fixes * Linting * Add Error Handling and new Server Struct * Move Lock slightly * Reduce Race Potentials * Remove unnecessary files * Linting * Add Proper Transport Handling * small fixes * add fence for allowed check * Add Startup Version Check and Service Naming * make errormessage private * Add Comments about VersionedClient * Linting * Remove Checks that refuse outgoing connections * Remove release cmd * Add Release Script * Linting * Update to use correct Values * Change Timestamp handling * Adding Protobuf changes back in * Adding SatelliteDB Changes and adding Storj Node Version to PB * Add Migration Table * Add Default Stats for Creation * Move to BigInt * Proper SQL Migration * Ensure minimum Version is passed to the node selection * Linting... * Remove VersionedClient and adjust smaller changes from prior merge * Linting * Fix PB Message Handling and Query for Node Selection * some future-proofing type changes Change-Id: I3cb5018dcccdbc9739fe004d859065992720caaf * fix a compiler error Change-Id: If66bb92d8b98e31cd618ecec9c6448ab9b037fa5 * Comment on Constant for Overlay * Remove NOT NULL and add epoch call as function * add versions to bootstrap and satellites Change-Id: I436944589ea5f21600cdd997742a84fe0b16e47b * Change Update Migration * Fix DB Migration * Increase Timeout temporarily, to see whats going on * Remove unnecessary const and vars Cleanup Function calls from deprecated NodeVersion struct * Updated Protopuf, removed depcreated Code from Inspector * Implement NodeVersion into InfoResponse * Regenerated locked.go * Linting * Fix Tests * Remove unnecessary constant * Update Function and Flag Description * Remove Empty Stat Creation * return properly with error * Remove unnecessary struct * simplify migration step * Update Inspector to return Version Info * Update local Endpoint Version Handling * Reset Travis Timeout * Add Default for CommitHash * single quotes
2019-04-10 07:04:24 +01:00
// UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.
func (service *Service) UpdateNodeInfo(ctx context.Context, node storj.NodeID, nodeInfo *InfoResponse) (stats *NodeDossier, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.UpdateNodeInfo(ctx, node, nodeInfo)
}
// UpdateUptime updates a single storagenode's uptime stats.
func (service *Service) UpdateUptime(ctx context.Context, nodeID storj.NodeID, isUp bool) (stats *NodeStats, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.UpdateUptime(ctx, nodeID, isUp)
}
// UpdateCheckIn updates a single storagenode's check-in info.
func (service *Service) UpdateCheckIn(ctx context.Context, node NodeCheckInInfo, timestamp time.Time) (err error) {
defer mon.Task()(&ctx)(&err)
return service.db.UpdateCheckIn(ctx, node, timestamp, service.config.Node)
}
// GetSuccesfulNodesNotCheckedInSince returns all nodes that last check-in was successful, but haven't checked-in within a given duration.
func (service *Service) GetSuccesfulNodesNotCheckedInSince(ctx context.Context, duration time.Duration) (nodeLastContacts []NodeLastContact, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.GetSuccesfulNodesNotCheckedInSince(ctx, duration)
}
// GetMissingPieces returns the list of offline nodes.
func (service *Service) GetMissingPieces(ctx context.Context, pieces []*pb.RemotePiece) (missingPieces []int32, err error) {
defer mon.Task()(&ctx)(&err)
var nodeIDs storj.NodeIDList
for _, p := range pieces {
nodeIDs = append(nodeIDs, p.NodeId)
}
badNodeIDs, err := service.KnownUnreliableOrOffline(ctx, nodeIDs)
if err != nil {
return nil, Error.New("error getting nodes %s", err)
}
for _, p := range pieces {
for _, nodeID := range badNodeIDs {
if nodeID == p.NodeId {
missingPieces = append(missingPieces, p.GetPieceNum())
}
}
}
return missingPieces, nil
}
// DisqualifyNode disqualifies a storage node.
func (service *Service) DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error) {
defer mon.Task()(&ctx)(&err)
return service.db.DisqualifyNode(ctx, nodeID)
}
// GetOfflineNodesLimited returns a list of the first N offline nodes ordered by least recently contacted.
func (service *Service) GetOfflineNodesLimited(ctx context.Context, limit int) (offlineNodes []NodeLastContact, err error) {
defer mon.Task()(&ctx)(&err)
return service.db.GetOfflineNodesLimited(ctx, limit)
}
// ResolveIPAndNetwork resolves the target address and determines its IP and /24 subnet IPv4 or /64 subnet IPv6.
func ResolveIPAndNetwork(ctx context.Context, target string) (ipPort, network string, err error) {
2019-06-24 16:33:18 +01:00
defer mon.Task()(&ctx)(&err)
host, port, err := net.SplitHostPort(target)
if err != nil {
return "", "", err
}
ipAddr, err := net.ResolveIPAddr("ip", host)
if err != nil {
return "", "", err
}
2019-06-24 16:33:18 +01:00
// If addr can be converted to 4byte notation, it is an IPv4 address, else its an IPv6 address
if ipv4 := ipAddr.IP.To4(); ipv4 != nil {
// Filter all IPv4 Addresses into /24 Subnet's
2019-06-24 16:33:18 +01:00
mask := net.CIDRMask(24, 32)
return net.JoinHostPort(ipAddr.String(), port), ipv4.Mask(mask).String(), nil
2019-06-24 16:33:18 +01:00
}
if ipv6 := ipAddr.IP.To16(); ipv6 != nil {
// Filter all IPv6 Addresses into /64 Subnet's
2019-06-24 16:33:18 +01:00
mask := net.CIDRMask(64, 128)
return net.JoinHostPort(ipAddr.String(), port), ipv6.Mask(mask).String(), nil
2019-06-24 16:33:18 +01:00
}
return "", "", errors.New("unable to get network for address " + ipAddr.String())
}
// TestVetNode directly sets a node's vetted_at timestamp to make testing easier.
func (service *Service) TestVetNode(ctx context.Context, nodeID storj.NodeID) (vettedTime *time.Time, err error) {
vettedTime, err = service.db.TestVetNode(ctx, nodeID)
service.log.Warn("node vetted", zap.Stringer("node ID", nodeID), zap.Stringer("vetted time", vettedTime))
if err != nil {
service.log.Warn("error vetting node", zap.Stringer("node ID", nodeID))
return nil, err
}
err = service.SelectionCache.Refresh(ctx)
service.log.Warn("nodecache refresh err", zap.Error(err))
return vettedTime, err
}
// TestUnvetNode directly sets a node's vetted_at timestamp to null to make testing easier.
func (service *Service) TestUnvetNode(ctx context.Context, nodeID storj.NodeID) (err error) {
err = service.db.TestUnvetNode(ctx, nodeID)
if err != nil {
service.log.Warn("error unvetting node", zap.Stringer("node ID", nodeID), zap.Error(err))
return err
}
err = service.SelectionCache.Refresh(ctx)
service.log.Warn("nodecache refresh err", zap.Error(err))
return err
}