2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
2018-12-17 20:14:16 +00:00
// See LICENSE for copying information.
package satellitedb
import (
"context"
"database/sql"
2019-07-31 18:21:06 +01:00
"encoding/hex"
2020-07-14 14:04:38 +01:00
"errors"
2019-07-31 18:21:06 +01:00
"fmt"
2019-09-11 22:38:58 +01:00
"sort"
2021-01-18 14:33:13 +00:00
"strings"
2019-03-29 08:53:43 +00:00
"time"
2018-12-17 20:14:16 +00:00
2019-11-08 20:40:39 +00:00
"github.com/spacemonkeygo/monkit/v3"
2019-01-15 16:08:45 +00:00
"github.com/zeebo/errs"
2020-04-08 23:28:25 +01:00
"go.uber.org/zap"
2019-01-15 16:08:45 +00:00
2019-12-27 11:48:47 +00:00
"storj.io/common/pb"
"storj.io/common/storj"
2020-03-23 19:30:31 +00:00
"storj.io/private/version"
2020-11-29 20:54:03 +00:00
"storj.io/storj/private/dbutil/cockroachutil"
2020-06-28 04:56:29 +01:00
"storj.io/storj/private/dbutil/pgutil"
2020-01-19 13:42:08 +00:00
"storj.io/storj/private/tagsql"
2019-07-28 06:55:36 +01:00
"storj.io/storj/satellite/overlay"
2020-01-15 02:29:51 +00:00
"storj.io/storj/satellite/satellitedb/dbx"
2018-12-17 20:14:16 +00:00
)
2019-03-25 22:25:09 +00:00
var (
2019-06-20 14:56:04 +01:00
mon = monkit . Package ( )
2019-03-25 22:25:09 +00:00
)
2019-01-15 16:08:45 +00:00
var _ overlay . DB = ( * overlaycache ) ( nil )
2018-12-17 20:14:16 +00:00
type overlaycache struct {
2019-12-14 02:29:54 +00:00
db * satelliteDB
2018-12-17 20:14:16 +00:00
}
2020-07-16 15:18:02 +01:00
// SelectAllStorageNodesUpload returns all nodes that qualify to store data, organized as reputable nodes and new nodes.
2020-04-14 21:50:02 +01:00
func ( cache * overlaycache ) SelectAllStorageNodesUpload ( ctx context . Context , selectionCfg overlay . NodeSelectionConfig ) ( reputable , new [ ] * overlay . SelectedNode , err error ) {
2020-11-29 20:54:03 +00:00
for {
reputable , new , err = cache . selectAllStorageNodesUpload ( ctx , selectionCfg )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return reputable , new , err
}
break
}
return reputable , new , err
}
func ( cache * overlaycache ) selectAllStorageNodesUpload ( ctx context . Context , selectionCfg overlay . NodeSelectionConfig ) ( reputable , new [ ] * overlay . SelectedNode , err error ) {
2020-04-14 21:50:02 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-12-22 19:07:07 +00:00
asOf := cache . db . AsOfSystemTimeClause ( selectionCfg . AsOfSystemTime . DefaultInterval )
2020-04-14 21:50:02 +01:00
query := `
2020-07-08 15:28:49 +01:00
SELECT id , address , last_net , last_ip_port , vetted_at
2020-12-22 19:07:07 +00:00
FROM nodes ` + asOf + `
2020-04-14 21:50:02 +01:00
WHERE disqualified IS NULL
2020-06-10 17:11:25 +01:00
AND unknown_audit_suspended IS NULL
2021-03-18 19:55:06 +00:00
AND offline_suspended IS NULL
2020-04-14 21:50:02 +01:00
AND exit_initiated_at IS NULL
2020-07-08 15:28:49 +01:00
AND type = $ 1
AND free_disk >= $ 2
AND last_contact_success > $ 3
2020-04-14 21:50:02 +01:00
`
args := [ ] interface { } {
2020-07-08 15:28:49 +01:00
// $1
2020-04-14 21:50:02 +01:00
int ( pb . NodeType_STORAGE ) ,
2020-07-08 15:28:49 +01:00
// $2
2020-04-14 21:50:02 +01:00
selectionCfg . MinimumDiskSpace . Int64 ( ) ,
2020-07-08 15:28:49 +01:00
// $3
2020-04-14 21:50:02 +01:00
time . Now ( ) . Add ( - selectionCfg . OnlineWindow ) ,
}
if selectionCfg . MinimumVersion != "" {
version , err := version . NewSemVer ( selectionCfg . MinimumVersion )
if err != nil {
return nil , nil , err
}
2020-07-08 15:28:49 +01:00
query += ` AND (major > $4 OR (major = $5 AND (minor > $6 OR (minor = $7 AND patch >= $8)))) AND release `
2020-04-14 21:50:02 +01:00
args = append ( args ,
2020-07-08 15:28:49 +01:00
// $4 - $8
2020-04-14 21:50:02 +01:00
version . Major , version . Major , version . Minor , version . Minor , version . Patch ,
)
}
rows , err := cache . db . Query ( ctx , query , args ... )
if err != nil {
return nil , nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
var reputableNodes [ ] * overlay . SelectedNode
var newNodes [ ] * overlay . SelectedNode
for rows . Next ( ) {
var node overlay . SelectedNode
node . Address = & pb . NodeAddress { }
var lastIPPort sql . NullString
2020-07-08 15:28:49 +01:00
var vettedAt * time . Time
err = rows . Scan ( & node . ID , & node . Address . Address , & node . LastNet , & lastIPPort , & vettedAt )
2020-04-14 21:50:02 +01:00
if err != nil {
return nil , nil , err
}
if lastIPPort . Valid {
node . LastIPPort = lastIPPort . String
}
2020-07-08 15:28:49 +01:00
if vettedAt == nil {
2020-04-14 21:50:02 +01:00
newNodes = append ( newNodes , & node )
continue
}
reputableNodes = append ( reputableNodes , & node )
}
return reputableNodes , newNodes , Error . Wrap ( rows . Err ( ) )
}
2021-01-28 14:33:53 +00:00
// SelectAllStorageNodesDownload returns all nodes that qualify to store data, organized as reputable nodes and new nodes.
func ( cache * overlaycache ) SelectAllStorageNodesDownload ( ctx context . Context , onlineWindow time . Duration , asOf overlay . AsOfSystemTimeConfig ) ( nodes [ ] * overlay . SelectedNode , err error ) {
for {
nodes , err = cache . selectAllStorageNodesDownload ( ctx , onlineWindow , asOf )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return nodes , err
}
break
}
return nodes , err
}
func ( cache * overlaycache ) selectAllStorageNodesDownload ( ctx context . Context , onlineWindow time . Duration , asOfConfig overlay . AsOfSystemTimeConfig ) ( _ [ ] * overlay . SelectedNode , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
asOf := cache . db . AsOfSystemTimeClause ( asOfConfig . DefaultInterval )
query := `
SELECT id , address , last_net , last_ip_port
FROM nodes ` + asOf + `
WHERE disqualified IS NULL
AND exit_finished_at IS NULL
AND last_contact_success > $ 1
`
args := [ ] interface { } {
// $1
time . Now ( ) . Add ( - onlineWindow ) ,
}
rows , err := cache . db . Query ( ctx , query , args ... )
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
var nodes [ ] * overlay . SelectedNode
for rows . Next ( ) {
var node overlay . SelectedNode
node . Address = & pb . NodeAddress { }
var lastIPPort sql . NullString
err = rows . Scan ( & node . ID , & node . Address . Address , & node . LastNet , & lastIPPort )
if err != nil {
return nil , err
}
if lastIPPort . Valid {
node . LastIPPort = lastIPPort . String
}
nodes = append ( nodes , & node )
}
return nodes , Error . Wrap ( rows . Err ( ) )
}
2020-03-06 22:04:23 +00:00
// GetNodesNetwork returns the /24 subnet for each storage node, order is not guaranteed.
func ( cache * overlaycache ) GetNodesNetwork ( ctx context . Context , nodeIDs [ ] storj . NodeID ) ( nodeNets [ ] string , err error ) {
2020-11-29 20:54:03 +00:00
for {
nodeNets , err = cache . getNodesNetwork ( ctx , nodeIDs )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return nodeNets , err
}
break
}
return nodeNets , err
}
func ( cache * overlaycache ) getNodesNetwork ( ctx context . Context , nodeIDs [ ] storj . NodeID ) ( nodeNets [ ] string , err error ) {
2019-11-06 21:38:52 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-19 13:42:08 +00:00
var rows tagsql . Rows
2020-01-17 20:07:00 +00:00
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
2019-11-06 21:38:52 +00:00
SELECT last_net FROM nodes
WHERE id = any ( $ 1 : : bytea [ ] )
2020-06-28 04:56:29 +01:00
` ) , pgutil . NodeIDArray ( nodeIDs ) ,
2019-11-06 21:38:52 +00:00
)
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
for rows . Next ( ) {
var ip string
err = rows . Scan ( & ip )
if err != nil {
return nil , err
}
2020-03-06 22:04:23 +00:00
nodeNets = append ( nodeNets , ip )
2019-11-06 21:38:52 +00:00
}
2020-03-06 22:04:23 +00:00
return nodeNets , Error . Wrap ( rows . Err ( ) )
2019-11-06 21:38:52 +00:00
}
2020-07-16 15:18:02 +01:00
// Get looks up the node by nodeID.
2020-11-29 20:54:03 +00:00
func ( cache * overlaycache ) Get ( ctx context . Context , id storj . NodeID ) ( dossier * overlay . NodeDossier , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-15 16:08:45 +00:00
if id . IsZero ( ) {
return nil , overlay . ErrEmptyNode
2018-12-17 20:14:16 +00:00
}
2019-03-29 08:53:43 +00:00
node , err := cache . db . Get_Node_By_Id ( ctx , dbx . Node_Id ( id . Bytes ( ) ) )
2020-07-14 14:04:38 +01:00
if errors . Is ( err , sql . ErrNoRows ) {
2019-08-21 17:30:29 +01:00
return nil , overlay . ErrNodeNotFound . New ( "%v" , id )
2019-01-15 16:08:45 +00:00
}
2018-12-17 20:14:16 +00:00
if err != nil {
2019-01-15 16:08:45 +00:00
return nil , err
2018-12-17 20:14:16 +00:00
}
2019-06-04 12:55:38 +01:00
return convertDBNode ( ctx , node )
2019-01-15 16:08:45 +00:00
}
2020-07-16 15:18:02 +01:00
// GetOnlineNodesForGetDelete returns a map of nodes for the supplied nodeIDs.
2020-11-29 20:54:03 +00:00
func ( cache * overlaycache ) GetOnlineNodesForGetDelete ( ctx context . Context , nodeIDs [ ] storj . NodeID , onlineWindow time . Duration ) ( nodes map [ storj . NodeID ] * overlay . SelectedNode , err error ) {
for {
nodes , err = cache . getOnlineNodesForGetDelete ( ctx , nodeIDs , onlineWindow )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return nodes , err
}
break
}
return nodes , err
}
func ( cache * overlaycache ) getOnlineNodesForGetDelete ( ctx context . Context , nodeIDs [ ] storj . NodeID , onlineWindow time . Duration ) ( _ map [ storj . NodeID ] * overlay . SelectedNode , err error ) {
2020-03-13 18:01:48 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-19 13:42:08 +00:00
var rows tagsql . Rows
2020-03-13 18:01:48 +00:00
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
2020-03-30 14:32:02 +01:00
SELECT last_net , id , address , last_ip_port
2020-03-13 18:01:48 +00:00
FROM nodes
WHERE id = any ( $ 1 : : bytea [ ] )
2020-03-30 14:32:02 +01:00
AND disqualified IS NULL
2020-06-12 02:11:22 +01:00
AND exit_finished_at IS NULL
2020-03-30 14:32:02 +01:00
AND last_contact_success > $ 2
2020-06-28 04:56:29 +01:00
` ) , pgutil . NodeIDArray ( nodeIDs ) , time . Now ( ) . Add ( - onlineWindow ) )
2020-03-13 18:01:48 +00:00
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2020-03-30 14:32:02 +01:00
nodes := make ( map [ storj . NodeID ] * overlay . SelectedNode )
2020-03-13 18:01:48 +00:00
for rows . Next ( ) {
2020-03-30 14:32:02 +01:00
var node overlay . SelectedNode
node . Address = & pb . NodeAddress { Transport : pb . NodeTransport_TCP_TLS_GRPC }
2020-03-13 18:01:48 +00:00
2020-04-03 19:22:24 +01:00
var lastIPPort sql . NullString
err = rows . Scan ( & node . LastNet , & node . ID , & node . Address . Address , & lastIPPort )
2020-03-13 18:01:48 +00:00
if err != nil {
return nil , err
}
2020-04-03 19:22:24 +01:00
if lastIPPort . Valid {
node . LastIPPort = lastIPPort . String
}
2020-03-30 14:32:02 +01:00
nodes [ node . ID ] = & node
2020-03-13 18:01:48 +00:00
}
return nodes , Error . Wrap ( rows . Err ( ) )
}
2020-07-16 15:18:02 +01:00
// KnownOffline filters a set of nodes to offline nodes.
2020-11-29 20:54:03 +00:00
func ( cache * overlaycache ) KnownOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIDs storj . NodeIDList ) ( offlineNodes storj . NodeIDList , err error ) {
for {
offlineNodes , err = cache . knownOffline ( ctx , criteria , nodeIDs )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return offlineNodes , err
}
break
}
return offlineNodes , err
}
func ( cache * overlaycache ) knownOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIds storj . NodeIDList ) ( offlineNodes storj . NodeIDList , err error ) {
2019-06-18 23:22:14 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( nodeIds ) == 0 {
return nil , Error . New ( "no ids provided" )
}
2020-12-22 19:07:07 +00:00
asOf := cache . db . AsOfSystemTimeClause ( criteria . AsOfSystemTimeInterval )
2019-06-18 23:22:14 +01:00
// get offline nodes
2020-01-19 13:42:08 +00:00
var rows tagsql . Rows
2020-01-17 20:07:00 +00:00
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
2020-12-22 19:07:07 +00:00
SELECT id FROM nodes ` +asOf+ `
2019-10-18 22:27:57 +01:00
WHERE id = any ( $ 1 : : bytea [ ] )
2020-01-16 14:27:24 +00:00
AND last_contact_success < $ 2
2020-06-28 04:56:29 +01:00
` ) , pgutil . NodeIDArray ( nodeIds ) , time . Now ( ) . Add ( - criteria . OnlineWindow ) ,
2019-10-18 22:27:57 +01:00
)
2019-06-18 23:22:14 +01:00
if err != nil {
return nil , err
}
2019-10-18 22:27:57 +01:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-06-18 23:22:14 +01:00
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
offlineNodes = append ( offlineNodes , id )
}
2020-01-16 14:27:24 +00:00
return offlineNodes , Error . Wrap ( rows . Err ( ) )
2019-06-18 23:22:14 +01:00
}
2020-07-16 15:18:02 +01:00
// KnownUnreliableOrOffline filters a set of nodes to unreliable or offlines node, independent of new.
2020-11-29 20:54:03 +00:00
func ( cache * overlaycache ) KnownUnreliableOrOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIDs storj . NodeIDList ) ( badNodes storj . NodeIDList , err error ) {
for {
badNodes , err = cache . knownUnreliableOrOffline ( ctx , criteria , nodeIDs )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return badNodes , err
}
break
}
return badNodes , err
}
func ( cache * overlaycache ) knownUnreliableOrOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIDs storj . NodeIDList ) ( badNodes storj . NodeIDList , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-11-29 20:54:03 +00:00
if len ( nodeIDs ) == 0 {
2019-05-01 14:45:52 +01:00
return nil , Error . New ( "no ids provided" )
}
2020-12-22 19:07:07 +00:00
asOf := cache . db . AsOfSystemTimeClause ( criteria . AsOfSystemTimeInterval )
2019-05-08 18:59:50 +01:00
// get reliable and online nodes
2020-01-19 13:42:08 +00:00
var rows tagsql . Rows
2020-01-17 20:07:00 +00:00
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
2020-12-22 19:07:07 +00:00
SELECT id FROM nodes ` +asOf+ `
2019-10-18 22:27:57 +01:00
WHERE id = any ( $ 1 : : bytea [ ] )
2019-06-18 10:14:31 +01:00
AND disqualified IS NULL
2020-06-10 17:11:25 +01:00
AND unknown_audit_suspended IS NULL
2021-03-18 19:55:06 +00:00
AND offline_suspended IS NULL
2020-04-23 20:46:16 +01:00
AND exit_finished_at IS NULL
2019-11-15 22:43:06 +00:00
AND last_contact_success > $ 2
2020-11-29 20:54:03 +00:00
` ) , pgutil . NodeIDArray ( nodeIDs ) , time . Now ( ) . Add ( - criteria . OnlineWindow ) ,
2019-10-18 22:27:57 +01:00
)
2019-05-01 14:45:52 +01:00
if err != nil {
return nil , err
}
2019-10-18 22:27:57 +01:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-05-08 18:59:50 +01:00
2020-11-29 20:54:03 +00:00
goodNodes := make ( map [ storj . NodeID ] struct { } , len ( nodeIDs ) )
2019-05-01 14:45:52 +01:00
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
2018-12-17 20:14:16 +00:00
if err != nil {
2019-05-01 14:45:52 +01:00
return nil , err
2018-12-17 20:14:16 +00:00
}
2019-05-19 16:10:46 +01:00
goodNodes [ id ] = struct { } { }
2019-05-08 18:59:50 +01:00
}
2020-11-29 20:54:03 +00:00
for _ , id := range nodeIDs {
2019-05-19 16:10:46 +01:00
if _ , ok := goodNodes [ id ] ; ! ok {
2019-05-08 18:59:50 +01:00
badNodes = append ( badNodes , id )
}
2018-12-17 20:14:16 +00:00
}
2020-01-16 14:27:24 +00:00
return badNodes , Error . Wrap ( rows . Err ( ) )
2018-12-17 20:14:16 +00:00
}
2019-12-16 13:45:13 +00:00
// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
func ( cache * overlaycache ) KnownReliable ( ctx context . Context , onlineWindow time . Duration , nodeIDs storj . NodeIDList ) ( nodes [ ] * pb . Node , err error ) {
2020-11-29 20:54:03 +00:00
for {
nodes , err = cache . knownReliable ( ctx , onlineWindow , nodeIDs )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return nodes , err
}
break
}
return nodes , err
}
func ( cache * overlaycache ) knownReliable ( ctx context . Context , onlineWindow time . Duration , nodeIDs storj . NodeIDList ) ( nodes [ ] * pb . Node , err error ) {
2019-12-16 13:45:13 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( nodeIDs ) == 0 {
return nil , Error . New ( "no ids provided" )
}
// get online nodes
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2020-03-06 22:04:23 +00:00
SELECT id , last_net , last_ip_port , address , protocol
FROM nodes
2019-12-16 13:45:13 +00:00
WHERE id = any ( $ 1 : : bytea [ ] )
AND disqualified IS NULL
2020-06-10 17:11:25 +01:00
AND unknown_audit_suspended IS NULL
2021-03-18 19:55:06 +00:00
AND offline_suspended IS NULL
2020-04-23 20:46:16 +01:00
AND exit_finished_at IS NULL
2019-12-16 13:45:13 +00:00
AND last_contact_success > $ 2
2020-06-28 04:56:29 +01:00
` ) , pgutil . NodeIDArray ( nodeIDs ) , time . Now ( ) . Add ( - onlineWindow ) ,
2019-12-16 13:45:13 +00:00
)
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
for rows . Next ( ) {
2020-03-11 21:11:46 +00:00
row := & dbx . Node { }
2020-03-06 22:04:23 +00:00
err = rows . Scan ( & row . Id , & row . LastNet , & row . LastIpPort , & row . Address , & row . Protocol )
2019-12-16 13:45:13 +00:00
if err != nil {
return nil , err
}
2020-03-11 21:11:46 +00:00
node , err := convertDBNode ( ctx , row )
2019-12-16 13:45:13 +00:00
if err != nil {
return nil , err
}
2020-03-11 21:11:46 +00:00
nodes = append ( nodes , & node . Node )
2019-12-16 13:45:13 +00:00
}
2020-01-16 14:27:24 +00:00
return nodes , Error . Wrap ( rows . Err ( ) )
2019-12-16 13:45:13 +00:00
}
2019-07-08 23:04:35 +01:00
// Reliable returns all reliable nodes.
func ( cache * overlaycache ) Reliable ( ctx context . Context , criteria * overlay . NodeCriteria ) ( nodes storj . NodeIDList , err error ) {
2020-11-29 20:54:03 +00:00
for {
nodes , err = cache . reliable ( ctx , criteria )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return nodes , err
}
break
}
return nodes , err
}
func ( cache * overlaycache ) reliable ( ctx context . Context , criteria * overlay . NodeCriteria ) ( nodes storj . NodeIDList , err error ) {
2020-12-22 19:07:07 +00:00
asOf := cache . db . AsOfSystemTimeClause ( criteria . AsOfSystemTimeInterval )
2019-07-08 23:04:35 +01:00
// get reliable and online nodes
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2020-12-22 19:07:07 +00:00
SELECT id FROM nodes ` +asOf+ `
2019-07-08 23:04:35 +01:00
WHERE disqualified IS NULL
2020-06-10 17:11:25 +01:00
AND unknown_audit_suspended IS NULL
2021-03-18 19:55:06 +00:00
AND offline_suspended IS NULL
2020-04-23 20:46:16 +01:00
AND exit_finished_at IS NULL
2020-01-16 14:27:24 +00:00
AND last_contact_success > ?
` ) , time . Now ( ) . Add ( - criteria . OnlineWindow ) )
2019-07-08 23:04:35 +01:00
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
nodes = append ( nodes , id )
}
2020-01-16 14:27:24 +00:00
return nodes , Error . Wrap ( rows . Err ( ) )
2019-07-08 23:04:35 +01:00
}
2020-07-16 15:18:02 +01:00
// BatchUpdateStats updates multiple storagenode's stats in one transaction.
2020-08-26 21:26:10 +01:00
func ( cache * overlaycache ) BatchUpdateStats ( ctx context . Context , updateRequests [ ] * overlay . UpdateRequest , batchSize int , now time . Time ) ( failed storj . NodeIDList , err error ) {
2019-07-31 18:21:06 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( updateRequests ) == 0 {
return failed , nil
}
2019-11-01 17:07:23 +00:00
// ensure updates happen in-order
sort . Slice ( updateRequests , func ( i , k int ) bool {
return updateRequests [ i ] . NodeID . Less ( updateRequests [ k ] . NodeID )
} )
2019-07-31 18:21:06 +01:00
doUpdate := func ( updateSlice [ ] * overlay . UpdateRequest ) ( duf storj . NodeIDList , err error ) {
appendAll := func ( ) {
for _ , ur := range updateRequests {
duf = append ( duf , ur . NodeID )
}
}
2019-12-19 10:03:20 +00:00
doAppendAll := true
err = cache . db . WithTx ( ctx , func ( ctx context . Context , tx * dbx . Tx ) ( err error ) {
2020-06-08 16:27:08 +01:00
_ , err = tx . Tx . ExecContext ( ctx , "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE" )
if err != nil {
return err
}
2019-12-19 10:03:20 +00:00
var allSQL string
for _ , updateReq := range updateSlice {
dbNode , err := tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( updateReq . NodeID . Bytes ( ) ) )
if err != nil {
doAppendAll = false
return err
}
2019-07-31 18:21:06 +01:00
2019-12-19 10:03:20 +00:00
// do not update reputation if node is disqualified
if dbNode . Disqualified != nil {
continue
}
2020-04-23 20:46:16 +01:00
// do not update reputation if node has gracefully exited
if dbNode . ExitFinishedAt != nil {
continue
}
2019-07-31 18:21:06 +01:00
2020-10-23 16:27:39 +01:00
isUp := updateReq . AuditOutcome != overlay . AuditOffline
2020-12-21 17:26:07 +00:00
auditHistoryResponse , err := cache . updateAuditHistoryWithTx ( ctx , tx , updateReq . NodeID , now , isUp , updateReq . AuditHistory )
2020-08-04 21:28:46 +01:00
if err != nil {
doAppendAll = false
return err
}
2020-12-21 17:26:07 +00:00
updateNodeStats := cache . populateUpdateNodeStats ( dbNode , updateReq , auditHistoryResponse , now )
2020-04-08 23:28:25 +01:00
2019-12-19 10:03:20 +00:00
sql := buildUpdateStatement ( updateNodeStats )
2019-07-31 18:21:06 +01:00
2019-12-19 10:03:20 +00:00
allSQL += sql
2019-07-31 18:21:06 +01:00
}
2019-12-19 10:03:20 +00:00
if allSQL != "" {
2020-01-17 20:07:00 +00:00
results , err := tx . Tx . Exec ( ctx , allSQL )
2019-12-19 10:03:20 +00:00
if err != nil {
return err
}
2019-07-31 18:21:06 +01:00
2019-12-19 10:03:20 +00:00
_ , err = results . RowsAffected ( )
if err != nil {
return err
}
2019-07-31 18:21:06 +01:00
}
2019-12-19 10:03:20 +00:00
return nil
} )
if err != nil {
if doAppendAll {
2019-07-31 18:21:06 +01:00
appendAll ( )
}
2019-12-19 10:03:20 +00:00
return duf , Error . Wrap ( err )
2019-07-31 18:21:06 +01:00
}
2019-12-19 10:03:20 +00:00
return duf , nil
2019-07-31 18:21:06 +01:00
}
var errlist errs . Group
length := len ( updateRequests )
for i := 0 ; i < length ; i += batchSize {
end := i + batchSize
if end > length {
end = length
}
failedBatch , err := doUpdate ( updateRequests [ i : end ] )
if err != nil && len ( failedBatch ) > 0 {
for _ , fb := range failedBatch {
errlist . Add ( err )
failed = append ( failed , fb )
}
}
}
return failed , errlist . Err ( )
}
2020-08-26 21:26:10 +01:00
// UpdateStats all parts of single storagenode's stats.
func ( cache * overlaycache ) UpdateStats ( ctx context . Context , updateReq * overlay . UpdateRequest , now time . Time ) ( stats * overlay . NodeStats , err error ) {
2019-03-25 22:25:09 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
nodeID := updateReq . NodeID
2019-12-19 10:03:20 +00:00
var dbNode * dbx . Node
err = cache . db . WithTx ( ctx , func ( ctx context . Context , tx * dbx . Tx ) ( err error ) {
2020-06-08 16:27:08 +01:00
_ , err = tx . Tx . ExecContext ( ctx , "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE" )
if err != nil {
return err
}
2019-12-19 10:03:20 +00:00
dbNode , err = tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) )
if err != nil {
return err
}
// do not update reputation if node is disqualified
if dbNode . Disqualified != nil {
return nil
}
2020-04-23 20:46:16 +01:00
// do not update reputation if node has gracefully exited
if dbNode . ExitFinishedAt != nil {
return nil
}
2019-03-25 22:25:09 +00:00
2020-10-23 16:27:39 +01:00
isUp := updateReq . AuditOutcome != overlay . AuditOffline
2020-12-21 17:26:07 +00:00
auditHistoryResponse , err := cache . updateAuditHistoryWithTx ( ctx , tx , updateReq . NodeID , now , isUp , updateReq . AuditHistory )
2020-08-04 21:28:46 +01:00
if err != nil {
return err
}
2020-12-21 17:26:07 +00:00
updateFields := cache . populateUpdateFields ( dbNode , updateReq , auditHistoryResponse , now )
2019-12-19 10:03:20 +00:00
dbNode , err = tx . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
return err
}
2019-03-25 22:25:09 +00:00
2019-12-19 10:03:20 +00:00
// Cleanup containment table too
_ , err = tx . Delete_PendingAudits_By_NodeId ( ctx , dbx . PendingAudits_NodeId ( nodeID . Bytes ( ) ) )
return err
} )
2019-07-02 16:16:25 +01:00
if err != nil {
2019-12-19 10:03:20 +00:00
return nil , Error . Wrap ( err )
2019-07-02 16:16:25 +01:00
}
2019-04-08 18:52:53 +01:00
// TODO: Allegedly tx.Get_Node_By_Id and tx.Update_Node_By_Id should never return a nil value for dbNode,
// however we've seen from some crashes that it does. We need to track down the cause of these crashes
// but for now we're adding a nil check to prevent a panic.
if dbNode == nil {
2019-12-19 10:03:20 +00:00
return nil , Error . New ( "unable to get node by ID: %v" , nodeID )
2019-04-08 18:52:53 +01:00
}
2019-12-19 10:03:20 +00:00
return getNodeStats ( dbNode ) , nil
2019-03-25 22:25:09 +00:00
}
2019-09-10 17:05:07 +01:00
// UpdateNodeInfo updates the following fields for a given node ID:
2020-07-16 15:18:02 +01:00
// wallet, email for node operator, free disk, and version.
2020-06-16 13:16:55 +01:00
func ( cache * overlaycache ) UpdateNodeInfo ( ctx context . Context , nodeID storj . NodeID , nodeInfo * overlay . InfoResponse ) ( stats * overlay . NodeDossier , err error ) {
2019-03-25 22:25:09 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-04-10 07:04:24 +01:00
var updateFields dbx . Node_Update_Fields
if nodeInfo != nil {
2020-06-16 13:16:55 +01:00
if nodeInfo . Type != pb . NodeType_INVALID {
updateFields . Type = dbx . Node_Type ( int ( nodeInfo . Type ) )
2019-04-22 10:07:50 +01:00
}
2020-06-16 13:16:55 +01:00
if nodeInfo . Operator != nil {
2021-01-18 14:33:13 +00:00
walletFeatures , err := encodeWalletFeatures ( nodeInfo . Operator . GetWalletFeatures ( ) )
if err != nil {
return nil , Error . Wrap ( err )
}
2020-06-16 13:16:55 +01:00
updateFields . Wallet = dbx . Node_Wallet ( nodeInfo . Operator . GetWallet ( ) )
updateFields . Email = dbx . Node_Email ( nodeInfo . Operator . GetEmail ( ) )
2021-01-18 14:33:13 +00:00
updateFields . WalletFeatures = dbx . Node_WalletFeatures ( walletFeatures )
2019-04-10 07:04:24 +01:00
}
2020-06-16 13:16:55 +01:00
if nodeInfo . Capacity != nil {
updateFields . FreeDisk = dbx . Node_FreeDisk ( nodeInfo . Capacity . GetFreeDisk ( ) )
2019-04-10 07:04:24 +01:00
}
2020-06-16 13:16:55 +01:00
if nodeInfo . Version != nil {
semVer , err := version . NewSemVer ( nodeInfo . Version . GetVersion ( ) )
2019-04-10 07:04:24 +01:00
if err != nil {
2019-04-22 10:07:50 +01:00
return nil , errs . New ( "unable to convert version to semVer" )
2019-04-10 07:04:24 +01:00
}
2019-10-21 11:50:59 +01:00
updateFields . Major = dbx . Node_Major ( int64 ( semVer . Major ) )
updateFields . Minor = dbx . Node_Minor ( int64 ( semVer . Minor ) )
updateFields . Patch = dbx . Node_Patch ( int64 ( semVer . Patch ) )
2020-06-16 13:16:55 +01:00
updateFields . Hash = dbx . Node_Hash ( nodeInfo . Version . GetCommitHash ( ) )
updateFields . Timestamp = dbx . Node_Timestamp ( nodeInfo . Version . Timestamp )
updateFields . Release = dbx . Node_Release ( nodeInfo . Version . GetRelease ( ) )
2019-04-10 07:04:24 +01:00
}
2019-03-25 22:25:09 +00:00
}
2019-04-04 17:34:36 +01:00
updatedDBNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
2019-03-25 22:25:09 +00:00
if err != nil {
2019-04-04 17:34:36 +01:00
return nil , Error . Wrap ( err )
2019-03-25 22:25:09 +00:00
}
2019-06-04 12:55:38 +01:00
return convertDBNode ( ctx , updatedDBNode )
2019-03-25 22:25:09 +00:00
}
2020-01-03 19:11:47 +00:00
// DisqualifyNode disqualifies a storage node.
func ( cache * overlaycache ) DisqualifyNode ( ctx context . Context , nodeID storj . NodeID ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
updateFields := dbx . Node_Update_Fields { }
updateFields . Disqualified = dbx . Node_Disqualified ( time . Now ( ) . UTC ( ) )
dbNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
return err
}
if dbNode == nil {
return errs . New ( "unable to get node by ID: %v" , nodeID )
}
return nil
}
2020-06-10 17:11:25 +01:00
// SuspendNodeUnknownAudit suspends a storage node for unknown audits.
func ( cache * overlaycache ) SuspendNodeUnknownAudit ( ctx context . Context , nodeID storj . NodeID , suspendedAt time . Time ) ( err error ) {
2020-03-09 15:35:54 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
updateFields := dbx . Node_Update_Fields { }
2020-06-10 17:11:25 +01:00
updateFields . UnknownAuditSuspended = dbx . Node_UnknownAuditSuspended ( suspendedAt . UTC ( ) )
2020-03-09 15:35:54 +00:00
dbNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
return err
}
if dbNode == nil {
return errs . New ( "unable to get node by ID: %v" , nodeID )
}
return nil
}
2020-06-10 17:11:25 +01:00
// UnsuspendNodeUnknownAudit unsuspends a storage node for unknown audits.
func ( cache * overlaycache ) UnsuspendNodeUnknownAudit ( ctx context . Context , nodeID storj . NodeID ) ( err error ) {
2020-03-09 15:35:54 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
updateFields := dbx . Node_Update_Fields { }
2020-06-10 17:11:25 +01:00
updateFields . UnknownAuditSuspended = dbx . Node_UnknownAuditSuspended_Null ( )
2020-03-09 15:35:54 +00:00
dbNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
2020-12-04 22:21:07 +00:00
if err != nil {
return err
}
2020-03-09 15:35:54 +00:00
if dbNode == nil {
return errs . New ( "unable to get node by ID: %v" , nodeID )
}
return nil
}
2019-08-27 13:37:42 +01:00
// AllPieceCounts returns a map of node IDs to piece counts from the db.
// NB: a valid, partial piece map can be returned even if node ID parsing error(s) are returned.
func ( cache * overlaycache ) AllPieceCounts ( ctx context . Context ) ( _ map [ storj . NodeID ] int , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
// NB: `All_Node_Id_Node_PieceCount_By_PieceCount_Not_Number` selects node
// ID and piece count from the nodes table where piece count is not zero.
rows , err := cache . db . All_Node_Id_Node_PieceCount_By_PieceCount_Not_Number ( ctx )
if err != nil {
return nil , Error . Wrap ( err )
}
pieceCounts := make ( map [ storj . NodeID ] int )
nodeIDErrs := errs . Group { }
for _ , row := range rows {
nodeID , err := storj . NodeIDFromBytes ( row . Id )
if err != nil {
nodeIDErrs . Add ( err )
continue
}
pieceCounts [ nodeID ] = int ( row . PieceCount )
}
2019-09-11 22:38:58 +01:00
2019-08-27 13:37:42 +01:00
return pieceCounts , nodeIDErrs . Err ( )
}
func ( cache * overlaycache ) UpdatePieceCounts ( ctx context . Context , pieceCounts map [ storj . NodeID ] int ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( pieceCounts ) == 0 {
return nil
}
2019-09-11 22:38:58 +01:00
// TODO: pass in the apprioriate struct to database, rather than constructing it here
type NodeCount struct {
ID storj . NodeID
Count int64
2019-08-27 13:37:42 +01:00
}
2019-09-11 22:38:58 +01:00
var counts [ ] NodeCount
2019-08-27 13:37:42 +01:00
2019-09-11 22:38:58 +01:00
for nodeid , count := range pieceCounts {
counts = append ( counts , NodeCount {
ID : nodeid ,
Count : int64 ( count ) ,
} )
}
sort . Slice ( counts , func ( i , k int ) bool {
return counts [ i ] . ID . Less ( counts [ k ] . ID )
} )
2019-08-27 13:37:42 +01:00
2019-10-18 22:27:57 +01:00
var nodeIDs [ ] storj . NodeID
var countNumbers [ ] int64
for _ , count := range counts {
nodeIDs = append ( nodeIDs , count . ID )
countNumbers = append ( countNumbers , count . Count )
2019-08-27 13:37:42 +01:00
}
2019-09-11 22:38:58 +01:00
2019-10-18 22:27:57 +01:00
_ , err = cache . db . ExecContext ( ctx , `
UPDATE nodes
SET piece_count = update . count
FROM (
SELECT unnest ( $ 1 : : bytea [ ] ) as id , unnest ( $ 2 : : bigint [ ] ) as count
) as update
WHERE nodes . id = update . id
2020-06-28 04:56:29 +01:00
` , pgutil . NodeIDArray ( nodeIDs ) , pgutil . Int8Array ( countNumbers ) )
2019-10-18 22:27:57 +01:00
2019-09-11 22:38:58 +01:00
return Error . Wrap ( err )
2019-08-27 13:37:42 +01:00
}
2019-11-07 17:19:34 +00:00
// GetExitingNodes returns nodes who have initiated a graceful exit and is not disqualified, but have not completed it.
2019-10-24 17:24:42 +01:00
func ( cache * overlaycache ) GetExitingNodes ( ctx context . Context ) ( exitingNodes [ ] * overlay . ExitStatus , err error ) {
2020-11-29 20:54:03 +00:00
for {
exitingNodes , err = cache . getExitingNodes ( ctx )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return exitingNodes , err
}
break
}
return exitingNodes , err
}
func ( cache * overlaycache ) getExitingNodes ( ctx context . Context ) ( exitingNodes [ ] * overlay . ExitStatus , err error ) {
2019-10-01 23:18:21 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2019-10-24 17:24:42 +01:00
SELECT id , exit_initiated_at , exit_loop_completed_at , exit_finished_at , exit_success FROM nodes
2019-10-01 23:18:21 +01:00
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NULL
2019-11-07 17:19:34 +00:00
AND disqualified is NULL
2020-01-16 14:27:24 +00:00
` ) )
2019-10-01 23:18:21 +01:00
if err != nil {
return nil , err
}
2020-01-16 14:27:24 +00:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-10-01 23:18:21 +01:00
for rows . Next ( ) {
2019-10-24 17:24:42 +01:00
var exitingNodeStatus overlay . ExitStatus
err = rows . Scan ( & exitingNodeStatus . NodeID , & exitingNodeStatus . ExitInitiatedAt , & exitingNodeStatus . ExitLoopCompletedAt , & exitingNodeStatus . ExitFinishedAt , & exitingNodeStatus . ExitSuccess )
2019-10-01 23:18:21 +01:00
if err != nil {
return nil , err
}
2019-10-24 17:24:42 +01:00
exitingNodes = append ( exitingNodes , & exitingNodeStatus )
2019-10-01 23:18:21 +01:00
}
2020-01-16 14:27:24 +00:00
return exitingNodes , Error . Wrap ( rows . Err ( ) )
2019-10-01 23:18:21 +01:00
}
2019-10-23 02:06:01 +01:00
// GetExitStatus returns a node's graceful exit status.
2020-11-29 20:54:03 +00:00
func ( cache * overlaycache ) GetExitStatus ( ctx context . Context , nodeID storj . NodeID ) ( exitStatus * overlay . ExitStatus , err error ) {
for {
exitStatus , err = cache . getExitStatus ( ctx , nodeID )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return exitStatus , err
}
break
}
return exitStatus , err
}
func ( cache * overlaycache ) getExitStatus ( ctx context . Context , nodeID storj . NodeID ) ( _ * overlay . ExitStatus , err error ) {
2019-10-11 22:18:05 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2020-01-16 14:27:24 +00:00
SELECT id , exit_initiated_at , exit_loop_completed_at , exit_finished_at , exit_success
FROM nodes
WHERE id = ?
` ) , nodeID )
2019-10-11 22:18:05 +01:00
if err != nil {
return nil , Error . Wrap ( err )
}
2020-01-16 14:27:24 +00:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-10-11 22:18:05 +01:00
exitStatus := & overlay . ExitStatus { }
if rows . Next ( ) {
2019-10-17 16:01:39 +01:00
err = rows . Scan ( & exitStatus . NodeID , & exitStatus . ExitInitiatedAt , & exitStatus . ExitLoopCompletedAt , & exitStatus . ExitFinishedAt , & exitStatus . ExitSuccess )
2020-01-16 14:27:24 +00:00
if err != nil {
return nil , err
}
2019-10-11 22:18:05 +01:00
}
2020-01-16 14:27:24 +00:00
return exitStatus , Error . Wrap ( rows . Err ( ) )
2019-10-11 22:18:05 +01:00
}
2019-10-01 23:18:21 +01:00
2019-10-23 02:06:01 +01:00
// GetGracefulExitCompletedByTimeFrame returns nodes who have completed graceful exit within a time window (time window is around graceful exit completion).
func ( cache * overlaycache ) GetGracefulExitCompletedByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitedNodes storj . NodeIDList , err error ) {
2020-11-29 20:54:03 +00:00
for {
exitedNodes , err = cache . getGracefulExitCompletedByTimeFrame ( ctx , begin , end )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return exitedNodes , err
}
break
}
return exitedNodes , err
}
func ( cache * overlaycache ) getGracefulExitCompletedByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitedNodes storj . NodeIDList , err error ) {
2019-10-23 02:06:01 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2019-10-23 02:06:01 +01:00
SELECT id FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NOT NULL
AND exit_finished_at >= ?
AND exit_finished_at < ?
2020-01-16 14:27:24 +00:00
` ) , begin , end )
2019-10-23 02:06:01 +01:00
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
exitedNodes = append ( exitedNodes , id )
}
2020-01-16 14:27:24 +00:00
return exitedNodes , Error . Wrap ( rows . Err ( ) )
2019-10-23 02:06:01 +01:00
}
// GetGracefulExitIncompleteByTimeFrame returns nodes who have initiated, but not completed graceful exit within a time window (time window is around graceful exit initiation).
func ( cache * overlaycache ) GetGracefulExitIncompleteByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitingNodes storj . NodeIDList , err error ) {
2020-11-29 20:54:03 +00:00
for {
exitingNodes , err = cache . getGracefulExitIncompleteByTimeFrame ( ctx , begin , end )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return exitingNodes , err
}
break
}
return exitingNodes , err
}
func ( cache * overlaycache ) getGracefulExitIncompleteByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitingNodes storj . NodeIDList , err error ) {
2019-10-23 02:06:01 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2020-01-17 20:07:00 +00:00
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
2019-10-23 02:06:01 +01:00
SELECT id FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NULL
AND exit_initiated_at >= ?
AND exit_initiated_at < ?
2020-01-16 14:27:24 +00:00
` ) , begin , end )
2019-10-23 02:06:01 +01:00
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
// TODO return more than just ID
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
exitingNodes = append ( exitingNodes , id )
}
2020-01-16 14:27:24 +00:00
return exitingNodes , Error . Wrap ( rows . Err ( ) )
2019-10-23 02:06:01 +01:00
}
2019-10-01 23:18:21 +01:00
// UpdateExitStatus is used to update a node's graceful exit status.
2019-10-29 20:22:20 +00:00
func ( cache * overlaycache ) UpdateExitStatus ( ctx context . Context , request * overlay . ExitStatusRequest ) ( _ * overlay . NodeDossier , err error ) {
2019-10-01 23:18:21 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
nodeID := request . NodeID
updateFields := populateExitStatusFields ( request )
2019-12-20 15:59:47 +00:00
dbNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
2019-10-01 23:18:21 +01:00
if err != nil {
return nil , Error . Wrap ( err )
}
if dbNode == nil {
2019-12-20 15:59:47 +00:00
return nil , Error . Wrap ( errs . New ( "unable to get node by ID: %v" , nodeID ) )
2019-10-29 20:22:20 +00:00
}
return convertDBNode ( ctx , dbNode )
2019-10-01 23:18:21 +01:00
}
func populateExitStatusFields ( req * overlay . ExitStatusRequest ) dbx . Node_Update_Fields {
dbxUpdateFields := dbx . Node_Update_Fields { }
if ! req . ExitInitiatedAt . IsZero ( ) {
dbxUpdateFields . ExitInitiatedAt = dbx . Node_ExitInitiatedAt ( req . ExitInitiatedAt )
}
if ! req . ExitLoopCompletedAt . IsZero ( ) {
dbxUpdateFields . ExitLoopCompletedAt = dbx . Node_ExitLoopCompletedAt ( req . ExitLoopCompletedAt )
}
if ! req . ExitFinishedAt . IsZero ( ) {
dbxUpdateFields . ExitFinishedAt = dbx . Node_ExitFinishedAt ( req . ExitFinishedAt )
}
2019-10-17 16:01:39 +01:00
dbxUpdateFields . ExitSuccess = dbx . Node_ExitSuccess ( req . ExitSuccess )
2019-10-01 23:18:21 +01:00
return dbxUpdateFields
}
2019-06-04 12:55:38 +01:00
func convertDBNode ( ctx context . Context , info * dbx . Node ) ( _ * overlay . NodeDossier , err error ) {
2019-01-15 16:08:45 +00:00
if info == nil {
return nil , Error . New ( "missing info" )
}
2019-03-29 08:53:43 +00:00
id , err := storj . NodeIDFromBytes ( info . Id )
2019-01-15 16:08:45 +00:00
if err != nil {
return nil , err
}
2019-10-21 11:50:59 +01:00
ver , err := version . NewSemVer ( fmt . Sprintf ( "%d.%d.%d" , info . Major , info . Minor , info . Patch ) )
if err != nil {
return nil , err
2019-04-10 07:04:24 +01:00
}
2019-10-11 22:18:05 +01:00
exitStatus := overlay . ExitStatus { NodeID : id }
exitStatus . ExitInitiatedAt = info . ExitInitiatedAt
exitStatus . ExitLoopCompletedAt = info . ExitLoopCompletedAt
exitStatus . ExitFinishedAt = info . ExitFinishedAt
2020-03-10 20:42:11 +00:00
exitStatus . ExitSuccess = info . ExitSuccess
2019-10-11 22:18:05 +01:00
2019-04-04 17:34:36 +01:00
node := & overlay . NodeDossier {
Node : pb . Node {
2020-03-06 22:04:23 +00:00
Id : id ,
2019-04-04 17:34:36 +01:00
Address : & pb . NodeAddress {
Address : info . Address ,
Transport : pb . NodeTransport ( info . Protocol ) ,
} ,
2019-01-15 16:08:45 +00:00
} ,
2019-04-04 17:34:36 +01:00
Type : pb . NodeType ( info . Type ) ,
Operator : pb . NodeOperator {
2021-01-18 14:33:13 +00:00
Email : info . Email ,
Wallet : info . Wallet ,
WalletFeatures : decodeWalletFeatures ( info . WalletFeatures ) ,
2019-01-15 16:08:45 +00:00
} ,
2019-04-04 17:34:36 +01:00
Capacity : pb . NodeCapacity {
2020-02-12 21:19:42 +00:00
FreeDisk : info . FreeDisk ,
2019-01-15 16:08:45 +00:00
} ,
2019-06-20 14:56:04 +01:00
Reputation : * getNodeStats ( info ) ,
2019-04-10 07:04:24 +01:00
Version : pb . NodeVersion {
Version : ver . String ( ) ,
CommitHash : info . Hash ,
2019-07-08 19:24:42 +01:00
Timestamp : info . Timestamp ,
2019-04-10 07:04:24 +01:00
Release : info . Release ,
} ,
2020-06-10 17:11:25 +01:00
Contained : info . Contained ,
Disqualified : info . Disqualified ,
UnknownAuditSuspended : info . UnknownAuditSuspended ,
2020-08-26 21:26:10 +01:00
OfflineSuspended : info . OfflineSuspended ,
OfflineUnderReview : info . UnderReview ,
2020-06-10 17:11:25 +01:00
PieceCount : info . PieceCount ,
ExitStatus : exitStatus ,
CreatedAt : info . CreatedAt ,
LastNet : info . LastNet ,
2020-03-06 22:04:23 +00:00
}
if info . LastIpPort != nil {
node . LastIPPort = * info . LastIpPort
2019-01-15 16:08:45 +00:00
}
return node , nil
2018-12-17 20:14:16 +00:00
}
2019-03-25 22:25:09 +00:00
2021-01-18 14:33:13 +00:00
// encodeWalletFeatures encodes wallet features into comma separated list string.
func encodeWalletFeatures ( features [ ] string ) ( string , error ) {
var errGroup errs . Group
for _ , feature := range features {
if strings . Contains ( feature , "," ) {
errGroup . Add ( errs . New ( "error encoding %s, can not contain separator \",\"" , feature ) )
}
}
if err := errGroup . Err ( ) ; err != nil {
return "" , Error . Wrap ( err )
}
return strings . Join ( features , "," ) , nil
}
// decodeWalletFeatures decodes comma separated wallet features list string.
func decodeWalletFeatures ( encoded string ) [ ] string {
if encoded == "" {
return nil
}
return strings . Split ( encoded , "," )
}
2019-04-08 18:52:53 +01:00
func getNodeStats ( dbNode * dbx . Node ) * overlay . NodeStats {
2019-03-25 22:25:09 +00:00
nodeStats := & overlay . NodeStats {
2020-03-09 15:35:54 +00:00
Latency90 : dbNode . Latency90 ,
2020-05-20 20:57:53 +01:00
VettedAt : dbNode . VettedAt ,
2020-03-09 15:35:54 +00:00
AuditCount : dbNode . TotalAuditCount ,
AuditSuccessCount : dbNode . AuditSuccessCount ,
LastContactSuccess : dbNode . LastContactSuccess ,
LastContactFailure : dbNode . LastContactFailure ,
AuditReputationAlpha : dbNode . AuditReputationAlpha ,
AuditReputationBeta : dbNode . AuditReputationBeta ,
Disqualified : dbNode . Disqualified ,
UnknownAuditReputationAlpha : dbNode . UnknownAuditReputationAlpha ,
UnknownAuditReputationBeta : dbNode . UnknownAuditReputationBeta ,
2020-06-10 17:11:25 +01:00
UnknownAuditSuspended : dbNode . UnknownAuditSuspended ,
2020-08-28 20:43:53 +01:00
OfflineUnderReview : dbNode . UnderReview ,
OfflineSuspended : dbNode . OfflineSuspended ,
OnlineScore : dbNode . OnlineScore ,
2019-03-25 22:25:09 +00:00
}
return nodeStats
}
2019-06-20 14:56:04 +01:00
// updateReputation uses the Beta distribution model to determine a node's reputation.
// lambda is the "forgetting factor" which determines how much past info is kept when determining current reputation score.
// w is the normalization weight that affects how severely new updates affect the current reputation distribution.
func updateReputation ( isSuccess bool , alpha , beta , lambda , w float64 , totalCount int64 ) ( newAlpha , newBeta float64 , updatedCount int64 ) {
// v is a single feedback value that allows us to update both alpha and beta
var v float64 = - 1
if isSuccess {
v = 1
}
newAlpha = lambda * alpha + w * ( 1 + v ) / 2
newBeta = lambda * beta + w * ( 1 - v ) / 2
return newAlpha , newBeta , totalCount + 1
2019-03-25 22:25:09 +00:00
}
2019-07-31 18:21:06 +01:00
2019-12-14 02:29:54 +00:00
func buildUpdateStatement ( update updateNodeStats ) string {
2019-07-31 18:21:06 +01:00
if update . NodeID . IsZero ( ) {
return ""
}
atLeastOne := false
sql := "UPDATE nodes SET "
2020-05-20 20:57:53 +01:00
if update . VettedAt . set {
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "vetted_at = '%s'" , update . VettedAt . value . Format ( time . RFC3339Nano ) )
2020-05-20 20:57:53 +01:00
}
2019-07-31 18:21:06 +01:00
if update . TotalAuditCount . set {
2020-05-20 20:57:53 +01:00
if atLeastOne {
sql += ","
}
2019-07-31 18:21:06 +01:00
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "total_audit_count = %d" , update . TotalAuditCount . value )
2019-07-31 18:21:06 +01:00
}
if update . AuditReputationAlpha . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "audit_reputation_alpha = %f" , update . AuditReputationAlpha . value )
2019-07-31 18:21:06 +01:00
}
if update . AuditReputationBeta . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "audit_reputation_beta = %f" , update . AuditReputationBeta . value )
2019-07-31 18:21:06 +01:00
}
2020-04-23 15:06:06 +01:00
if update . UnknownAuditReputationAlpha . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "unknown_audit_reputation_alpha = %f" , update . UnknownAuditReputationAlpha . value )
2020-04-23 15:06:06 +01:00
}
if update . UnknownAuditReputationBeta . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "unknown_audit_reputation_beta = %f" , update . UnknownAuditReputationBeta . value )
2020-04-23 15:06:06 +01:00
}
2019-07-31 18:21:06 +01:00
if update . Disqualified . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "disqualified = '%s'" , update . Disqualified . value . Format ( time . RFC3339Nano ) )
2019-07-31 18:21:06 +01:00
}
2020-06-10 17:11:25 +01:00
if update . UnknownAuditSuspended . set {
2020-03-09 15:35:54 +00:00
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-06-10 17:11:25 +01:00
if update . UnknownAuditSuspended . isNil {
sql += "unknown_audit_suspended = NULL"
2020-03-11 21:11:46 +00:00
} else {
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "unknown_audit_suspended = '%s'" , update . UnknownAuditSuspended . value . Format ( time . RFC3339Nano ) )
2020-03-11 21:11:46 +00:00
}
2020-03-09 15:35:54 +00:00
}
2019-07-31 18:21:06 +01:00
if update . LastContactSuccess . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "last_contact_success = '%s'" , update . LastContactSuccess . value . Format ( time . RFC3339Nano ) )
2019-07-31 18:21:06 +01:00
}
if update . LastContactFailure . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "last_contact_failure = '%s'" , update . LastContactFailure . value . Format ( time . RFC3339Nano ) )
2019-07-31 18:21:06 +01:00
}
if update . AuditSuccessCount . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "audit_success_count = %d" , update . AuditSuccessCount . value )
2019-07-31 18:21:06 +01:00
}
if update . Contained . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
2020-08-26 21:26:10 +01:00
sql += fmt . Sprintf ( "contained = %t" , update . Contained . value )
}
2020-08-28 20:43:53 +01:00
if update . OnlineScore . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "online_score = %f" , update . OnlineScore . value )
}
2020-08-26 21:26:10 +01:00
if update . OfflineUnderReview . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
if update . OfflineUnderReview . isNil {
sql += "under_review = NULL"
} else {
sql += fmt . Sprintf ( "under_review = '%s'" , update . OfflineUnderReview . value . Format ( time . RFC3339Nano ) )
}
}
if update . OfflineSuspended . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
if update . OfflineSuspended . isNil {
sql += "offline_suspended = NULL"
} else {
sql += fmt . Sprintf ( "offline_suspended = '%s'" , update . OfflineSuspended . value . Format ( time . RFC3339Nano ) )
}
2019-07-31 18:21:06 +01:00
}
if ! atLeastOne {
return ""
}
hexNodeID := hex . EncodeToString ( update . NodeID . Bytes ( ) )
2019-10-18 22:27:57 +01:00
sql += fmt . Sprintf ( " WHERE nodes.id = decode('%v', 'hex');\n" , hexNodeID )
sql += fmt . Sprintf ( "DELETE FROM pending_audits WHERE pending_audits.node_id = decode('%v', 'hex');\n" , hexNodeID )
2019-07-31 18:21:06 +01:00
return sql
}
type int64Field struct {
set bool
value int64
}
type float64Field struct {
set bool
value float64
}
type boolField struct {
set bool
value bool
}
type timeField struct {
set bool
2020-03-09 15:35:54 +00:00
isNil bool
2019-07-31 18:21:06 +01:00
value time . Time
}
type updateNodeStats struct {
2020-03-09 15:35:54 +00:00
NodeID storj . NodeID
2020-05-20 20:57:53 +01:00
VettedAt timeField
2020-03-09 15:35:54 +00:00
TotalAuditCount int64Field
AuditReputationAlpha float64Field
AuditReputationBeta float64Field
Disqualified timeField
UnknownAuditReputationAlpha float64Field
UnknownAuditReputationBeta float64Field
2020-06-10 17:11:25 +01:00
UnknownAuditSuspended timeField
2020-03-09 15:35:54 +00:00
LastContactSuccess timeField
LastContactFailure timeField
AuditSuccessCount int64Field
Contained boolField
2020-08-26 21:26:10 +01:00
OfflineUnderReview timeField
OfflineSuspended timeField
2020-08-28 20:43:53 +01:00
OnlineScore float64Field
2019-07-31 18:21:06 +01:00
}
2020-12-21 17:26:07 +00:00
func ( cache * overlaycache ) populateUpdateNodeStats ( dbNode * dbx . Node , updateReq * overlay . UpdateRequest , auditHistoryResponse * overlay . UpdateAuditHistoryResponse , now time . Time ) updateNodeStats {
2020-03-09 15:35:54 +00:00
// there are three audit outcomes: success, failure, and unknown
// if a node fails enough audits, it gets disqualified
// if a node gets enough "unknown" audits, it gets put into suspension
// if a node gets enough successful audits, and is in suspension, it gets removed from suspension
auditAlpha := dbNode . AuditReputationAlpha
auditBeta := dbNode . AuditReputationBeta
unknownAuditAlpha := dbNode . UnknownAuditReputationAlpha
unknownAuditBeta := dbNode . UnknownAuditReputationBeta
totalAuditCount := dbNode . TotalAuditCount
2020-05-20 20:57:53 +01:00
vettedAt := dbNode . VettedAt
2020-03-09 15:35:54 +00:00
2020-03-11 21:11:46 +00:00
var updatedTotalAuditCount int64
2020-03-09 15:35:54 +00:00
switch updateReq . AuditOutcome {
case overlay . AuditSuccess :
// for a successful audit, increase reputation for normal *and* unknown audits
2020-03-11 21:11:46 +00:00
auditAlpha , auditBeta , updatedTotalAuditCount = updateReputation (
2020-03-09 15:35:54 +00:00
true ,
auditAlpha ,
auditBeta ,
updateReq . AuditLambda ,
updateReq . AuditWeight ,
totalAuditCount ,
)
2020-03-11 21:11:46 +00:00
// we will use updatedTotalAuditCount from the updateReputation call above
unknownAuditAlpha , unknownAuditBeta , _ = updateReputation (
2020-03-09 15:35:54 +00:00
true ,
unknownAuditAlpha ,
unknownAuditBeta ,
updateReq . AuditLambda ,
updateReq . AuditWeight ,
2020-03-11 21:11:46 +00:00
totalAuditCount ,
2020-03-09 15:35:54 +00:00
)
case overlay . AuditFailure :
// for audit failure, only update normal alpha/beta
2020-03-11 21:11:46 +00:00
auditAlpha , auditBeta , updatedTotalAuditCount = updateReputation (
2020-03-09 15:35:54 +00:00
false ,
auditAlpha ,
auditBeta ,
updateReq . AuditLambda ,
updateReq . AuditWeight ,
totalAuditCount ,
)
case overlay . AuditUnknown :
// for audit unknown, only update unknown alpha/beta
2020-03-11 21:11:46 +00:00
unknownAuditAlpha , unknownAuditBeta , updatedTotalAuditCount = updateReputation (
2020-03-09 15:35:54 +00:00
false ,
unknownAuditAlpha ,
unknownAuditBeta ,
updateReq . AuditLambda ,
updateReq . AuditWeight ,
totalAuditCount ,
)
2020-10-22 22:02:48 +01:00
case overlay . AuditOffline :
// for audit offline, only update total audit count
updatedTotalAuditCount = totalAuditCount + 1
2020-03-09 15:35:54 +00:00
}
2020-10-22 22:02:48 +01:00
2020-10-13 13:13:41 +01:00
mon . FloatVal ( "audit_reputation_alpha" ) . Observe ( auditAlpha ) //mon:locked
mon . FloatVal ( "audit_reputation_beta" ) . Observe ( auditBeta ) //mon:locked
mon . FloatVal ( "unknown_audit_reputation_alpha" ) . Observe ( unknownAuditAlpha ) //mon:locked
mon . FloatVal ( "unknown_audit_reputation_beta" ) . Observe ( unknownAuditBeta ) //mon:locked
2020-12-21 17:26:07 +00:00
mon . FloatVal ( "audit_online_score" ) . Observe ( auditHistoryResponse . NewScore ) //mon:locked
2019-07-31 18:21:06 +01:00
2020-10-23 16:27:39 +01:00
isUp := updateReq . AuditOutcome != overlay . AuditOffline
2019-07-31 18:21:06 +01:00
updateFields := updateNodeStats {
2020-03-09 15:35:54 +00:00
NodeID : updateReq . NodeID ,
2020-03-11 21:11:46 +00:00
TotalAuditCount : int64Field { set : true , value : updatedTotalAuditCount } ,
2020-03-09 15:35:54 +00:00
AuditReputationAlpha : float64Field { set : true , value : auditAlpha } ,
AuditReputationBeta : float64Field { set : true , value : auditBeta } ,
UnknownAuditReputationAlpha : float64Field { set : true , value : unknownAuditAlpha } ,
UnknownAuditReputationBeta : float64Field { set : true , value : unknownAuditBeta } ,
2019-07-31 18:21:06 +01:00
}
2020-12-17 22:07:06 +00:00
if vettedAt == nil && updatedTotalAuditCount >= updateReq . AuditsRequiredForVetting {
2020-08-26 21:26:10 +01:00
updateFields . VettedAt = timeField { set : true , value : now }
2020-05-20 20:57:53 +01:00
}
2020-04-14 17:49:45 +01:00
// disqualification case a
// a) Success/fail audit reputation falls below audit DQ threshold
2019-07-31 18:21:06 +01:00
auditRep := auditAlpha / ( auditAlpha + auditBeta )
if auditRep <= updateReq . AuditDQ {
2020-04-14 17:49:45 +01:00
cache . db . log . Info ( "Disqualified" , zap . String ( "DQ type" , "audit failure" ) , zap . String ( "Node ID" , updateReq . NodeID . String ( ) ) )
2020-10-15 17:00:08 +01:00
mon . Meter ( "bad_audit_dqs" ) . Mark ( 1 ) //mon:locked
2020-08-26 21:26:10 +01:00
updateFields . Disqualified = timeField { set : true , value : now }
2019-07-31 18:21:06 +01:00
}
2020-03-09 15:35:54 +00:00
// if unknown audit rep goes below threshold, suspend node. Otherwise unsuspend node.
unknownAuditRep := unknownAuditAlpha / ( unknownAuditAlpha + unknownAuditBeta )
if unknownAuditRep <= updateReq . AuditDQ {
2020-06-10 17:11:25 +01:00
if dbNode . UnknownAuditSuspended == nil {
2020-04-08 23:28:25 +01:00
cache . db . log . Info ( "Suspended" , zap . String ( "Node ID" , updateFields . NodeID . String ( ) ) , zap . String ( "Category" , "Unknown Audits" ) )
2020-08-26 21:26:10 +01:00
updateFields . UnknownAuditSuspended = timeField { set : true , value : now }
2020-04-08 23:28:25 +01:00
}
2020-04-14 17:49:45 +01:00
// disqualification case b
// b) Node is suspended (success/unknown reputation below audit DQ threshold)
// AND the suspended grace period has elapsed
// AND audit outcome is unknown or failed
// if suspended grace period has elapsed and audit outcome was failed or unknown,
// disqualify node. Set suspended to nil if node is disqualified
// NOTE: if updateFields.Suspended is set, we just suspended the node so it will not be disqualified
if updateReq . AuditOutcome != overlay . AuditSuccess {
2020-06-10 17:11:25 +01:00
if dbNode . UnknownAuditSuspended != nil && ! updateFields . UnknownAuditSuspended . set &&
time . Since ( * dbNode . UnknownAuditSuspended ) > updateReq . SuspensionGracePeriod &&
2020-05-04 17:32:06 +01:00
updateReq . SuspensionDQEnabled {
2020-06-10 17:11:25 +01:00
cache . db . log . Info ( "Disqualified" , zap . String ( "DQ type" , "suspension grace period expired for unknown audits" ) , zap . String ( "Node ID" , updateReq . NodeID . String ( ) ) )
2020-10-15 17:00:08 +01:00
mon . Meter ( "unknown_suspension_dqs" ) . Mark ( 1 ) //mon:locked
2020-08-26 21:26:10 +01:00
updateFields . Disqualified = timeField { set : true , value : now }
2020-06-10 17:11:25 +01:00
updateFields . UnknownAuditSuspended = timeField { set : true , isNil : true }
2020-04-14 17:49:45 +01:00
}
2020-04-08 23:28:25 +01:00
}
2020-06-10 17:11:25 +01:00
} else if dbNode . UnknownAuditSuspended != nil {
2020-04-14 17:49:45 +01:00
cache . db . log . Info ( "Suspension lifted" , zap . String ( "Category" , "Unknown Audits" ) , zap . String ( "Node ID" , updateFields . NodeID . String ( ) ) )
2020-06-10 17:11:25 +01:00
updateFields . UnknownAuditSuspended = timeField { set : true , isNil : true }
2020-03-09 15:35:54 +00:00
}
2020-10-23 16:27:39 +01:00
if isUp {
2020-08-26 21:26:10 +01:00
updateFields . LastContactSuccess = timeField { set : true , value : now }
2019-07-31 18:21:06 +01:00
} else {
2020-08-26 21:26:10 +01:00
updateFields . LastContactFailure = timeField { set : true , value : now }
2019-07-31 18:21:06 +01:00
}
2020-03-09 15:35:54 +00:00
if updateReq . AuditOutcome == overlay . AuditSuccess {
2019-07-31 18:21:06 +01:00
updateFields . AuditSuccessCount = int64Field { set : true , value : dbNode . AuditSuccessCount + 1 }
}
// Updating node stats always exits it from containment mode
updateFields . Contained = boolField { set : true , value : false }
2021-02-25 20:00:00 +00:00
// always update online score
updateFields . OnlineScore = float64Field { set : true , value : auditHistoryResponse . NewScore }
// if suspension not enabled, skip penalization and unsuspend node if applicable
if ! updateReq . AuditHistory . OfflineSuspensionEnabled {
if dbNode . OfflineSuspended != nil {
updateFields . OfflineSuspended = timeField { set : true , isNil : true }
}
if dbNode . UnderReview != nil {
updateFields . OfflineUnderReview = timeField { set : true , isNil : true }
}
return updateFields
}
2020-09-29 23:08:48 +01:00
// only penalize node if online score is below threshold and
// if it has enough completed windows to fill a tracking period
penalizeOfflineNode := false
2020-12-21 17:26:07 +00:00
if auditHistoryResponse . NewScore < updateReq . AuditHistory . OfflineThreshold && auditHistoryResponse . TrackingPeriodFull {
2020-09-29 23:08:48 +01:00
penalizeOfflineNode = true
}
2020-08-26 21:26:10 +01:00
// Suspension and disqualification for offline nodes
if dbNode . UnderReview != nil {
// move node in and out of suspension as needed during review period
2020-09-29 23:08:48 +01:00
if ! penalizeOfflineNode && dbNode . OfflineSuspended != nil {
2020-08-26 21:26:10 +01:00
updateFields . OfflineSuspended = timeField { set : true , isNil : true }
2020-09-29 23:08:48 +01:00
} else if penalizeOfflineNode && dbNode . OfflineSuspended == nil {
2020-08-26 21:26:10 +01:00
updateFields . OfflineSuspended = timeField { set : true , value : now }
}
gracePeriodEnd := dbNode . UnderReview . Add ( updateReq . AuditHistory . GracePeriod )
trackingPeriodEnd := gracePeriodEnd . Add ( updateReq . AuditHistory . TrackingPeriod )
trackingPeriodPassed := now . After ( trackingPeriodEnd )
// after tracking period has elapsed, if score is good, clear under review
2020-10-15 17:00:08 +01:00
// otherwise, disqualify node (if OfflineDQEnabled feature flag is true)
2020-08-26 21:26:10 +01:00
if trackingPeriodPassed {
2020-09-29 23:08:48 +01:00
if penalizeOfflineNode {
2020-10-15 17:00:08 +01:00
if updateReq . AuditHistory . OfflineDQEnabled {
2020-08-26 21:26:10 +01:00
cache . db . log . Info ( "Disqualified" , zap . String ( "DQ type" , "node offline" ) , zap . String ( "Node ID" , updateReq . NodeID . String ( ) ) )
2020-10-15 17:00:08 +01:00
mon . Meter ( "offline_dqs" ) . Mark ( 1 ) //mon:locked
2020-08-26 21:26:10 +01:00
updateFields . Disqualified = timeField { set : true , value : now }
2020-10-15 17:00:08 +01:00
}
2020-08-26 21:26:10 +01:00
} else {
updateFields . OfflineUnderReview = timeField { set : true , isNil : true }
updateFields . OfflineSuspended = timeField { set : true , isNil : true }
}
}
2020-09-29 23:08:48 +01:00
} else if penalizeOfflineNode {
2020-08-26 21:26:10 +01:00
// suspend node for being offline and begin review period
updateFields . OfflineUnderReview = timeField { set : true , value : now }
updateFields . OfflineSuspended = timeField { set : true , value : now }
}
2019-07-31 18:21:06 +01:00
return updateFields
}
2020-12-21 17:26:07 +00:00
func ( cache * overlaycache ) populateUpdateFields ( dbNode * dbx . Node , updateReq * overlay . UpdateRequest , auditHistoryResponse * overlay . UpdateAuditHistoryResponse , now time . Time ) dbx . Node_Update_Fields {
2019-07-31 18:21:06 +01:00
2020-12-21 17:26:07 +00:00
update := cache . populateUpdateNodeStats ( dbNode , updateReq , auditHistoryResponse , now )
2019-07-31 18:21:06 +01:00
updateFields := dbx . Node_Update_Fields { }
2020-05-20 20:57:53 +01:00
if update . VettedAt . set {
updateFields . VettedAt = dbx . Node_VettedAt ( update . VettedAt . value )
}
2019-07-31 18:21:06 +01:00
if update . TotalAuditCount . set {
updateFields . TotalAuditCount = dbx . Node_TotalAuditCount ( update . TotalAuditCount . value )
}
if update . AuditReputationAlpha . set {
updateFields . AuditReputationAlpha = dbx . Node_AuditReputationAlpha ( update . AuditReputationAlpha . value )
}
if update . AuditReputationBeta . set {
updateFields . AuditReputationBeta = dbx . Node_AuditReputationBeta ( update . AuditReputationBeta . value )
}
if update . Disqualified . set {
updateFields . Disqualified = dbx . Node_Disqualified ( update . Disqualified . value )
}
2020-03-09 15:35:54 +00:00
if update . UnknownAuditReputationAlpha . set {
updateFields . UnknownAuditReputationAlpha = dbx . Node_UnknownAuditReputationAlpha ( update . UnknownAuditReputationAlpha . value )
}
if update . UnknownAuditReputationBeta . set {
updateFields . UnknownAuditReputationBeta = dbx . Node_UnknownAuditReputationBeta ( update . UnknownAuditReputationBeta . value )
}
2020-06-10 17:11:25 +01:00
if update . UnknownAuditSuspended . set {
if update . UnknownAuditSuspended . isNil {
updateFields . UnknownAuditSuspended = dbx . Node_UnknownAuditSuspended_Null ( )
2020-03-09 15:35:54 +00:00
} else {
2020-06-10 17:11:25 +01:00
updateFields . UnknownAuditSuspended = dbx . Node_UnknownAuditSuspended ( update . UnknownAuditSuspended . value )
2020-03-09 15:35:54 +00:00
}
}
2019-07-31 18:21:06 +01:00
if update . LastContactSuccess . set {
updateFields . LastContactSuccess = dbx . Node_LastContactSuccess ( update . LastContactSuccess . value )
}
if update . LastContactFailure . set {
updateFields . LastContactFailure = dbx . Node_LastContactFailure ( update . LastContactFailure . value )
}
if update . AuditSuccessCount . set {
updateFields . AuditSuccessCount = dbx . Node_AuditSuccessCount ( update . AuditSuccessCount . value )
}
if update . Contained . set {
updateFields . Contained = dbx . Node_Contained ( update . Contained . value )
}
2020-03-09 15:35:54 +00:00
if updateReq . AuditOutcome == overlay . AuditSuccess {
2019-07-31 18:21:06 +01:00
updateFields . AuditSuccessCount = dbx . Node_AuditSuccessCount ( dbNode . AuditSuccessCount + 1 )
}
2020-08-28 20:43:53 +01:00
if update . OnlineScore . set {
updateFields . OnlineScore = dbx . Node_OnlineScore ( update . OnlineScore . value )
}
2020-08-26 21:26:10 +01:00
if update . OfflineSuspended . set {
if update . OfflineSuspended . isNil {
updateFields . OfflineSuspended = dbx . Node_OfflineSuspended_Null ( )
} else {
updateFields . OfflineSuspended = dbx . Node_OfflineSuspended ( update . OfflineSuspended . value )
}
}
if update . OfflineUnderReview . set {
if update . OfflineUnderReview . isNil {
updateFields . UnderReview = dbx . Node_UnderReview_Null ( )
} else {
updateFields . UnderReview = dbx . Node_UnderReview ( update . OfflineUnderReview . value )
}
}
2019-07-31 18:21:06 +01:00
return updateFields
}
2019-09-19 19:37:31 +01:00
2021-03-15 20:48:36 +00:00
// DQNodesLastSeenBefore disqualifies a limited number of nodes where last_contact_success < cutoff except those already disqualified
2021-04-22 14:43:56 +01:00
// or gracefully exited or where last_contact_success = '0001-01-01 00:00:00+00'.
2021-03-15 20:48:36 +00:00
func ( cache * overlaycache ) DQNodesLastSeenBefore ( ctx context . Context , cutoff time . Time , limit int ) ( count int , err error ) {
2020-12-31 18:43:13 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2021-03-15 20:48:36 +00:00
2021-03-23 18:31:08 +00:00
var nodeIDs [ ] storj . NodeID
for {
nodeIDs , err = cache . getNodesForDQLastSeenBefore ( ctx , cutoff , limit )
if err != nil {
if cockroachutil . NeedsRetry ( err ) {
continue
}
return 0 , err
}
if len ( nodeIDs ) == 0 {
return 0 , nil
}
break
}
var rows tagsql . Rows
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
UPDATE nodes
SET disqualified = current_timestamp
WHERE id = any ( $ 1 : : bytea [ ] )
AND disqualified IS NULL
AND exit_finished_at IS NULL
AND last_contact_success < $ 2
2021-04-22 14:43:56 +01:00
AND last_contact_success != ' 0001 - 01 - 01 00 : 00 : 00 + 00 ' : : timestamptz
2021-03-23 18:31:08 +00:00
RETURNING id , last_contact_success ;
` ) , pgutil . NodeIDArray ( nodeIDs ) , cutoff )
2021-02-22 20:01:24 +00:00
if err != nil {
2021-03-15 20:48:36 +00:00
return 0 , err
2021-02-22 20:01:24 +00:00
}
2021-03-15 20:48:36 +00:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
for rows . Next ( ) {
var id storj . NodeID
var lcs time . Time
err = rows . Scan ( & id , & lcs )
if err != nil {
return count , err
}
cache . db . log . Info ( "Disqualified" ,
zap . String ( "DQ type" , "stray node" ) ,
zap . Stringer ( "Node ID" , id ) ,
zap . Stringer ( "Last contacted" , lcs ) )
count ++
2021-02-22 20:01:24 +00:00
}
2021-03-15 20:48:36 +00:00
return count , rows . Err ( )
2020-12-31 18:43:13 +00:00
}
2021-03-23 18:31:08 +00:00
func ( cache * overlaycache ) getNodesForDQLastSeenBefore ( ctx context . Context , cutoff time . Time , limit int ) ( nodes [ ] storj . NodeID , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
rows , err := cache . db . Query ( ctx , cache . db . Rebind ( `
SELECT id
FROM nodes
WHERE last_contact_success < $ 1
AND disqualified is NULL
AND exit_finished_at is NULL
LIMIT $ 2
` ) , cutoff , limit )
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
var nodeIDs [ ] storj . NodeID
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
nodeIDs = append ( nodeIDs , id )
}
return nodeIDs , rows . Err ( )
}
2019-09-19 19:37:31 +01:00
// UpdateCheckIn updates a single storagenode with info from when the the node last checked in.
2019-11-15 22:43:06 +00:00
func ( cache * overlaycache ) UpdateCheckIn ( ctx context . Context , node overlay . NodeCheckInInfo , timestamp time . Time , config overlay . NodeSelectionConfig ) ( err error ) {
2019-09-19 19:37:31 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
if node . Address . GetAddress ( ) == "" {
return Error . New ( "error UpdateCheckIn: missing the storage node address" )
}
2019-10-18 22:27:57 +01:00
semVer , err := version . NewSemVer ( node . Version . GetVersion ( ) )
if err != nil {
return Error . New ( "unable to convert version to semVer" )
}
2019-09-19 19:37:31 +01:00
2021-01-18 14:33:13 +00:00
walletFeatures , err := encodeWalletFeatures ( node . Operator . GetWalletFeatures ( ) )
if err != nil {
return Error . Wrap ( err )
}
2019-10-18 22:27:57 +01:00
query := `
2019-09-19 19:37:31 +01:00
INSERT INTO nodes
(
id , address , last_net , protocol , type ,
2020-02-12 21:19:42 +00:00
email , wallet , free_disk ,
2019-09-19 19:37:31 +01:00
last_contact_success ,
last_contact_failure ,
2020-01-03 00:00:18 +00:00
audit_reputation_alpha , audit_reputation_beta ,
2020-02-11 21:32:50 +00:00
unknown_audit_reputation_alpha , unknown_audit_reputation_beta ,
2020-03-06 22:04:23 +00:00
major , minor , patch , hash , timestamp , release ,
2021-02-18 15:33:49 +00:00
last_ip_port ,
2021-01-18 14:33:13 +00:00
wallet_features
2019-09-19 19:37:31 +01:00
)
VALUES (
$ 1 , $ 2 , $ 3 , $ 4 , $ 5 ,
2020-02-12 21:19:42 +00:00
$ 6 , $ 7 , $ 8 ,
CASE WHEN $ 9 : : bool IS TRUE THEN $ 18 : : timestamptz
2019-11-15 22:43:06 +00:00
ELSE ' 0001 - 01 - 01 00 : 00 : 00 + 00 ' : : timestamptz
2019-09-19 19:37:31 +01:00
END ,
2020-02-12 21:19:42 +00:00
CASE WHEN $ 9 : : bool IS FALSE THEN $ 18 : : timestamptz
2019-11-15 22:43:06 +00:00
ELSE ' 0001 - 01 - 01 00 : 00 : 00 + 00 ' : : timestamptz
2019-09-19 19:37:31 +01:00
END ,
2020-02-12 21:19:42 +00:00
$ 10 , $ 11 ,
$ 10 , $ 11 ,
2020-03-06 22:04:23 +00:00
$ 12 , $ 13 , $ 14 , $ 15 , $ 16 , $ 17 ,
2021-01-18 14:33:13 +00:00
$ 19 ,
$ 20
2019-09-19 19:37:31 +01:00
)
ON CONFLICT ( id )
DO UPDATE
SET
address = $ 2 ,
last_net = $ 3 ,
protocol = $ 4 ,
email = $ 6 ,
wallet = $ 7 ,
2020-02-12 21:19:42 +00:00
free_disk = $ 8 ,
major = $ 12 , minor = $ 13 , patch = $ 14 , hash = $ 15 , timestamp = $ 16 , release = $ 17 ,
last_contact_success = CASE WHEN $ 9 : : bool IS TRUE
THEN $ 18 : : timestamptz
2019-09-19 19:37:31 +01:00
ELSE nodes . last_contact_success
END ,
2020-02-12 21:19:42 +00:00
last_contact_failure = CASE WHEN $ 9 : : bool IS FALSE
THEN $ 18 : : timestamptz
2019-09-19 19:37:31 +01:00
ELSE nodes . last_contact_failure
2020-03-06 22:04:23 +00:00
END ,
2021-01-18 14:33:13 +00:00
last_ip_port = $ 19 ,
wallet_features = $ 20 ;
2019-09-19 19:37:31 +01:00
`
2019-10-18 22:27:57 +01:00
_ , err = cache . db . ExecContext ( ctx , query ,
// args $1 - $5
2020-03-06 22:04:23 +00:00
node . NodeID . Bytes ( ) , node . Address . GetAddress ( ) , node . LastNet , node . Address . GetTransport ( ) , int ( pb . NodeType_STORAGE ) ,
2020-02-12 21:19:42 +00:00
// args $6 - $8
node . Operator . GetEmail ( ) , node . Operator . GetWallet ( ) , node . Capacity . GetFreeDisk ( ) ,
// args $9
2019-10-18 22:27:57 +01:00
node . IsUp ,
2020-02-12 21:19:42 +00:00
// args $10 - $11
2020-04-13 22:38:33 +01:00
1 , 0 ,
2020-02-12 21:19:42 +00:00
// args $12 - $17
2019-10-18 22:27:57 +01:00
semVer . Major , semVer . Minor , semVer . Patch , node . Version . GetCommitHash ( ) , node . Version . Timestamp , node . Version . GetRelease ( ) ,
2020-02-12 21:19:42 +00:00
// args $18
2019-11-15 22:43:06 +00:00
timestamp ,
2020-03-06 22:04:23 +00:00
// args $19
node . LastIPPort ,
2021-01-18 14:33:13 +00:00
// args $20,
walletFeatures ,
2019-10-18 22:27:57 +01:00
)
if err != nil {
return Error . Wrap ( err )
2019-09-19 19:37:31 +01:00
}
return nil
}
2020-07-08 15:28:49 +01:00
var (
// ErrVetting is the error class for the following test methods.
ErrVetting = errs . Class ( "vetting error" )
)
// TestVetNode directly sets a node's vetted_at timestamp to make testing easier.
func ( cache * overlaycache ) TestVetNode ( ctx context . Context , nodeID storj . NodeID ) ( vettedTime * time . Time , err error ) {
updateFields := dbx . Node_Update_Fields {
VettedAt : dbx . Node_VettedAt ( time . Now ( ) . UTC ( ) ) ,
}
node , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
return nil , err
}
return node . VettedAt , nil
}
// TestUnvetNode directly sets a node's vetted_at timestamp to null to make testing easier.
func ( cache * overlaycache ) TestUnvetNode ( ctx context . Context , nodeID storj . NodeID ) ( err error ) {
_ , err = cache . db . Exec ( ctx , ` UPDATE nodes SET vetted_at = NULL WHERE nodes.id = $1; ` , nodeID )
if err != nil {
return err
}
2020-11-28 16:23:39 +00:00
_ , err = cache . Get ( ctx , nodeID )
2020-07-08 15:28:49 +01:00
return err
}
2021-02-18 15:33:49 +00:00
2021-02-18 16:29:28 +00:00
// IterateAllNodes will call cb on all known nodes (used in restore trash contexts).
2021-02-18 15:33:49 +00:00
func ( cache * overlaycache ) IterateAllNodes ( ctx context . Context , cb func ( context . Context , * overlay . SelectedNode ) error ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
var rows tagsql . Rows
rows , err = cache . db . Query ( ctx , cache . db . Rebind ( `
SELECT last_net , id , address , last_ip_port
FROM nodes
` ) )
if err != nil {
return Error . Wrap ( err )
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
for rows . Next ( ) {
var node overlay . SelectedNode
node . Address = & pb . NodeAddress { Transport : pb . NodeTransport_TCP_TLS_GRPC }
var lastIPPort sql . NullString
err = rows . Scan ( & node . LastNet , & node . ID , & node . Address . Address , & lastIPPort )
if err != nil {
return Error . Wrap ( err )
}
if lastIPPort . Valid {
node . LastIPPort = lastIPPort . String
}
err = cb ( ctx , & node )
if err != nil {
return err
}
}
return rows . Err ( )
}
2021-03-01 20:04:00 +00:00
// IterateAllNodeDossiers will call cb on all known nodes (used for invoice generation).
func ( cache * overlaycache ) IterateAllNodeDossiers ( ctx context . Context , cb func ( context . Context , * overlay . NodeDossier ) error ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
const nodesPerPage = 1000
var cont * dbx . Paged_Node_Continuation
var dbxNodes [ ] * dbx . Node
for {
dbxNodes , cont , err = cache . db . Paged_Node ( ctx , nodesPerPage , cont )
if err != nil {
return err
}
for _ , node := range dbxNodes {
dossier , err := convertDBNode ( ctx , node )
if err != nil {
return err
}
if err := cb ( ctx , dossier ) ; err != nil {
return err
}
}
if cont == nil {
return nil
}
}
}