2019-01-24 20:15:10 +00:00
// Copyright (C) 2019 Storj Labs, Inc.
2018-12-17 20:14:16 +00:00
// See LICENSE for copying information.
package satellitedb
import (
"context"
"database/sql"
2019-07-31 18:21:06 +01:00
"encoding/hex"
"fmt"
2019-09-11 22:38:58 +01:00
"sort"
2019-01-29 19:42:43 +00:00
"strings"
2019-03-29 08:53:43 +00:00
"time"
2018-12-17 20:14:16 +00:00
2019-05-19 16:10:46 +01:00
"github.com/lib/pq"
2019-01-15 16:08:45 +00:00
"github.com/zeebo/errs"
2019-07-10 03:36:09 +01:00
monkit "gopkg.in/spacemonkeygo/monkit.v2"
2019-01-15 16:08:45 +00:00
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
2019-11-14 19:46:15 +00:00
"storj.io/storj/private/version"
2019-07-28 06:55:36 +01:00
"storj.io/storj/satellite/overlay"
2018-12-17 20:14:16 +00:00
dbx "storj.io/storj/satellite/satellitedb/dbx"
"storj.io/storj/storage"
)
2019-03-25 22:25:09 +00:00
var (
2019-06-20 14:56:04 +01:00
mon = monkit . Package ( )
2019-03-25 22:25:09 +00:00
)
2019-01-15 16:08:45 +00:00
var _ overlay . DB = ( * overlaycache ) ( nil )
2018-12-17 20:14:16 +00:00
type overlaycache struct {
2018-12-27 09:56:25 +00:00
db * dbx . DB
2018-12-17 20:14:16 +00:00
}
2019-05-22 21:06:27 +01:00
func ( cache * overlaycache ) SelectStorageNodes ( ctx context . Context , count int , criteria * overlay . NodeCriteria ) ( nodes [ ] * pb . Node , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-02-11 16:35:28 +00:00
nodeType := int ( pb . NodeType_STORAGE )
2019-04-10 07:04:24 +01:00
safeQuery := `
2019-06-18 10:14:31 +01:00
WHERE disqualified IS NULL
2019-10-08 20:03:38 +01:00
AND exit_initiated_at IS NULL
2019-06-06 01:21:32 +01:00
AND type = ?
AND free_bandwidth >= ?
AND free_disk >= ?
AND total_audit_count >= ?
AND total_uptime_count >= ?
2019-07-10 03:36:09 +01:00
AND ( last_contact_success > ?
OR last_contact_success > last_contact_failure ) `
2019-04-10 07:04:24 +01:00
args := append ( make ( [ ] interface { } , 0 , 13 ) ,
2019-06-06 01:21:32 +01:00
nodeType , criteria . FreeBandwidth , criteria . FreeDisk , criteria . AuditCount ,
criteria . UptimeCount , time . Now ( ) . Add ( - criteria . OnlineWindow ) )
2019-04-10 07:04:24 +01:00
if criteria . MinimumVersion != "" {
v , err := version . NewSemVer ( criteria . MinimumVersion )
if err != nil {
return nil , Error . New ( "invalid node selection criteria version: %v" , err )
}
safeQuery += `
2019-05-14 00:55:51 +01:00
AND ( major > ? OR ( major = ? AND ( minor > ? OR ( minor = ? AND patch >= ? ) ) ) )
2019-04-10 07:04:24 +01:00
AND release `
args = append ( args , v . Major , v . Major , v . Minor , v . Minor , v . Patch )
}
2019-05-22 21:06:27 +01:00
if ! criteria . DistinctIP {
nodes , err = cache . queryNodes ( ctx , criteria . ExcludedNodes , count , safeQuery , args ... )
if err != nil {
return nil , err
}
return nodes , nil
}
// query for distinct IPs
for i := 0 ; i < 3 ; i ++ {
moreNodes , err := cache . queryNodesDistinct ( ctx , criteria . ExcludedNodes , criteria . ExcludedIPs , count - len ( nodes ) , safeQuery , criteria . DistinctIP , args ... )
if err != nil {
return nil , err
}
for _ , n := range moreNodes {
nodes = append ( nodes , n )
criteria . ExcludedNodes = append ( criteria . ExcludedNodes , n . Id )
criteria . ExcludedIPs = append ( criteria . ExcludedIPs , n . LastIp )
}
if len ( nodes ) == count {
break
}
}
return nodes , nil
2019-01-29 19:42:43 +00:00
}
2019-05-22 21:06:27 +01:00
func ( cache * overlaycache ) SelectNewStorageNodes ( ctx context . Context , count int , criteria * overlay . NodeCriteria ) ( nodes [ ] * pb . Node , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-02-11 16:35:28 +00:00
nodeType := int ( pb . NodeType_STORAGE )
2019-04-10 07:04:24 +01:00
safeQuery := `
2019-06-18 10:14:31 +01:00
WHERE disqualified IS NULL
2019-10-08 20:03:38 +01:00
AND exit_initiated_at IS NULL
2019-06-06 01:21:32 +01:00
AND type = ?
AND free_bandwidth >= ?
AND free_disk >= ?
2019-06-18 18:40:28 +01:00
AND ( total_audit_count < ? OR total_uptime_count < ? )
2019-07-10 03:36:09 +01:00
AND ( last_contact_success > ?
OR last_contact_success > last_contact_failure ) `
2019-04-10 07:04:24 +01:00
args := append ( make ( [ ] interface { } , 0 , 10 ) ,
2019-06-18 18:40:28 +01:00
nodeType , criteria . FreeBandwidth , criteria . FreeDisk , criteria . AuditCount , criteria . UptimeCount , time . Now ( ) . Add ( - criteria . OnlineWindow ) )
2019-04-10 07:04:24 +01:00
if criteria . MinimumVersion != "" {
v , err := version . NewSemVer ( criteria . MinimumVersion )
if err != nil {
return nil , Error . New ( "invalid node selection criteria version: %v" , err )
}
safeQuery += `
2019-05-14 00:55:51 +01:00
AND ( major > ? OR ( major = ? AND ( minor > ? OR ( minor = ? AND patch >= ? ) ) ) )
2019-04-10 07:04:24 +01:00
AND release `
args = append ( args , v . Major , v . Major , v . Minor , v . Minor , v . Patch )
}
2019-05-22 21:06:27 +01:00
if ! criteria . DistinctIP {
nodes , err = cache . queryNodes ( ctx , criteria . ExcludedNodes , count , safeQuery , args ... )
if err != nil {
return nil , err
}
return nodes , nil
}
// query for distinct IPs
for i := 0 ; i < 3 ; i ++ {
moreNodes , err := cache . queryNodesDistinct ( ctx , criteria . ExcludedNodes , criteria . ExcludedIPs , count - len ( nodes ) , safeQuery , criteria . DistinctIP , args ... )
if err != nil {
return nil , err
}
for _ , n := range moreNodes {
nodes = append ( nodes , n )
criteria . ExcludedNodes = append ( criteria . ExcludedNodes , n . Id )
criteria . ExcludedIPs = append ( criteria . ExcludedIPs , n . LastIp )
}
if len ( nodes ) == count {
break
}
}
return nodes , nil
2019-01-29 19:42:43 +00:00
}
2019-11-06 21:38:52 +00:00
// GetNodeIPs returns a list of node IP addresses. Warning: these node IP addresses might be returned out of order.
func ( cache * overlaycache ) GetNodeIPs ( ctx context . Context , nodeIDs [ ] storj . NodeID ) ( nodeIPs [ ] string , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
var rows * sql . Rows
rows , err = cache . db . Query ( cache . db . Rebind ( `
SELECT last_net FROM nodes
WHERE id = any ( $ 1 : : bytea [ ] )
` ) , postgresNodeIDList ( nodeIDs ) ,
)
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
for rows . Next ( ) {
var ip string
err = rows . Scan ( & ip )
if err != nil {
return nil , err
}
nodeIPs = append ( nodeIPs , ip )
}
return nodeIPs , nil
}
2019-05-22 21:06:27 +01:00
func ( cache * overlaycache ) queryNodes ( ctx context . Context , excludedNodes [ ] storj . NodeID , count int , safeQuery string , args ... interface { } ) ( _ [ ] * pb . Node , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-31 18:49:00 +00:00
if count == 0 {
return nil , nil
2019-01-29 19:42:43 +00:00
}
2019-01-31 18:49:00 +00:00
safeExcludeNodes := ""
2019-05-22 21:06:27 +01:00
if len ( excludedNodes ) > 0 {
safeExcludeNodes = ` AND id NOT IN (? ` + strings . Repeat ( ", ?" , len ( excludedNodes ) - 1 ) + ` ) `
for _ , id := range excludedNodes {
args = append ( args , id . Bytes ( ) )
}
2019-01-29 19:42:43 +00:00
}
2019-05-22 21:06:27 +01:00
args = append ( args , count )
var rows * sql . Rows
2019-07-10 03:36:09 +01:00
rows , err = cache . db . Query ( cache . db . Rebind ( ` SELECT id , type , address , last_net ,
free_bandwidth , free_disk , total_audit_count , audit_success_count ,
2019-06-21 18:14:53 +01:00
total_uptime_count , uptime_success_count , disqualified , audit_reputation_alpha ,
2019-06-18 14:45:02 +01:00
audit_reputation_beta , uptime_reputation_alpha , uptime_reputation_beta
2019-05-22 21:06:27 +01:00
FROM nodes
` +safeQuery+safeExcludeNodes+ `
ORDER BY RANDOM ( )
LIMIT ? ` ) , args ... )
if err != nil {
return nil , err
}
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
var nodes [ ] * pb . Node
for rows . Next ( ) {
dbNode := & dbx . Node { }
err = rows . Scan ( & dbNode . Id , & dbNode . Type ,
2019-06-24 16:33:18 +01:00
& dbNode . Address , & dbNode . LastNet , & dbNode . FreeBandwidth , & dbNode . FreeDisk ,
2019-05-22 21:06:27 +01:00
& dbNode . TotalAuditCount , & dbNode . AuditSuccessCount ,
2019-06-18 14:45:02 +01:00
& dbNode . TotalUptimeCount , & dbNode . UptimeSuccessCount , & dbNode . Disqualified ,
& dbNode . AuditReputationAlpha , & dbNode . AuditReputationBeta ,
& dbNode . UptimeReputationAlpha , & dbNode . UptimeReputationBeta ,
)
2019-05-22 21:06:27 +01:00
if err != nil {
return nil , err
}
2019-06-04 12:55:38 +01:00
dossier , err := convertDBNode ( ctx , dbNode )
2019-05-22 21:06:27 +01:00
if err != nil {
return nil , err
}
nodes = append ( nodes , & dossier . Node )
2019-01-29 19:42:43 +00:00
}
2019-05-22 21:06:27 +01:00
return nodes , rows . Err ( )
}
func ( cache * overlaycache ) queryNodesDistinct ( ctx context . Context , excludedNodes [ ] storj . NodeID , excludedIPs [ ] string , count int , safeQuery string , distinctIP bool , args ... interface { } ) ( _ [ ] * pb . Node , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-05-22 21:06:27 +01:00
if count == 0 {
return nil , nil
}
safeExcludeNodes := ""
if len ( excludedNodes ) > 0 {
safeExcludeNodes = ` AND id NOT IN (? ` + strings . Repeat ( ", ?" , len ( excludedNodes ) - 1 ) + ` ) `
for _ , id := range excludedNodes {
args = append ( args , id . Bytes ( ) )
}
}
safeExcludeIPs := ""
if len ( excludedIPs ) > 0 {
2019-06-24 16:33:18 +01:00
safeExcludeIPs = ` AND last_net NOT IN (? ` + strings . Repeat ( ", ?" , len ( excludedIPs ) - 1 ) + ` ) `
2019-05-22 21:06:27 +01:00
for _ , ip := range excludedIPs {
args = append ( args , ip )
}
}
args = append ( args , count )
2019-06-23 22:16:45 +01:00
rows , err := cache . db . Query ( cache . db . Rebind ( `
2019-07-29 13:32:43 +01:00
SELECT *
FROM (
SELECT DISTINCT ON ( last_net ) last_net , -- choose at max 1 node from this IP or network
id , type , address , free_bandwidth , free_disk , total_audit_count ,
2019-06-23 22:16:45 +01:00
audit_success_count , total_uptime_count , uptime_success_count ,
audit_reputation_alpha , audit_reputation_beta , uptime_reputation_alpha ,
uptime_reputation_beta
2019-07-29 13:32:43 +01:00
FROM nodes
` +safeQuery+safeExcludeNodes+safeExcludeIPs+ `
AND last_net < > ' ' -- don ' t try to IP - filter nodes with no known IP yet
ORDER BY last_net , RANDOM ( ) -- equal chance of choosing any qualified node at this IP or network
2019-06-23 22:16:45 +01:00
) filteredcandidates
2019-07-29 13:32:43 +01:00
ORDER BY RANDOM ( ) -- do the actual node selection from filtered pool
2019-06-23 22:16:45 +01:00
LIMIT ? ` ) , args ... )
2019-05-22 21:06:27 +01:00
2019-01-29 19:42:43 +00:00
if err != nil {
return nil , err
}
2019-01-31 18:49:00 +00:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
var nodes [ ] * pb . Node
2019-01-29 19:42:43 +00:00
for rows . Next ( ) {
2019-03-29 08:53:43 +00:00
dbNode := & dbx . Node { }
2019-07-29 13:32:43 +01:00
err = rows . Scan ( & dbNode . LastNet , & dbNode . Id , & dbNode . Type ,
& dbNode . Address , & dbNode . FreeBandwidth , & dbNode . FreeDisk ,
2019-03-29 08:53:43 +00:00
& dbNode . TotalAuditCount , & dbNode . AuditSuccessCount ,
2019-06-18 14:45:02 +01:00
& dbNode . TotalUptimeCount , & dbNode . UptimeSuccessCount ,
& dbNode . AuditReputationAlpha , & dbNode . AuditReputationBeta ,
& dbNode . UptimeReputationAlpha , & dbNode . UptimeReputationBeta ,
)
2019-01-29 19:42:43 +00:00
if err != nil {
return nil , err
}
2019-06-04 12:55:38 +01:00
dossier , err := convertDBNode ( ctx , dbNode )
2019-01-29 19:42:43 +00:00
if err != nil {
return nil , err
}
2019-04-04 17:34:36 +01:00
nodes = append ( nodes , & dossier . Node )
2019-01-29 19:42:43 +00:00
}
2019-01-31 18:49:00 +00:00
return nodes , rows . Err ( )
2019-01-29 19:42:43 +00:00
}
2019-01-15 16:08:45 +00:00
// Get looks up the node by nodeID
2019-06-04 12:55:38 +01:00
func ( cache * overlaycache ) Get ( ctx context . Context , id storj . NodeID ) ( _ * overlay . NodeDossier , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-15 16:08:45 +00:00
if id . IsZero ( ) {
return nil , overlay . ErrEmptyNode
2018-12-17 20:14:16 +00:00
}
2019-03-29 08:53:43 +00:00
node , err := cache . db . Get_Node_By_Id ( ctx , dbx . Node_Id ( id . Bytes ( ) ) )
2019-01-15 16:08:45 +00:00
if err == sql . ErrNoRows {
2019-08-21 17:30:29 +01:00
return nil , overlay . ErrNodeNotFound . New ( "%v" , id )
2019-01-15 16:08:45 +00:00
}
2018-12-17 20:14:16 +00:00
if err != nil {
2019-01-15 16:08:45 +00:00
return nil , err
2018-12-17 20:14:16 +00:00
}
2019-06-04 12:55:38 +01:00
return convertDBNode ( ctx , node )
2019-01-15 16:08:45 +00:00
}
2019-06-18 23:22:14 +01:00
// KnownOffline filters a set of nodes to offline nodes
func ( cache * overlaycache ) KnownOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIds storj . NodeIDList ) ( offlineNodes storj . NodeIDList , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( nodeIds ) == 0 {
return nil , Error . New ( "no ids provided" )
}
// get offline nodes
var rows * sql . Rows
2019-10-18 22:27:57 +01:00
rows , err = cache . db . Query ( cache . db . Rebind ( `
SELECT id FROM nodes
WHERE id = any ( $ 1 : : bytea [ ] )
2019-06-18 23:22:14 +01:00
AND (
2019-10-18 22:27:57 +01:00
last_contact_success < last_contact_failure AND last_contact_success < $ 2
2019-06-18 23:22:14 +01:00
)
2019-10-18 22:27:57 +01:00
` ) , postgresNodeIDList ( nodeIds ) , time . Now ( ) . Add ( - criteria . OnlineWindow ) ,
)
2019-06-18 23:22:14 +01:00
if err != nil {
return nil , err
}
2019-10-18 22:27:57 +01:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-06-18 23:22:14 +01:00
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
offlineNodes = append ( offlineNodes , id )
}
return offlineNodes , nil
}
2019-05-01 14:45:52 +01:00
// KnownUnreliableOrOffline filters a set of nodes to unreliable or offlines node, independent of new
2019-05-08 18:59:50 +01:00
func ( cache * overlaycache ) KnownUnreliableOrOffline ( ctx context . Context , criteria * overlay . NodeCriteria , nodeIds storj . NodeIDList ) ( badNodes storj . NodeIDList , err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-05-01 14:45:52 +01:00
if len ( nodeIds ) == 0 {
return nil , Error . New ( "no ids provided" )
}
2019-05-08 18:59:50 +01:00
// get reliable and online nodes
2019-05-19 16:10:46 +01:00
var rows * sql . Rows
2019-10-18 22:27:57 +01:00
rows , err = cache . db . Query ( cache . db . Rebind ( `
SELECT id FROM nodes
WHERE id = any ( $ 1 : : bytea [ ] )
2019-06-18 10:14:31 +01:00
AND disqualified IS NULL
2019-10-18 22:27:57 +01:00
AND ( last_contact_success > $ 2 OR last_contact_success > last_contact_failure )
` ) , postgresNodeIDList ( nodeIds ) , time . Now ( ) . Add ( - criteria . OnlineWindow ) ,
)
2019-05-01 14:45:52 +01:00
if err != nil {
return nil , err
}
2019-10-18 22:27:57 +01:00
defer func ( ) { err = errs . Combine ( err , rows . Close ( ) ) } ( )
2019-05-08 18:59:50 +01:00
2019-05-19 16:10:46 +01:00
goodNodes := make ( map [ storj . NodeID ] struct { } , len ( nodeIds ) )
2019-05-01 14:45:52 +01:00
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
2018-12-17 20:14:16 +00:00
if err != nil {
2019-05-01 14:45:52 +01:00
return nil , err
2018-12-17 20:14:16 +00:00
}
2019-05-19 16:10:46 +01:00
goodNodes [ id ] = struct { } { }
2019-05-08 18:59:50 +01:00
}
for _ , id := range nodeIds {
2019-05-19 16:10:46 +01:00
if _ , ok := goodNodes [ id ] ; ! ok {
2019-05-08 18:59:50 +01:00
badNodes = append ( badNodes , id )
}
2018-12-17 20:14:16 +00:00
}
2019-05-08 18:59:50 +01:00
return badNodes , nil
2018-12-17 20:14:16 +00:00
}
2019-07-08 23:04:35 +01:00
// Reliable returns all reliable nodes.
func ( cache * overlaycache ) Reliable ( ctx context . Context , criteria * overlay . NodeCriteria ) ( nodes storj . NodeIDList , err error ) {
// get reliable and online nodes
rows , err := cache . db . Query ( cache . db . Rebind ( `
SELECT id FROM nodes
WHERE disqualified IS NULL
2019-07-10 03:36:09 +01:00
AND ( last_contact_success > ? OR last_contact_success > last_contact_failure ) ` ) ,
2019-07-08 23:04:35 +01:00
time . Now ( ) . Add ( - criteria . OnlineWindow ) )
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
nodes = append ( nodes , id )
}
return nodes , nil
}
2019-01-30 16:29:18 +00:00
// Paginate will run through
2019-06-04 12:55:38 +01:00
func ( cache * overlaycache ) Paginate ( ctx context . Context , offset int64 , limit int ) ( _ [ ] * overlay . NodeDossier , _ bool , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-30 16:29:18 +00:00
cursor := storj . NodeID { }
// more represents end of table. If there are more rows in the database, more will be true.
more := true
if limit <= 0 || limit > storage . LookupLimit {
limit = storage . LookupLimit
}
2019-03-29 08:53:43 +00:00
dbxInfos , err := cache . db . Limited_Node_By_Id_GreaterOrEqual_OrderBy_Asc_Id ( ctx , dbx . Node_Id ( cursor . Bytes ( ) ) , limit , offset )
2019-01-30 16:29:18 +00:00
if err != nil {
return nil , false , err
}
if len ( dbxInfos ) < limit {
more = false
}
2019-04-04 17:34:36 +01:00
infos := make ( [ ] * overlay . NodeDossier , len ( dbxInfos ) )
2019-01-30 16:29:18 +00:00
for i , dbxInfo := range dbxInfos {
2019-06-04 12:55:38 +01:00
infos [ i ] , err = convertDBNode ( ctx , dbxInfo )
2019-01-30 16:29:18 +00:00
if err != nil {
return nil , false , err
}
}
return infos , more , nil
}
2019-07-12 15:35:48 +01:00
// PaginateQualified will retrieve all qualified nodes
func ( cache * overlaycache ) PaginateQualified ( ctx context . Context , offset int64 , limit int ) ( _ [ ] * pb . Node , _ bool , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
cursor := storj . NodeID { }
// more represents end of table. If there are more rows in the database, more will be true.
more := true
if limit <= 0 || limit > storage . LookupLimit {
limit = storage . LookupLimit
}
dbxInfos , err := cache . db . Limited_Node_Id_Node_LastNet_Node_Address_Node_Protocol_By_Id_GreaterOrEqual_And_Disqualified_Is_Null_OrderBy_Asc_Id ( ctx , dbx . Node_Id ( cursor . Bytes ( ) ) , limit , offset )
if err != nil {
return nil , false , err
}
if len ( dbxInfos ) < limit {
more = false
}
infos := make ( [ ] * pb . Node , len ( dbxInfos ) )
for i , dbxInfo := range dbxInfos {
infos [ i ] , err = convertDBNodeToPBNode ( ctx , dbxInfo )
if err != nil {
return nil , false , err
}
}
return infos , more , nil
}
2019-04-22 10:07:50 +01:00
// Update updates node address
2019-06-20 14:56:04 +01:00
func ( cache * overlaycache ) UpdateAddress ( ctx context . Context , info * pb . Node , defaults overlay . NodeSelectionConfig ) ( err error ) {
2019-06-04 12:55:38 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-15 16:08:45 +00:00
if info == nil || info . Id . IsZero ( ) {
return overlay . ErrEmptyNode
}
2018-12-17 20:14:16 +00:00
2019-01-15 16:08:45 +00:00
tx , err := cache . db . Open ( ctx )
if err != nil {
return Error . Wrap ( err )
2018-12-17 20:14:16 +00:00
}
2019-01-15 16:08:45 +00:00
// TODO: use upsert
2019-03-29 08:53:43 +00:00
_ , err = tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( info . Id . Bytes ( ) ) )
2019-01-15 16:08:45 +00:00
address := info . Address
if address == nil {
address = & pb . NodeAddress { }
2018-12-18 20:41:31 +00:00
}
2019-01-15 16:08:45 +00:00
2019-03-29 08:53:43 +00:00
if err != nil {
2019-04-22 10:07:50 +01:00
// add the node to DB for first time
2019-09-12 18:31:50 +01:00
err = tx . CreateNoReturn_Node (
2019-01-15 16:08:45 +00:00
ctx ,
2019-03-29 08:53:43 +00:00
dbx . Node_Id ( info . Id . Bytes ( ) ) ,
dbx . Node_Address ( address . Address ) ,
2019-06-24 16:33:18 +01:00
dbx . Node_LastNet ( info . LastIp ) ,
2019-03-29 08:53:43 +00:00
dbx . Node_Protocol ( int ( address . Transport ) ) ,
2019-04-22 10:07:50 +01:00
dbx . Node_Type ( int ( pb . NodeType_INVALID ) ) ,
dbx . Node_Email ( "" ) ,
dbx . Node_Wallet ( "" ) ,
dbx . Node_FreeBandwidth ( - 1 ) ,
dbx . Node_FreeDisk ( - 1 ) ,
dbx . Node_Major ( 0 ) ,
dbx . Node_Minor ( 0 ) ,
dbx . Node_Patch ( 0 ) ,
dbx . Node_Hash ( "" ) ,
dbx . Node_Timestamp ( time . Time { } ) ,
dbx . Node_Release ( false ) ,
dbx . Node_Latency90 ( 0 ) ,
dbx . Node_AuditSuccessCount ( 0 ) ,
dbx . Node_TotalAuditCount ( 0 ) ,
dbx . Node_UptimeSuccessCount ( 0 ) ,
dbx . Node_TotalUptimeCount ( 0 ) ,
2019-04-01 19:42:06 +01:00
dbx . Node_LastContactSuccess ( time . Now ( ) ) ,
dbx . Node_LastContactFailure ( time . Time { } ) ,
2019-05-16 15:11:15 +01:00
dbx . Node_Contained ( false ) ,
2019-06-20 14:56:04 +01:00
dbx . Node_AuditReputationAlpha ( defaults . AuditReputationAlpha0 ) ,
dbx . Node_AuditReputationBeta ( defaults . AuditReputationBeta0 ) ,
dbx . Node_UptimeReputationAlpha ( defaults . UptimeReputationAlpha0 ) ,
dbx . Node_UptimeReputationBeta ( defaults . UptimeReputationBeta0 ) ,
2019-10-17 16:01:39 +01:00
dbx . Node_ExitSuccess ( false ) ,
2019-06-18 10:14:31 +01:00
dbx . Node_Create_Fields {
Disqualified : dbx . Node_Disqualified_Null ( ) ,
} ,
2019-01-15 16:08:45 +00:00
)
if err != nil {
return Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
}
} else {
2019-09-12 18:31:50 +01:00
err = tx . UpdateNoReturn_Node_By_Id ( ctx , dbx . Node_Id ( info . Id . Bytes ( ) ) ,
dbx . Node_Update_Fields {
Address : dbx . Node_Address ( address . Address ) ,
LastNet : dbx . Node_LastNet ( info . LastIp ) ,
Protocol : dbx . Node_Protocol ( int ( address . Transport ) ) ,
} )
2019-01-15 16:08:45 +00:00
if err != nil {
return Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
}
}
return Error . Wrap ( tx . Commit ( ) )
2018-12-17 20:14:16 +00:00
}
2019-07-31 18:21:06 +01:00
// BatchUpdateStats updates multiple storagenode's stats in one transaction
func ( cache * overlaycache ) BatchUpdateStats ( ctx context . Context , updateRequests [ ] * overlay . UpdateRequest , batchSize int ) ( failed storj . NodeIDList , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( updateRequests ) == 0 {
return failed , nil
}
2019-11-01 17:07:23 +00:00
// ensure updates happen in-order
sort . Slice ( updateRequests , func ( i , k int ) bool {
return updateRequests [ i ] . NodeID . Less ( updateRequests [ k ] . NodeID )
} )
2019-07-31 18:21:06 +01:00
doUpdate := func ( updateSlice [ ] * overlay . UpdateRequest ) ( duf storj . NodeIDList , err error ) {
appendAll := func ( ) {
for _ , ur := range updateRequests {
duf = append ( duf , ur . NodeID )
}
}
tx , err := cache . db . Open ( ctx )
if err != nil {
appendAll ( )
return duf , Error . Wrap ( err )
}
var allSQL string
for _ , updateReq := range updateSlice {
dbNode , err := tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( updateReq . NodeID . Bytes ( ) ) )
if err != nil {
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
}
// do not update reputation if node is disqualified
if dbNode . Disqualified != nil {
continue
}
updateNodeStats := populateUpdateNodeStats ( dbNode , updateReq )
sql := buildUpdateStatement ( cache . db , updateNodeStats )
allSQL += sql
}
if allSQL != "" {
results , err := tx . Tx . Exec ( allSQL )
if results == nil || err != nil {
appendAll ( )
return duf , errs . Combine ( err , tx . Rollback ( ) )
}
_ , err = results . RowsAffected ( )
if err != nil {
appendAll ( )
return duf , errs . Combine ( err , tx . Rollback ( ) )
}
}
return duf , Error . Wrap ( tx . Commit ( ) )
}
var errlist errs . Group
length := len ( updateRequests )
for i := 0 ; i < length ; i += batchSize {
end := i + batchSize
if end > length {
end = length
}
failedBatch , err := doUpdate ( updateRequests [ i : end ] )
if err != nil && len ( failedBatch ) > 0 {
for _ , fb := range failedBatch {
errlist . Add ( err )
failed = append ( failed , fb )
}
}
}
return failed , errlist . Err ( )
}
2019-03-25 22:25:09 +00:00
// UpdateStats a single storagenode's stats in the db
func ( cache * overlaycache ) UpdateStats ( ctx context . Context , updateReq * overlay . UpdateRequest ) ( stats * overlay . NodeStats , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
nodeID := updateReq . NodeID
tx , err := cache . db . Open ( ctx )
if err != nil {
return nil , Error . Wrap ( err )
}
dbNode , err := tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) )
if err != nil {
2019-03-29 12:30:23 +00:00
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
2019-03-25 22:25:09 +00:00
}
2019-06-20 14:56:04 +01:00
// do not update reputation if node is disqualified
if dbNode . Disqualified != nil {
return getNodeStats ( dbNode ) , Error . Wrap ( tx . Commit ( ) )
}
2019-03-25 22:25:09 +00:00
2019-07-31 18:21:06 +01:00
updateFields := populateUpdateFields ( dbNode , updateReq )
2019-07-02 16:16:25 +01:00
2019-03-25 22:25:09 +00:00
dbNode , err = tx . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
2019-03-29 12:30:23 +00:00
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
2019-03-25 22:25:09 +00:00
}
2019-07-02 16:16:25 +01:00
// Cleanup containment table too
_ , err = tx . Delete_PendingAudits_By_NodeId ( ctx , dbx . PendingAudits_NodeId ( nodeID . Bytes ( ) ) )
if err != nil {
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
}
2019-04-08 18:52:53 +01:00
// TODO: Allegedly tx.Get_Node_By_Id and tx.Update_Node_By_Id should never return a nil value for dbNode,
// however we've seen from some crashes that it does. We need to track down the cause of these crashes
// but for now we're adding a nil check to prevent a panic.
if dbNode == nil {
2019-06-03 15:37:43 +01:00
return nil , Error . Wrap ( errs . Combine ( errs . New ( "unable to get node by ID: %v" , nodeID ) , tx . Rollback ( ) ) )
2019-04-08 18:52:53 +01:00
}
return getNodeStats ( dbNode ) , Error . Wrap ( tx . Commit ( ) )
2019-03-25 22:25:09 +00:00
}
2019-09-10 17:05:07 +01:00
// UpdateNodeInfo updates the following fields for a given node ID:
// wallet, email for node operator, free disk and bandwidth capacity, and version
2019-04-10 07:04:24 +01:00
func ( cache * overlaycache ) UpdateNodeInfo ( ctx context . Context , nodeID storj . NodeID , nodeInfo * pb . InfoResponse ) ( stats * overlay . NodeDossier , err error ) {
2019-03-25 22:25:09 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
2019-04-10 07:04:24 +01:00
var updateFields dbx . Node_Update_Fields
if nodeInfo != nil {
2019-04-22 10:07:50 +01:00
if nodeInfo . GetType ( ) != pb . NodeType_INVALID {
updateFields . Type = dbx . Node_Type ( int ( nodeInfo . GetType ( ) ) )
}
2019-04-10 07:04:24 +01:00
if nodeInfo . GetOperator ( ) != nil {
updateFields . Wallet = dbx . Node_Wallet ( nodeInfo . GetOperator ( ) . GetWallet ( ) )
updateFields . Email = dbx . Node_Email ( nodeInfo . GetOperator ( ) . GetEmail ( ) )
}
if nodeInfo . GetCapacity ( ) != nil {
updateFields . FreeDisk = dbx . Node_FreeDisk ( nodeInfo . GetCapacity ( ) . GetFreeDisk ( ) )
updateFields . FreeBandwidth = dbx . Node_FreeBandwidth ( nodeInfo . GetCapacity ( ) . GetFreeBandwidth ( ) )
}
if nodeInfo . GetVersion ( ) != nil {
semVer , err := version . NewSemVer ( nodeInfo . GetVersion ( ) . GetVersion ( ) )
if err != nil {
2019-04-22 10:07:50 +01:00
return nil , errs . New ( "unable to convert version to semVer" )
2019-04-10 07:04:24 +01:00
}
2019-10-21 11:50:59 +01:00
updateFields . Major = dbx . Node_Major ( int64 ( semVer . Major ) )
updateFields . Minor = dbx . Node_Minor ( int64 ( semVer . Minor ) )
updateFields . Patch = dbx . Node_Patch ( int64 ( semVer . Patch ) )
2019-04-10 07:04:24 +01:00
updateFields . Hash = dbx . Node_Hash ( nodeInfo . GetVersion ( ) . GetCommitHash ( ) )
2019-07-08 19:24:42 +01:00
updateFields . Timestamp = dbx . Node_Timestamp ( nodeInfo . GetVersion ( ) . Timestamp )
2019-04-10 07:04:24 +01:00
updateFields . Release = dbx . Node_Release ( nodeInfo . GetVersion ( ) . GetRelease ( ) )
}
2019-03-25 22:25:09 +00:00
}
2019-04-04 17:34:36 +01:00
updatedDBNode , err := cache . db . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
2019-03-25 22:25:09 +00:00
if err != nil {
2019-04-04 17:34:36 +01:00
return nil , Error . Wrap ( err )
2019-03-25 22:25:09 +00:00
}
2019-06-04 12:55:38 +01:00
return convertDBNode ( ctx , updatedDBNode )
2019-03-25 22:25:09 +00:00
}
// UpdateUptime updates a single storagenode's uptime stats in the db
2019-06-20 14:56:04 +01:00
func ( cache * overlaycache ) UpdateUptime ( ctx context . Context , nodeID storj . NodeID , isUp bool , lambda , weight , uptimeDQ float64 ) ( stats * overlay . NodeStats , err error ) {
2019-03-25 22:25:09 +00:00
defer mon . Task ( ) ( & ctx ) ( & err )
tx , err := cache . db . Open ( ctx )
if err != nil {
return nil , Error . Wrap ( err )
}
dbNode , err := tx . Get_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) )
if err != nil {
2019-03-29 12:30:23 +00:00
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
2019-03-25 22:25:09 +00:00
}
2019-06-20 14:56:04 +01:00
// do not update reputation if node is disqualified
if dbNode . Disqualified != nil {
return getNodeStats ( dbNode ) , Error . Wrap ( tx . Commit ( ) )
}
updateFields := dbx . Node_Update_Fields { }
uptimeAlpha , uptimeBeta , totalUptimeCount := updateReputation (
isUp ,
dbNode . UptimeReputationAlpha ,
dbNode . UptimeReputationBeta ,
lambda ,
weight ,
dbNode . TotalUptimeCount ,
)
mon . FloatVal ( "uptime_reputation_alpha" ) . Observe ( uptimeAlpha )
mon . FloatVal ( "uptime_reputation_beta" ) . Observe ( uptimeBeta )
updateFields . UptimeReputationAlpha = dbx . Node_UptimeReputationAlpha ( uptimeAlpha )
updateFields . UptimeReputationBeta = dbx . Node_UptimeReputationBeta ( uptimeBeta )
updateFields . TotalUptimeCount = dbx . Node_TotalUptimeCount ( totalUptimeCount )
uptimeRep := uptimeAlpha / ( uptimeAlpha + uptimeBeta )
if uptimeRep <= uptimeDQ {
updateFields . Disqualified = dbx . Node_Disqualified ( time . Now ( ) . UTC ( ) )
}
2019-03-25 22:25:09 +00:00
2019-06-18 13:54:52 +01:00
lastContactSuccess := dbNode . LastContactSuccess
lastContactFailure := dbNode . LastContactFailure
mon . Meter ( "uptime_updates" ) . Mark ( 1 )
2019-06-20 14:56:04 +01:00
if isUp {
updateFields . UptimeSuccessCount = dbx . Node_UptimeSuccessCount ( dbNode . UptimeSuccessCount + 1 )
updateFields . LastContactSuccess = dbx . Node_LastContactSuccess ( time . Now ( ) )
2019-06-18 13:54:52 +01:00
mon . Meter ( "uptime_update_successes" ) . Mark ( 1 )
// we have seen this node in the past 24 hours
2019-08-22 12:40:15 +01:00
if time . Since ( lastContactFailure ) > time . Hour * 24 {
2019-06-18 13:54:52 +01:00
mon . Meter ( "uptime_seen_24h" ) . Mark ( 1 )
}
// we have seen this node in the past week
2019-08-22 12:40:15 +01:00
if time . Since ( lastContactFailure ) > time . Hour * 24 * 7 {
2019-06-18 13:54:52 +01:00
mon . Meter ( "uptime_seen_week" ) . Mark ( 1 )
}
2019-03-29 08:53:43 +00:00
} else {
updateFields . LastContactFailure = dbx . Node_LastContactFailure ( time . Now ( ) )
2019-06-20 14:56:04 +01:00
mon . Meter ( "uptime_update_failures" ) . Mark ( 1 )
// it's been over 24 hours since we've seen this node
2019-08-22 12:40:15 +01:00
if time . Since ( lastContactSuccess ) > time . Hour * 24 {
2019-06-20 14:56:04 +01:00
mon . Meter ( "uptime_not_seen_24h" ) . Mark ( 1 )
}
// it's been over a week since we've seen this node
2019-08-22 12:40:15 +01:00
if time . Since ( lastContactSuccess ) > time . Hour * 24 * 7 {
2019-06-20 14:56:04 +01:00
mon . Meter ( "uptime_not_seen_week" ) . Mark ( 1 )
}
2019-03-25 22:25:09 +00:00
}
dbNode , err = tx . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
2019-03-29 12:30:23 +00:00
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
2019-03-25 22:25:09 +00:00
}
2019-04-08 18:52:53 +01:00
// TODO: Allegedly tx.Get_Node_By_Id and tx.Update_Node_By_Id should never return a nil value for dbNode,
// however we've seen from some crashes that it does. We need to track down the cause of these crashes
// but for now we're adding a nil check to prevent a panic.
if dbNode == nil {
2019-06-03 15:37:43 +01:00
return nil , Error . Wrap ( errs . Combine ( errs . New ( "unable to get node by ID: %v" , nodeID ) , tx . Rollback ( ) ) )
2019-04-08 18:52:53 +01:00
}
2019-03-25 22:25:09 +00:00
2019-04-08 18:52:53 +01:00
return getNodeStats ( dbNode ) , Error . Wrap ( tx . Commit ( ) )
2019-03-25 22:25:09 +00:00
}
2019-08-27 13:37:42 +01:00
// AllPieceCounts returns a map of node IDs to piece counts from the db.
// NB: a valid, partial piece map can be returned even if node ID parsing error(s) are returned.
func ( cache * overlaycache ) AllPieceCounts ( ctx context . Context ) ( _ map [ storj . NodeID ] int , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
// NB: `All_Node_Id_Node_PieceCount_By_PieceCount_Not_Number` selects node
// ID and piece count from the nodes table where piece count is not zero.
rows , err := cache . db . All_Node_Id_Node_PieceCount_By_PieceCount_Not_Number ( ctx )
if err != nil {
return nil , Error . Wrap ( err )
}
pieceCounts := make ( map [ storj . NodeID ] int )
nodeIDErrs := errs . Group { }
for _ , row := range rows {
nodeID , err := storj . NodeIDFromBytes ( row . Id )
if err != nil {
nodeIDErrs . Add ( err )
continue
}
pieceCounts [ nodeID ] = int ( row . PieceCount )
}
2019-09-11 22:38:58 +01:00
2019-08-27 13:37:42 +01:00
return pieceCounts , nodeIDErrs . Err ( )
}
func ( cache * overlaycache ) UpdatePieceCounts ( ctx context . Context , pieceCounts map [ storj . NodeID ] int ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if len ( pieceCounts ) == 0 {
return nil
}
2019-09-11 22:38:58 +01:00
// TODO: pass in the apprioriate struct to database, rather than constructing it here
type NodeCount struct {
ID storj . NodeID
Count int64
2019-08-27 13:37:42 +01:00
}
2019-09-11 22:38:58 +01:00
var counts [ ] NodeCount
2019-08-27 13:37:42 +01:00
2019-09-11 22:38:58 +01:00
for nodeid , count := range pieceCounts {
counts = append ( counts , NodeCount {
ID : nodeid ,
Count : int64 ( count ) ,
} )
}
sort . Slice ( counts , func ( i , k int ) bool {
return counts [ i ] . ID . Less ( counts [ k ] . ID )
} )
2019-08-27 13:37:42 +01:00
2019-10-18 22:27:57 +01:00
var nodeIDs [ ] storj . NodeID
var countNumbers [ ] int64
for _ , count := range counts {
nodeIDs = append ( nodeIDs , count . ID )
countNumbers = append ( countNumbers , count . Count )
2019-08-27 13:37:42 +01:00
}
2019-09-11 22:38:58 +01:00
2019-10-18 22:27:57 +01:00
_ , err = cache . db . ExecContext ( ctx , `
UPDATE nodes
SET piece_count = update . count
FROM (
SELECT unnest ( $ 1 : : bytea [ ] ) as id , unnest ( $ 2 : : bigint [ ] ) as count
) as update
WHERE nodes . id = update . id
` , postgresNodeIDList ( nodeIDs ) , pq . Array ( countNumbers ) )
2019-09-11 22:38:58 +01:00
return Error . Wrap ( err )
2019-08-27 13:37:42 +01:00
}
2019-11-07 17:19:34 +00:00
// GetExitingNodes returns nodes who have initiated a graceful exit and is not disqualified, but have not completed it.
2019-10-24 17:24:42 +01:00
func ( cache * overlaycache ) GetExitingNodes ( ctx context . Context ) ( exitingNodes [ ] * overlay . ExitStatus , err error ) {
2019-10-01 23:18:21 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
rows , err := cache . db . Query ( cache . db . Rebind ( `
2019-10-24 17:24:42 +01:00
SELECT id , exit_initiated_at , exit_loop_completed_at , exit_finished_at , exit_success FROM nodes
2019-10-01 23:18:21 +01:00
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NULL
2019-11-07 17:19:34 +00:00
AND disqualified is NULL
2019-10-01 23:18:21 +01:00
` ) ,
)
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
for rows . Next ( ) {
2019-10-24 17:24:42 +01:00
var exitingNodeStatus overlay . ExitStatus
err = rows . Scan ( & exitingNodeStatus . NodeID , & exitingNodeStatus . ExitInitiatedAt , & exitingNodeStatus . ExitLoopCompletedAt , & exitingNodeStatus . ExitFinishedAt , & exitingNodeStatus . ExitSuccess )
2019-10-01 23:18:21 +01:00
if err != nil {
return nil , err
}
2019-10-24 17:24:42 +01:00
exitingNodes = append ( exitingNodes , & exitingNodeStatus )
2019-10-01 23:18:21 +01:00
}
return exitingNodes , nil
}
2019-10-23 02:06:01 +01:00
// GetExitStatus returns a node's graceful exit status.
2019-10-11 22:18:05 +01:00
func ( cache * overlaycache ) GetExitStatus ( ctx context . Context , nodeID storj . NodeID ) ( _ * overlay . ExitStatus , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-10-17 16:01:39 +01:00
rows , err := cache . db . Query ( cache . db . Rebind ( "select id, exit_initiated_at, exit_loop_completed_at, exit_finished_at, exit_success from nodes where id = ?" ) , nodeID )
2019-10-11 22:18:05 +01:00
if err != nil {
return nil , Error . Wrap ( err )
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
exitStatus := & overlay . ExitStatus { }
if rows . Next ( ) {
2019-10-17 16:01:39 +01:00
err = rows . Scan ( & exitStatus . NodeID , & exitStatus . ExitInitiatedAt , & exitStatus . ExitLoopCompletedAt , & exitStatus . ExitFinishedAt , & exitStatus . ExitSuccess )
2019-10-11 22:18:05 +01:00
}
return exitStatus , Error . Wrap ( err )
}
2019-10-01 23:18:21 +01:00
2019-10-23 02:06:01 +01:00
// GetGracefulExitCompletedByTimeFrame returns nodes who have completed graceful exit within a time window (time window is around graceful exit completion).
func ( cache * overlaycache ) GetGracefulExitCompletedByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitedNodes storj . NodeIDList , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
rows , err := cache . db . Query ( cache . db . Rebind ( `
SELECT id FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NOT NULL
AND exit_finished_at >= ?
AND exit_finished_at < ?
` ) , begin , end ,
)
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
exitedNodes = append ( exitedNodes , id )
}
return exitedNodes , rows . Err ( )
}
// GetGracefulExitIncompleteByTimeFrame returns nodes who have initiated, but not completed graceful exit within a time window (time window is around graceful exit initiation).
func ( cache * overlaycache ) GetGracefulExitIncompleteByTimeFrame ( ctx context . Context , begin , end time . Time ) ( exitingNodes storj . NodeIDList , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
rows , err := cache . db . Query ( cache . db . Rebind ( `
SELECT id FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NULL
AND exit_initiated_at >= ?
AND exit_initiated_at < ?
` ) , begin , end ,
)
if err != nil {
return nil , err
}
defer func ( ) {
err = errs . Combine ( err , rows . Close ( ) )
} ( )
// TODO return more than just ID
for rows . Next ( ) {
var id storj . NodeID
err = rows . Scan ( & id )
if err != nil {
return nil , err
}
exitingNodes = append ( exitingNodes , id )
}
return exitingNodes , rows . Err ( )
}
2019-10-01 23:18:21 +01:00
// UpdateExitStatus is used to update a node's graceful exit status.
2019-10-29 20:22:20 +00:00
func ( cache * overlaycache ) UpdateExitStatus ( ctx context . Context , request * overlay . ExitStatusRequest ) ( _ * overlay . NodeDossier , err error ) {
2019-10-01 23:18:21 +01:00
defer mon . Task ( ) ( & ctx ) ( & err )
nodeID := request . NodeID
updateFields := populateExitStatusFields ( request )
tx , err := cache . db . Open ( ctx )
if err != nil {
return nil , Error . Wrap ( err )
}
dbNode , err := tx . Update_Node_By_Id ( ctx , dbx . Node_Id ( nodeID . Bytes ( ) ) , updateFields )
if err != nil {
return nil , Error . Wrap ( errs . Combine ( err , tx . Rollback ( ) ) )
}
if dbNode == nil {
return nil , Error . Wrap ( errs . Combine ( errs . New ( "unable to get node by ID: %v" , nodeID ) , tx . Rollback ( ) ) )
}
2019-10-29 20:22:20 +00:00
err = tx . Commit ( )
if err != nil {
return nil , Error . Wrap ( err )
}
return convertDBNode ( ctx , dbNode )
2019-10-01 23:18:21 +01:00
}
func populateExitStatusFields ( req * overlay . ExitStatusRequest ) dbx . Node_Update_Fields {
dbxUpdateFields := dbx . Node_Update_Fields { }
if ! req . ExitInitiatedAt . IsZero ( ) {
dbxUpdateFields . ExitInitiatedAt = dbx . Node_ExitInitiatedAt ( req . ExitInitiatedAt )
}
if ! req . ExitLoopCompletedAt . IsZero ( ) {
dbxUpdateFields . ExitLoopCompletedAt = dbx . Node_ExitLoopCompletedAt ( req . ExitLoopCompletedAt )
}
if ! req . ExitFinishedAt . IsZero ( ) {
dbxUpdateFields . ExitFinishedAt = dbx . Node_ExitFinishedAt ( req . ExitFinishedAt )
}
2019-10-17 16:01:39 +01:00
dbxUpdateFields . ExitSuccess = dbx . Node_ExitSuccess ( req . ExitSuccess )
2019-10-01 23:18:21 +01:00
return dbxUpdateFields
}
2019-06-04 12:55:38 +01:00
func convertDBNode ( ctx context . Context , info * dbx . Node ) ( _ * overlay . NodeDossier , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
2019-01-15 16:08:45 +00:00
if info == nil {
return nil , Error . New ( "missing info" )
}
2019-03-29 08:53:43 +00:00
id , err := storj . NodeIDFromBytes ( info . Id )
2019-01-15 16:08:45 +00:00
if err != nil {
return nil , err
}
2019-10-21 11:50:59 +01:00
ver , err := version . NewSemVer ( fmt . Sprintf ( "%d.%d.%d" , info . Major , info . Minor , info . Patch ) )
if err != nil {
return nil , err
2019-04-10 07:04:24 +01:00
}
2019-10-11 22:18:05 +01:00
exitStatus := overlay . ExitStatus { NodeID : id }
exitStatus . ExitInitiatedAt = info . ExitInitiatedAt
exitStatus . ExitLoopCompletedAt = info . ExitLoopCompletedAt
exitStatus . ExitFinishedAt = info . ExitFinishedAt
2019-04-04 17:34:36 +01:00
node := & overlay . NodeDossier {
Node : pb . Node {
2019-05-22 21:06:27 +01:00
Id : id ,
2019-06-24 16:33:18 +01:00
LastIp : info . LastNet ,
2019-04-04 17:34:36 +01:00
Address : & pb . NodeAddress {
Address : info . Address ,
Transport : pb . NodeTransport ( info . Protocol ) ,
} ,
2019-01-15 16:08:45 +00:00
} ,
2019-04-04 17:34:36 +01:00
Type : pb . NodeType ( info . Type ) ,
Operator : pb . NodeOperator {
2019-03-29 08:53:43 +00:00
Email : info . Email ,
Wallet : info . Wallet ,
2019-01-15 16:08:45 +00:00
} ,
2019-04-04 17:34:36 +01:00
Capacity : pb . NodeCapacity {
2019-01-15 16:08:45 +00:00
FreeBandwidth : info . FreeBandwidth ,
FreeDisk : info . FreeDisk ,
} ,
2019-06-20 14:56:04 +01:00
Reputation : * getNodeStats ( info ) ,
2019-04-10 07:04:24 +01:00
Version : pb . NodeVersion {
Version : ver . String ( ) ,
CommitHash : info . Hash ,
2019-07-08 19:24:42 +01:00
Timestamp : info . Timestamp ,
2019-04-10 07:04:24 +01:00
Release : info . Release ,
} ,
2019-05-30 22:38:23 +01:00
Contained : info . Contained ,
Disqualified : info . Disqualified ,
2019-08-19 11:58:13 +01:00
PieceCount : info . PieceCount ,
2019-10-11 22:18:05 +01:00
ExitStatus : exitStatus ,
2019-10-23 02:06:01 +01:00
CreatedAt : info . CreatedAt ,
2019-01-15 16:08:45 +00:00
}
return node , nil
2018-12-17 20:14:16 +00:00
}
2019-03-25 22:25:09 +00:00
2019-07-12 15:35:48 +01:00
func convertDBNodeToPBNode ( ctx context . Context , info * dbx . Id_LastNet_Address_Protocol_Row ) ( _ * pb . Node , err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if info == nil {
return nil , Error . New ( "missing info" )
}
id , err := storj . NodeIDFromBytes ( info . Id )
if err != nil {
return nil , err
}
return & pb . Node {
Id : id ,
LastIp : info . LastNet ,
Address : & pb . NodeAddress {
Address : info . Address ,
Transport : pb . NodeTransport ( info . Protocol ) ,
} ,
} , nil
}
2019-04-08 18:52:53 +01:00
func getNodeStats ( dbNode * dbx . Node ) * overlay . NodeStats {
2019-03-25 22:25:09 +00:00
nodeStats := & overlay . NodeStats {
2019-06-20 14:56:04 +01:00
Latency90 : dbNode . Latency90 ,
AuditCount : dbNode . TotalAuditCount ,
2019-08-14 18:53:39 +01:00
AuditSuccessCount : dbNode . AuditSuccessCount ,
2019-06-20 14:56:04 +01:00
UptimeCount : dbNode . TotalUptimeCount ,
2019-08-14 18:53:39 +01:00
UptimeSuccessCount : dbNode . UptimeSuccessCount ,
2019-06-20 14:56:04 +01:00
LastContactSuccess : dbNode . LastContactSuccess ,
LastContactFailure : dbNode . LastContactFailure ,
AuditReputationAlpha : dbNode . AuditReputationAlpha ,
AuditReputationBeta : dbNode . AuditReputationBeta ,
UptimeReputationAlpha : dbNode . UptimeReputationAlpha ,
UptimeReputationBeta : dbNode . UptimeReputationBeta ,
Disqualified : dbNode . Disqualified ,
2019-03-25 22:25:09 +00:00
}
return nodeStats
}
2019-06-20 14:56:04 +01:00
// updateReputation uses the Beta distribution model to determine a node's reputation.
// lambda is the "forgetting factor" which determines how much past info is kept when determining current reputation score.
// w is the normalization weight that affects how severely new updates affect the current reputation distribution.
func updateReputation ( isSuccess bool , alpha , beta , lambda , w float64 , totalCount int64 ) ( newAlpha , newBeta float64 , updatedCount int64 ) {
// v is a single feedback value that allows us to update both alpha and beta
var v float64 = - 1
if isSuccess {
v = 1
}
newAlpha = lambda * alpha + w * ( 1 + v ) / 2
newBeta = lambda * beta + w * ( 1 - v ) / 2
return newAlpha , newBeta , totalCount + 1
2019-03-25 22:25:09 +00:00
}
2019-07-31 18:21:06 +01:00
func buildUpdateStatement ( db * dbx . DB , update updateNodeStats ) string {
if update . NodeID . IsZero ( ) {
return ""
}
atLeastOne := false
sql := "UPDATE nodes SET "
if update . TotalAuditCount . set {
atLeastOne = true
sql += fmt . Sprintf ( "total_audit_count = %v" , update . TotalAuditCount . value )
}
if update . TotalUptimeCount . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "total_uptime_count = %v" , update . TotalUptimeCount . value )
}
if update . AuditReputationAlpha . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "audit_reputation_alpha = %v" , update . AuditReputationAlpha . value )
}
if update . AuditReputationBeta . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "audit_reputation_beta = %v" , update . AuditReputationBeta . value )
}
if update . UptimeReputationAlpha . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "uptime_reputation_alpha = %v" , update . UptimeReputationAlpha . value )
}
if update . UptimeReputationBeta . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "uptime_reputation_beta = %v" , update . UptimeReputationBeta . value )
}
if update . Disqualified . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "disqualified = '%v'" , update . Disqualified . value . Format ( time . RFC3339Nano ) )
}
if update . UptimeSuccessCount . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "uptime_success_count = %v" , update . UptimeSuccessCount . value )
}
if update . LastContactSuccess . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "last_contact_success = '%v'" , update . LastContactSuccess . value . Format ( time . RFC3339Nano ) )
}
if update . LastContactFailure . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "last_contact_failure = '%v'" , update . LastContactFailure . value . Format ( time . RFC3339Nano ) )
}
if update . AuditSuccessCount . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "audit_success_count = %v" , update . AuditSuccessCount . value )
}
if update . Contained . set {
if atLeastOne {
sql += ","
}
atLeastOne = true
sql += fmt . Sprintf ( "contained = %v" , update . Contained . value )
}
if ! atLeastOne {
return ""
}
hexNodeID := hex . EncodeToString ( update . NodeID . Bytes ( ) )
2019-10-18 22:27:57 +01:00
sql += fmt . Sprintf ( " WHERE nodes.id = decode('%v', 'hex');\n" , hexNodeID )
sql += fmt . Sprintf ( "DELETE FROM pending_audits WHERE pending_audits.node_id = decode('%v', 'hex');\n" , hexNodeID )
2019-07-31 18:21:06 +01:00
return sql
}
type int64Field struct {
set bool
value int64
}
type float64Field struct {
set bool
value float64
}
type boolField struct {
set bool
value bool
}
type timeField struct {
set bool
value time . Time
}
type updateNodeStats struct {
NodeID storj . NodeID
TotalAuditCount int64Field
TotalUptimeCount int64Field
AuditReputationAlpha float64Field
AuditReputationBeta float64Field
UptimeReputationAlpha float64Field
UptimeReputationBeta float64Field
Disqualified timeField
UptimeSuccessCount int64Field
LastContactSuccess timeField
LastContactFailure timeField
AuditSuccessCount int64Field
Contained boolField
}
func populateUpdateNodeStats ( dbNode * dbx . Node , updateReq * overlay . UpdateRequest ) updateNodeStats {
auditAlpha , auditBeta , totalAuditCount := updateReputation (
updateReq . AuditSuccess ,
dbNode . AuditReputationAlpha ,
dbNode . AuditReputationBeta ,
updateReq . AuditLambda ,
updateReq . AuditWeight ,
dbNode . TotalAuditCount ,
)
2019-10-15 18:00:14 +01:00
mon . FloatVal ( "audit_reputation_alpha" ) . Observe ( auditAlpha ) //locked
mon . FloatVal ( "audit_reputation_beta" ) . Observe ( auditBeta ) //locked
2019-07-31 18:21:06 +01:00
uptimeAlpha , uptimeBeta , totalUptimeCount := updateReputation (
updateReq . IsUp ,
dbNode . UptimeReputationAlpha ,
dbNode . UptimeReputationBeta ,
updateReq . UptimeLambda ,
updateReq . UptimeWeight ,
dbNode . TotalUptimeCount ,
)
mon . FloatVal ( "uptime_reputation_alpha" ) . Observe ( uptimeAlpha )
mon . FloatVal ( "uptime_reputation_beta" ) . Observe ( uptimeBeta )
updateFields := updateNodeStats {
NodeID : updateReq . NodeID ,
TotalAuditCount : int64Field { set : true , value : totalAuditCount } ,
TotalUptimeCount : int64Field { set : true , value : totalUptimeCount } ,
AuditReputationAlpha : float64Field { set : true , value : auditAlpha } ,
AuditReputationBeta : float64Field { set : true , value : auditBeta } ,
UptimeReputationAlpha : float64Field { set : true , value : uptimeAlpha } ,
UptimeReputationBeta : float64Field { set : true , value : uptimeBeta } ,
}
auditRep := auditAlpha / ( auditAlpha + auditBeta )
if auditRep <= updateReq . AuditDQ {
updateFields . Disqualified = timeField { set : true , value : time . Now ( ) . UTC ( ) }
}
uptimeRep := uptimeAlpha / ( uptimeAlpha + uptimeBeta )
if uptimeRep <= updateReq . UptimeDQ {
// n.b. that this will overwrite the audit DQ timestamp
// if it has already been set.
updateFields . Disqualified = timeField { set : true , value : time . Now ( ) . UTC ( ) }
}
if updateReq . IsUp {
updateFields . UptimeSuccessCount = int64Field { set : true , value : dbNode . UptimeSuccessCount + 1 }
updateFields . LastContactSuccess = timeField { set : true , value : time . Now ( ) }
} else {
updateFields . LastContactFailure = timeField { set : true , value : time . Now ( ) }
}
if updateReq . AuditSuccess {
updateFields . AuditSuccessCount = int64Field { set : true , value : dbNode . AuditSuccessCount + 1 }
}
// Updating node stats always exits it from containment mode
updateFields . Contained = boolField { set : true , value : false }
return updateFields
}
func populateUpdateFields ( dbNode * dbx . Node , updateReq * overlay . UpdateRequest ) dbx . Node_Update_Fields {
update := populateUpdateNodeStats ( dbNode , updateReq )
updateFields := dbx . Node_Update_Fields { }
if update . TotalAuditCount . set {
updateFields . TotalAuditCount = dbx . Node_TotalAuditCount ( update . TotalAuditCount . value )
}
if update . TotalUptimeCount . set {
updateFields . TotalUptimeCount = dbx . Node_TotalUptimeCount ( update . TotalUptimeCount . value )
}
if update . AuditReputationAlpha . set {
updateFields . AuditReputationAlpha = dbx . Node_AuditReputationAlpha ( update . AuditReputationAlpha . value )
}
if update . AuditReputationBeta . set {
updateFields . AuditReputationBeta = dbx . Node_AuditReputationBeta ( update . AuditReputationBeta . value )
}
if update . UptimeReputationAlpha . set {
updateFields . UptimeReputationAlpha = dbx . Node_UptimeReputationAlpha ( update . UptimeReputationAlpha . value )
}
if update . UptimeReputationBeta . set {
updateFields . UptimeReputationBeta = dbx . Node_UptimeReputationBeta ( update . UptimeReputationBeta . value )
}
if update . Disqualified . set {
updateFields . Disqualified = dbx . Node_Disqualified ( update . Disqualified . value )
}
if update . UptimeSuccessCount . set {
updateFields . UptimeSuccessCount = dbx . Node_UptimeSuccessCount ( update . UptimeSuccessCount . value )
}
if update . LastContactSuccess . set {
updateFields . LastContactSuccess = dbx . Node_LastContactSuccess ( update . LastContactSuccess . value )
}
if update . LastContactFailure . set {
updateFields . LastContactFailure = dbx . Node_LastContactFailure ( update . LastContactFailure . value )
}
if update . AuditSuccessCount . set {
updateFields . AuditSuccessCount = dbx . Node_AuditSuccessCount ( update . AuditSuccessCount . value )
}
if update . Contained . set {
updateFields . Contained = dbx . Node_Contained ( update . Contained . value )
}
if updateReq . AuditSuccess {
updateFields . AuditSuccessCount = dbx . Node_AuditSuccessCount ( dbNode . AuditSuccessCount + 1 )
}
return updateFields
}
2019-09-19 19:37:31 +01:00
// UpdateCheckIn updates a single storagenode with info from when the the node last checked in.
func ( cache * overlaycache ) UpdateCheckIn ( ctx context . Context , node overlay . NodeCheckInInfo , config overlay . NodeSelectionConfig ) ( err error ) {
defer mon . Task ( ) ( & ctx ) ( & err )
if node . Address . GetAddress ( ) == "" {
return Error . New ( "error UpdateCheckIn: missing the storage node address" )
}
2019-10-18 22:27:57 +01:00
// v is a single feedback value that allows us to update both alpha and beta
var v float64 = - 1
if node . IsUp {
v = 1
}
2019-09-19 19:37:31 +01:00
2019-10-18 22:27:57 +01:00
uptimeReputationAlpha := config . UptimeReputationLambda * config . UptimeReputationAlpha0 + config . UptimeReputationWeight * ( 1 + v ) / 2
uptimeReputationBeta := config . UptimeReputationLambda * config . UptimeReputationBeta0 + config . UptimeReputationWeight * ( 1 - v ) / 2
semVer , err := version . NewSemVer ( node . Version . GetVersion ( ) )
if err != nil {
return Error . New ( "unable to convert version to semVer" )
}
2019-09-19 19:37:31 +01:00
2019-10-18 22:27:57 +01:00
query := `
2019-09-19 19:37:31 +01:00
INSERT INTO nodes
(
id , address , last_net , protocol , type ,
email , wallet , free_bandwidth , free_disk ,
uptime_success_count , total_uptime_count ,
last_contact_success ,
last_contact_failure ,
audit_reputation_alpha , audit_reputation_beta , uptime_reputation_alpha , uptime_reputation_beta ,
major , minor , patch , hash , timestamp , release
)
VALUES (
$ 1 , $ 2 , $ 3 , $ 4 , $ 5 ,
$ 6 , $ 7 , $ 8 , $ 9 ,
$ 10 : : bool : : int , 1 ,
CASE WHEN $ 10 IS TRUE THEN current_timestamp
ELSE ' 0001 - 01 - 01 00 : 00 : 00 + 00 '
END ,
CASE WHEN $ 10 IS FALSE THEN current_timestamp
ELSE ' 0001 - 01 - 01 00 : 00 : 00 + 00 '
END ,
$ 11 , $ 12 , $ 13 , $ 14 ,
$ 18 , $ 19 , $ 20 , $ 21 , $ 22 , $ 23
)
ON CONFLICT ( id )
DO UPDATE
SET
address = $ 2 ,
last_net = $ 3 ,
protocol = $ 4 ,
email = $ 6 ,
wallet = $ 7 ,
free_bandwidth = $ 8 ,
free_disk = $ 9 ,
2019-09-26 01:07:39 +01:00
major = $ 18 , minor = $ 19 , patch = $ 20 , hash = $ 21 , timestamp = $ 22 , release = $ 23 ,
2019-09-19 19:37:31 +01:00
total_uptime_count = nodes . total_uptime_count + 1 ,
uptime_reputation_alpha = $ 16 : : numeric * nodes . uptime_reputation_alpha + $ 17 : : numeric * $ 10 : : bool : : int ,
uptime_reputation_beta = $ 16 : : numeric * nodes . uptime_reputation_beta + $ 17 : : numeric * ( NOT $ 10 ) : : bool : : int ,
uptime_success_count = nodes . uptime_success_count + $ 10 : : bool : : int ,
last_contact_success = CASE WHEN $ 10 IS TRUE
THEN current_timestamp
ELSE nodes . last_contact_success
END ,
last_contact_failure = CASE WHEN $ 10 IS FALSE
THEN current_timestamp
ELSE nodes . last_contact_failure
END ,
-- this disqualified case statement resolves to :
-- when ( new . uptime_reputation_alpha / ( new . uptime_reputation_alpha + new . uptime_reputation_beta ) ) <= config . UptimeReputationDQ
disqualified = CASE WHEN ( ( $ 16 : : numeric * nodes . uptime_reputation_alpha + $ 17 : : numeric * $ 10 : : bool : : int ) / ( ( $ 16 : : numeric * nodes . uptime_reputation_alpha + $ 17 : : numeric * $ 10 : : bool : : int ) + ( $ 16 : : numeric * nodes . uptime_reputation_beta + $ 17 : : numeric * ( NOT $ 10 ) : : bool : : int ) ) ) <= $ 15 AND nodes . disqualified IS NULL
THEN current_timestamp
ELSE nodes . disqualified
END ;
`
2019-10-18 22:27:57 +01:00
_ , err = cache . db . ExecContext ( ctx , query ,
// args $1 - $5
node . NodeID . Bytes ( ) , node . Address . GetAddress ( ) , node . LastIP , node . Address . GetTransport ( ) , int ( pb . NodeType_STORAGE ) ,
// args $6 - $9
node . Operator . GetEmail ( ) , node . Operator . GetWallet ( ) , node . Capacity . GetFreeBandwidth ( ) , node . Capacity . GetFreeDisk ( ) ,
// args $10
node . IsUp ,
// args $11 - $14
config . AuditReputationAlpha0 , config . AuditReputationBeta0 , uptimeReputationAlpha , uptimeReputationBeta ,
// args $15 - $17
config . UptimeReputationDQ , config . UptimeReputationLambda , config . UptimeReputationWeight ,
// args $18 - $23
semVer . Major , semVer . Minor , semVer . Patch , node . Version . GetCommitHash ( ) , node . Version . Timestamp , node . Version . GetRelease ( ) ,
)
if err != nil {
return Error . Wrap ( err )
2019-09-19 19:37:31 +01:00
}
return nil
}