storj/satellite/satellitedb/audithistory.go
Cameron Ayer b39a99bae6 satellite/{overlay,satellitedb}: always show node's real online score
Previously if a node did not have audit history data for each of the
windows over the tracking period, we would give them the benefit of
the doubt and set their score to 1. This was to prevent nodes from
being suspended right out the gate. We need a minimum amount of data
to evaluate them.

However, a node who is actually failing at being online will have no
idea until they have received enough audits and we suspend them.

Instead, we will always use their real score, but use a flag to determine
whether they are eligible for suspension/dq.

Change-Id: I382218f12e8770f95d4bcddcf101ef348940cadf
2020-10-02 12:28:11 -04:00

149 lines
4.2 KiB
Go

// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
package satellitedb
import (
"context"
"database/sql"
"time"
"github.com/zeebo/errs"
"storj.io/common/pb"
"storj.io/common/storj"
"storj.io/storj/satellite/overlay"
"storj.io/storj/satellite/satellitedb/dbx"
)
func addAudit(a *pb.AuditHistory, auditTime time.Time, online bool, config overlay.AuditHistoryConfig) error {
newAuditWindowStartTime := auditTime.Truncate(config.WindowSize)
earliestWindow := newAuditWindowStartTime.Add(-config.TrackingPeriod)
// windowsModified is used to determine whether we will need to recalculate the score because windows have been added or removed.
windowsModified := false
// delete windows outside of tracking period scope
updatedWindows := a.Windows
for i, window := range a.Windows {
if window.WindowStart.Before(earliestWindow) {
updatedWindows = a.Windows[i+1:]
windowsModified = true
} else {
// windows are in order, so if this window is in the tracking period, we are done deleting windows
break
}
}
a.Windows = updatedWindows
// if there are no windows or the latest window has passed, add another window
if len(a.Windows) == 0 || a.Windows[len(a.Windows)-1].WindowStart.Before(newAuditWindowStartTime) {
windowsModified = true
a.Windows = append(a.Windows, &pb.AuditWindow{WindowStart: newAuditWindowStartTime})
}
latestIndex := len(a.Windows) - 1
if a.Windows[latestIndex].WindowStart.After(newAuditWindowStartTime) {
return Error.New("cannot add audit to audit history; window already passed")
}
// add new audit to latest window
if online {
a.Windows[latestIndex].OnlineCount++
}
a.Windows[latestIndex].TotalCount++
// if no windows were added or removed, score does not change
if !windowsModified {
return nil
}
if len(a.Windows) <= 1 {
a.Score = 1
return nil
}
totalWindowScores := 0.0
for i, window := range a.Windows {
// do not include last window in score
if i+1 == len(a.Windows) {
break
}
totalWindowScores += float64(window.OnlineCount) / float64(window.TotalCount)
}
// divide by number of windows-1 because last window is not included
a.Score = totalWindowScores / float64(len(a.Windows)-1)
return nil
}
// UpdateAuditHistory updates a node's audit history with an online or offline audit.
func (cache *overlaycache) UpdateAuditHistory(ctx context.Context, nodeID storj.NodeID, auditTime time.Time, online bool, config overlay.AuditHistoryConfig) (history *pb.AuditHistory, err error) {
err = cache.db.WithTx(ctx, func(ctx context.Context, tx *dbx.Tx) (err error) {
_, err = tx.Tx.ExecContext(ctx, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE")
if err != nil {
return err
}
history, err = cache.updateAuditHistoryWithTx(ctx, tx, nodeID, auditTime, online, config)
if err != nil {
return err
}
return nil
})
return history, err
}
func (cache *overlaycache) updateAuditHistoryWithTx(ctx context.Context, tx *dbx.Tx, nodeID storj.NodeID, auditTime time.Time, online bool, config overlay.AuditHistoryConfig) (*pb.AuditHistory, error) {
// get and deserialize node audit history
historyBytes := []byte{}
newEntry := false
dbAuditHistory, err := tx.Get_AuditHistory_By_NodeId(
ctx,
dbx.AuditHistory_NodeId(nodeID.Bytes()),
)
if errs.Is(err, sql.ErrNoRows) {
// set flag to true so we know to create rather than update later
newEntry = true
} else if err != nil {
return nil, Error.Wrap(err)
} else {
historyBytes = dbAuditHistory.History
}
history := &pb.AuditHistory{}
err = pb.Unmarshal(historyBytes, history)
if err != nil {
return history, err
}
err = addAudit(history, auditTime, online, config)
if err != nil {
return history, err
}
historyBytes, err = pb.Marshal(history)
if err != nil {
return history, err
}
// if the entry did not exist at the beginning, create a new one. Otherwise update
if newEntry {
_, err = tx.Create_AuditHistory(
ctx,
dbx.AuditHistory_NodeId(nodeID.Bytes()),
dbx.AuditHistory_History(historyBytes),
)
return history, Error.Wrap(err)
}
_, err = tx.Update_AuditHistory_By_NodeId(
ctx,
dbx.AuditHistory_NodeId(nodeID.Bytes()),
dbx.AuditHistory_Update_Fields{
History: dbx.AuditHistory_History(historyBytes),
},
)
return history, Error.Wrap(err)
}