storagenode: fix B*h to bytes disk usage conversion

The used space graph values are correct when a single satellite is
selected but wrong for 'All satellites'. This is related to the
queries for getting the individual disk usages for all satellites
per day and the summary and average for all satellites per day:

1. dividing the sum of at_rest_total by the total_hours is wrong.
Simply put, we were assuming that, for example (4/2)+(6/3) equals
to (4+6)/(2+3), assuming we had 4 and 6 at_rest_total values with
2 and 3 respective hours.

2. To get the average, we need to first find the sum of the
at_rest_total_bytes for each timestamp across all satellites
before taking the average of the sums instead of just taking the
average from the individual satellite values.

Closes https://github.com/storj/storj/issues/5519

Change-Id: Ib1314e238b695a6c1ecd9f9171ee86dd56bb3b24
This commit is contained in:
Clement Sam 2023-01-31 12:53:06 +00:00 committed by Storj Robot
parent 3146ad7f2e
commit 3d3f9d133a
4 changed files with 87 additions and 64 deletions

View File

@ -354,15 +354,15 @@ func (s *Service) GetSatelliteData(ctx context.Context, satelliteID storj.NodeID
// Satellites represents consolidated data across all satellites.
type Satellites struct {
StorageDaily []storageusage.Stamp `json:"storageDaily"`
BandwidthDaily []bandwidth.UsageRollup `json:"bandwidthDaily"`
StorageSummary float64 `json:"storageSummary"`
AverageUsageBytes float64 `json:"averageUsageBytes"`
BandwidthSummary int64 `json:"bandwidthSummary"`
EgressSummary int64 `json:"egressSummary"`
IngressSummary int64 `json:"ingressSummary"`
EarliestJoinedAt time.Time `json:"earliestJoinedAt"`
Audits []Audits `json:"audits"`
StorageDaily []storageusage.StampGroup `json:"storageDaily"`
BandwidthDaily []bandwidth.UsageRollup `json:"bandwidthDaily"`
StorageSummary float64 `json:"storageSummary"`
AverageUsageBytes float64 `json:"averageUsageBytes"`
BandwidthSummary int64 `json:"bandwidthSummary"`
EgressSummary int64 `json:"egressSummary"`
IngressSummary int64 `json:"ingressSummary"`
EarliestJoinedAt time.Time `json:"earliestJoinedAt"`
Audits []Audits `json:"audits"`
}
// Audits represents audit, suspension and online scores of SNO across all satellites.

View File

@ -86,7 +86,7 @@ func (db *storageUsageDB) GetDaily(ctx context.Context, satelliteID storj.NodeID
var stamps []storageusage.Stamp
for rows.Next() {
var satellite storj.NodeID
var atRestTotal, intervalInHours float64
var atRestTotal, intervalInHours sql.NullFloat64
var timestamp time.Time
err = rows.Scan(&satellite, &atRestTotal, &intervalInHours, &timestamp)
@ -94,11 +94,16 @@ func (db *storageUsageDB) GetDaily(ctx context.Context, satelliteID storj.NodeID
return nil, err
}
atRestTotalBytes := float64(0)
if intervalInHours.Float64 > 0 {
atRestTotalBytes = atRestTotal.Float64 / intervalInHours.Float64
}
stamps = append(stamps, storageusage.Stamp{
SatelliteID: satellite,
AtRestTotal: atRestTotal,
AtRestTotalBytes: atRestTotal / intervalInHours,
IntervalInHours: intervalInHours,
AtRestTotal: atRestTotal.Float64,
AtRestTotalBytes: atRestTotalBytes,
IntervalInHours: intervalInHours.Float64,
IntervalStart: timestamp,
})
}
@ -108,15 +113,16 @@ func (db *storageUsageDB) GetDaily(ctx context.Context, satelliteID storj.NodeID
// GetDailyTotal returns daily storage usage stamps summed across all known satellites
// for provided time range.
func (db *storageUsageDB) GetDailyTotal(ctx context.Context, from, to time.Time) (_ []storageusage.Stamp, err error) {
func (db *storageUsageDB) GetDailyTotal(ctx context.Context, from, to time.Time) (_ []storageusage.StampGroup, err error) {
defer mon.Task()(&ctx)(&err)
// hour_interval = current row interval_end_time - previous row interval_end_time
// Rows with 0-hour difference are assumed to be 24 hours.
query := `SELECT SUM(su3.at_rest_total), SUM(su3.hour_interval), su3.timestamp
query := `SELECT SUM(su3.at_rest_total), SUM(su3.at_rest_total_bytes), su3.timestamp
FROM (
SELECT su1.at_rest_total,
COALESCE(
SELECT
su1.at_rest_total,
su1.at_rest_total / COALESCE(
(
CAST(strftime('%s', su1.interval_end_time) AS NUMERIC)
-
@ -130,7 +136,7 @@ func (db *storageUsageDB) GetDailyTotal(ctx context.Context, from, to time.Time)
)) AS NUMERIC)
) / 3600,
24
) AS hour_interval,
) AS at_rest_total_bytes,
su1.timestamp
FROM storage_usage su1
WHERE ? <= su1.timestamp AND su1.timestamp <= ?
@ -146,20 +152,19 @@ func (db *storageUsageDB) GetDailyTotal(ctx context.Context, from, to time.Time)
err = errs.Combine(err, rows.Close())
}()
var stamps []storageusage.Stamp
var stamps []storageusage.StampGroup
for rows.Next() {
var atRestTotal, intervalInHours float64
var atRestTotal, atRestTotalBytes sql.NullFloat64
var timestamp time.Time
err = rows.Scan(&atRestTotal, &intervalInHours, &timestamp)
err = rows.Scan(&atRestTotal, &atRestTotalBytes, &timestamp)
if err != nil {
return nil, err
}
stamps = append(stamps, storageusage.Stamp{
AtRestTotal: atRestTotal,
AtRestTotalBytes: atRestTotal / intervalInHours,
IntervalInHours: intervalInHours,
stamps = append(stamps, storageusage.StampGroup{
AtRestTotal: atRestTotal.Float64,
AtRestTotalBytes: atRestTotalBytes.Float64,
IntervalStart: timestamp,
})
}
@ -172,29 +177,33 @@ func (db *storageUsageDB) Summary(ctx context.Context, from, to time.Time) (_, _
defer mon.Task()(&ctx, from, to)(&err)
var summary, averageUsageInBytes sql.NullFloat64
query := `SELECT SUM(su3.at_rest_total), AVG(su3.at_rest_total_bytes)
query := `SELECT SUM(at_rest_total), AVG(at_rest_total_bytes)
FROM (
SELECT
at_rest_total,
at_rest_total / (
COALESCE(
(
CAST(strftime('%s', su1.interval_end_time) AS NUMERIC)
-
CAST(strftime('%s', (
SELECT interval_end_time
FROM storage_usage su2
WHERE su2.satellite_id = su1.satellite_id
AND su2.timestamp < su1.timestamp
ORDER BY su2.timestamp DESC
LIMIT 1
)) AS NUMERIC)
) / 3600,
24
SUM(su1.at_rest_total) AS at_rest_total,
SUM(
su1.at_rest_total / (
COALESCE(
(
CAST(strftime('%s', su1.interval_end_time) AS NUMERIC)
-
CAST(strftime('%s', (
SELECT interval_end_time
FROM storage_usage su2
WHERE su2.satellite_id = su1.satellite_id
AND su2.timestamp < su1.timestamp
ORDER BY su2.timestamp DESC
LIMIT 1
)) AS NUMERIC)
) / 3600,
24
)
)
) AS at_rest_total_bytes
) AS at_rest_total_bytes,
su1.timestamp
FROM storage_usage su1
WHERE ? <= timestamp AND timestamp <= ?
WHERE ? <= su1.timestamp AND su1.timestamp <= ?
GROUP BY timestamp
) as su3`
err = db.QueryRowContext(ctx, query, from.UTC(), to.UTC()).Scan(&summary, &averageUsageInBytes)
@ -209,27 +218,29 @@ func (db *storageUsageDB) SatelliteSummary(ctx context.Context, satelliteID stor
query := `SELECT SUM(su3.at_rest_total), AVG(su3.at_rest_total_bytes)
FROM (
SELECT
at_rest_total,
at_rest_total / (
COALESCE(
(
CAST(strftime('%s', su1.interval_end_time) AS NUMERIC)
-
CAST(strftime('%s', (
SELECT interval_end_time
FROM storage_usage su2
WHERE su2.satellite_id = su1.satellite_id
AND su2.timestamp < su1.timestamp
ORDER BY su2.timestamp DESC
LIMIT 1
)) AS NUMERIC)
) / 3600,
24
su1.at_rest_total,
(
su1.at_rest_total / (
COALESCE(
(
CAST(strftime('%s', su1.interval_end_time) AS NUMERIC)
-
CAST(strftime('%s', (
SELECT interval_end_time
FROM storage_usage su2
WHERE su2.satellite_id = su1.satellite_id
AND su2.timestamp < su1.timestamp
ORDER BY su2.timestamp DESC
LIMIT 1
)) AS NUMERIC)
) / 3600,
24
)
)
) AS at_rest_total_bytes
FROM storage_usage su1
WHERE satellite_id = ?
AND ? <= timestamp AND timestamp <= ?
WHERE su1.satellite_id = ?
AND ? <= su1.timestamp AND su1.timestamp <= ?
) as su3`
err = db.QueryRowContext(ctx, query, satelliteID, from.UTC(), to.UTC()).Scan(&summary, &averageUsageInBytes)

View File

@ -21,7 +21,7 @@ type DB interface {
GetDaily(ctx context.Context, satelliteID storj.NodeID, from, to time.Time) ([]Stamp, error)
// GetDailyTotal returns daily storage usage stamps summed across all known satellites
// for provided time range
GetDailyTotal(ctx context.Context, from, to time.Time) ([]Stamp, error)
GetDailyTotal(ctx context.Context, from, to time.Time) ([]StampGroup, error)
// Summary returns aggregated storage usage across all satellites.
Summary(ctx context.Context, from, to time.Time) (float64, float64, error)
// SatelliteSummary returns aggregated storage usage for a particular satellite.
@ -45,3 +45,15 @@ type Stamp struct {
// (i.e. last interval_end_time) for the day
IntervalEndTime time.Time `json:"-"`
}
// StampGroup is storage usage stamp for all satellites from interval start till next interval
// grouped by interval_start time.
type StampGroup struct {
// AtRestTotal is the bytes*hour disk space used at the IntervalEndTime.
AtRestTotal float64 `json:"atRestTotal"`
// AtRestTotalBytes is the AtRestTotal divided by the IntervalInHours.
AtRestTotalBytes float64 `json:"atRestTotalBytes"`
// IntervalStart represents one tally day
// TODO: rename to timestamp to match DB
IntervalStart time.Time `json:"intervalStart"`
}

View File

@ -70,7 +70,7 @@ func TestStorageUsage(t *testing.T) {
averageBySatellite[satellite] = satelliteUsageBytes / float64(len(expectedDailyStamps[satellite]))
}
averageUsage = totalUsageBytes / totalStamps
averageUsage = totalUsageBytes / float64(len(expectedDailyStampsTotals))
storagenodedbtest.Run(t, func(ctx *testcontext.Context, t *testing.T, db storagenode.DB) {
storageUsageDB := db.StorageUsage()