48fb3e947c
Attribution is attached to bucket usage, but that's more granular than necessary for the attribution report. This change iterates over the bucket attributions, parses the user agent, converts the first entry to lower case, and uses that as the key to a map which holds the attribution totals for each unique user agent. Change-Id: Ib2962ba0f57daa8a7298f11fcb1ac44a8bb97875
362 lines
10 KiB
Go
362 lines
10 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package satellitedb
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"time"
|
|
|
|
"github.com/zeebo/errs"
|
|
|
|
"storj.io/common/uuid"
|
|
"storj.io/storj/satellite/attribution"
|
|
"storj.io/storj/satellite/satellitedb/dbx"
|
|
)
|
|
|
|
const (
|
|
// I can see how keeping around the query to only get attribution for one partner might be good,
|
|
// but that means we need to maintain both queries. Maybe we should get rid of this one.
|
|
valueAttrQuery = `
|
|
-- A union of both the storage tally and bandwidth rollups.
|
|
-- Should be 1 row per project/bucket by partner within the timeframe specified
|
|
SELECT
|
|
o.partner_id as partner_id,
|
|
o.user_agent as user_agent,
|
|
o.project_id as project_id,
|
|
o.bucket_name as bucket_name,
|
|
SUM(o.total) as total_byte_hours,
|
|
SUM(o.remote) as remote_byte_hours,
|
|
SUM(o.inline) as inline_byte_hours,
|
|
SUM(o.segments) as segment_hours,
|
|
SUM(o.objects) as object_hours,
|
|
SUM(o.settled) as settled,
|
|
COALESCE(SUM(o.hours),0) as hours
|
|
FROM
|
|
(
|
|
-- SUM the storage and hours
|
|
-- Hours are used to calculate byte hours above
|
|
SELECT
|
|
bsti.partner_id as partner_id,
|
|
bsti.user_agent as user_agent,
|
|
bsto.project_id as project_id,
|
|
bsto.bucket_name as bucket_name,
|
|
SUM(bsto.total_bytes) as total,
|
|
SUM(bsto.remote) as remote,
|
|
SUM(bsto.inline) as inline,
|
|
SUM(bsto.total_segments_count) as segments,
|
|
SUM(bsto.object_count) as objects,
|
|
0 as settled,
|
|
count(1) as hours
|
|
FROM
|
|
(
|
|
-- Collapse entries by the latest record in the hour
|
|
-- If there are more than 1 records within the hour, only the latest will be considered
|
|
SELECT
|
|
va.partner_id,
|
|
va.user_agent,
|
|
date_trunc('hour', bst.interval_start) as hours,
|
|
bst.project_id,
|
|
bst.bucket_name,
|
|
MAX(bst.interval_start) as max_interval
|
|
FROM
|
|
bucket_storage_tallies bst
|
|
RIGHT JOIN value_attributions va ON (
|
|
bst.project_id = va.project_id
|
|
AND bst.bucket_name = va.bucket_name
|
|
)
|
|
WHERE
|
|
va.partner_id = ?
|
|
AND va.user_agent = ?
|
|
AND bst.interval_start >= ?
|
|
AND bst.interval_start < ?
|
|
GROUP BY
|
|
va.partner_id,
|
|
va.user_agent,
|
|
bst.project_id,
|
|
bst.bucket_name,
|
|
date_trunc('hour', bst.interval_start)
|
|
ORDER BY
|
|
max_interval DESC
|
|
) bsti
|
|
INNER JOIN bucket_storage_tallies bsto ON (
|
|
bsto.project_id = bsti.project_id
|
|
AND bsto.bucket_name = bsti.bucket_name
|
|
AND bsto.interval_start = bsti.max_interval
|
|
)
|
|
GROUP BY
|
|
bsti.partner_id,
|
|
bsti.user_agent,
|
|
bsto.project_id,
|
|
bsto.bucket_name
|
|
UNION
|
|
-- SUM the bandwidth for the timeframe specified grouping by the partner_id, user_agent, project_id, and bucket_name
|
|
SELECT
|
|
va.partner_id as partner_id,
|
|
va.user_agent as user_agent,
|
|
bbr.project_id as project_id,
|
|
bbr.bucket_name as bucket_name,
|
|
0 as total,
|
|
0 as remote,
|
|
0 as inline,
|
|
0 as segments,
|
|
0 as objects,
|
|
SUM(settled)::integer as settled,
|
|
NULL as hours
|
|
FROM
|
|
bucket_bandwidth_rollups bbr
|
|
INNER JOIN value_attributions va ON (
|
|
bbr.project_id = va.project_id
|
|
AND bbr.bucket_name = va.bucket_name
|
|
)
|
|
WHERE
|
|
va.partner_id = ?
|
|
AND va.user_agent = ?
|
|
AND bbr.interval_start >= ?
|
|
AND bbr.interval_start < ?
|
|
-- action 2 is GET
|
|
AND bbr.action = 2
|
|
GROUP BY
|
|
va.partner_id,
|
|
va.user_agent,
|
|
bbr.project_id,
|
|
bbr.bucket_name
|
|
) AS o
|
|
GROUP BY
|
|
o.partner_id,
|
|
o.user_agent,
|
|
o.project_id,
|
|
o.bucket_name;
|
|
`
|
|
allValueAttrQuery = `
|
|
-- A union of both the storage tally and bandwidth rollups.
|
|
-- Should be 1 row per project/bucket by partner within the timeframe specified
|
|
SELECT
|
|
o.partner_id as partner_id,
|
|
o.user_agent as user_agent,
|
|
o.project_id as project_id,
|
|
o.bucket_name as bucket_name,
|
|
SUM(o.total) as total_byte_hours,
|
|
SUM(o.remote) as remote_byte_hours,
|
|
SUM(o.inline) as inline_byte_hours,
|
|
SUM(o.segments) as segment_hours,
|
|
SUM(o.objects) as object_hours,
|
|
SUM(o.settled) as settled,
|
|
COALESCE(SUM(o.hours),0) as hours
|
|
FROM
|
|
(
|
|
-- SUM the storage and hours
|
|
-- Hours are used to calculate byte hours above
|
|
SELECT
|
|
bsti.partner_id as partner_id,
|
|
bsti.user_agent as user_agent,
|
|
bsto.project_id as project_id,
|
|
bsto.bucket_name as bucket_name,
|
|
SUM(bsto.total_bytes) as total,
|
|
SUM(bsto.remote) as remote,
|
|
SUM(bsto.inline) as inline,
|
|
SUM(bsto.total_segments_count) as segments,
|
|
SUM(bsto.object_count) as objects,
|
|
0 as settled,
|
|
count(1) as hours
|
|
FROM
|
|
(
|
|
-- Collapse entries by the latest record in the hour
|
|
-- If there are more than 1 records within the hour, only the latest will be considered
|
|
SELECT
|
|
va.partner_id,
|
|
va.user_agent,
|
|
date_trunc('hour', bst.interval_start) as hours,
|
|
bst.project_id,
|
|
bst.bucket_name,
|
|
MAX(bst.interval_start) as max_interval
|
|
FROM
|
|
bucket_storage_tallies bst
|
|
INNER JOIN value_attributions va ON (
|
|
bst.project_id = va.project_id
|
|
AND bst.bucket_name = va.bucket_name
|
|
)
|
|
WHERE
|
|
bst.interval_start >= $1
|
|
AND bst.interval_start < $2
|
|
GROUP BY
|
|
va.partner_id,
|
|
va.user_agent,
|
|
bst.project_id,
|
|
bst.bucket_name,
|
|
date_trunc('hour', bst.interval_start)
|
|
ORDER BY
|
|
max_interval DESC
|
|
) bsti
|
|
INNER JOIN bucket_storage_tallies bsto ON (
|
|
bsto.project_id = bsti.project_id
|
|
AND bsto.bucket_name = bsti.bucket_name
|
|
AND bsto.interval_start = bsti.max_interval
|
|
)
|
|
GROUP BY
|
|
bsti.partner_id,
|
|
bsti.user_agent,
|
|
bsto.project_id,
|
|
bsto.bucket_name
|
|
UNION
|
|
-- SUM the bandwidth for the timeframe specified grouping by the partner_id, user_agent, project_id, and bucket_name
|
|
SELECT
|
|
va.partner_id as partner_id,
|
|
va.user_agent as user_agent,
|
|
bbr.project_id as project_id,
|
|
bbr.bucket_name as bucket_name,
|
|
0 as total,
|
|
0 as remote,
|
|
0 as inline,
|
|
0 as segments,
|
|
0 as objects,
|
|
SUM(settled)::integer as settled,
|
|
null as hours
|
|
FROM
|
|
bucket_bandwidth_rollups bbr
|
|
INNER JOIN value_attributions va ON (
|
|
bbr.project_id = va.project_id
|
|
AND bbr.bucket_name = va.bucket_name
|
|
)
|
|
WHERE
|
|
bbr.interval_start >= $1
|
|
AND bbr.interval_start < $2
|
|
-- action 2 is GET
|
|
AND bbr.action = 2
|
|
GROUP BY
|
|
va.partner_id,
|
|
va.user_agent,
|
|
bbr.project_id,
|
|
bbr.bucket_name
|
|
) AS o
|
|
GROUP BY
|
|
o.partner_id,
|
|
o.user_agent,
|
|
o.project_id,
|
|
o.bucket_name;
|
|
`
|
|
)
|
|
|
|
type attributionDB struct {
|
|
db *satelliteDB
|
|
}
|
|
|
|
// Get reads the partner info.
|
|
func (keys *attributionDB) Get(ctx context.Context, projectID uuid.UUID, bucketName []byte) (info *attribution.Info, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
dbxInfo, err := keys.db.Get_ValueAttribution_By_ProjectId_And_BucketName(ctx,
|
|
dbx.ValueAttribution_ProjectId(projectID[:]),
|
|
dbx.ValueAttribution_BucketName(bucketName),
|
|
)
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return nil, attribution.ErrBucketNotAttributed.New("%q", bucketName)
|
|
}
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
|
|
return attributionFromDBX(dbxInfo)
|
|
}
|
|
|
|
// Insert implements create partner info.
|
|
func (keys *attributionDB) Insert(ctx context.Context, info *attribution.Info) (_ *attribution.Info, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
err = keys.db.QueryRowContext(ctx, `
|
|
INSERT INTO value_attributions (project_id, bucket_name, partner_id, user_agent, last_updated)
|
|
VALUES ($1, $2, $3, $4, now())
|
|
ON CONFLICT (project_id, bucket_name) DO NOTHING
|
|
RETURNING last_updated
|
|
`, info.ProjectID[:], info.BucketName, info.PartnerID[:], info.UserAgent).Scan(&info.CreatedAt)
|
|
// TODO when sql.ErrNoRows is returned then CreatedAt is not set
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return info, nil
|
|
}
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
|
|
return info, nil
|
|
}
|
|
|
|
// QueryAttribution queries partner bucket attribution data.
|
|
func (keys *attributionDB) QueryAttribution(ctx context.Context, partnerID uuid.UUID, userAgent []byte, start time.Time, end time.Time) (_ []*attribution.BucketAttribution, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
rows, err := keys.db.DB.QueryContext(ctx, keys.db.Rebind(valueAttrQuery), partnerID[:], userAgent, start.UTC(), end.UTC(), partnerID[:], userAgent, start.UTC(), end.UTC())
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
|
|
|
results := []*attribution.BucketAttribution{}
|
|
for rows.Next() {
|
|
r := &attribution.BucketAttribution{}
|
|
var inline, remote float64
|
|
err := rows.Scan(&r.PartnerID, &r.UserAgent, &r.ProjectID, &r.BucketName, &r.ByteHours, &inline, &remote, &r.SegmentHours, &r.ObjectHours, &r.EgressData, &r.Hours)
|
|
if err != nil {
|
|
return results, Error.Wrap(err)
|
|
}
|
|
|
|
if r.ByteHours == 0 {
|
|
r.ByteHours = inline + remote
|
|
}
|
|
|
|
results = append(results, r)
|
|
}
|
|
return results, Error.Wrap(rows.Err())
|
|
}
|
|
|
|
// QueryAllAttribution queries all partner bucket attribution data.
|
|
func (keys *attributionDB) QueryAllAttribution(ctx context.Context, start time.Time, end time.Time) (_ []*attribution.BucketAttribution, err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
rows, err := keys.db.DB.QueryContext(ctx, keys.db.Rebind(allValueAttrQuery), start.UTC(), end.UTC())
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
defer func() { err = errs.Combine(err, rows.Close()) }()
|
|
|
|
results := []*attribution.BucketAttribution{}
|
|
for rows.Next() {
|
|
r := &attribution.BucketAttribution{}
|
|
var inline, remote float64
|
|
err := rows.Scan(&r.PartnerID, &r.UserAgent, &r.ProjectID, &r.BucketName, &r.ByteHours, &inline, &remote, &r.SegmentHours, &r.ObjectHours, &r.EgressData, &r.Hours)
|
|
if err != nil {
|
|
return results, Error.Wrap(err)
|
|
}
|
|
|
|
if r.ByteHours == 0 {
|
|
r.ByteHours = inline + remote
|
|
}
|
|
results = append(results, r)
|
|
}
|
|
return results, Error.Wrap(rows.Err())
|
|
}
|
|
|
|
func attributionFromDBX(info *dbx.ValueAttribution) (*attribution.Info, error) {
|
|
partnerID, err := uuid.FromBytes(info.PartnerId)
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
userAgent := info.UserAgent
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
projectID, err := uuid.FromBytes(info.ProjectId)
|
|
if err != nil {
|
|
return nil, Error.Wrap(err)
|
|
}
|
|
|
|
return &attribution.Info{
|
|
ProjectID: projectID,
|
|
BucketName: info.BucketName,
|
|
PartnerID: partnerID,
|
|
UserAgent: userAgent,
|
|
CreatedAt: info.LastUpdated,
|
|
}, nil
|
|
}
|