satellite/metabase: use table stats if are up to date

Currently, to get number of entries in segments table we are doing
heavy SELECT count(*) operation. For biggest satellite it's taking
25min now. We are using this method to get stat before and after
segments loop so it adds almost 1h to overall loop time.

With current version of crdb we are using this additional code won't be
used because global configuration for stats refresh rate is inaccurate
for such large table like `segments`. Soon we should be able to upgrade
crdb and be able to adjust refresh rate per table and configure it to
satisfy defined threshold.

https://github.com/storj/storj/issues/5544

Change-Id: I05cfd9154f08894d2bc56bf716b436d1b03b87f1
This commit is contained in:
Michal Niewrzal 2023-03-03 11:33:51 +01:00
parent 5c744d7ed4
commit 06b51258be
2 changed files with 43 additions and 0 deletions

View File

@ -5,9 +5,15 @@ package metabase
import (
"context"
"database/sql"
"errors"
"time"
"storj.io/private/dbutil"
)
const statsUpToDateThreshold = 8 * time.Hour
// GetTableStats contains arguments necessary for getting table statistics.
type GetTableStats struct {
AsOfSystemInterval time.Duration
@ -22,6 +28,19 @@ type TableStats struct {
func (db *DB) GetTableStats(ctx context.Context, opts GetTableStats) (result TableStats, err error) {
defer mon.Task()(&ctx)(&err)
// if it's cockroach and statistics are up to date we will use them to get segments count
if db.impl == dbutil.Cockroach {
var created time.Time
err := db.db.QueryRowContext(ctx, `WITH stats AS (SHOW STATISTICS FOR TABLE segments) SELECT row_count, created FROM stats ORDER BY row_count DESC LIMIT 1`).
Scan(&result.SegmentCount, &created)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return TableStats{}, err
}
if !created.IsZero() && statsUpToDateThreshold > time.Since(created) {
return result, nil
}
}
err = db.db.QueryRowContext(ctx, `SELECT count(*) FROM segments `+db.impl.AsOfSystemInterval(opts.AsOfSystemInterval)).Scan(&result.SegmentCount)
if err != nil {
return TableStats{}, err

View File

@ -7,6 +7,8 @@ import (
"testing"
"time"
"github.com/stretchr/testify/require"
"storj.io/common/testcontext"
"storj.io/private/dbutil"
"storj.io/storj/satellite/metabase"
@ -83,6 +85,28 @@ func TestGetTableStats(t *testing.T) {
},
}.Check(ctx, t, db)
})
t.Run("use statistics", func(t *testing.T) {
defer metabasetest.DeleteAll{}.Check(ctx, t, db)
obj1 := metabasetest.RandObjectStream()
metabasetest.CreateTestObject{}.Run(ctx, t, db, obj1, 4)
_, err := db.UnderlyingTagSQL().ExecContext(ctx, "CREATE STATISTICS test FROM segments")
require.NoError(t, err)
// add some segments after creating statistics to know that results are taken
// from statistics and not directly with SELECT count(*)
obj1 = metabasetest.RandObjectStream()
metabasetest.CreateTestObject{}.Run(ctx, t, db, obj1, 4)
metabasetest.GetTableStats{
Opts: metabase.GetTableStats{},
Result: metabase.TableStats{
SegmentCount: 4,
},
}.Check(ctx, t, db)
})
}
})
}