storage/postgreskv: fix ultra-slow nonrecursive list (#3564)

This is based on Jeff's most excellent work to identify why
non-recursive listing under postgreskv was phenomenally slow. It turns
out PostgreSQL's query planner was actually using two sequential scans
of the pathdata table to do its job. It's unclear for how long that has
been happening, but obviously it won't scale any further.

The main change is propagating bucket association with pathnames through
the CTE so that the query planner lets itself use the pathdata index on
(bucket, fullpath) for the skipping-forward part.

Jeff also had some changes to the range ends to keep NULL from being
used- I believe with the intent of making sure the query planner was
able to use the pathdata index. My tests on postgres 9.6 and 11
indicate that those changes don't make any appreciable difference in
performance or query plan, so I'm going to leave them off for now to
avoid a careful audit of the semantic differences.

There is a test included here, which only serves to check that the new
version of the function is indeed active. To actually ensure that no
sequential scans are being used in the query plan anymore, our tests
would need to be run against a test db with lots of data already loaded
in it, and that isn't feasible for now.

Change-Id: Iffe9a1f411c54a2f742a4abb8f2df0c64fd662cb
This commit is contained in:
paul cannon 2019-11-13 17:52:14 -06:00 committed by GitHub
parent bd89f51c66
commit d5963d81d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 195 additions and 9 deletions

View File

@ -6,6 +6,7 @@ package postgreskv
import ( import (
"context" "context"
"database/sql" "database/sql"
"strings"
"testing" "testing"
"github.com/lib/pq" "github.com/lib/pq"
@ -44,6 +45,44 @@ func TestSuite(t *testing.T) {
testsuite.RunTests(t, store) testsuite.RunTests(t, store)
} }
func TestThatMigrationActuallyHappened(t *testing.T) {
store, cleanup := newTestPostgres(t)
defer cleanup()
rows, err := store.pgConn.Query(`
SELECT prosrc
FROM pg_catalog.pg_proc p,
pg_catalog.pg_namespace n
WHERE p.pronamespace = n.oid
AND p.proname = 'list_directory'
AND n.nspname = ANY(current_schemas(true))
AND p.pronargs = 4
`)
if err != nil {
t.Fatalf("failed to get list_directory source: %v", err)
}
defer func() {
if err := rows.Close(); err != nil {
t.Fatalf("failed to close rows: %v", err)
}
}()
numFound := 0
for rows.Next() {
numFound++
if numFound > 1 {
t.Fatal("there are multiple eligible list_directory() functions??")
}
var source string
if err := rows.Scan(&source); err != nil {
t.Fatalf("failed to read list_directory source: %v", err)
}
if strings.Contains(source, "distinct_prefix (truncatedpath)") {
t.Fatal("list_directory() function in pg appears to be the oldnbusted one")
}
}
}
func BenchmarkSuite(b *testing.B) { func BenchmarkSuite(b *testing.B) {
store, cleanup := newTestPostgres(b) store, cleanup := newTestPostgres(b)
defer cleanup() defer cleanup()

View File

@ -0,0 +1,44 @@
CREATE OR REPLACE FUNCTION list_directory(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)
RETURNS SETOF path_and_meta AS $$
WITH RECURSIVE
inputs AS (
SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,
CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,
octet_length(dirpath) + 1 AS component_start,
b.delim AS delim,
b.bucketname AS bucket
FROM buckets b
WHERE bucketname = bucket
),
distinct_prefix (truncatedpath) AS (
SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
FROM pathdata pd
WHERE (i.range_low IS NULL OR pd.fullpath > i.range_low)
AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
AND (start_at = '' OR pd.fullpath >= start_at)
AND pd.bucket = i.bucket
ORDER BY pd.fullpath
LIMIT 1)
FROM inputs i
UNION ALL
SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
FROM pathdata pd
WHERE pd.fullpath >= component_increment(pfx.truncatedpath, i.delim)
AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
AND pd.bucket = i.bucket
ORDER BY pd.fullpath
LIMIT 1)
FROM distinct_prefix pfx, inputs i
WHERE pfx.truncatedpath IS NOT NULL
)
SELECT pfx.truncatedpath AS fullpath,
pd.metadata
FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath
WHERE pfx.truncatedpath IS NOT NULL
UNION ALL
-- this one, if it exists, can't be part of distinct_prefix (or it would cause us to skip over all
-- subcontents of the prefix we're looking for), so we tack it on here
SELECT pd.fullpath, pd.metadata FROM pathdata pd, inputs i WHERE pd.fullpath = i.range_low
ORDER BY fullpath
LIMIT limit_to;
$$ LANGUAGE 'sql' STABLE;

View File

@ -0,0 +1,46 @@
CREATE OR REPLACE FUNCTION list_directory(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)
RETURNS SETOF path_and_meta AS $$
WITH RECURSIVE
inputs AS (
SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,
CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,
octet_length(dirpath) + 1 AS component_start,
b.delim AS delim,
b.bucketname AS bucket
FROM buckets b
WHERE bucketname = bucket
),
distinct_prefix (bucket, truncatedpath) AS (
SELECT i.bucket,
(SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
FROM pathdata pd
WHERE (i.range_low IS NULL OR pd.fullpath > i.range_low)
AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
AND (start_at = '' OR pd.fullpath >= start_at)
AND pd.bucket = i.bucket
ORDER BY pd.fullpath
LIMIT 1)
FROM inputs i
UNION ALL
SELECT i.bucket,
(SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
FROM pathdata pd
WHERE pd.fullpath >= component_increment(pfx.truncatedpath, i.delim)
AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
AND pd.bucket = i.bucket
ORDER BY pd.fullpath
LIMIT 1)
FROM distinct_prefix pfx, inputs i
WHERE pfx.truncatedpath IS NOT NULL
)
SELECT pfx.truncatedpath AS fullpath,
pd.metadata
FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath AND pd.bucket = pfx.bucket
WHERE pfx.truncatedpath IS NOT NULL
UNION ALL
-- this one, if it exists, can't be part of distinct_prefix (or it would cause us to skip over all
-- subcontents of the prefix we're looking for), so we tack it on here
SELECT pd.fullpath, pd.metadata FROM pathdata pd, inputs i WHERE pd.fullpath = i.range_low AND pd.bucket = i.bucket
ORDER BY fullpath
LIMIT limit_to;
$$ LANGUAGE 'sql' STABLE;

File diff suppressed because one or more lines are too long