storj/storage/postgreskv/schema/2020011601_kvimplementation_switchover.down.sql

CREATE TABLE buckets (
                         bucketname BYTEA
                             PRIMARY KEY,
                         delim INT
                             NOT NULL
                             CHECK (delim > 0 AND delim < 255)
);

-- until the KeyValueStore interface supports passing the bucket separately, or
-- until storj actually supports changing the delimiter character per bucket, this
-- dummy row should suffice for everything.
INSERT INTO buckets (bucketname, delim) VALUES (''::BYTEA, ascii('/'));


CREATE TABLE pathdata (
                          bucket BYTEA
                              NOT NULL
                              REFERENCES buckets (bucketname),
                          fullpath BYTEA
                              NOT NULL
                              CHECK (fullpath <> ''),
                          metadata BYTEA
                              NOT NULL,

                          PRIMARY KEY (bucket, fullpath)
);

CREATE VIEW pathdata_pretty AS
SELECT encode(bucket, 'escape') AS bucket,
       encode(fullpath, 'escape') AS fullpath,
       encode(metadata, 'escape') AS metadata
FROM pathdata;


-- given a path as might be found in the pathdata table, truncate it after the next delimiter to be
-- found at or after 'afterpos', if any.
--
-- Examples:
--
--     truncate_after(''::BYTEA,            ascii('/'), 1)  ->  ''::BYTEA
--     truncate_after('foo'::BYTEA,         ascii('/'), 1)  ->  'foo'::BYTEA
--     truncate_after('foo/'::BYTEA,        ascii('/'), 1)  ->  'foo/'::BYTEA
--     truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 4)  ->  'foo/'::BYTEA
--     truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 5)  ->  'foo/bar/'::BYTEA
--     truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 8)  ->  'foo/bar/'::BYTEA
--     truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 9)  ->  'foo/bar/baz'::BYTEA
--     truncate_after('foo//bar/bz'::BYTEA, ascii('/'), 4)  ->  'foo/'::BYTEA
--     truncate_after('foo//bar/bz'::BYTEA, ascii('/'), 5)  ->  'foo//'::BYTEA
--
CREATE FUNCTION truncate_after(bpath BYTEA, delim INTEGER, afterpos INTEGER) RETURNS BYTEA AS $$
DECLARE
    suff BYTEA;
    delimpos INTEGER;
BEGIN
    suff := substring(bpath FROM afterpos);
    delimpos := position(set_byte(' '::BYTEA, 0, delim) IN suff);
    IF delimpos > 0 THEN
        RETURN substring(bpath FROM 1 FOR (afterpos + delimpos - 1));
    END IF;
    RETURN bpath;
END;
$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT;


CREATE FUNCTION bytea_increment(b BYTEA) RETURNS BYTEA AS $$
BEGIN
    WHILE b <> ''::BYTEA AND get_byte(b, octet_length(b) - 1) = 255 LOOP
            b := substring(b FROM 1 FOR octet_length(b) - 1);
        END LOOP;
    IF b = ''::BYTEA THEN
        RETURN NULL;
    END IF;
    RETURN set_byte(b, octet_length(b) - 1, get_byte(b, octet_length(b) - 1) + 1);
END;
$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT;


-- Given a path as might be found in the pathdata table, with a delimeter appended if that path
-- has any sub-elements, return the next possible path that _could_ be in the table (skipping over
-- any potential sub-elements).
--
-- Examples:
--
--     component_increment('/'::BYTEA, ascii('/'))  ->  '0'::BYTEA
--         (nothing can be between '/' and '0' other than subpaths under '/')
--
--     component_increment('/foo/bar/'::BYTEA, ascii('/'))  ->  '/foo/bar0'::BYTEA
--
--     component_increment('/foo/barboom'::BYTEA, ascii('/'))  ->  ('/foo/barboom' || E'\\x00')
--         (nothing can be between '/foo/barboom' and '/foo/barboom\x00' in normal BYTEA ordering)
--
--     component_increment(E'\\xFEFFFF'::BYTEA, 255)  ->  E'\\xFF'::BYTEA
--
CREATE FUNCTION component_increment(bpath BYTEA, delim INTEGER) RETURNS BYTEA AS $$
SELECT CASE WHEN get_byte(bpath, octet_length(bpath) - 1) = delim
                THEN CASE WHEN delim = 255
                              THEN bytea_increment(bpath)
                          ELSE set_byte(bpath, octet_length(bpath) - 1, delim + 1)
        END
            ELSE bpath || E'\\x00'::BYTEA
           END;
$$ LANGUAGE 'sql' IMMUTABLE STRICT;


CREATE TYPE path_and_meta AS (
                                 fullpath BYTEA,
                                 metadata BYTEA
                             );

CREATE OR REPLACE FUNCTION list_directory(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)
    RETURNS SETOF path_and_meta AS $$
WITH RECURSIVE
    inputs AS (
        SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,
               CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,
               octet_length(dirpath) + 1 AS component_start,
               b.delim AS delim,
               b.bucketname AS bucket
        FROM buckets b
        WHERE bucketname = bucket
    ),
    distinct_prefix (bucket, truncatedpath) AS (
        SELECT i.bucket,
               (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
                FROM pathdata pd
                WHERE (i.range_low IS NULL OR pd.fullpath > i.range_low)
                  AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
                  AND (start_at = '' OR pd.fullpath >= start_at)
                  AND pd.bucket = i.bucket
                ORDER BY pd.fullpath
                LIMIT 1)
        FROM inputs i
        UNION ALL
        SELECT i.bucket,
               (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
                FROM pathdata pd
                WHERE pd.fullpath >= component_increment(pfx.truncatedpath, i.delim)
                  AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
                  AND pd.bucket = i.bucket
                ORDER BY pd.fullpath
                LIMIT 1)
        FROM distinct_prefix pfx, inputs i
        WHERE pfx.truncatedpath IS NOT NULL
    )
SELECT pfx.truncatedpath AS fullpath,
       pd.metadata
FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath AND pd.bucket = pfx.bucket
WHERE pfx.truncatedpath IS NOT NULL
UNION ALL
-- this one, if it exists, can't be part of distinct_prefix (or it would cause us to skip over all
-- subcontents of the prefix we're looking for), so we tack it on here
SELECT pd.fullpath, pd.metadata FROM pathdata pd, inputs i WHERE pd.fullpath = i.range_low AND pd.bucket = i.bucket
ORDER BY fullpath
LIMIT limit_to;
$$ LANGUAGE 'sql' STABLE;
CREATE FUNCTION list_directory_reverse(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)
    RETURNS SETOF path_and_meta AS $$
WITH RECURSIVE
    inputs AS (
        SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,
               CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,
               octet_length(dirpath) + 1 AS component_start,
               b.delim AS delim,
               b.bucketname AS bucket
        FROM buckets b
        WHERE bucketname = bucket
    ),
    distinct_prefix (truncatedpath) AS (
        SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
                FROM pathdata pd
                WHERE (i.range_low IS NULL OR pd.fullpath >= i.range_low)
                  AND (i.range_high IS NULL OR pd.fullpath < i.range_high)
                  AND (start_at = '' OR pd.fullpath <= start_at)
                  AND pd.bucket = i.bucket
                ORDER BY pd.fullpath DESC
                LIMIT 1)
        FROM inputs i
        UNION ALL
        SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)
                FROM pathdata pd
                WHERE (i.range_low IS NULL OR pd.fullpath >= i.range_low)
                  AND pd.fullpath < pfx.truncatedpath
                  AND pd.bucket = i.bucket
                ORDER BY pd.fullpath DESC
                LIMIT 1)
        FROM distinct_prefix pfx, inputs i
        WHERE pfx.truncatedpath IS NOT NULL
    )
SELECT pfx.truncatedpath AS fullpath,
       pd.metadata
FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath
WHERE pfx.truncatedpath IS NOT NULL
ORDER BY fullpath DESC
LIMIT limit_to;
$$ LANGUAGE 'sql' STABLE;
Add proper Pathdata Index (#3750) 2020-01-16 23:48:59 +00:00			`CREATE TABLE buckets (`
			`bucketname BYTEA`
			`PRIMARY KEY,`
			`delim INT`
			`NOT NULL`
			`CHECK (delim > 0 AND delim < 255)`
			`);`

			`-- until the KeyValueStore interface supports passing the bucket separately, or`
			`-- until storj actually supports changing the delimiter character per bucket, this`
			`-- dummy row should suffice for everything.`
			`INSERT INTO buckets (bucketname, delim) VALUES (''::BYTEA, ascii('/'));`


			`CREATE TABLE pathdata (`
			`bucket BYTEA`
			`NOT NULL`
			`REFERENCES buckets (bucketname),`
			`fullpath BYTEA`
			`NOT NULL`
			`CHECK (fullpath <> ''),`
			`metadata BYTEA`
			`NOT NULL,`

			`PRIMARY KEY (bucket, fullpath)`
			`);`

			`CREATE VIEW pathdata_pretty AS`
			`SELECT encode(bucket, 'escape') AS bucket,`
			`encode(fullpath, 'escape') AS fullpath,`
			`encode(metadata, 'escape') AS metadata`
			`FROM pathdata;`


			`-- given a path as might be found in the pathdata table, truncate it after the next delimiter to be`
			`-- found at or after 'afterpos', if any.`
			`--`
			`-- Examples:`
			`--`
			`-- truncate_after(''::BYTEA, ascii('/'), 1) -> ''::BYTEA`
			`-- truncate_after('foo'::BYTEA, ascii('/'), 1) -> 'foo'::BYTEA`
			`-- truncate_after('foo/'::BYTEA, ascii('/'), 1) -> 'foo/'::BYTEA`
			`-- truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 4) -> 'foo/'::BYTEA`
			`-- truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 5) -> 'foo/bar/'::BYTEA`
			`-- truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 8) -> 'foo/bar/'::BYTEA`
			`-- truncate_after('foo/bar/baz'::BYTEA, ascii('/'), 9) -> 'foo/bar/baz'::BYTEA`
			`-- truncate_after('foo//bar/bz'::BYTEA, ascii('/'), 4) -> 'foo/'::BYTEA`
			`-- truncate_after('foo//bar/bz'::BYTEA, ascii('/'), 5) -> 'foo//'::BYTEA`
			`--`
			`CREATE FUNCTION truncate_after(bpath BYTEA, delim INTEGER, afterpos INTEGER) RETURNS BYTEA AS $$`
			`DECLARE`
			`suff BYTEA;`
			`delimpos INTEGER;`
			`BEGIN`
			`suff := substring(bpath FROM afterpos);`
			`delimpos := position(set_byte(' '::BYTEA, 0, delim) IN suff);`
			`IF delimpos > 0 THEN`
			`RETURN substring(bpath FROM 1 FOR (afterpos + delimpos - 1));`
			`END IF;`
			`RETURN bpath;`
			`END;`
			`$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT;`


			`CREATE FUNCTION bytea_increment(b BYTEA) RETURNS BYTEA AS $$`
			`BEGIN`
			`WHILE b <> ''::BYTEA AND get_byte(b, octet_length(b) - 1) = 255 LOOP`
			`b := substring(b FROM 1 FOR octet_length(b) - 1);`
			`END LOOP;`
			`IF b = ''::BYTEA THEN`
			`RETURN NULL;`
			`END IF;`
			`RETURN set_byte(b, octet_length(b) - 1, get_byte(b, octet_length(b) - 1) + 1);`
			`END;`
			`$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT;`


			`-- Given a path as might be found in the pathdata table, with a delimeter appended if that path`
			`-- has any sub-elements, return the next possible path that _could_ be in the table (skipping over`
			`-- any potential sub-elements).`
			`--`
			`-- Examples:`
			`--`
			`-- component_increment('/'::BYTEA, ascii('/')) -> '0'::BYTEA`
			`-- (nothing can be between '/' and '0' other than subpaths under '/')`
			`--`
			`-- component_increment('/foo/bar/'::BYTEA, ascii('/')) -> '/foo/bar0'::BYTEA`
			`--`
			`-- component_increment('/foo/barboom'::BYTEA, ascii('/')) -> ('/foo/barboom' \|\| E'\\x00')`
			`-- (nothing can be between '/foo/barboom' and '/foo/barboom\x00' in normal BYTEA ordering)`
			`--`
			`-- component_increment(E'\\xFEFFFF'::BYTEA, 255) -> E'\\xFF'::BYTEA`
			`--`
			`CREATE FUNCTION component_increment(bpath BYTEA, delim INTEGER) RETURNS BYTEA AS $$`
			`SELECT CASE WHEN get_byte(bpath, octet_length(bpath) - 1) = delim`
			`THEN CASE WHEN delim = 255`
			`THEN bytea_increment(bpath)`
			`ELSE set_byte(bpath, octet_length(bpath) - 1, delim + 1)`
			`END`
			`ELSE bpath \|\| E'\\x00'::BYTEA`
			`END;`
			`$$ LANGUAGE 'sql' IMMUTABLE STRICT;`


			`CREATE TYPE path_and_meta AS (`
			`fullpath BYTEA,`
			`metadata BYTEA`
			`);`

			`CREATE OR REPLACE FUNCTION list_directory(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)`
			`RETURNS SETOF path_and_meta AS $$`
			`WITH RECURSIVE`
			`inputs AS (`
			`SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,`
			`CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,`
			`octet_length(dirpath) + 1 AS component_start,`
			`b.delim AS delim,`
			`b.bucketname AS bucket`
			`FROM buckets b`
			`WHERE bucketname = bucket`
			`),`
			`distinct_prefix (bucket, truncatedpath) AS (`
			`SELECT i.bucket,`
			`(SELECT truncate_after(pd.fullpath, i.delim, i.component_start)`
			`FROM pathdata pd`
			`WHERE (i.range_low IS NULL OR pd.fullpath > i.range_low)`
			`AND (i.range_high IS NULL OR pd.fullpath < i.range_high)`
			`AND (start_at = '' OR pd.fullpath >= start_at)`
			`AND pd.bucket = i.bucket`
			`ORDER BY pd.fullpath`
			`LIMIT 1)`
			`FROM inputs i`
			`UNION ALL`
			`SELECT i.bucket,`
			`(SELECT truncate_after(pd.fullpath, i.delim, i.component_start)`
			`FROM pathdata pd`
			`WHERE pd.fullpath >= component_increment(pfx.truncatedpath, i.delim)`
			`AND (i.range_high IS NULL OR pd.fullpath < i.range_high)`
			`AND pd.bucket = i.bucket`
			`ORDER BY pd.fullpath`
			`LIMIT 1)`
			`FROM distinct_prefix pfx, inputs i`
			`WHERE pfx.truncatedpath IS NOT NULL`
			`)`
			`SELECT pfx.truncatedpath AS fullpath,`
			`pd.metadata`
			`FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath AND pd.bucket = pfx.bucket`
			`WHERE pfx.truncatedpath IS NOT NULL`
			`UNION ALL`
			`-- this one, if it exists, can't be part of distinct_prefix (or it would cause us to skip over all`
			`-- subcontents of the prefix we're looking for), so we tack it on here`
			`SELECT pd.fullpath, pd.metadata FROM pathdata pd, inputs i WHERE pd.fullpath = i.range_low AND pd.bucket = i.bucket`
			`ORDER BY fullpath`
			`LIMIT limit_to;`
			`$$ LANGUAGE 'sql' STABLE;`
			`CREATE FUNCTION list_directory_reverse(bucket BYTEA, dirpath BYTEA, start_at BYTEA = ''::BYTEA, limit_to INTEGER = NULL)`
			`RETURNS SETOF path_and_meta AS $$`
			`WITH RECURSIVE`
			`inputs AS (`
			`SELECT CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE dirpath END AS range_low,`
			`CASE WHEN dirpath = ''::BYTEA THEN NULL ELSE bytea_increment(dirpath) END AS range_high,`
			`octet_length(dirpath) + 1 AS component_start,`
			`b.delim AS delim,`
			`b.bucketname AS bucket`
			`FROM buckets b`
			`WHERE bucketname = bucket`
			`),`
			`distinct_prefix (truncatedpath) AS (`
			`SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)`
			`FROM pathdata pd`
			`WHERE (i.range_low IS NULL OR pd.fullpath >= i.range_low)`
			`AND (i.range_high IS NULL OR pd.fullpath < i.range_high)`
			`AND (start_at = '' OR pd.fullpath <= start_at)`
			`AND pd.bucket = i.bucket`
			`ORDER BY pd.fullpath DESC`
			`LIMIT 1)`
			`FROM inputs i`
			`UNION ALL`
			`SELECT (SELECT truncate_after(pd.fullpath, i.delim, i.component_start)`
			`FROM pathdata pd`
			`WHERE (i.range_low IS NULL OR pd.fullpath >= i.range_low)`
			`AND pd.fullpath < pfx.truncatedpath`
			`AND pd.bucket = i.bucket`
			`ORDER BY pd.fullpath DESC`
			`LIMIT 1)`
			`FROM distinct_prefix pfx, inputs i`
			`WHERE pfx.truncatedpath IS NOT NULL`
			`)`
			`SELECT pfx.truncatedpath AS fullpath,`
			`pd.metadata`
			`FROM distinct_prefix pfx LEFT OUTER JOIN pathdata pd ON pfx.truncatedpath = pd.fullpath`
			`WHERE pfx.truncatedpath IS NOT NULL`
			`ORDER BY fullpath DESC`
			`LIMIT limit_to;`
			`$$ LANGUAGE 'sql' STABLE;`