9ebe388621
Add ability to limit updates in migrations. To make sure things are looking okay in the migration, we can run it with a limit of something like 10 or 30. We can look at the output of the migrated columns to see if they are correct. This should have no effect on subsequently running the full migration. Change-Id: I2c74879c8909c7938f994e1bd972d19325bc01f0
633 lines
18 KiB
Go
633 lines
18 KiB
Go
// Copyright (C) 2021 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
|
|
pgx "github.com/jackc/pgx/v4"
|
|
"github.com/zeebo/errs"
|
|
"go.uber.org/zap"
|
|
|
|
"storj.io/private/dbutil/cockroachutil"
|
|
"storj.io/private/dbutil/pgutil"
|
|
)
|
|
|
|
// MigrateTables runs the migration for each table.
|
|
func MigrateTables(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
err = MigrateUsers(ctx, log, conn, p, config)
|
|
if err != nil {
|
|
return errs.New("error migrating users: %w", err)
|
|
}
|
|
err = MigrateProjects(ctx, log, conn, p, config)
|
|
if err != nil {
|
|
return errs.New("error migrating projects: %w", err)
|
|
}
|
|
err = MigrateAPIKeys(ctx, log, conn, p, config)
|
|
if err != nil {
|
|
return errs.New("error migrating api_keys: %w", err)
|
|
}
|
|
err = MigrateBucketMetainfos(ctx, log, conn, p, config)
|
|
if err != nil {
|
|
return errs.New("error migrating bucket_metainfos: %w", err)
|
|
}
|
|
err = MigrateValueAttributions(ctx, log, conn, p, config)
|
|
if err != nil {
|
|
return errs.New("error migrating value_attributions: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MigrateUsers updates the user_agent column to corresponding Partners.Names or partner_id if applicable.
|
|
func MigrateUsers(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
// We select the next id then use limit as an offset which actually gives us limit+1 rows.
|
|
offset := config.Limit - 1
|
|
|
|
startID := []byte{}
|
|
nextID := []byte{}
|
|
var total int
|
|
more := true
|
|
|
|
// select the next ID after startID and offset the result. We will update relevant rows from the range of startID to nextID.
|
|
_, err = conn.Prepare(ctx, "select-for-update", "SELECT id FROM users WHERE id > $1 ORDER BY id OFFSET $2 LIMIT 1")
|
|
if err != nil {
|
|
return errs.New("could not prepare select query")
|
|
}
|
|
|
|
// update range from startID to nextID. Return count of updated rows to log progress.
|
|
_, err = conn.Prepare(ctx, "update-limited", `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE users
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE users.id > $3 AND users.id <= $4
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare update statement")
|
|
}
|
|
|
|
for {
|
|
row := conn.QueryRow(ctx, "select-for-update", startID, offset)
|
|
err = row.Scan(&nextID)
|
|
if err != nil {
|
|
if errs.Is(err, pgx.ErrNoRows) {
|
|
more = false
|
|
} else {
|
|
return errs.New("unable to select row for update from users: %w", err)
|
|
}
|
|
}
|
|
|
|
var updated int
|
|
for {
|
|
var row pgx.Row
|
|
if more {
|
|
row = conn.QueryRow(ctx, "update-limited", pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID, nextID)
|
|
} else {
|
|
// if !more then the select statement reached the end of the table. Update to the end of the table.
|
|
row = conn.QueryRow(ctx, `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE users
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE users.id > $3
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`, pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID,
|
|
)
|
|
}
|
|
err := row.Scan(&updated)
|
|
if err != nil {
|
|
if cockroachutil.NeedsRetry(err) {
|
|
continue
|
|
} else if errs.Is(err, pgx.ErrNoRows) {
|
|
break
|
|
}
|
|
return errs.New("updating users %w", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
total += updated
|
|
if !more {
|
|
log.Info("batch update complete", zap.Int("rows updated", updated))
|
|
break
|
|
}
|
|
log.Info("batch update complete", zap.Int("rows updated", updated), zap.Binary("last id", nextID))
|
|
startID = nextID
|
|
}
|
|
log.Info("users migration complete", zap.Int("total rows updated", total))
|
|
return nil
|
|
}
|
|
|
|
// MigrateProjects updates the user_agent column to corresponding PartnerInfo.Names or partner_id if applicable.
|
|
func MigrateProjects(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
// We select the next id then use limit as an offset which actually gives us limit+1 rows.
|
|
offset := config.Limit - 1
|
|
|
|
startID := []byte{}
|
|
nextID := []byte{}
|
|
var total int
|
|
more := true
|
|
|
|
// select the next ID after startID and offset the result. We will update relevant rows from the range of startID to nextID.
|
|
_, err = conn.Prepare(ctx, "select-for-update", "SELECT id FROM projects WHERE id > $1 ORDER BY id OFFSET $2 LIMIT 1")
|
|
if err != nil {
|
|
return errs.New("could not prepare select query")
|
|
}
|
|
|
|
// update range from startID to nextID. Return count of updated rows to log progress.
|
|
_, err = conn.Prepare(ctx, "update-limited", `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE projects
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE projects.id > $3 AND projects.id <= $4
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare update statement")
|
|
}
|
|
|
|
for {
|
|
row := conn.QueryRow(ctx, "select-for-update", startID, offset)
|
|
err = row.Scan(&nextID)
|
|
if err != nil {
|
|
if errs.Is(err, pgx.ErrNoRows) {
|
|
more = false
|
|
} else {
|
|
return errs.New("unable to select row for update from projects: %w", err)
|
|
}
|
|
}
|
|
|
|
var updated int
|
|
for {
|
|
var row pgx.Row
|
|
if more {
|
|
row = conn.QueryRow(ctx, "update-limited", pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID, nextID)
|
|
} else {
|
|
// if !more then the select statement reached the end of the table. Update to the end of the table.
|
|
row = conn.QueryRow(ctx, `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE projects
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE projects.id > $3
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`, pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID,
|
|
)
|
|
}
|
|
|
|
err := row.Scan(&updated)
|
|
if err != nil {
|
|
if cockroachutil.NeedsRetry(err) {
|
|
continue
|
|
} else if errs.Is(err, pgx.ErrNoRows) {
|
|
break
|
|
}
|
|
return errs.New("updating projects %w", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
total += updated
|
|
if !more {
|
|
log.Info("batch update complete", zap.Int("rows updated", updated))
|
|
break
|
|
}
|
|
log.Info("batch update complete", zap.Int("rows updated", updated), zap.Binary("last id", nextID))
|
|
startID = nextID
|
|
}
|
|
log.Info("projects migration complete", zap.Int("total rows updated", total))
|
|
return nil
|
|
}
|
|
|
|
// MigrateAPIKeys updates the user_agent column to corresponding PartnerInfo.Names or partner_id if applicable.
|
|
func MigrateAPIKeys(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
// We select the next id then use limit as an offset which actually gives us limit+1 rows.
|
|
offset := config.Limit - 1
|
|
|
|
startID := []byte{}
|
|
nextID := []byte{}
|
|
var total int
|
|
more := true
|
|
|
|
// select the next ID after startID and offset the result. We will update relevant rows from the range of startID to nextID.
|
|
_, err = conn.Prepare(ctx, "select-for-update", "SELECT id FROM api_keys WHERE id > $1 ORDER BY id OFFSET $2 LIMIT 1")
|
|
if err != nil {
|
|
return errs.New("could not prepare select query")
|
|
}
|
|
|
|
// update range from startID to nextID. Return count of updated rows to log progress.
|
|
_, err = conn.Prepare(ctx, "update-limited", `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE api_keys
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE api_keys.id > $3 AND api_keys.id <= $4
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare update statement")
|
|
}
|
|
|
|
for {
|
|
row := conn.QueryRow(ctx, "select-for-update", startID, offset)
|
|
err = row.Scan(&nextID)
|
|
if err != nil {
|
|
if errs.Is(err, pgx.ErrNoRows) {
|
|
more = false
|
|
} else {
|
|
return errs.New("unable to select row for update from api_keys: %w", err)
|
|
}
|
|
}
|
|
var updated int
|
|
for {
|
|
var row pgx.Row
|
|
if more {
|
|
row = conn.QueryRow(ctx, "update-limited", pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID, nextID)
|
|
} else {
|
|
// if !more then the select statement reached the end of the table. Update to the end of the table.
|
|
row = conn.QueryRow(ctx, `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE api_keys
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE api_keys.id > $3
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`, pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID,
|
|
)
|
|
}
|
|
err := row.Scan(&updated)
|
|
if err != nil {
|
|
if cockroachutil.NeedsRetry(err) {
|
|
continue
|
|
} else if errs.Is(err, pgx.ErrNoRows) {
|
|
break
|
|
}
|
|
return errs.New("updating api_keys %w", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
total += updated
|
|
if !more {
|
|
log.Info("batch update complete", zap.Int("rows updated", updated))
|
|
break
|
|
}
|
|
log.Info("batch update complete", zap.Int("rows updated", updated), zap.Binary("last id", nextID))
|
|
startID = nextID
|
|
}
|
|
log.Info("api_keys migration complete", zap.Int("total rows updated", total))
|
|
return nil
|
|
}
|
|
|
|
// MigrateBucketMetainfos updates the user_agent column to corresponding Partners.Names or partner_id if applicable.
|
|
func MigrateBucketMetainfos(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
// We select the next id then use limit as an offset which actually gives us limit+1 rows.
|
|
offset := config.Limit - 1
|
|
|
|
startID := []byte{}
|
|
nextID := []byte{}
|
|
var total int
|
|
more := true
|
|
|
|
// select the next ID after startID and offset the result. We will update relevant rows from the range of startID to nextID.
|
|
_, err = conn.Prepare(ctx, "select-for-update", "SELECT id FROM bucket_metainfos WHERE id > $1 ORDER BY id OFFSET $2 LIMIT 1")
|
|
if err != nil {
|
|
return errs.New("could not prepare select query")
|
|
}
|
|
|
|
// update range from startID to nextID. Return count of updated rows to log progress.
|
|
_, err = conn.Prepare(ctx, "update-limited", `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE bucket_metainfos
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE bucket_metainfos.id > $3 AND bucket_metainfos.id <= $4
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare update statement")
|
|
}
|
|
|
|
for {
|
|
row := conn.QueryRow(ctx, "select-for-update", startID, offset)
|
|
err = row.Scan(&nextID)
|
|
if err != nil {
|
|
if errs.Is(err, pgx.ErrNoRows) {
|
|
more = false
|
|
} else {
|
|
return errs.New("unable to select row for update from bucket_metainfos: %w", err)
|
|
}
|
|
}
|
|
var updated int
|
|
for {
|
|
var row pgx.Row
|
|
if more {
|
|
row = conn.QueryRow(ctx, "update-limited", pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID, nextID)
|
|
} else {
|
|
// if !more then the select statement reached the end of the table. Update to the end of the table.
|
|
row = conn.QueryRow(ctx, `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE bucket_metainfos
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE bucket_metainfos.id > $3
|
|
AND partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`, pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startID,
|
|
)
|
|
}
|
|
err := row.Scan(&updated)
|
|
if err != nil {
|
|
if cockroachutil.NeedsRetry(err) {
|
|
continue
|
|
} else if errs.Is(err, pgx.ErrNoRows) {
|
|
break
|
|
}
|
|
return errs.New("updating bucket_metainfos %w", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
total += updated
|
|
if !more {
|
|
log.Info("batch update complete", zap.Int("rows updated", updated))
|
|
break
|
|
}
|
|
log.Info("batch update complete", zap.Int("rows updated", updated), zap.Binary("last id", nextID))
|
|
startID = nextID
|
|
}
|
|
log.Info("bucket_metainfos migration complete", zap.Int("total rows updated", total))
|
|
return nil
|
|
}
|
|
|
|
// MigrateValueAttributions updates the user_agent column to corresponding Partners.Names or partner_id if applicable.
|
|
func MigrateValueAttributions(ctx context.Context, log *zap.Logger, conn *pgx.Conn, p *Partners, config Config) (err error) {
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
// We select the next id then use limit as an offset which actually gives us limit+1 rows.
|
|
offset := config.Limit - 1
|
|
|
|
startProjectID := []byte{}
|
|
startBucket := []byte{}
|
|
nextProjectID := []byte{}
|
|
nextBucket := []byte{}
|
|
var total int
|
|
more := true
|
|
|
|
// select the next ID after startID and offset the result. We will update relevant rows from the range of startID to nextID.
|
|
_, err = conn.Prepare(ctx, "select-for-update", `
|
|
SELECT project_id, bucket_name
|
|
FROM value_attributions
|
|
WHERE project_id > $1
|
|
OR (
|
|
project_id = $1 AND bucket_name > $2
|
|
)
|
|
ORDER BY project_id, bucket_name
|
|
OFFSET $3
|
|
LIMIT 1
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare select query")
|
|
}
|
|
|
|
// update range from startID to nextID. Return count of updated rows to log progress.
|
|
_, err = conn.Prepare(ctx, "update-limited", `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE value_attributions
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
AND (
|
|
value_attributions.project_id > $3
|
|
OR (
|
|
value_attributions.project_id = $3 AND value_attributions.bucket_name > $5
|
|
)
|
|
)
|
|
AND (
|
|
value_attributions.project_id < $4
|
|
OR (
|
|
value_attributions.project_id = $4 AND value_attributions.bucket_name <= $6
|
|
)
|
|
)
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`)
|
|
if err != nil {
|
|
return errs.New("could not prepare update statement")
|
|
}
|
|
|
|
for {
|
|
row := conn.QueryRow(ctx, "select-for-update", startProjectID, startBucket, offset)
|
|
err = row.Scan(&nextProjectID, &nextBucket)
|
|
if err != nil {
|
|
if errs.Is(err, pgx.ErrNoRows) {
|
|
more = false
|
|
} else {
|
|
return errs.New("unable to select row for update from value_attributions: %w", err)
|
|
}
|
|
}
|
|
var updated int
|
|
for {
|
|
var row pgx.Row
|
|
if more {
|
|
row = conn.QueryRow(ctx, "update-limited", pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startProjectID, nextProjectID, startBucket, nextBucket)
|
|
} else {
|
|
// if !more then the select statement reached the end of the table. Update to the end of the table.
|
|
row = conn.QueryRow(ctx, `
|
|
WITH partners AS (
|
|
SELECT unnest($1::bytea[]) as id,
|
|
unnest($2::bytea[]) as name
|
|
),
|
|
updated as (
|
|
UPDATE value_attributions
|
|
SET user_agent = (
|
|
CASE
|
|
WHEN partner_id IN (SELECT id FROM partners)
|
|
THEN (SELECT name FROM partners WHERE partner_id = partners.id)
|
|
ELSE partner_id
|
|
END
|
|
)
|
|
FROM partners
|
|
WHERE partner_id IS NOT NULL
|
|
AND user_agent IS NULL
|
|
AND (
|
|
value_attributions.project_id > $3
|
|
OR (
|
|
value_attributions.project_id = $3 AND value_attributions.bucket_name > $4
|
|
)
|
|
)
|
|
RETURNING 1
|
|
)
|
|
SELECT count(*)
|
|
FROM updated;
|
|
`, pgutil.UUIDArray(p.UUIDs), pgutil.ByteaArray(p.Names), startProjectID, startBucket,
|
|
)
|
|
}
|
|
err := row.Scan(&updated)
|
|
if err != nil {
|
|
if cockroachutil.NeedsRetry(err) {
|
|
continue
|
|
} else if errs.Is(err, pgx.ErrNoRows) {
|
|
break
|
|
}
|
|
return errs.New("updating value_attributions %w", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
total += updated
|
|
if !more {
|
|
log.Info("batch update complete", zap.Int("rows updated", updated))
|
|
break
|
|
}
|
|
log.Info("batch update complete", zap.Int("rows updated", updated), zap.Binary("last project id", nextProjectID), zap.String("last bucket name", string(nextBucket)))
|
|
startProjectID = nextProjectID
|
|
startBucket = nextBucket
|
|
}
|
|
log.Info("value_attributions migration complete", zap.Int("total rows updated", total))
|
|
return nil
|
|
}
|