cmd: remove metainfo-migrator and metainfo-verifier

We already merged the multipart-upload branch to main. These two tools
make only sense if we are migrating a satellite from Pointer DB to
Metabase. There is one remaining satellite to migrate, but these tools
should be used from the respective release branch instead of from main.

Removing these tools from main will:
1) Avoid the mistake to use them from the main branch instead of from
the respective release branch.
2) Allow to finally remove any code related to the old Pointer DB.

Change-Id: Ied66098c5d0b8fefeb5d6e92b5e0ef5c6603df5d
This commit is contained in:
Kaloyan Raev 2021-03-30 17:06:09 +03:00 committed by Egon Elbre
parent 6b88a675c5
commit a264a4422b
14 changed files with 1 additions and 4158 deletions

View File

@ -379,4 +379,4 @@ bump-dependencies:
go mod tidy
update-proto-lock:
protolock commit --ignore "satellite/internalpb,storagenode/internalpb,cmd/metainfo-migration/fastpb"
protolock commit --ignore "satellite/internalpb,storagenode/internalpb"

View File

@ -1,143 +0,0 @@
// Protocol Buffers for Go with Gadgets
//
// Copyright (c) 2013, The GoGo Authors. All rights reserved.
// http://github.com/gogo/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package gogoproto;
import "google/protobuf/descriptor.proto";
option java_package = "com.google.protobuf";
option java_outer_classname = "GoGoProtos";
extend google.protobuf.EnumOptions {
optional bool goproto_enum_prefix = 62001;
optional bool goproto_enum_stringer = 62021;
optional bool enum_stringer = 62022;
optional string enum_customname = 62023;
optional bool enumdecl = 62024;
}
extend google.protobuf.EnumValueOptions {
optional string enumvalue_customname = 66001;
}
extend google.protobuf.FileOptions {
optional bool goproto_getters_all = 63001;
optional bool goproto_enum_prefix_all = 63002;
optional bool goproto_stringer_all = 63003;
optional bool verbose_equal_all = 63004;
optional bool face_all = 63005;
optional bool gostring_all = 63006;
optional bool populate_all = 63007;
optional bool stringer_all = 63008;
optional bool onlyone_all = 63009;
optional bool equal_all = 63013;
optional bool description_all = 63014;
optional bool testgen_all = 63015;
optional bool benchgen_all = 63016;
optional bool marshaler_all = 63017;
optional bool unmarshaler_all = 63018;
optional bool stable_marshaler_all = 63019;
optional bool sizer_all = 63020;
optional bool goproto_enum_stringer_all = 63021;
optional bool enum_stringer_all = 63022;
optional bool unsafe_marshaler_all = 63023;
optional bool unsafe_unmarshaler_all = 63024;
optional bool goproto_extensions_map_all = 63025;
optional bool goproto_unrecognized_all = 63026;
optional bool gogoproto_import = 63027;
optional bool protosizer_all = 63028;
optional bool compare_all = 63029;
optional bool typedecl_all = 63030;
optional bool enumdecl_all = 63031;
optional bool goproto_registration = 63032;
optional bool messagename_all = 63033;
optional bool goproto_sizecache_all = 63034;
optional bool goproto_unkeyed_all = 63035;
}
extend google.protobuf.MessageOptions {
optional bool goproto_getters = 64001;
optional bool goproto_stringer = 64003;
optional bool verbose_equal = 64004;
optional bool face = 64005;
optional bool gostring = 64006;
optional bool populate = 64007;
optional bool stringer = 67008;
optional bool onlyone = 64009;
optional bool equal = 64013;
optional bool description = 64014;
optional bool testgen = 64015;
optional bool benchgen = 64016;
optional bool marshaler = 64017;
optional bool unmarshaler = 64018;
optional bool stable_marshaler = 64019;
optional bool sizer = 64020;
optional bool unsafe_marshaler = 64023;
optional bool unsafe_unmarshaler = 64024;
optional bool goproto_extensions_map = 64025;
optional bool goproto_unrecognized = 64026;
optional bool protosizer = 64028;
optional bool typedecl = 64030;
optional bool messagename = 64033;
optional bool goproto_sizecache = 64034;
optional bool goproto_unkeyed = 64035;
}
extend google.protobuf.FieldOptions {
optional bool nullable = 65001;
optional bool embed = 65002;
optional string customtype = 65003;
optional string customname = 65004;
optional string jsontag = 65005;
optional string moretags = 65006;
optional string casttype = 65007;
optional string castkey = 65008;
optional string castvalue = 65009;
optional bool stdtime = 65010;
optional bool stdduration = 65011;
optional bool wktpointer = 65012;
optional bool compare = 65013;
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1,77 +0,0 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
syntax = "proto3";
package fastpb;
import "google/protobuf/timestamp.proto";
import "gogo.proto";
// This is a reduced version of pointerdb.proto for things that are
// relevant to migration.
message RedundancyScheme {
enum SchemeType {
INVALID = 0;
RS = 1;
}
SchemeType type = 1;
// these values apply to RS encoding
int32 min_req = 2; // minimum required for reconstruction
int32 total = 3; // total amount of pieces we generated
int32 repair_threshold = 4; // amount of pieces we need to drop to before triggering repair
int32 success_threshold = 5; // amount of pieces we need to store to call it a success
int32 erasure_share_size = 6;
}
message RemotePiece {
int32 piece_num = 1;
bytes node_id = 2 [(gogoproto.customtype) = "NodeID", (gogoproto.nullable) = false];
}
message RemoteSegment {
RedundancyScheme redundancy = 1;
bytes root_piece_id = 2 [(gogoproto.customtype) = "PieceID", (gogoproto.nullable) = false];
repeated RemotePiece remote_pieces = 3 [(gogoproto.nullable) = false];
}
message Pointer {
enum DataType {
INLINE = 0;
REMOTE = 1;
}
DataType type = 1;
bytes inline_segment = 3;
RemoteSegment remote = 4;
int64 segment_size = 5;
google.protobuf.Timestamp creation_date = 6 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
google.protobuf.Timestamp expiration_date = 7 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
bytes metadata = 8;
}
message SegmentMeta {
bytes encrypted_key = 1;
bytes key_nonce = 2;
}
message StreamInfo {
int64 deprecated_number_of_segments = 1;
int64 segments_size = 2;
int64 last_segment_size = 3;
bytes metadata = 4;
}
message StreamMeta {
bytes encrypted_stream_info = 1;
int32 encryption_type = 2;
int32 encryption_block_size = 3;
SegmentMeta last_segment_meta = 4;
int64 number_of_segments = 5;
}

View File

@ -1,17 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
// Package fastpb contains generated marshaling for Pointers.
package fastpb
import (
"storj.io/common/storj"
)
//go:generate protoc -I=. --gogofaster_out=Mgoogle/protobuf/any.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/duration.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/struct.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/timestamp.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/wrappers.proto=github.com/gogo/protobuf/types,paths=source_relative:. pointerdb.proto
// NodeID is a unique node identifier.
type NodeID = storj.NodeID
// PieceID is a unique identifier for pieces.
type PieceID = storj.PieceID

View File

@ -1,83 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package main
import (
"context"
"flag"
"log"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
const (
defaultReadBatchSize = 3000000
defaultWriteBatchSize = 100
defaultWriteParallelLimit = 6
defaultPreGeneratedStreamIDs = 100
)
var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
readBatchSize = flag.Int("readBatchSize", defaultReadBatchSize, "batch size for selecting pointers from DB")
writeBatchSize = flag.Int("writeBatchSize", defaultWriteBatchSize, "batch size for inserting objects and segments")
writeParallelLimit = flag.Int("writeParallelLimit", defaultWriteParallelLimit, "limit of parallel batch writes")
preGeneratedStreamIDs = flag.Int("preGeneratedStreamIDs", defaultPreGeneratedStreamIDs, "number of pre generated stream ids for segment")
nodes = flag.String("nodes", "", "file with nodes ids")
invalidObjects = flag.String("invalidObjects", "", "file for storing invalid objects")
pointerdb = flag.String("pointerdb", "", "connection URL for PointerDB")
metabasedb = flag.String("metabasedb", "", "connection URL for MetabaseDB")
)
func main() {
flag.Parse()
if *pointerdb == "" {
log.Fatalln("Flag '--pointerdb' is not set")
}
if *metabasedb == "" {
log.Fatalln("Flag '--metabasedb' is not set")
}
ctx := context.Background()
log, err := zap.Config{
Encoding: "console",
Level: zap.NewAtomicLevelAt(zapcore.DebugLevel),
OutputPaths: []string{"stdout"},
ErrorOutputPaths: []string{"stdout"},
EncoderConfig: zapcore.EncoderConfig{
LevelKey: "L",
NameKey: "N",
CallerKey: "C",
MessageKey: "M",
StacktraceKey: "S",
LineEnding: zapcore.DefaultLineEnding,
EncodeLevel: zapcore.CapitalLevelEncoder,
EncodeTime: zapcore.ISO8601TimeEncoder,
EncodeDuration: zapcore.StringDurationEncoder,
EncodeCaller: zapcore.ShortCallerEncoder,
},
}.Build()
if err != nil {
panic(err)
}
defer func() { _ = log.Sync() }()
config := Config{
PreGeneratedStreamIDs: *preGeneratedStreamIDs,
ReadBatchSize: *readBatchSize,
WriteBatchSize: *writeBatchSize,
WriteParallelLimit: *writeParallelLimit,
Nodes: *nodes,
InvalidObjectsFile: *invalidObjects,
}
migrator := NewMigrator(log, *pointerdb, *metabasedb, config)
err = migrator.MigrateProjects(ctx)
if err != nil {
panic(err)
}
}

View File

@ -1,705 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package main
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"encoding/csv"
"encoding/hex"
"errors"
"os"
"runtime/pprof"
"sort"
"strconv"
"strings"
"time"
proto "github.com/gogo/protobuf/proto"
"github.com/jackc/pgx/v4"
"github.com/jackc/pgx/v4/pgxpool"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/pb"
"storj.io/common/storj"
"storj.io/common/sync2"
"storj.io/common/uuid"
"storj.io/storj/cmd/metainfo-migration/fastpb"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/metainfo/metabase"
)
const objectArgs = 14
const segmentArgs = 11
// EntryKey map key for object.
type EntryKey struct {
Bucket string
Key metabase.ObjectKey
}
// Object represents object metadata.
type Object struct {
StreamID uuid.UUID
CreationDate time.Time
ExpireAt *time.Time
EncryptedMetadata []byte
EncryptedMetadataKey []byte
EncryptedMetadataNonce []byte
Encryption int64
TotalEncryptedSize int64
SegmentsRead int64
SegmentsExpected int64
}
// Config initial settings for migrator.
type Config struct {
PreGeneratedStreamIDs int
ReadBatchSize int
WriteBatchSize int
WriteParallelLimit int
Nodes string
InvalidObjectsFile string
NumberOfRetries int
}
// Migrator defines metainfo migrator.
type Migrator struct {
log *zap.Logger
pointerDBStr string
metabaseDBStr string
config Config
objects [][]interface{}
segments [][]interface{}
objectsSQL string
segmentsSQL string
metabaseLimiter *sync2.Limiter
}
// NewMigrator creates new metainfo migrator.
func NewMigrator(log *zap.Logger, pointerDBStr, metabaseDBStr string, config Config) *Migrator {
if config.ReadBatchSize == 0 {
config.ReadBatchSize = defaultReadBatchSize
}
if config.WriteBatchSize == 0 {
config.WriteBatchSize = defaultWriteBatchSize
}
if config.WriteParallelLimit == 0 {
config.WriteParallelLimit = defaultWriteParallelLimit
}
if config.PreGeneratedStreamIDs == 0 {
config.PreGeneratedStreamIDs = defaultPreGeneratedStreamIDs
}
if config.NumberOfRetries == 0 {
config.NumberOfRetries = 5
}
return &Migrator{
log: log,
pointerDBStr: pointerDBStr,
metabaseDBStr: metabaseDBStr,
config: config,
objects: make([][]interface{}, 0, config.WriteBatchSize),
segments: make([][]interface{}, 0, config.WriteBatchSize),
objectsSQL: prepareObjectsSQL(config.WriteBatchSize),
segmentsSQL: prepareSegmentsSQL(config.WriteBatchSize),
metabaseLimiter: sync2.NewLimiter(config.WriteParallelLimit),
}
}
// MigrateProjects migrates all projects in pointerDB database.
func (m *Migrator) MigrateProjects(ctx context.Context) (err error) {
m.log.Debug("Databases", zap.String("PointerDB", m.pointerDBStr), zap.String("MetabaseDB", m.metabaseDBStr))
pointerDBConn, err := pgx.Connect(ctx, m.pointerDBStr)
if err != nil {
return errs.New("unable to connect %q: %w", m.pointerDBStr, err)
}
defer func() { err = errs.Combine(err, pointerDBConn.Close(ctx)) }()
mb, err := metainfo.OpenMetabase(ctx, m.log.Named("metabase"), m.metabaseDBStr)
if err != nil {
return err
}
if err := mb.MigrateToLatest(ctx); err != nil {
return err
}
defer func() { err = errs.Combine(err, mb.Close()) }()
aliasCache := metabase.NewNodeAliasCache(mb)
config, err := pgxpool.ParseConfig(m.metabaseDBStr)
if err != nil {
return err
}
config.MaxConns = 10
metabaseConn, err := pgxpool.ConnectConfig(ctx, config)
if err != nil {
return errs.New("unable to connect %q: %w", m.metabaseDBStr, err)
}
defer func() { metabaseConn.Close() }()
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
return err
}
err = pprof.StartCPUProfile(f)
if err != nil {
return err
}
defer pprof.StopCPUProfile()
}
pointer := &fastpb.Pointer{}
streamMeta := &fastpb.StreamMeta{}
segmentMeta := &fastpb.SegmentMeta{}
segmentPosition := metabase.SegmentPosition{}
object := Object{}
location := metabase.ObjectLocation{}
objects := make(map[EntryKey]Object)
var currentProject uuid.UUID
var fullpath, lastFullPath, metadata []byte
var allObjects, allSegments, zombieSegments int64
var invalidObjectsWriter *csv.Writer
if m.config.InvalidObjectsFile != "" {
objFile, err := os.Create(m.config.InvalidObjectsFile)
if err != nil {
return err
}
defer func() { err = errs.Combine(err, objFile.Close()) }()
invalidObjectsWriter = csv.NewWriter(objFile)
} else {
invalidObjectsWriter = csv.NewWriter(os.Stdout)
}
err = invalidObjectsWriter.Write([]string{"project_id", "bucket_name", "object_key", "stream_id", "expected_segments", "read_segments"})
if err != nil {
return err
}
start := time.Now()
if m.config.Nodes != "" {
err = m.aliasNodes(ctx, mb)
if err != nil {
return err
}
}
lastCheck := time.Now()
m.log.Info("Start generating StreamIDs", zap.Int("total", m.config.PreGeneratedStreamIDs))
ids, err := generateStreamIDs(m.config.PreGeneratedStreamIDs)
if err != nil {
return err
}
m.log.Info("Finished generating StreamIDs", zap.Duration("took", time.Since(lastCheck)))
m.log.Info("Start", zap.Time("time", start),
zap.Int("readBatchSize", m.config.ReadBatchSize),
zap.Int("writeBatchSize", m.config.WriteBatchSize),
zap.Int("writeParallelLimit", m.config.WriteParallelLimit),
)
lastCheck = time.Now()
for {
hasResults := false
err = func() error {
var rows pgx.Rows
if len(lastFullPath) == 0 {
m.withRetry(ctx, func() (err error) {
rows, err = pointerDBConn.Query(ctx, `SELECT fullpath, metadata FROM pathdata ORDER BY fullpath ASC LIMIT $1`, m.config.ReadBatchSize)
return err
})
} else {
m.withRetry(ctx, func() (err error) {
rows, err = pointerDBConn.Query(ctx, `SELECT fullpath, metadata FROM pathdata WHERE fullpath > $1 ORDER BY fullpath ASC LIMIT $2`, lastFullPath, m.config.ReadBatchSize)
return err
})
}
defer func() { rows.Close() }()
for rows.Next() {
hasResults = true
err = rows.Scan(&fullpath, &metadata)
if err != nil {
return err
}
lastFullPath = fullpath
segmentKey, err := metabase.ParseSegmentKey(metabase.SegmentKey(fullpath))
if err != nil {
// we should skip such errors as it looks we can have outdated entries
// in pointerdb like `project_id/l/bucket_name` without object key
m.log.Warn("unable to parse segment key", zap.Error(err))
continue
}
if !bytes.Equal(currentProject[:], segmentKey.ProjectID[:]) {
if len(objects) != 0 {
// TODO should we add such incomplete object into metabase?
for key, object := range objects {
err = invalidObjectsWriter.Write([]string{
currentProject.String(),
key.Bucket,
hex.EncodeToString([]byte(key.Key)),
object.StreamID.String(),
strconv.FormatInt(object.SegmentsExpected, 10),
strconv.FormatInt(object.SegmentsRead, 10),
})
if err != nil {
return err
}
}
invalidObjectsWriter.Flush()
if err := invalidObjectsWriter.Error(); err != nil {
return err
}
m.log.Warn("Object map should be empty after processing whole project", zap.String("ProjectID", currentProject.String()), zap.Int("Number of objects", len(objects)))
}
currentProject = segmentKey.ProjectID
for b := range objects {
delete(objects, b)
}
}
err = proto.Unmarshal(metadata, pointer)
if err != nil {
return err
}
if allSegments != 0 && allSegments%1000000 == 0 {
m.log.Info("Processed segments", zap.Int64("segments", allSegments), zap.Duration("took", time.Since(lastCheck)))
lastCheck = time.Now()
}
key := EntryKey{
Bucket: segmentKey.BucketName,
Key: segmentKey.ObjectKey,
}
// TODO:
// * detect empty objects and insert only object
if segmentKey.Position.Index == metabase.LastSegmentIndex {
// process last segment, it contains information about object and segment metadata
if len(ids) == 0 {
return errs.New("not enough generated stream ids")
}
streamID := ids[0]
err = proto.Unmarshal(pointer.Metadata, streamMeta)
if err != nil {
return err
}
// remove used ID
ids = ids[1:]
var expireAt *time.Time
if !pointer.ExpirationDate.IsZero() {
// because we are reusing Pointer struct using it directly can cause race
copy := pointer.ExpirationDate
expireAt = &copy
}
encryption, err := encodeEncryption(storj.EncryptionParameters{
CipherSuite: storj.CipherSuite(streamMeta.EncryptionType),
BlockSize: streamMeta.EncryptionBlockSize,
})
if err != nil {
return err
}
object.StreamID = streamID
object.CreationDate = pointer.CreationDate
object.ExpireAt = expireAt
object.Encryption = encryption
object.EncryptedMetadataKey = streamMeta.LastSegmentMeta.EncryptedKey
object.EncryptedMetadataNonce = streamMeta.LastSegmentMeta.KeyNonce
object.EncryptedMetadata = pointer.Metadata // TODO this needs to be striped to EncryptedStreamInfo
object.SegmentsRead = 1
object.TotalEncryptedSize = pointer.SegmentSize
object.SegmentsExpected = streamMeta.NumberOfSegments
// if object has only one segment then just insert it and don't put into map
if streamMeta.NumberOfSegments == 1 {
location.ProjectID = currentProject
location.BucketName = key.Bucket
location.ObjectKey = key.Key
err = m.insertObject(ctx, metabaseConn, location, object)
if err != nil {
return err
}
allObjects++
} else {
objects[key] = object
}
segmentPosition.Index = uint32(streamMeta.NumberOfSegments - 1)
err = m.insertSegment(ctx, metabaseConn, aliasCache, streamID, segmentPosition.Encode(), pointer, streamMeta.LastSegmentMeta)
if err != nil {
return err
}
} else {
object, ok := objects[key]
if !ok {
// TODO verify if its possible that DB has zombie segments
zombieSegments++
} else {
err = pb.Unmarshal(pointer.Metadata, segmentMeta)
if err != nil {
return err
}
segmentPosition.Index = segmentKey.Position.Index
err = m.insertSegment(ctx, metabaseConn, aliasCache, object.StreamID, segmentPosition.Encode(), pointer, segmentMeta)
if err != nil {
return err
}
object.SegmentsRead++
object.TotalEncryptedSize += pointer.SegmentSize
if object.SegmentsRead == object.SegmentsExpected {
location.ProjectID = currentProject
location.BucketName = key.Bucket
location.ObjectKey = key.Key
err = m.insertObject(ctx, metabaseConn, location, object)
if err != nil {
return err
}
allObjects++
delete(objects, key)
} else {
objects[key] = object
}
}
}
allSegments++
}
return rows.Err()
}()
if err != nil {
return err
}
if !hasResults {
break
}
}
err = m.flushObjects(ctx, metabaseConn)
if err != nil {
return err
}
err = m.flushSegments(ctx, metabaseConn)
if err != nil {
return err
}
m.metabaseLimiter.Wait()
m.log.Info("Finished", zap.Int64("objects", allObjects), zap.Int64("segments", allSegments), zap.Int64("invalid", zombieSegments), zap.Duration("total", time.Since(start)))
return nil
}
func (m *Migrator) insertObject(ctx context.Context, conn *pgxpool.Pool, location metabase.ObjectLocation, object Object) error {
m.objects = append(m.objects, []interface{}{
location.ProjectID, location.BucketName, []byte(location.ObjectKey), 1, object.StreamID,
object.CreationDate, object.ExpireAt,
metabase.Committed, object.SegmentsRead,
object.EncryptedMetadata, object.EncryptedMetadataKey, object.EncryptedMetadataNonce,
object.TotalEncryptedSize,
object.Encryption,
})
if len(m.objects) >= m.config.WriteBatchSize {
err := m.flushObjects(ctx, conn)
if err != nil {
return err
}
}
return nil
}
func (m *Migrator) insertSegment(ctx context.Context, conn *pgxpool.Pool, aliasCache *metabase.NodeAliasCache, streamID uuid.UUID, position uint64, pointer *fastpb.Pointer, segmentMeta *fastpb.SegmentMeta) (err error) {
var rootPieceID storj.PieceID
var remotePieces metabase.Pieces
var redundancy int64
if pointer.Type == fastpb.Pointer_REMOTE && pointer.Remote != nil {
rootPieceID = pointer.Remote.RootPieceId
redundancy, err = encodeRedundancy(pointer.Remote.Redundancy)
if err != nil {
return err
}
for _, remotePiece := range pointer.Remote.RemotePieces {
remotePieces = append(remotePieces, metabase.Piece{
Number: uint16(remotePiece.PieceNum),
StorageNode: remotePiece.NodeId,
})
}
}
pieces, err := aliasCache.ConvertPiecesToAliases(ctx, remotePieces)
if err != nil {
return err
}
sort.Slice(pieces, func(i, j int) bool {
return pieces[i].Number < pieces[j].Number
})
m.segments = append(m.segments, []interface{}{
streamID, position,
rootPieceID,
segmentMeta.EncryptedKey, segmentMeta.KeyNonce,
pointer.SegmentSize, 0, 0,
redundancy,
pointer.InlineSegment, pieces,
})
if len(m.segments) >= m.config.WriteBatchSize {
err = m.flushSegments(ctx, conn)
if err != nil {
return err
}
}
return nil
}
func encodeEncryption(params storj.EncryptionParameters) (int64, error) {
var bytes [8]byte
bytes[0] = byte(params.CipherSuite)
binary.LittleEndian.PutUint32(bytes[1:], uint32(params.BlockSize))
return int64(binary.LittleEndian.Uint64(bytes[:])), nil
}
func encodeRedundancy(redundancy *fastpb.RedundancyScheme) (int64, error) {
params := storj.RedundancyScheme{}
if redundancy != nil {
params.Algorithm = storj.RedundancyAlgorithm(redundancy.Type)
params.ShareSize = redundancy.ErasureShareSize
params.RequiredShares = int16(redundancy.MinReq)
params.RepairShares = int16(redundancy.RepairThreshold)
params.OptimalShares = int16(redundancy.SuccessThreshold)
params.TotalShares = int16(redundancy.Total)
}
var bytes [8]byte
bytes[0] = byte(params.Algorithm)
if params.ShareSize >= (1 << 24) {
return 0, errors.New("redundancy ShareSize is too big to encode")
}
bytes[1] = byte(params.ShareSize >> 0)
bytes[2] = byte(params.ShareSize >> 8)
bytes[3] = byte(params.ShareSize >> 16)
bytes[4] = byte(params.RequiredShares)
bytes[5] = byte(params.RepairShares)
bytes[6] = byte(params.OptimalShares)
bytes[7] = byte(params.TotalShares)
return int64(binary.LittleEndian.Uint64(bytes[:])), nil
}
func (m *Migrator) flushObjects(ctx context.Context, conn *pgxpool.Pool) error {
if len(m.objects) == 0 {
return nil
}
objectsSQL := m.objectsSQL
if len(m.objects) < m.config.WriteBatchSize {
objectsSQL = prepareObjectsSQL(len(m.objects))
}
// TODO make predefined instance for that
params := []interface{}{}
for _, object := range m.objects {
params = append(params, object...)
}
m.metabaseLimiter.Go(ctx, func() {
params := params
m.withRetry(ctx, func() error {
_, err := conn.Exec(ctx, objectsSQL, params...)
return err
})
})
m.objects = m.objects[:0]
return nil
}
func (m *Migrator) flushSegments(ctx context.Context, conn *pgxpool.Pool) error {
if len(m.segments) == 0 {
return nil
}
segmentsSQL := m.segmentsSQL
if len(m.segments) < m.config.WriteBatchSize {
segmentsSQL = prepareSegmentsSQL(len(m.segments))
}
// TODO make predefined instance for that
params := make([]interface{}, 0, len(m.segments)*segmentArgs)
for _, segment := range m.segments {
params = append(params, segment...)
}
m.metabaseLimiter.Go(ctx, func() {
params := params
m.withRetry(ctx, func() error {
_, err := conn.Exec(ctx, segmentsSQL, params...)
return err
})
})
m.segments = m.segments[:0]
return nil
}
func prepareObjectsSQL(batchSize int) string {
sql := `
INSERT INTO objects (
project_id, bucket_name, object_key, version, stream_id,
created_at, expires_at,
status, segment_count,
encrypted_metadata, encrypted_metadata_encrypted_key, encrypted_metadata_nonce,
total_encrypted_size,
encryption
) VALUES
`
i := 1
for i < batchSize*objectArgs {
sql += parameters(objectArgs, i) + ","
i += objectArgs
}
return strings.TrimSuffix(sql, ",")
}
func prepareSegmentsSQL(batchSize int) string {
sql := `INSERT INTO segments (
stream_id, position,
root_piece_id, encrypted_key, encrypted_key_nonce,
encrypted_size, plain_offset, plain_size,
redundancy,
inline_data, remote_alias_pieces
) VALUES
`
i := 1
for i < batchSize*segmentArgs {
sql += parameters(segmentArgs, i) + ","
i += segmentArgs
}
return strings.TrimSuffix(sql, ",")
}
func parameters(args, index int) string {
values := make([]string, args)
for i := index; i < args+index; i++ {
values[i-index] = "$" + strconv.Itoa(i)
}
return "(" + strings.Join(values, ",") + ")"
}
func generateStreamIDs(numberOfIDs int) ([]uuid.UUID, error) {
ids := make([]uuid.UUID, numberOfIDs)
var err error
for i := 0; i < len(ids); i++ {
ids[i], err = uuid.New()
if err != nil {
return []uuid.UUID{}, err
}
}
sort.Slice(ids, func(i, j int) bool {
return bytes.Compare(ids[i][:], ids[j][:]) == -1
})
return ids, nil
}
func (m *Migrator) aliasNodes(ctx context.Context, mb metainfo.MetabaseDB) error {
start := time.Now()
m.log.Info("Start aliasing nodes")
file, err := os.Open(m.config.Nodes)
if err != nil {
return err
}
defer func() { err = errs.Combine(err, file.Close()) }()
scanner := bufio.NewScanner(file)
nodes := make([]storj.NodeID, 0, 30000)
for scanner.Scan() {
line := scanner.Text()
decoded, err := hex.DecodeString(line)
if err != nil {
m.log.Error("unable decode node id", zap.String("value", line), zap.Error(err))
continue
}
node, err := storj.NodeIDFromBytes(decoded)
if err != nil {
m.log.Error("unable create node id", zap.String("value", line), zap.Error(err))
continue
}
nodes = append(nodes, node)
}
// batch is used because we had issue with CRDB to put all nodes in one insert
batch := 1000
for len(nodes) > 0 {
if len(nodes) < batch {
batch = len(nodes)
}
err = mb.EnsureNodeAliases(ctx, metabase.EnsureNodeAliases{
Nodes: nodes[:batch],
})
if err != nil {
return err
}
nodes = nodes[batch:]
m.log.Info("Left to insert", zap.Int("nodes", len(nodes)))
}
m.log.Info("Finished aliasing nodes", zap.Duration("took", time.Since(start)))
return nil
}
func (m *Migrator) withRetry(ctx context.Context, fn func() error) {
var err error
for i := 0; i < m.config.NumberOfRetries; i++ {
err = fn()
if err != nil {
m.log.Error("error occur", zap.Int("retry", i), zap.Error(err))
if !sync2.Sleep(ctx, 3*time.Second) {
m.log.Fatal("context error", zap.Error(ctx.Err()))
}
continue
}
return
}
// make no sense to continue if even single query to DB fails
m.log.Fatal("query failed after retries", zap.Error(err))
}

View File

@ -1,293 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package main_test
import (
"context"
"strconv"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"storj.io/common/pb"
"storj.io/common/testcontext"
"storj.io/common/testrand"
"storj.io/common/uuid"
migration "storj.io/storj/cmd/metainfo-migration"
"storj.io/storj/private/dbutil/tempdb"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/metainfo/metabase"
"storj.io/storj/satellite/satellitedb/satellitedbtest"
"storj.io/storj/storage"
)
func TestMigrator_SingleSegmentObj(t *testing.T) {
expectedEntries := 1
expectedProjectID := testrand.UUID()
expectedBucket := []byte("bucket-name")
expectedObjectKey := []byte("encrypted-key")
var pointer *pb.Pointer
createPointers := func(t *testing.T, ctx context.Context, pointerDB metainfo.PointerDB) {
var err error
pointer, err = createLastSegment(ctx, pointerDB, expectedProjectID, expectedBucket, expectedObjectKey, 1)
require.NoError(t, err)
// create invalid segment key which should be ignored during migration
err = pointerDB.Put(ctx, storage.Key("ff5b056b-5763-41f8-a928-286723cfefc9/l/test_bucket"), storage.Value([]byte{}))
require.NoError(t, err)
}
checkMigration := func(t *testing.T, ctx context.Context, metabaseDB metainfo.MetabaseDB) {
objects, err := metabaseDB.TestingAllObjects(ctx)
require.NoError(t, err)
require.Len(t, objects, expectedEntries)
{ // verify object
require.EqualValues(t, expectedProjectID, objects[0].ProjectID)
require.EqualValues(t, expectedBucket, objects[0].BucketName)
require.EqualValues(t, expectedObjectKey, objects[0].ObjectKey)
require.EqualValues(t, pointer.SegmentSize, objects[0].TotalEncryptedSize)
require.EqualValues(t, 1, objects[0].SegmentCount)
require.Equal(t, metabase.Committed, objects[0].Status)
require.Zero(t, objects[0].TotalPlainSize)
require.WithinDuration(t, pointer.CreationDate, objects[0].CreatedAt, 5*time.Second)
require.WithinDuration(t, pointer.ExpirationDate, *objects[0].ExpiresAt, 5*time.Second)
streamMeta := &pb.StreamMeta{}
err = pb.Unmarshal(pointer.Metadata, streamMeta)
require.NoError(t, err)
require.Equal(t, pointer.Metadata, objects[0].EncryptedMetadata)
require.EqualValues(t, streamMeta.LastSegmentMeta.EncryptedKey, objects[0].EncryptedMetadataEncryptedKey)
require.EqualValues(t, streamMeta.LastSegmentMeta.KeyNonce, objects[0].EncryptedMetadataNonce)
require.EqualValues(t, streamMeta.EncryptionType, objects[0].Encryption.CipherSuite)
require.EqualValues(t, streamMeta.EncryptionBlockSize, objects[0].Encryption.BlockSize)
}
{ // verify segment
segments, err := metabaseDB.TestingAllSegments(ctx)
require.NoError(t, err)
require.Equal(t, len(segments), expectedEntries)
require.Zero(t, segments[0].Position.Part)
require.Zero(t, segments[0].Position.Index)
require.Zero(t, segments[0].PlainOffset)
require.Zero(t, segments[0].PlainSize)
require.EqualValues(t, pointer.Remote.RootPieceId, segments[0].RootPieceID)
redundancy := pointer.Remote.Redundancy
require.EqualValues(t, redundancy.ErasureShareSize, segments[0].Redundancy.ShareSize)
require.EqualValues(t, redundancy.Type, segments[0].Redundancy.Algorithm)
require.EqualValues(t, redundancy.MinReq, segments[0].Redundancy.RequiredShares)
require.EqualValues(t, redundancy.RepairThreshold, segments[0].Redundancy.RepairShares)
require.EqualValues(t, redundancy.SuccessThreshold, segments[0].Redundancy.OptimalShares)
require.EqualValues(t, redundancy.Total, segments[0].Redundancy.TotalShares)
require.Empty(t, segments[0].InlineData)
require.Equal(t, len(pointer.Remote.RemotePieces), len(segments[0].Pieces))
for i, piece := range pointer.Remote.RemotePieces {
require.EqualValues(t, piece.PieceNum, segments[0].Pieces[i].Number)
require.Equal(t, piece.NodeId, segments[0].Pieces[i].StorageNode)
}
}
}
test(t, createPointers, checkMigration)
}
func TestMigrator_ManyOneSegObj(t *testing.T) {
expectedEntries := 300
createPointers := func(t *testing.T, ctx context.Context, pointerDB metainfo.PointerDB) {
projectID := testrand.UUID()
for i := 0; i < expectedEntries; i++ {
_, err := createLastSegment(ctx, pointerDB, projectID, []byte("bucket-name"), []byte("encrypted-key"+strconv.Itoa(i)), 1)
require.NoError(t, err)
}
// create invalid segment key which should be ignored during migration
err := pointerDB.Put(ctx, storage.Key("005b056b-5763-41f8-a928-286723cfefc9/l/test_bucket"), storage.Value([]byte{}))
require.NoError(t, err)
}
checkMigration := func(t *testing.T, ctx context.Context, metabaseDB metainfo.MetabaseDB) {
objects, err := metabaseDB.TestingAllObjects(ctx)
require.NoError(t, err)
require.Len(t, objects, expectedEntries)
segments, err := metabaseDB.TestingAllSegments(ctx)
require.NoError(t, err)
require.Equal(t, len(segments), expectedEntries)
}
test(t, createPointers, checkMigration)
}
func TestMigrator_MultiSegmentObj(t *testing.T) {
expectedEntries := 1000
createPointers := func(t *testing.T, ctx context.Context, pointerDB metainfo.PointerDB) {
projectID := testrand.UUID()
_, err := createLastSegment(ctx, pointerDB, projectID, []byte("bucket-name"), []byte("encrypted-key"), expectedEntries+1)
require.NoError(t, err)
for i := 0; i < expectedEntries; i++ {
err = createSegment(ctx, pointerDB, projectID, uint32(i), []byte("bucket-name"), []byte("encrypted-key"))
require.NoError(t, err)
}
}
checkMigration := func(t *testing.T, ctx context.Context, metabaseDB metainfo.MetabaseDB) {
objects, err := metabaseDB.TestingAllObjects(ctx)
require.NoError(t, err)
require.Len(t, objects, 1)
segments, err := metabaseDB.TestingAllSegments(ctx)
require.NoError(t, err)
require.Equal(t, len(segments), expectedEntries+1)
}
test(t, createPointers, checkMigration)
}
func test(t *testing.T, createPointers func(t *testing.T, ctx context.Context, pointerDB metainfo.PointerDB), checkMigration func(t *testing.T, ctx context.Context, metabaseDB metainfo.MetabaseDB)) {
for _, satelliteDB := range satellitedbtest.Databases() {
satelliteDB := satelliteDB
if strings.EqualFold(satelliteDB.MasterDB.URL, "omit") {
continue
}
t.Run(satelliteDB.Name, func(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
log := zaptest.NewLogger(t)
schemaSuffix := satellitedbtest.SchemaSuffix()
schema := satellitedbtest.SchemaName(t.Name(), "category", 0, schemaSuffix)
pointerTempDB, err := tempdb.OpenUnique(ctx, satelliteDB.PointerDB.URL, schema)
require.NoError(t, err)
pointerDB, err := satellitedbtest.CreatePointerDBOnTopOf(ctx, log, pointerTempDB)
require.NoError(t, err)
defer ctx.Check(pointerDB.Close)
schema = satellitedbtest.SchemaName(t.Name(), "category", 1, schemaSuffix)
metabaseTempDB, err := tempdb.OpenUnique(ctx, satelliteDB.MetabaseDB.URL, schema)
require.NoError(t, err)
metabaseDB, err := satellitedbtest.CreateMetabaseDBOnTopOf(ctx, log, metabaseTempDB)
require.NoError(t, err)
defer ctx.Check(metabaseDB.Close)
createPointers(t, ctx, pointerDB)
// TODO workaround for pgx
pConnStr := strings.Replace(pointerTempDB.ConnStr, "cockroach", "postgres", 1)
mConnStr := strings.Replace(metabaseTempDB.ConnStr, "cockroach", "postgres", 1)
migrator := migration.NewMigrator(log, pConnStr, mConnStr, migration.Config{
PreGeneratedStreamIDs: 1000,
WriteBatchSize: 3,
WriteParallelLimit: 6,
InvalidObjectsFile: ctx.File(satelliteDB.Name + "_invalid_objects.csv"),
})
err = migrator.MigrateProjects(ctx)
require.NoError(t, err)
checkMigration(t, ctx, metabaseDB)
})
}
}
func createLastSegment(ctx context.Context, pointerDB metainfo.PointerDB, projectID uuid.UUID, bucket, encryptedKey []byte, numberOfSegments int) (*pb.Pointer, error) {
pointer := &pb.Pointer{}
pointer.Type = pb.Pointer_REMOTE
pointer.SegmentSize = 10
pointer.CreationDate = time.Now()
pointer.ExpirationDate = time.Now()
pointer.Remote = &pb.RemoteSegment{
RootPieceId: testrand.PieceID(),
Redundancy: &pb.RedundancyScheme{
ErasureShareSize: 256,
Type: pb.RedundancyScheme_RS,
MinReq: 1,
RepairThreshold: 2,
SuccessThreshold: 3,
Total: 4,
},
RemotePieces: []*pb.RemotePiece{},
}
for i := 0; i < 10; i++ {
pointer.Remote.RemotePieces = append(pointer.Remote.RemotePieces, &pb.RemotePiece{
PieceNum: int32(i),
NodeId: testrand.NodeID(),
})
}
streamMeta := &pb.StreamMeta{}
streamMeta.NumberOfSegments = int64(numberOfSegments)
streamMeta.EncryptedStreamInfo = testrand.Bytes(1024)
streamMeta.EncryptionBlockSize = 256
streamMeta.EncryptionType = int32(pb.CipherSuite_ENC_AESGCM)
streamMeta.LastSegmentMeta = &pb.SegmentMeta{
EncryptedKey: testrand.Bytes(256),
KeyNonce: testrand.Bytes(32),
}
metaBytes, err := pb.Marshal(streamMeta)
if err != nil {
return nil, err
}
pointer.Metadata = metaBytes
path := strings.Join([]string{projectID.String(), "l", string(bucket), string(encryptedKey)}, "/")
pointerBytes, err := pb.Marshal(pointer)
if err != nil {
return nil, err
}
err = pointerDB.Put(ctx, storage.Key(path), storage.Value(pointerBytes))
if err != nil {
return nil, err
}
return pointer, nil
}
func createSegment(ctx context.Context, pointerDB metainfo.PointerDB, projectID uuid.UUID, segmentIndex uint32, bucket, encryptedKey []byte) error {
pointer := &pb.Pointer{}
pointer.Type = pb.Pointer_REMOTE
pointer.SegmentSize = 10
pointer.CreationDate = time.Now()
pointer.ExpirationDate = time.Now()
pointer.Remote = &pb.RemoteSegment{
RootPieceId: testrand.PieceID(),
Redundancy: &pb.RedundancyScheme{},
RemotePieces: []*pb.RemotePiece{},
}
segmentMeta := &pb.SegmentMeta{
EncryptedKey: testrand.Bytes(256),
KeyNonce: testrand.Bytes(32),
}
metaBytes, err := pb.Marshal(segmentMeta)
if err != nil {
return err
}
pointer.Metadata = metaBytes
path := strings.Join([]string{projectID.String(), "s" + strconv.Itoa(int(segmentIndex)), string(bucket), string(encryptedKey)}, "/")
pointerBytes, err := pb.Marshal(pointer)
if err != nil {
return err
}
err = pointerDB.Put(ctx, storage.Key(path), storage.Value(pointerBytes))
if err != nil {
return err
}
return nil
}

View File

@ -1,73 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package main
import (
"context"
"flag"
"log"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
const (
defaultSamplePercent = 1.0
)
var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
samplePercent = flag.Float64("samplePercent", defaultSamplePercent, "sample size to verify in percents")
pointerdb = flag.String("pointerdb", "", "connection URL for PointerDB")
metabasedb = flag.String("metabasedb", "", "connection URL for MetabaseDB")
)
func main() {
flag.Parse()
if *pointerdb == "" {
log.Fatalln("Flag '--pointerdb' is not set")
}
if *metabasedb == "" {
log.Fatalln("Flag '--metabasedb' is not set")
}
if *samplePercent < 0 || *samplePercent > 100 {
log.Fatalln("Flag '--samplePercent' can take values between 0 and 100")
}
ctx := context.Background()
log, err := zap.Config{
Encoding: "console",
Level: zap.NewAtomicLevelAt(zapcore.DebugLevel),
OutputPaths: []string{"stdout"},
ErrorOutputPaths: []string{"stdout"},
EncoderConfig: zapcore.EncoderConfig{
LevelKey: "L",
NameKey: "N",
CallerKey: "C",
MessageKey: "M",
StacktraceKey: "S",
LineEnding: zapcore.DefaultLineEnding,
EncodeLevel: zapcore.CapitalLevelEncoder,
EncodeTime: zapcore.ISO8601TimeEncoder,
EncodeDuration: zapcore.StringDurationEncoder,
EncodeCaller: zapcore.ShortCallerEncoder,
},
}.Build()
if err != nil {
panic(err)
}
defer func() { _ = log.Sync() }()
config := Config{
SamplePercent: *samplePercent,
}
verifier := NewVerifier(log, *pointerdb, *metabasedb, config)
err = verifier.VerifyPointers(ctx)
if err != nil {
panic(err)
}
}

View File

@ -1,297 +0,0 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package main
import (
"bytes"
"context"
"os"
"runtime/pprof"
"sort"
"time"
proto "github.com/gogo/protobuf/proto"
"github.com/jackc/pgx/v4"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/pb"
"storj.io/storj/cmd/metainfo-migration/fastpb"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/metainfo/metabase"
)
// Config initial settings for verifier.
type Config struct {
SamplePercent float64
}
// Verifier defines metainfo migration verifier.
type Verifier struct {
log *zap.Logger
pointerDBStr string
metabaseDBStr string
config Config
}
// NewVerifier creates new metainfo migration verifier.
func NewVerifier(log *zap.Logger, pointerDBStr, metabaseDBStr string, config Config) *Verifier {
if config.SamplePercent == 0 {
config.SamplePercent = defaultSamplePercent
}
return &Verifier{
log: log,
pointerDBStr: pointerDBStr,
metabaseDBStr: metabaseDBStr,
config: config,
}
}
// VerifyPointers verifies a sample of all migrated pointers.
func (v *Verifier) VerifyPointers(ctx context.Context) (err error) {
v.log.Debug("Databases", zap.String("PointerDB", v.pointerDBStr), zap.String("MetabaseDB", v.metabaseDBStr))
pointerDBConn, err := pgx.Connect(ctx, v.pointerDBStr)
if err != nil {
return errs.New("unable to connect %q: %w", v.pointerDBStr, err)
}
defer func() { err = errs.Combine(err, pointerDBConn.Close(ctx)) }()
mb, err := metainfo.OpenMetabase(ctx, v.log.Named("metabase"), v.metabaseDBStr)
if err != nil {
return err
}
defer func() { err = errs.Combine(err, mb.Close()) }()
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
return err
}
err = pprof.StartCPUProfile(f)
if err != nil {
return err
}
defer pprof.StopCPUProfile()
}
pointer := &fastpb.Pointer{}
streamMeta := &fastpb.StreamMeta{}
segmentMeta := &fastpb.SegmentMeta{}
var fullpath, metadata []byte
var allSegments int64
var lastSegment bool
start := time.Now()
v.log.Info("Start", zap.Time("time", start),
zap.Float64("samplePercent", v.config.SamplePercent),
)
rows, err := pointerDBConn.Query(ctx, `SELECT fullpath, metadata FROM pathdata WHERE RANDOM() < $1`, v.config.SamplePercent/100)
if err != nil {
return err
}
defer func() { rows.Close() }()
lastCheck := time.Now()
for rows.Next() {
err = rows.Scan(&fullpath, &metadata)
if err != nil {
return err
}
segmentLocation, err := metabase.ParseSegmentKey(metabase.SegmentKey(fullpath))
if err != nil {
return errs.New("%v; pointer: %s", err, fullpath)
}
err = proto.Unmarshal(metadata, pointer)
if err != nil {
return wrap(err, segmentLocation)
}
lastSegment = segmentLocation.Position.Index == metabase.LastSegmentIndex
if lastSegment {
err = proto.Unmarshal(pointer.Metadata, streamMeta)
if err != nil {
return wrap(err, segmentLocation)
}
// calculate the metabase segment position, so we can query the segment from the metabase
segmentLocation.Position.Index = uint32(streamMeta.NumberOfSegments) - 1
}
segment, err := mb.GetSegmentByLocation(ctx, metabase.GetSegmentByLocation{SegmentLocation: segmentLocation})
if err != nil {
return wrap(err, segmentLocation)
}
if pointer.Type == fastpb.Pointer_DataType(pb.Pointer_REMOTE) {
if len(segment.InlineData) > 0 {
return wrap(errs.New("unexpected inline data for remote segment"), segmentLocation)
}
if pointer.Remote.RootPieceId != segment.RootPieceID {
return wrap(errs.New("root piece id does not match: want %s, got %s", pointer.Remote.RootPieceId, segment.RootPieceID), segmentLocation)
}
if pointer.Remote.Redundancy.Type != fastpb.RedundancyScheme_SchemeType(segment.Redundancy.Algorithm) {
return wrap(errs.New("redundancy scheme type does not match: want %d, got %d", pointer.Remote.Redundancy.Type, segment.Redundancy.Algorithm), segmentLocation)
}
if pointer.Remote.Redundancy.MinReq != int32(segment.Redundancy.RequiredShares) {
return wrap(errs.New("redundancy scheme required shares does not match: want %d, got %d", pointer.Remote.Redundancy.MinReq, segment.Redundancy.RequiredShares), segmentLocation)
}
if pointer.Remote.Redundancy.RepairThreshold != int32(segment.Redundancy.RepairShares) {
return wrap(errs.New("redundancy scheme repair shares does not match: want %d, got %d", pointer.Remote.Redundancy.RepairThreshold, segment.Redundancy.RepairShares), segmentLocation)
}
if pointer.Remote.Redundancy.SuccessThreshold != int32(segment.Redundancy.OptimalShares) {
return wrap(errs.New("redundancy scheme optimal shares does not match: want %d, got %d", pointer.Remote.Redundancy.SuccessThreshold, segment.Redundancy.OptimalShares), segmentLocation)
}
if pointer.Remote.Redundancy.Total != int32(segment.Redundancy.TotalShares) {
return wrap(errs.New("redundancy scheme total shares does not match: want %d, got %d", pointer.Remote.Redundancy.Total, segment.Redundancy.TotalShares), segmentLocation)
}
if pointer.Remote.Redundancy.ErasureShareSize != segment.Redundancy.ShareSize {
return wrap(errs.New("redundancy scheme erasure share size does not match: want %d, got %d", pointer.Remote.Redundancy.ErasureShareSize, segment.Redundancy.ShareSize), segmentLocation)
}
if len(pointer.Remote.RemotePieces) != segment.Pieces.Len() {
return wrap(errs.New("number of remote pieces does not match: want %d, got %d", len(pointer.Remote.RemotePieces), segment.Pieces.Len()), segmentLocation)
}
sort.Slice(pointer.Remote.RemotePieces, func(i, k int) bool {
return pointer.Remote.RemotePieces[i].PieceNum < pointer.Remote.RemotePieces[k].PieceNum
})
sort.Slice(segment.Pieces, func(i, k int) bool {
return segment.Pieces[i].Number < segment.Pieces[k].Number
})
for i, piece := range pointer.Remote.RemotePieces {
if piece.PieceNum != int32(segment.Pieces[i].Number) {
return wrap(errs.New("piece number does not match for remote piece %d: want %d, got %d", i, piece.PieceNum, segment.Pieces[i].Number), segmentLocation)
}
if piece.NodeId != segment.Pieces[i].StorageNode {
return wrap(errs.New("storage node id does not match for remote piece %d: want %s, got %s", i, piece.NodeId, segment.Pieces[i].StorageNode), segmentLocation)
}
}
} else {
if !bytes.Equal(pointer.InlineSegment, segment.InlineData) {
return wrap(errs.New("inline data does not match: want %x, got %x", pointer.InlineSegment, segment.InlineData), segmentLocation)
}
if !segment.RootPieceID.IsZero() {
return wrap(errs.New("unexpected root piece id for inline segment"), segmentLocation)
}
if !segment.Redundancy.IsZero() {
return wrap(errs.New("unexpected redundancy scheme for inline segment"), segmentLocation)
}
if segment.Pieces.Len() > 0 {
return wrap(errs.New("unexpected remote pieces for inline segment"), segmentLocation)
}
}
if segment.StreamID.IsZero() {
return wrap(errs.New("missing stream id in segment"), segmentLocation)
}
if pointer.SegmentSize != int64(segment.EncryptedSize) {
return wrap(errs.New("segment size does not match: want %d, got %d", pointer.SegmentSize, segment.EncryptedSize), segmentLocation)
}
if segment.PlainOffset != 0 {
return wrap(errs.New("unexpected plain offset: %d", segment.PlainOffset), segmentLocation)
}
if segment.PlainSize != 0 {
return wrap(errs.New("unexpected plain size: %d", segment.PlainSize), segmentLocation)
}
if lastSegment {
object, err := mb.GetObjectLatestVersion(ctx, metabase.GetObjectLatestVersion{ObjectLocation: segmentLocation.Object()})
if err != nil {
return wrap(err, segmentLocation)
}
if object.StreamID.IsZero() {
return wrap(errs.New("missing stream id in object"), segmentLocation)
}
if object.StreamID != segment.StreamID {
return wrap(errs.New("stream id does no match: object %s, segment %s", object.StreamID, segment.StreamID), segmentLocation)
}
if object.Version != 1 {
return wrap(errs.New("unexpected version: want %d, got %d", 1, object.Version), segmentLocation)
}
if object.Status != metabase.Committed {
return wrap(errs.New("unexpected status: want %d, got %d", metabase.Committed, object.Status), segmentLocation)
}
if !withinDuration(pointer.CreationDate, object.CreatedAt, 1*time.Microsecond) {
return wrap(errs.New("creation date does not match: want %s, got %s", pointer.CreationDate, object.CreatedAt), segmentLocation)
}
if object.ExpiresAt == nil {
if !pointer.ExpirationDate.IsZero() {
return wrap(errs.New("missing expiration date"), segmentLocation)
}
} else if !withinDuration(pointer.ExpirationDate, *object.ExpiresAt, 1*time.Microsecond) {
return wrap(errs.New("expiration date does not match: want %s, got %s", pointer.ExpirationDate, object.ExpiresAt), segmentLocation)
}
if int32(streamMeta.NumberOfSegments) != object.SegmentCount {
return wrap(errs.New("number of segments does not match: want %d, got %d", streamMeta.NumberOfSegments, object.SegmentCount), segmentLocation)
}
if object.FixedSegmentSize != 0 {
return wrap(errs.New("unexpected fixed segment size: %d", object.FixedSegmentSize), segmentLocation)
}
if object.SegmentCount == 1 {
if pointer.SegmentSize != object.TotalEncryptedSize {
return wrap(errs.New("total encrypted size does not match: want %d, got %d", pointer.SegmentSize, object.TotalEncryptedSize), segmentLocation)
}
} else {
if pointer.SegmentSize >= object.TotalEncryptedSize {
return wrap(errs.New("total encrypted size does not match: want >%d, got %d", pointer.SegmentSize, object.TotalEncryptedSize), segmentLocation)
}
}
if object.TotalPlainSize != 0 {
return wrap(errs.New("unexpected total plain size: %d", object.TotalPlainSize), segmentLocation)
}
if streamMeta.EncryptionType != int32(object.Encryption.CipherSuite) {
return wrap(errs.New("encryption type does not match: want %d, got %d", streamMeta.EncryptionType, object.Encryption.CipherSuite), segmentLocation)
}
if streamMeta.EncryptionBlockSize != object.Encryption.BlockSize {
return wrap(errs.New("encryption block size does not match: want %d, got %d", streamMeta.EncryptionBlockSize, object.Encryption.BlockSize), segmentLocation)
}
if !bytes.Equal(streamMeta.LastSegmentMeta.EncryptedKey, object.EncryptedMetadataEncryptedKey) {
return wrap(errs.New("encrypted metadata encrypted key does not match: want %x, got %x", streamMeta.LastSegmentMeta.EncryptedKey, object.EncryptedMetadataEncryptedKey), segmentLocation)
}
if !bytes.Equal(streamMeta.LastSegmentMeta.KeyNonce, object.EncryptedMetadataNonce) {
return wrap(errs.New("encrypted metadata key nonce does not match: want %x, got %x", streamMeta.LastSegmentMeta.KeyNonce, object.EncryptedMetadataNonce), segmentLocation)
}
if !bytes.Equal(pointer.Metadata, object.EncryptedMetadata) {
return wrap(errs.New("encrypted metadata does not match: want %x, got %x", pointer.Metadata, object.EncryptedMetadata), segmentLocation)
}
if object.ZombieDeletionDeadline != nil {
return wrap(errs.New("unexpected zombie deletion deadline: %s", object.ZombieDeletionDeadline), segmentLocation)
}
} else {
err = pb.Unmarshal(pointer.Metadata, segmentMeta)
if err != nil {
return wrap(err, segmentLocation)
}
if !bytes.Equal(segmentMeta.EncryptedKey, segment.EncryptedKey) {
return wrap(errs.New("segment metadata encrypted key does not match: want %x, got %x", segmentMeta.EncryptedKey, segment.EncryptedKey), segmentLocation)
}
if !bytes.Equal(segmentMeta.KeyNonce, segment.EncryptedKeyNonce) {
return wrap(errs.New("segment metadata key nonce does not match: want %x, got %x", segmentMeta.KeyNonce, segment.EncryptedKeyNonce), segmentLocation)
}
}
if allSegments != 0 && allSegments%100 == 0 {
v.log.Info("Processed segments", zap.Int64("segments", allSegments), zap.Duration("took", time.Since(lastCheck)))
lastCheck = time.Now()
}
allSegments++
}
v.log.Info("Finished", zap.Int64("segments", allSegments), zap.Duration("Total", time.Since(start)))
return rows.Err()
}
func withinDuration(expected, actual time.Time, delta time.Duration) bool {
dt := expected.Sub(actual)
return -delta < dt && dt < delta
}
func wrap(err error, segment metabase.SegmentLocation) error {
return errs.New("%v; project: %x, bucket: %s, object: %x, index: %d",
err, segment.ProjectID, segment.BucketName, segment.ObjectKey, segment.Position.Index)
}

1
go.sum
View File

@ -334,7 +334,6 @@ github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0f
github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.3 h1:JnPg/5Q9xVJGfjsO5CPUOjnJps1JaRUm8I9FXVCFK94=
github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=

View File

@ -146,10 +146,6 @@ STORJ_NUM_NODES=9
## Run tests on the branch under test.
##
go install storj.io/storj/cmd/metainfo-migration
STORJ_MIGRATION_DB=${STORJ_MIGRATION_DB:-$STORJ_SIM_POSTGRES}
metainfo-migration --pointerdb "${STORJ_MIGRATION_DB}" --metabasedb "${STORJ_MIGRATION_DB}"
# check that branch uplink + branch network can read fully release data
test_branch -b release-network-release-uplink download

View File

@ -142,10 +142,6 @@ setup_stage(){
if [[ $stage == "2" ]]
then
mv $dest_sat_cfg_dir/satellite $dest_sat_cfg_dir/old_satellite
go build -v -o $test_dir/bin storj.io/storj/cmd/metainfo-migration >/dev/null 2>&1
STORJ_MIGRATION_DB=${STORJ_MIGRATION_DB:-$STORJ_SIM_POSTGRES}
$test_dir/bin/metainfo-migration --pointerdb "${STORJ_MIGRATION_DB}" --metabasedb "${STORJ_MIGRATION_DB}"
fi
# ln binary and copy config.yaml for desired version