From 25f81f353cce8074a17a56684d35ceb5be802fae Mon Sep 17 00:00:00 2001 From: Egon Elbre Date: Tue, 9 Feb 2021 10:13:54 +0200 Subject: [PATCH] satellite/metainfo/metabase: add AliasPiece This adds AliasPieces run length encoding. On average it should make our pieces encoding: repair=50,optimal=85,total=90 152.0 bytes repair=16,optimal=37,total=50 65.4 bytes Change-Id: I391a9183164828f05383a3cde9ab0e4549c2d440 --- satellite/metainfo/metabase/aliaspiece.go | 190 ++++++++++++++ .../metainfo/metabase/aliaspiece_test.go | 241 ++++++++++++++++++ 2 files changed, 431 insertions(+) create mode 100644 satellite/metainfo/metabase/aliaspiece.go create mode 100644 satellite/metainfo/metabase/aliaspiece_test.go diff --git a/satellite/metainfo/metabase/aliaspiece.go b/satellite/metainfo/metabase/aliaspiece.go new file mode 100644 index 000000000..94abcdae5 --- /dev/null +++ b/satellite/metainfo/metabase/aliaspiece.go @@ -0,0 +1,190 @@ +// Copyright (C) 2021 Storj Labs, Inc. +// See LICENSE for copying information. + +package metabase + +import ( + "database/sql/driver" + "encoding/binary" +) + +// AliasPieces is a slice of AliasPiece. +type AliasPieces []AliasPiece + +// AliasPiece is a piece with alias node ID. +type AliasPiece struct { + Number uint16 + Alias NodeAlias +} + +const ( + // aliasPieceEncodingRLE run length encodes the zeros and node ID-s. + // + // Example: + // pieces = {2 x} {11 y} + // // converted into slice with zeros + // 0 0 x 0 0 0 0 0 0 0 0 y + // // run length encoded + // <2 zeros, 1 value> x <7 zeros, 0 values> <1 zeros, 1 value> y + aliasPieceEncodingRLE = 1 + + aliasPieceEncodingZeroBits = 3 + aliasPieceEncodingNodeAliasBits = 8 - aliasPieceEncodingZeroBits + aliasPieceEncodingMaxZeros = 1< piece.Number { + return nil, Error.New("alias pieces not ordered") + } + + // count up until max zeros + for i := 0; i < aliasPieceEncodingMaxZeros; i++ { + if expectedPieceNumber == piece.Number { + break + } + zeroCount++ + expectedPieceNumber++ + } + + // if there were too many zeros in sequence, we need to emit more headers + if piece.Number != expectedPieceNumber { + setHeader() + continue + } + + // emit all the pieces that are in sequence, but up to max node aliases + for aliasCount < aliasPieceEncodingMaxNodeAliases { + // emit the piece alias + n := binary.PutUvarint(buffer[:], uint64(piece.Alias)) + data = append(data, buffer[:n]...) + + // update the header and the expected piece number + aliasCount++ + expectedPieceNumber++ + + // next piece + index++ + if index >= len(aliases) { + break + } + piece = aliases[index] + + // check whether we should emit zeros + if piece.Number != expectedPieceNumber { + break + } + } + setHeader() + } + + return data, nil +} + +// SetBytes decompresses alias pieces from a slice of bytes. +func (aliases *AliasPieces) SetBytes(data []byte) error { + *aliases = nil + if len(data) == 0 { + return nil + } + if data[0] != aliasPieceEncodingRLE { + return Error.New("unknown alias pieces header: %v", data[0]) + } + + // we're going to guess there's two alias pieces per two bytes of data + *aliases = make(AliasPieces, 0, len(data)/2) + + p := 1 + pieceNumber := uint16(0) + for p < len(data) { + // read the header + header := data[p] + p++ + if p >= len(data) { + return Error.New("invalid alias pieces data") + } + + // extract header values + aliasCount := int(header >> aliasPieceEncodingZeroBits) + zeroCount := int(header & aliasPieceEncodingMaxZeros) + + // skip over the zero values + pieceNumber += uint16(zeroCount) + + // read the aliases + for k := 0; k < aliasCount; k++ { + v, n := binary.Uvarint(data[p:]) + p += n + if n <= 0 { + return Error.New("invalid alias pieces data") + } + *aliases = append(*aliases, AliasPiece{ + Number: pieceNumber, + Alias: NodeAlias(v), + }) + pieceNumber++ + } + } + + return nil +} + +// Scan implements the database/sql Scanner interface. +func (aliases *AliasPieces) Scan(src interface{}) error { + if src == nil { + *aliases = nil + return nil + } + + switch src := src.(type) { + case []byte: + return aliases.SetBytes(src) + default: + return Error.New("invalid type for AliasPieces: %T", src) + } +} + +// Value implements the database/sql/driver Valuer interface. +func (aliases AliasPieces) Value() (driver.Value, error) { + return aliases.Bytes() +} + +// EqualAliasPieces compares whether xs and ys are equal. +func EqualAliasPieces(xs, ys AliasPieces) bool { + if len(xs) != len(ys) { + return false + } + for i, x := range xs { + if ys[i] != x { + return false + } + } + return true +} diff --git a/satellite/metainfo/metabase/aliaspiece_test.go b/satellite/metainfo/metabase/aliaspiece_test.go new file mode 100644 index 000000000..2459ffd32 --- /dev/null +++ b/satellite/metainfo/metabase/aliaspiece_test.go @@ -0,0 +1,241 @@ +// Copyright (C) 2021 Storj Labs, Inc. +// See LICENSE for copying information. + +package metabase_test + +import ( + "fmt" + "math/rand" + "sort" + "testing" + + "github.com/stretchr/testify/require" + + "storj.io/storj/satellite/metainfo/metabase" +) + +func TestAliasPieces(t *testing.T) { + type test struct { + in metabase.AliasPieces + bytes []byte + } + tests := []test{ + {in: nil, bytes: nil}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 1}, + }, bytes: []byte{1, 0b00001_000, 1}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 1}, + {Number: 3, Alias: 2}, + }, bytes: []byte{1, 0b00001_000, 1, 0b00001_010, 2}}, + {in: metabase.AliasPieces{ + {Number: 3, Alias: 2}, + }, bytes: []byte{1, 0b00001_011, 2}}, + {in: metabase.AliasPieces{ + {Number: 4, Alias: 2}, + }, bytes: []byte{1, 0b00001_100, 2}}, + {in: metabase.AliasPieces{ + {Number: 9, Alias: 2}, + }, bytes: []byte{1, 0b00000_111, 0b00001_010, 2}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 0xF8}, + }, bytes: []byte{1, 0b00001_000, 0xF8, 0x01}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 0xF808}, + }, bytes: []byte{1, 0b00001_000, 0x88, 0xf0, 0x03}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 0xF808ba}, + }, bytes: []byte{1, 0b00001_000, 0xba, 0x91, 0xe0, 0x07}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 0xA}, + {Number: 1, Alias: 0xB}, + {Number: 2, Alias: 0xC}, + }, bytes: []byte{1, 0b00011_000, 0xA, 0xB, 0xC}}, + {in: metabase.AliasPieces{ + {Number: 2, Alias: 0xA}, + {Number: 3, Alias: 0xB}, + {Number: 4, Alias: 0xC}, + }, bytes: []byte{1, 0b00011_010, 0xA, 0xB, 0xC}}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 0xA}, + {Number: 1, Alias: 0xB}, + {Number: 2, Alias: 0xC}, + {Number: 7, Alias: 0xD}, + {Number: 8, Alias: 0xE}, + {Number: 9, Alias: 0xF}, + }, bytes: []byte{1, + 0b00011_000, 0xA, 0xB, 0xC, + 0b00011_100, 0xD, 0xE, 0xF, + }}, + {in: metabase.AliasPieces{ + {Number: 0, Alias: 1}, {Number: 1, Alias: 2}, {Number: 2, Alias: 3}, {Number: 3, Alias: 4}, {Number: 4, Alias: 5}, {Number: 5, Alias: 6}, {Number: 6, Alias: 7}, {Number: 7, Alias: 8}, + {Number: 8, Alias: 9}, {Number: 9, Alias: 10}, {Number: 10, Alias: 11}, {Number: 11, Alias: 12}, {Number: 12, Alias: 13}, {Number: 13, Alias: 14}, {Number: 14, Alias: 15}, {Number: 15, Alias: 16}, + {Number: 16, Alias: 17}, {Number: 17, Alias: 18}, {Number: 18, Alias: 19}, {Number: 19, Alias: 20}, {Number: 20, Alias: 21}, {Number: 21, Alias: 22}, {Number: 22, Alias: 23}, {Number: 23, Alias: 24}, + {Number: 24, Alias: 25}, {Number: 25, Alias: 26}, {Number: 26, Alias: 27}, {Number: 27, Alias: 28}, {Number: 28, Alias: 29}, {Number: 29, Alias: 30}, {Number: 30, Alias: 31}, {Number: 31, Alias: 32}, + {Number: 32, Alias: 33}, {Number: 33, Alias: 34}, + }, bytes: []byte{1, + 0b11111_000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 0b00011_000, 32, 33, 34, + }}, + } + + for i, test := range tests { + bytes, err := test.in.Bytes() + require.NoError(t, err, i) + require.Equal(t, test.bytes, bytes, i) + + out := metabase.AliasPieces{} + err = out.SetBytes(bytes) + require.NoError(t, err, i) + + require.Equal(t, test.in, out, i) + } +} + +func TestAliasPieces_Large(t *testing.T) { + aliases := make(metabase.AliasPieces, 0xFF) + for offset := 1; offset < 18; offset++ { + for i := range aliases { + aliases[i].Number = uint16(i * offset) + aliases[i].Alias = metabase.NodeAlias(i + 1) + } + + bytes, err := aliases.Bytes() + require.NoError(t, err) + + var result metabase.AliasPieces + err = result.SetBytes(bytes) + require.NoError(t, err) + + require.Equal(t, result, aliases) + } +} + +func TestAliasPieces_Errors(t *testing.T) { + aliases := metabase.AliasPieces{ + {Number: 1, Alias: 1}, + {Number: 0, Alias: 2}, + } + _, err := aliases.Bytes() + require.EqualError(t, err, "metabase: alias pieces not ordered") + + duplicate := metabase.AliasPieces{ + {Number: 0, Alias: 1}, + {Number: 0, Alias: 2}, + } + _, err = duplicate.Bytes() + require.EqualError(t, err, "metabase: alias pieces not ordered") + + err = aliases.SetBytes([]byte{17}) + require.EqualError(t, err, "metabase: unknown alias pieces header: 17") + + err = aliases.SetBytes([]byte{1, 0xFF}) + require.EqualError(t, err, "metabase: invalid alias pieces data") +} + +func BenchmarkAliasPiecesBytes(b *testing.B) { + benchmarkAliasPiecesBytes(b, 50, 85, 90) + benchmarkAliasPiecesBytes(b, 16, 37, 50) +} + +func benchmarkAliasPiecesBytes(b *testing.B, repair, optimal, total int) { + prefix := fmt.Sprintf("repair=%d,optimal=%d,total=%d", repair, optimal, total) + + b.Run(prefix+"/2byte", func(b *testing.B) { + aliases := make(metabase.AliasPieces, optimal) + for i := range aliases { + aliases[i] = metabase.AliasPiece{ + Number: uint16(i), + Alias: metabase.NodeAlias(0xFF + i), + } + } + + var finalData []byte + b.Run("Bytes", func(b *testing.B) { + for k := 0; k < b.N; k++ { + data, err := aliases.Bytes() + if err != nil { + b.Fatal(err) + } + finalData = data + } + }) + b.Run("SetBytes", func(b *testing.B) { + var aliases metabase.AliasPieces + for k := 0; k < b.N; k++ { + err := aliases.SetBytes(finalData) + if err != nil { + b.Fatal(err) + } + } + }) + + b.ReportMetric(float64(len(finalData)), "B") + }) + + b.Run(prefix+"/3byte", func(b *testing.B) { + aliases := make(metabase.AliasPieces, optimal) + for i := range aliases { + aliases[i] = metabase.AliasPiece{ + Number: uint16(i), + Alias: metabase.NodeAlias(0xFFFF + i), + } + } + + var finalData []byte + b.Run("Bytes", func(b *testing.B) { + for k := 0; k < b.N; k++ { + data, err := aliases.Bytes() + if err != nil { + b.Fatal(err) + } + finalData = data + } + }) + b.Run("SetBytes", func(b *testing.B) { + var aliases metabase.AliasPieces + for k := 0; k < b.N; k++ { + err := aliases.SetBytes(finalData) + if err != nil { + b.Fatal(err) + } + } + }) + + b.ReportMetric(float64(len(finalData)), "B") + }) + + b.Run(prefix+"/sim", func(b *testing.B) { + totalBytes := int64(0) + minBytes, maxBytes := int64(0xFFFFFF), int64(0) + + for k := 0; k < b.N; k++ { + numPieces := repair + k%(optimal-repair) + aliases := make(metabase.AliasPieces, numPieces) + for i, n := range rand.Perm(total)[:numPieces] { + aliases[i].Number = uint16(n) + aliases[i].Alias = metabase.NodeAlias(0xFF + i) + } + sort.Slice(aliases, func(i, k int) bool { + return aliases[i].Number < aliases[k].Number + }) + bytes, err := aliases.Bytes() + if err != nil { + b.Fatal(err) + } + + b := int64(len(bytes)) + totalBytes += b + if b < minBytes { + minBytes = b + } + if b > maxBytes { + maxBytes = b + } + } + + b.ReportMetric(float64(totalBytes)/float64(b.N), "B/avg") + b.ReportMetric(float64(minBytes), "B/min") + b.ReportMetric(float64(maxBytes), "B/max") + }) +}