satellite/metainfo/metabase: add AliasPiece
This adds AliasPieces run length encoding. On average it should make our pieces encoding: repair=50,optimal=85,total=90 152.0 bytes repair=16,optimal=37,total=50 65.4 bytes Change-Id: I391a9183164828f05383a3cde9ab0e4549c2d440
This commit is contained in:
parent
63c7f8b7fc
commit
25f81f353c
190
satellite/metainfo/metabase/aliaspiece.go
Normal file
190
satellite/metainfo/metabase/aliaspiece.go
Normal file
@ -0,0 +1,190 @@
|
||||
// Copyright (C) 2021 Storj Labs, Inc.
|
||||
// See LICENSE for copying information.
|
||||
|
||||
package metabase
|
||||
|
||||
import (
|
||||
"database/sql/driver"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// AliasPieces is a slice of AliasPiece.
|
||||
type AliasPieces []AliasPiece
|
||||
|
||||
// AliasPiece is a piece with alias node ID.
|
||||
type AliasPiece struct {
|
||||
Number uint16
|
||||
Alias NodeAlias
|
||||
}
|
||||
|
||||
const (
|
||||
// aliasPieceEncodingRLE run length encodes the zeros and node ID-s.
|
||||
//
|
||||
// Example:
|
||||
// pieces = {2 x} {11 y}
|
||||
// // converted into slice with zeros
|
||||
// 0 0 x 0 0 0 0 0 0 0 0 y
|
||||
// // run length encoded
|
||||
// <2 zeros, 1 value> x <7 zeros, 0 values> <1 zeros, 1 value> y
|
||||
aliasPieceEncodingRLE = 1
|
||||
|
||||
aliasPieceEncodingZeroBits = 3
|
||||
aliasPieceEncodingNodeAliasBits = 8 - aliasPieceEncodingZeroBits
|
||||
aliasPieceEncodingMaxZeros = 1<<aliasPieceEncodingZeroBits - 1
|
||||
aliasPieceEncodingMaxNodeAliases = 1<<aliasPieceEncodingNodeAliasBits - 1
|
||||
)
|
||||
|
||||
// Bytes compresses alias pieces to a slice of bytes.
|
||||
func (aliases AliasPieces) Bytes() ([]byte, error) {
|
||||
if len(aliases) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var buffer [binary.MaxVarintLen64]byte
|
||||
|
||||
// we're going to guess that it'll take 3 bytes per node alias + at most one per two nodes.
|
||||
data := make([]byte, 0, len(aliases)*3+len(aliases)/2)
|
||||
data = append(data, aliasPieceEncodingRLE)
|
||||
|
||||
expectedPieceNumber := uint16(0)
|
||||
|
||||
index := 0
|
||||
for index < len(aliases) {
|
||||
data = append(data, 0)
|
||||
|
||||
// setup header for the next sequence of nodes
|
||||
lengthHeaderPos := len(data) - 1
|
||||
zeroCount, aliasCount := 0, 0
|
||||
setHeader := func() {
|
||||
data[lengthHeaderPos] = byte(aliasCount)<<aliasPieceEncodingZeroBits | byte(zeroCount)
|
||||
}
|
||||
|
||||
// start examining the piece
|
||||
piece := aliases[index]
|
||||
if expectedPieceNumber > piece.Number {
|
||||
return nil, Error.New("alias pieces not ordered")
|
||||
}
|
||||
|
||||
// count up until max zeros
|
||||
for i := 0; i < aliasPieceEncodingMaxZeros; i++ {
|
||||
if expectedPieceNumber == piece.Number {
|
||||
break
|
||||
}
|
||||
zeroCount++
|
||||
expectedPieceNumber++
|
||||
}
|
||||
|
||||
// if there were too many zeros in sequence, we need to emit more headers
|
||||
if piece.Number != expectedPieceNumber {
|
||||
setHeader()
|
||||
continue
|
||||
}
|
||||
|
||||
// emit all the pieces that are in sequence, but up to max node aliases
|
||||
for aliasCount < aliasPieceEncodingMaxNodeAliases {
|
||||
// emit the piece alias
|
||||
n := binary.PutUvarint(buffer[:], uint64(piece.Alias))
|
||||
data = append(data, buffer[:n]...)
|
||||
|
||||
// update the header and the expected piece number
|
||||
aliasCount++
|
||||
expectedPieceNumber++
|
||||
|
||||
// next piece
|
||||
index++
|
||||
if index >= len(aliases) {
|
||||
break
|
||||
}
|
||||
piece = aliases[index]
|
||||
|
||||
// check whether we should emit zeros
|
||||
if piece.Number != expectedPieceNumber {
|
||||
break
|
||||
}
|
||||
}
|
||||
setHeader()
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// SetBytes decompresses alias pieces from a slice of bytes.
|
||||
func (aliases *AliasPieces) SetBytes(data []byte) error {
|
||||
*aliases = nil
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
if data[0] != aliasPieceEncodingRLE {
|
||||
return Error.New("unknown alias pieces header: %v", data[0])
|
||||
}
|
||||
|
||||
// we're going to guess there's two alias pieces per two bytes of data
|
||||
*aliases = make(AliasPieces, 0, len(data)/2)
|
||||
|
||||
p := 1
|
||||
pieceNumber := uint16(0)
|
||||
for p < len(data) {
|
||||
// read the header
|
||||
header := data[p]
|
||||
p++
|
||||
if p >= len(data) {
|
||||
return Error.New("invalid alias pieces data")
|
||||
}
|
||||
|
||||
// extract header values
|
||||
aliasCount := int(header >> aliasPieceEncodingZeroBits)
|
||||
zeroCount := int(header & aliasPieceEncodingMaxZeros)
|
||||
|
||||
// skip over the zero values
|
||||
pieceNumber += uint16(zeroCount)
|
||||
|
||||
// read the aliases
|
||||
for k := 0; k < aliasCount; k++ {
|
||||
v, n := binary.Uvarint(data[p:])
|
||||
p += n
|
||||
if n <= 0 {
|
||||
return Error.New("invalid alias pieces data")
|
||||
}
|
||||
*aliases = append(*aliases, AliasPiece{
|
||||
Number: pieceNumber,
|
||||
Alias: NodeAlias(v),
|
||||
})
|
||||
pieceNumber++
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Scan implements the database/sql Scanner interface.
|
||||
func (aliases *AliasPieces) Scan(src interface{}) error {
|
||||
if src == nil {
|
||||
*aliases = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
switch src := src.(type) {
|
||||
case []byte:
|
||||
return aliases.SetBytes(src)
|
||||
default:
|
||||
return Error.New("invalid type for AliasPieces: %T", src)
|
||||
}
|
||||
}
|
||||
|
||||
// Value implements the database/sql/driver Valuer interface.
|
||||
func (aliases AliasPieces) Value() (driver.Value, error) {
|
||||
return aliases.Bytes()
|
||||
}
|
||||
|
||||
// EqualAliasPieces compares whether xs and ys are equal.
|
||||
func EqualAliasPieces(xs, ys AliasPieces) bool {
|
||||
if len(xs) != len(ys) {
|
||||
return false
|
||||
}
|
||||
for i, x := range xs {
|
||||
if ys[i] != x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
241
satellite/metainfo/metabase/aliaspiece_test.go
Normal file
241
satellite/metainfo/metabase/aliaspiece_test.go
Normal file
@ -0,0 +1,241 @@
|
||||
// Copyright (C) 2021 Storj Labs, Inc.
|
||||
// See LICENSE for copying information.
|
||||
|
||||
package metabase_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"storj.io/storj/satellite/metainfo/metabase"
|
||||
)
|
||||
|
||||
func TestAliasPieces(t *testing.T) {
|
||||
type test struct {
|
||||
in metabase.AliasPieces
|
||||
bytes []byte
|
||||
}
|
||||
tests := []test{
|
||||
{in: nil, bytes: nil},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 1},
|
||||
}, bytes: []byte{1, 0b00001_000, 1}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 1},
|
||||
{Number: 3, Alias: 2},
|
||||
}, bytes: []byte{1, 0b00001_000, 1, 0b00001_010, 2}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 3, Alias: 2},
|
||||
}, bytes: []byte{1, 0b00001_011, 2}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 4, Alias: 2},
|
||||
}, bytes: []byte{1, 0b00001_100, 2}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 9, Alias: 2},
|
||||
}, bytes: []byte{1, 0b00000_111, 0b00001_010, 2}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 0xF8},
|
||||
}, bytes: []byte{1, 0b00001_000, 0xF8, 0x01}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 0xF808},
|
||||
}, bytes: []byte{1, 0b00001_000, 0x88, 0xf0, 0x03}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 0xF808ba},
|
||||
}, bytes: []byte{1, 0b00001_000, 0xba, 0x91, 0xe0, 0x07}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 0xA},
|
||||
{Number: 1, Alias: 0xB},
|
||||
{Number: 2, Alias: 0xC},
|
||||
}, bytes: []byte{1, 0b00011_000, 0xA, 0xB, 0xC}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 2, Alias: 0xA},
|
||||
{Number: 3, Alias: 0xB},
|
||||
{Number: 4, Alias: 0xC},
|
||||
}, bytes: []byte{1, 0b00011_010, 0xA, 0xB, 0xC}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 0xA},
|
||||
{Number: 1, Alias: 0xB},
|
||||
{Number: 2, Alias: 0xC},
|
||||
{Number: 7, Alias: 0xD},
|
||||
{Number: 8, Alias: 0xE},
|
||||
{Number: 9, Alias: 0xF},
|
||||
}, bytes: []byte{1,
|
||||
0b00011_000, 0xA, 0xB, 0xC,
|
||||
0b00011_100, 0xD, 0xE, 0xF,
|
||||
}},
|
||||
{in: metabase.AliasPieces{
|
||||
{Number: 0, Alias: 1}, {Number: 1, Alias: 2}, {Number: 2, Alias: 3}, {Number: 3, Alias: 4}, {Number: 4, Alias: 5}, {Number: 5, Alias: 6}, {Number: 6, Alias: 7}, {Number: 7, Alias: 8},
|
||||
{Number: 8, Alias: 9}, {Number: 9, Alias: 10}, {Number: 10, Alias: 11}, {Number: 11, Alias: 12}, {Number: 12, Alias: 13}, {Number: 13, Alias: 14}, {Number: 14, Alias: 15}, {Number: 15, Alias: 16},
|
||||
{Number: 16, Alias: 17}, {Number: 17, Alias: 18}, {Number: 18, Alias: 19}, {Number: 19, Alias: 20}, {Number: 20, Alias: 21}, {Number: 21, Alias: 22}, {Number: 22, Alias: 23}, {Number: 23, Alias: 24},
|
||||
{Number: 24, Alias: 25}, {Number: 25, Alias: 26}, {Number: 26, Alias: 27}, {Number: 27, Alias: 28}, {Number: 28, Alias: 29}, {Number: 29, Alias: 30}, {Number: 30, Alias: 31}, {Number: 31, Alias: 32},
|
||||
{Number: 32, Alias: 33}, {Number: 33, Alias: 34},
|
||||
}, bytes: []byte{1,
|
||||
0b11111_000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
0b00011_000, 32, 33, 34,
|
||||
}},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
bytes, err := test.in.Bytes()
|
||||
require.NoError(t, err, i)
|
||||
require.Equal(t, test.bytes, bytes, i)
|
||||
|
||||
out := metabase.AliasPieces{}
|
||||
err = out.SetBytes(bytes)
|
||||
require.NoError(t, err, i)
|
||||
|
||||
require.Equal(t, test.in, out, i)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAliasPieces_Large(t *testing.T) {
|
||||
aliases := make(metabase.AliasPieces, 0xFF)
|
||||
for offset := 1; offset < 18; offset++ {
|
||||
for i := range aliases {
|
||||
aliases[i].Number = uint16(i * offset)
|
||||
aliases[i].Alias = metabase.NodeAlias(i + 1)
|
||||
}
|
||||
|
||||
bytes, err := aliases.Bytes()
|
||||
require.NoError(t, err)
|
||||
|
||||
var result metabase.AliasPieces
|
||||
err = result.SetBytes(bytes)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, result, aliases)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAliasPieces_Errors(t *testing.T) {
|
||||
aliases := metabase.AliasPieces{
|
||||
{Number: 1, Alias: 1},
|
||||
{Number: 0, Alias: 2},
|
||||
}
|
||||
_, err := aliases.Bytes()
|
||||
require.EqualError(t, err, "metabase: alias pieces not ordered")
|
||||
|
||||
duplicate := metabase.AliasPieces{
|
||||
{Number: 0, Alias: 1},
|
||||
{Number: 0, Alias: 2},
|
||||
}
|
||||
_, err = duplicate.Bytes()
|
||||
require.EqualError(t, err, "metabase: alias pieces not ordered")
|
||||
|
||||
err = aliases.SetBytes([]byte{17})
|
||||
require.EqualError(t, err, "metabase: unknown alias pieces header: 17")
|
||||
|
||||
err = aliases.SetBytes([]byte{1, 0xFF})
|
||||
require.EqualError(t, err, "metabase: invalid alias pieces data")
|
||||
}
|
||||
|
||||
func BenchmarkAliasPiecesBytes(b *testing.B) {
|
||||
benchmarkAliasPiecesBytes(b, 50, 85, 90)
|
||||
benchmarkAliasPiecesBytes(b, 16, 37, 50)
|
||||
}
|
||||
|
||||
func benchmarkAliasPiecesBytes(b *testing.B, repair, optimal, total int) {
|
||||
prefix := fmt.Sprintf("repair=%d,optimal=%d,total=%d", repair, optimal, total)
|
||||
|
||||
b.Run(prefix+"/2byte", func(b *testing.B) {
|
||||
aliases := make(metabase.AliasPieces, optimal)
|
||||
for i := range aliases {
|
||||
aliases[i] = metabase.AliasPiece{
|
||||
Number: uint16(i),
|
||||
Alias: metabase.NodeAlias(0xFF + i),
|
||||
}
|
||||
}
|
||||
|
||||
var finalData []byte
|
||||
b.Run("Bytes", func(b *testing.B) {
|
||||
for k := 0; k < b.N; k++ {
|
||||
data, err := aliases.Bytes()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
finalData = data
|
||||
}
|
||||
})
|
||||
b.Run("SetBytes", func(b *testing.B) {
|
||||
var aliases metabase.AliasPieces
|
||||
for k := 0; k < b.N; k++ {
|
||||
err := aliases.SetBytes(finalData)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
b.ReportMetric(float64(len(finalData)), "B")
|
||||
})
|
||||
|
||||
b.Run(prefix+"/3byte", func(b *testing.B) {
|
||||
aliases := make(metabase.AliasPieces, optimal)
|
||||
for i := range aliases {
|
||||
aliases[i] = metabase.AliasPiece{
|
||||
Number: uint16(i),
|
||||
Alias: metabase.NodeAlias(0xFFFF + i),
|
||||
}
|
||||
}
|
||||
|
||||
var finalData []byte
|
||||
b.Run("Bytes", func(b *testing.B) {
|
||||
for k := 0; k < b.N; k++ {
|
||||
data, err := aliases.Bytes()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
finalData = data
|
||||
}
|
||||
})
|
||||
b.Run("SetBytes", func(b *testing.B) {
|
||||
var aliases metabase.AliasPieces
|
||||
for k := 0; k < b.N; k++ {
|
||||
err := aliases.SetBytes(finalData)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
b.ReportMetric(float64(len(finalData)), "B")
|
||||
})
|
||||
|
||||
b.Run(prefix+"/sim", func(b *testing.B) {
|
||||
totalBytes := int64(0)
|
||||
minBytes, maxBytes := int64(0xFFFFFF), int64(0)
|
||||
|
||||
for k := 0; k < b.N; k++ {
|
||||
numPieces := repair + k%(optimal-repair)
|
||||
aliases := make(metabase.AliasPieces, numPieces)
|
||||
for i, n := range rand.Perm(total)[:numPieces] {
|
||||
aliases[i].Number = uint16(n)
|
||||
aliases[i].Alias = metabase.NodeAlias(0xFF + i)
|
||||
}
|
||||
sort.Slice(aliases, func(i, k int) bool {
|
||||
return aliases[i].Number < aliases[k].Number
|
||||
})
|
||||
bytes, err := aliases.Bytes()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b := int64(len(bytes))
|
||||
totalBytes += b
|
||||
if b < minBytes {
|
||||
minBytes = b
|
||||
}
|
||||
if b > maxBytes {
|
||||
maxBytes = b
|
||||
}
|
||||
}
|
||||
|
||||
b.ReportMetric(float64(totalBytes)/float64(b.N), "B/avg")
|
||||
b.ReportMetric(float64(minBytes), "B/min")
|
||||
b.ReportMetric(float64(maxBytes), "B/max")
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user