satellite/metainfo/metabase: add AliasPiece

This adds AliasPieces run length encoding. On average it should
make our pieces encoding:

   repair=50,optimal=85,total=90   152.0 bytes
   repair=16,optimal=37,total=50    65.4 bytes

Change-Id: I391a9183164828f05383a3cde9ab0e4549c2d440
This commit is contained in:
Egon Elbre 2021-02-09 10:13:54 +02:00
parent 63c7f8b7fc
commit 25f81f353c
2 changed files with 431 additions and 0 deletions

View File

@ -0,0 +1,190 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package metabase
import (
"database/sql/driver"
"encoding/binary"
)
// AliasPieces is a slice of AliasPiece.
type AliasPieces []AliasPiece
// AliasPiece is a piece with alias node ID.
type AliasPiece struct {
Number uint16
Alias NodeAlias
}
const (
// aliasPieceEncodingRLE run length encodes the zeros and node ID-s.
//
// Example:
// pieces = {2 x} {11 y}
// // converted into slice with zeros
// 0 0 x 0 0 0 0 0 0 0 0 y
// // run length encoded
// <2 zeros, 1 value> x <7 zeros, 0 values> <1 zeros, 1 value> y
aliasPieceEncodingRLE = 1
aliasPieceEncodingZeroBits = 3
aliasPieceEncodingNodeAliasBits = 8 - aliasPieceEncodingZeroBits
aliasPieceEncodingMaxZeros = 1<<aliasPieceEncodingZeroBits - 1
aliasPieceEncodingMaxNodeAliases = 1<<aliasPieceEncodingNodeAliasBits - 1
)
// Bytes compresses alias pieces to a slice of bytes.
func (aliases AliasPieces) Bytes() ([]byte, error) {
if len(aliases) == 0 {
return nil, nil
}
var buffer [binary.MaxVarintLen64]byte
// we're going to guess that it'll take 3 bytes per node alias + at most one per two nodes.
data := make([]byte, 0, len(aliases)*3+len(aliases)/2)
data = append(data, aliasPieceEncodingRLE)
expectedPieceNumber := uint16(0)
index := 0
for index < len(aliases) {
data = append(data, 0)
// setup header for the next sequence of nodes
lengthHeaderPos := len(data) - 1
zeroCount, aliasCount := 0, 0
setHeader := func() {
data[lengthHeaderPos] = byte(aliasCount)<<aliasPieceEncodingZeroBits | byte(zeroCount)
}
// start examining the piece
piece := aliases[index]
if expectedPieceNumber > piece.Number {
return nil, Error.New("alias pieces not ordered")
}
// count up until max zeros
for i := 0; i < aliasPieceEncodingMaxZeros; i++ {
if expectedPieceNumber == piece.Number {
break
}
zeroCount++
expectedPieceNumber++
}
// if there were too many zeros in sequence, we need to emit more headers
if piece.Number != expectedPieceNumber {
setHeader()
continue
}
// emit all the pieces that are in sequence, but up to max node aliases
for aliasCount < aliasPieceEncodingMaxNodeAliases {
// emit the piece alias
n := binary.PutUvarint(buffer[:], uint64(piece.Alias))
data = append(data, buffer[:n]...)
// update the header and the expected piece number
aliasCount++
expectedPieceNumber++
// next piece
index++
if index >= len(aliases) {
break
}
piece = aliases[index]
// check whether we should emit zeros
if piece.Number != expectedPieceNumber {
break
}
}
setHeader()
}
return data, nil
}
// SetBytes decompresses alias pieces from a slice of bytes.
func (aliases *AliasPieces) SetBytes(data []byte) error {
*aliases = nil
if len(data) == 0 {
return nil
}
if data[0] != aliasPieceEncodingRLE {
return Error.New("unknown alias pieces header: %v", data[0])
}
// we're going to guess there's two alias pieces per two bytes of data
*aliases = make(AliasPieces, 0, len(data)/2)
p := 1
pieceNumber := uint16(0)
for p < len(data) {
// read the header
header := data[p]
p++
if p >= len(data) {
return Error.New("invalid alias pieces data")
}
// extract header values
aliasCount := int(header >> aliasPieceEncodingZeroBits)
zeroCount := int(header & aliasPieceEncodingMaxZeros)
// skip over the zero values
pieceNumber += uint16(zeroCount)
// read the aliases
for k := 0; k < aliasCount; k++ {
v, n := binary.Uvarint(data[p:])
p += n
if n <= 0 {
return Error.New("invalid alias pieces data")
}
*aliases = append(*aliases, AliasPiece{
Number: pieceNumber,
Alias: NodeAlias(v),
})
pieceNumber++
}
}
return nil
}
// Scan implements the database/sql Scanner interface.
func (aliases *AliasPieces) Scan(src interface{}) error {
if src == nil {
*aliases = nil
return nil
}
switch src := src.(type) {
case []byte:
return aliases.SetBytes(src)
default:
return Error.New("invalid type for AliasPieces: %T", src)
}
}
// Value implements the database/sql/driver Valuer interface.
func (aliases AliasPieces) Value() (driver.Value, error) {
return aliases.Bytes()
}
// EqualAliasPieces compares whether xs and ys are equal.
func EqualAliasPieces(xs, ys AliasPieces) bool {
if len(xs) != len(ys) {
return false
}
for i, x := range xs {
if ys[i] != x {
return false
}
}
return true
}

View File

@ -0,0 +1,241 @@
// Copyright (C) 2021 Storj Labs, Inc.
// See LICENSE for copying information.
package metabase_test
import (
"fmt"
"math/rand"
"sort"
"testing"
"github.com/stretchr/testify/require"
"storj.io/storj/satellite/metainfo/metabase"
)
func TestAliasPieces(t *testing.T) {
type test struct {
in metabase.AliasPieces
bytes []byte
}
tests := []test{
{in: nil, bytes: nil},
{in: metabase.AliasPieces{
{Number: 0, Alias: 1},
}, bytes: []byte{1, 0b00001_000, 1}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 1},
{Number: 3, Alias: 2},
}, bytes: []byte{1, 0b00001_000, 1, 0b00001_010, 2}},
{in: metabase.AliasPieces{
{Number: 3, Alias: 2},
}, bytes: []byte{1, 0b00001_011, 2}},
{in: metabase.AliasPieces{
{Number: 4, Alias: 2},
}, bytes: []byte{1, 0b00001_100, 2}},
{in: metabase.AliasPieces{
{Number: 9, Alias: 2},
}, bytes: []byte{1, 0b00000_111, 0b00001_010, 2}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 0xF8},
}, bytes: []byte{1, 0b00001_000, 0xF8, 0x01}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 0xF808},
}, bytes: []byte{1, 0b00001_000, 0x88, 0xf0, 0x03}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 0xF808ba},
}, bytes: []byte{1, 0b00001_000, 0xba, 0x91, 0xe0, 0x07}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 0xA},
{Number: 1, Alias: 0xB},
{Number: 2, Alias: 0xC},
}, bytes: []byte{1, 0b00011_000, 0xA, 0xB, 0xC}},
{in: metabase.AliasPieces{
{Number: 2, Alias: 0xA},
{Number: 3, Alias: 0xB},
{Number: 4, Alias: 0xC},
}, bytes: []byte{1, 0b00011_010, 0xA, 0xB, 0xC}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 0xA},
{Number: 1, Alias: 0xB},
{Number: 2, Alias: 0xC},
{Number: 7, Alias: 0xD},
{Number: 8, Alias: 0xE},
{Number: 9, Alias: 0xF},
}, bytes: []byte{1,
0b00011_000, 0xA, 0xB, 0xC,
0b00011_100, 0xD, 0xE, 0xF,
}},
{in: metabase.AliasPieces{
{Number: 0, Alias: 1}, {Number: 1, Alias: 2}, {Number: 2, Alias: 3}, {Number: 3, Alias: 4}, {Number: 4, Alias: 5}, {Number: 5, Alias: 6}, {Number: 6, Alias: 7}, {Number: 7, Alias: 8},
{Number: 8, Alias: 9}, {Number: 9, Alias: 10}, {Number: 10, Alias: 11}, {Number: 11, Alias: 12}, {Number: 12, Alias: 13}, {Number: 13, Alias: 14}, {Number: 14, Alias: 15}, {Number: 15, Alias: 16},
{Number: 16, Alias: 17}, {Number: 17, Alias: 18}, {Number: 18, Alias: 19}, {Number: 19, Alias: 20}, {Number: 20, Alias: 21}, {Number: 21, Alias: 22}, {Number: 22, Alias: 23}, {Number: 23, Alias: 24},
{Number: 24, Alias: 25}, {Number: 25, Alias: 26}, {Number: 26, Alias: 27}, {Number: 27, Alias: 28}, {Number: 28, Alias: 29}, {Number: 29, Alias: 30}, {Number: 30, Alias: 31}, {Number: 31, Alias: 32},
{Number: 32, Alias: 33}, {Number: 33, Alias: 34},
}, bytes: []byte{1,
0b11111_000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
0b00011_000, 32, 33, 34,
}},
}
for i, test := range tests {
bytes, err := test.in.Bytes()
require.NoError(t, err, i)
require.Equal(t, test.bytes, bytes, i)
out := metabase.AliasPieces{}
err = out.SetBytes(bytes)
require.NoError(t, err, i)
require.Equal(t, test.in, out, i)
}
}
func TestAliasPieces_Large(t *testing.T) {
aliases := make(metabase.AliasPieces, 0xFF)
for offset := 1; offset < 18; offset++ {
for i := range aliases {
aliases[i].Number = uint16(i * offset)
aliases[i].Alias = metabase.NodeAlias(i + 1)
}
bytes, err := aliases.Bytes()
require.NoError(t, err)
var result metabase.AliasPieces
err = result.SetBytes(bytes)
require.NoError(t, err)
require.Equal(t, result, aliases)
}
}
func TestAliasPieces_Errors(t *testing.T) {
aliases := metabase.AliasPieces{
{Number: 1, Alias: 1},
{Number: 0, Alias: 2},
}
_, err := aliases.Bytes()
require.EqualError(t, err, "metabase: alias pieces not ordered")
duplicate := metabase.AliasPieces{
{Number: 0, Alias: 1},
{Number: 0, Alias: 2},
}
_, err = duplicate.Bytes()
require.EqualError(t, err, "metabase: alias pieces not ordered")
err = aliases.SetBytes([]byte{17})
require.EqualError(t, err, "metabase: unknown alias pieces header: 17")
err = aliases.SetBytes([]byte{1, 0xFF})
require.EqualError(t, err, "metabase: invalid alias pieces data")
}
func BenchmarkAliasPiecesBytes(b *testing.B) {
benchmarkAliasPiecesBytes(b, 50, 85, 90)
benchmarkAliasPiecesBytes(b, 16, 37, 50)
}
func benchmarkAliasPiecesBytes(b *testing.B, repair, optimal, total int) {
prefix := fmt.Sprintf("repair=%d,optimal=%d,total=%d", repair, optimal, total)
b.Run(prefix+"/2byte", func(b *testing.B) {
aliases := make(metabase.AliasPieces, optimal)
for i := range aliases {
aliases[i] = metabase.AliasPiece{
Number: uint16(i),
Alias: metabase.NodeAlias(0xFF + i),
}
}
var finalData []byte
b.Run("Bytes", func(b *testing.B) {
for k := 0; k < b.N; k++ {
data, err := aliases.Bytes()
if err != nil {
b.Fatal(err)
}
finalData = data
}
})
b.Run("SetBytes", func(b *testing.B) {
var aliases metabase.AliasPieces
for k := 0; k < b.N; k++ {
err := aliases.SetBytes(finalData)
if err != nil {
b.Fatal(err)
}
}
})
b.ReportMetric(float64(len(finalData)), "B")
})
b.Run(prefix+"/3byte", func(b *testing.B) {
aliases := make(metabase.AliasPieces, optimal)
for i := range aliases {
aliases[i] = metabase.AliasPiece{
Number: uint16(i),
Alias: metabase.NodeAlias(0xFFFF + i),
}
}
var finalData []byte
b.Run("Bytes", func(b *testing.B) {
for k := 0; k < b.N; k++ {
data, err := aliases.Bytes()
if err != nil {
b.Fatal(err)
}
finalData = data
}
})
b.Run("SetBytes", func(b *testing.B) {
var aliases metabase.AliasPieces
for k := 0; k < b.N; k++ {
err := aliases.SetBytes(finalData)
if err != nil {
b.Fatal(err)
}
}
})
b.ReportMetric(float64(len(finalData)), "B")
})
b.Run(prefix+"/sim", func(b *testing.B) {
totalBytes := int64(0)
minBytes, maxBytes := int64(0xFFFFFF), int64(0)
for k := 0; k < b.N; k++ {
numPieces := repair + k%(optimal-repair)
aliases := make(metabase.AliasPieces, numPieces)
for i, n := range rand.Perm(total)[:numPieces] {
aliases[i].Number = uint16(n)
aliases[i].Alias = metabase.NodeAlias(0xFF + i)
}
sort.Slice(aliases, func(i, k int) bool {
return aliases[i].Number < aliases[k].Number
})
bytes, err := aliases.Bytes()
if err != nil {
b.Fatal(err)
}
b := int64(len(bytes))
totalBytes += b
if b < minBytes {
minBytes = b
}
if b > maxBytes {
maxBytes = b
}
}
b.ReportMetric(float64(totalBytes)/float64(b.N), "B/avg")
b.ReportMetric(float64(minBytes), "B/min")
b.ReportMetric(float64(maxBytes), "B/max")
})
}