2019-01-24 20:15:10 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
2018-04-11 14:41:50 +01:00
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package eestream
|
|
|
|
|
|
|
|
import (
|
2018-05-14 21:30:57 +01:00
|
|
|
"context"
|
2018-04-11 14:41:50 +01:00
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
2018-09-14 15:10:43 +01:00
|
|
|
"sync"
|
2018-04-11 14:41:50 +01:00
|
|
|
|
2019-03-29 12:30:23 +00:00
|
|
|
"github.com/zeebo/errs"
|
2019-04-12 21:57:24 +01:00
|
|
|
"go.uber.org/zap"
|
2019-03-29 12:30:23 +00:00
|
|
|
|
2019-04-25 09:17:26 +01:00
|
|
|
"storj.io/storj/internal/errs2"
|
2018-10-25 09:24:39 +01:00
|
|
|
"storj.io/storj/internal/readcloser"
|
2018-10-18 12:10:29 +01:00
|
|
|
"storj.io/storj/pkg/encryption"
|
2018-04-11 14:41:50 +01:00
|
|
|
"storj.io/storj/pkg/ranger"
|
|
|
|
)
|
|
|
|
|
|
|
|
type decodedReader struct {
|
2018-08-24 14:06:27 +01:00
|
|
|
ctx context.Context
|
|
|
|
cancel context.CancelFunc
|
|
|
|
readers map[int]io.ReadCloser
|
|
|
|
scheme ErasureScheme
|
|
|
|
stripeReader *StripeReader
|
|
|
|
outbuf []byte
|
|
|
|
err error
|
|
|
|
currentStripe int64
|
|
|
|
expectedStripes int64
|
2018-09-14 15:10:43 +01:00
|
|
|
close sync.Once
|
|
|
|
closeErr error
|
2018-04-24 03:17:50 +01:00
|
|
|
}
|
|
|
|
|
2018-04-11 14:41:50 +01:00
|
|
|
// DecodeReaders takes a map of readers and an ErasureScheme returning a
|
2018-05-30 16:27:09 +01:00
|
|
|
// combined Reader.
|
|
|
|
//
|
|
|
|
// rs is a map of erasure piece numbers to erasure piece streams.
|
|
|
|
// expectedSize is the number of bytes expected to be returned by the Reader.
|
|
|
|
// mbm is the maximum memory (in bytes) to be allocated for read buffers. If
|
|
|
|
// set to 0, the minimum possible memory will be used.
|
2019-06-14 10:16:31 +01:00
|
|
|
// if forceErrorDetection is set to true then k+1 pieces will be always
|
|
|
|
// required for decoding, so corrupted pieces can be detected.
|
|
|
|
func DecodeReaders(ctx context.Context, rs map[int]io.ReadCloser, es ErasureScheme, expectedSize int64, mbm int, forceErrorDetection bool) io.ReadCloser {
|
2019-06-04 12:36:27 +01:00
|
|
|
defer mon.Task()(&ctx)(nil)
|
2018-05-14 21:30:57 +01:00
|
|
|
if expectedSize < 0 {
|
|
|
|
return readcloser.FatalReadCloser(Error.New("negative expected size"))
|
|
|
|
}
|
2018-09-27 12:52:18 +01:00
|
|
|
if expectedSize%int64(es.StripeSize()) != 0 {
|
2018-05-14 21:30:57 +01:00
|
|
|
return readcloser.FatalReadCloser(
|
2018-08-06 15:24:30 +01:00
|
|
|
Error.New("expected size (%d) not a factor decoded block size (%d)",
|
2018-09-27 12:52:18 +01:00
|
|
|
expectedSize, es.StripeSize()))
|
2018-05-14 21:30:57 +01:00
|
|
|
}
|
2018-07-03 09:35:01 +01:00
|
|
|
if err := checkMBM(mbm); err != nil {
|
|
|
|
return readcloser.FatalReadCloser(err)
|
2018-05-14 21:30:57 +01:00
|
|
|
}
|
2018-04-11 14:41:50 +01:00
|
|
|
dr := &decodedReader{
|
2018-08-24 14:06:27 +01:00
|
|
|
readers: rs,
|
|
|
|
scheme: es,
|
2019-06-14 10:16:31 +01:00
|
|
|
stripeReader: NewStripeReader(rs, es, mbm, forceErrorDetection),
|
2018-09-27 12:52:18 +01:00
|
|
|
outbuf: make([]byte, 0, es.StripeSize()),
|
|
|
|
expectedStripes: expectedSize / int64(es.StripeSize()),
|
2018-08-24 14:06:27 +01:00
|
|
|
}
|
|
|
|
dr.ctx, dr.cancel = context.WithCancel(ctx)
|
|
|
|
// Kick off a goroutine to watch for context cancelation.
|
|
|
|
go func() {
|
|
|
|
<-dr.ctx.Done()
|
2018-08-27 18:28:16 +01:00
|
|
|
_ = dr.Close()
|
2018-08-24 14:06:27 +01:00
|
|
|
}()
|
2018-04-11 14:41:50 +01:00
|
|
|
return dr
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dr *decodedReader) Read(p []byte) (n int, err error) {
|
2019-06-26 14:05:58 +01:00
|
|
|
ctx := dr.ctx
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2019-05-29 14:14:25 +01:00
|
|
|
if len(dr.outbuf) == 0 {
|
2018-04-11 14:41:50 +01:00
|
|
|
// if the output buffer is empty, let's fill it again
|
|
|
|
// if we've already had an error, fail
|
|
|
|
if dr.err != nil {
|
2018-05-14 21:30:57 +01:00
|
|
|
return 0, dr.err
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
2018-08-24 14:06:27 +01:00
|
|
|
// return EOF is the expected stripes were read
|
|
|
|
if dr.currentStripe >= dr.expectedStripes {
|
2018-05-14 21:30:57 +01:00
|
|
|
dr.err = io.EOF
|
|
|
|
return 0, dr.err
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
2018-08-24 14:06:27 +01:00
|
|
|
// read the input buffers of the next stripe - may also decode it
|
2019-06-26 14:05:58 +01:00
|
|
|
dr.outbuf, dr.err = dr.stripeReader.ReadStripe(ctx, dr.currentStripe, dr.outbuf)
|
2018-05-14 21:30:57 +01:00
|
|
|
if dr.err != nil {
|
|
|
|
return 0, dr.err
|
|
|
|
}
|
2018-08-24 14:06:27 +01:00
|
|
|
dr.currentStripe++
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// copy what data we have to the output
|
|
|
|
n = copy(p, dr.outbuf)
|
|
|
|
// slide the remaining bytes to the beginning
|
|
|
|
copy(dr.outbuf, dr.outbuf[n:])
|
|
|
|
// shrink the remaining buffer
|
|
|
|
dr.outbuf = dr.outbuf[:len(dr.outbuf)-n]
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
2019-06-26 14:05:58 +01:00
|
|
|
func (dr *decodedReader) Close() (err error) {
|
|
|
|
ctx := dr.ctx
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2018-05-14 21:30:57 +01:00
|
|
|
// cancel the context to terminate reader goroutines
|
|
|
|
dr.cancel()
|
2019-04-12 21:57:24 +01:00
|
|
|
errorThreshold := len(dr.readers) - dr.scheme.RequiredCount()
|
2019-04-25 09:17:26 +01:00
|
|
|
var closeGroup errs2.Group
|
|
|
|
// avoid double close of readers
|
2018-09-14 15:10:43 +01:00
|
|
|
dr.close.Do(func() {
|
2018-09-27 11:45:19 +01:00
|
|
|
for _, r := range dr.readers {
|
2019-04-25 09:17:26 +01:00
|
|
|
closeGroup.Go(r.Close)
|
2018-04-17 14:39:14 +01:00
|
|
|
}
|
2019-04-25 09:17:26 +01:00
|
|
|
|
2018-09-14 15:10:43 +01:00
|
|
|
// close the stripe reader
|
2019-04-25 09:17:26 +01:00
|
|
|
closeGroup.Go(dr.stripeReader.Close)
|
|
|
|
|
|
|
|
allErrors := closeGroup.Wait()
|
|
|
|
errorThreshold -= len(allErrors)
|
|
|
|
dr.closeErr = errs.Combine(allErrors...)
|
2018-09-14 15:10:43 +01:00
|
|
|
})
|
2019-04-12 21:57:24 +01:00
|
|
|
// TODO this is workaround, we need reorganize to return multiple errors or divide into fatal, non fatal
|
2019-05-15 13:49:13 +01:00
|
|
|
if errorThreshold < 0 {
|
2019-04-12 21:57:24 +01:00
|
|
|
return dr.closeErr
|
|
|
|
}
|
|
|
|
if dr.closeErr != nil {
|
|
|
|
zap.L().Debug("decode close non fatal error: ", zap.Error(dr.closeErr))
|
|
|
|
}
|
|
|
|
return nil
|
2018-05-14 21:30:57 +01:00
|
|
|
}
|
|
|
|
|
2018-04-11 14:41:50 +01:00
|
|
|
type decodedRanger struct {
|
2019-06-14 10:16:31 +01:00
|
|
|
es ErasureScheme
|
|
|
|
rrs map[int]ranger.Ranger
|
|
|
|
inSize int64
|
|
|
|
mbm int // max buffer memory
|
|
|
|
forceErrorDetection bool
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
|
2018-05-30 16:27:09 +01:00
|
|
|
// Decode takes a map of Rangers and an ErasureScheme and returns a combined
|
|
|
|
// Ranger.
|
|
|
|
//
|
|
|
|
// rrs is a map of erasure piece numbers to erasure piece rangers.
|
|
|
|
// mbm is the maximum memory (in bytes) to be allocated for read buffers. If
|
|
|
|
// set to 0, the minimum possible memory will be used.
|
2019-06-14 10:16:31 +01:00
|
|
|
// if forceErrorDetection is set to true then k+1 pieces will be always
|
|
|
|
// required for decoding, so corrupted pieces can be detected.
|
|
|
|
func Decode(rrs map[int]ranger.Ranger, es ErasureScheme, mbm int, forceErrorDetection bool) (ranger.Ranger, error) {
|
2018-07-03 09:35:01 +01:00
|
|
|
if err := checkMBM(mbm); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if len(rrs) < es.RequiredCount() {
|
|
|
|
return nil, Error.New("not enough readers to reconstruct data!")
|
2018-05-14 21:30:57 +01:00
|
|
|
}
|
2018-04-11 14:41:50 +01:00
|
|
|
size := int64(-1)
|
|
|
|
for _, rr := range rrs {
|
|
|
|
if size == -1 {
|
|
|
|
size = rr.Size()
|
2019-05-29 14:14:25 +01:00
|
|
|
} else if size != rr.Size() {
|
|
|
|
return nil, Error.New(
|
|
|
|
"decode failure: range reader sizes don't all match")
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if size == -1 {
|
2018-09-14 15:10:43 +01:00
|
|
|
return ranger.ByteRanger(nil), nil
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
2018-09-27 12:52:18 +01:00
|
|
|
if size%int64(es.ErasureShareSize()) != 0 {
|
2018-08-06 15:24:30 +01:00
|
|
|
return nil, Error.New("invalid erasure decoder and range reader combo. "+
|
|
|
|
"range reader size (%d) must be a multiple of erasure encoder block size (%d)",
|
2018-09-27 12:52:18 +01:00
|
|
|
size, es.ErasureShareSize())
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
return &decodedRanger{
|
2019-06-14 10:16:31 +01:00
|
|
|
es: es,
|
|
|
|
rrs: rrs,
|
|
|
|
inSize: size,
|
|
|
|
mbm: mbm,
|
|
|
|
forceErrorDetection: forceErrorDetection,
|
2018-04-11 14:41:50 +01:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dr *decodedRanger) Size() int64 {
|
2018-09-27 12:52:18 +01:00
|
|
|
blocks := dr.inSize / int64(dr.es.ErasureShareSize())
|
|
|
|
return blocks * int64(dr.es.StripeSize())
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
|
2019-06-04 12:36:27 +01:00
|
|
|
func (dr *decodedRanger) Range(ctx context.Context, offset, length int64) (_ io.ReadCloser, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2018-04-11 14:41:50 +01:00
|
|
|
// offset and length might not be block-aligned. figure out which
|
|
|
|
// blocks contain this request
|
2018-10-18 12:10:29 +01:00
|
|
|
firstBlock, blockCount := encryption.CalcEncompassingBlocks(offset, length, dr.es.StripeSize())
|
2018-04-11 14:41:50 +01:00
|
|
|
// go ask for ranges for all those block boundaries
|
2018-04-26 13:51:49 +01:00
|
|
|
// do it parallel to save from network latency
|
2018-04-17 14:39:14 +01:00
|
|
|
readers := make(map[int]io.ReadCloser, len(dr.rrs))
|
2018-04-26 18:51:28 +01:00
|
|
|
type indexReadCloser struct {
|
2018-06-18 17:46:49 +01:00
|
|
|
i int
|
|
|
|
r io.ReadCloser
|
|
|
|
err error
|
2018-04-26 18:51:28 +01:00
|
|
|
}
|
2018-04-26 13:51:49 +01:00
|
|
|
result := make(chan indexReadCloser, len(dr.rrs))
|
2018-04-11 14:41:50 +01:00
|
|
|
for i, rr := range dr.rrs {
|
2018-04-26 13:51:49 +01:00
|
|
|
go func(i int, rr ranger.Ranger) {
|
2018-06-19 16:59:09 +01:00
|
|
|
r, err := rr.Range(ctx,
|
2018-09-27 12:52:18 +01:00
|
|
|
firstBlock*int64(dr.es.ErasureShareSize()),
|
|
|
|
blockCount*int64(dr.es.ErasureShareSize()))
|
2018-06-18 17:46:49 +01:00
|
|
|
result <- indexReadCloser{i: i, r: r, err: err}
|
2018-04-26 13:51:49 +01:00
|
|
|
}(i, rr)
|
|
|
|
}
|
|
|
|
// wait for all goroutines to finish and save result in readers map
|
|
|
|
for range dr.rrs {
|
|
|
|
res := <-result
|
2018-06-18 17:46:49 +01:00
|
|
|
if res.err != nil {
|
|
|
|
readers[res.i] = readcloser.FatalReadCloser(res.err)
|
|
|
|
} else {
|
|
|
|
readers[res.i] = res.r
|
|
|
|
}
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
// decode from all those ranges
|
2019-06-14 10:16:31 +01:00
|
|
|
r := DecodeReaders(ctx, readers, dr.es, blockCount*int64(dr.es.StripeSize()), dr.mbm, dr.forceErrorDetection)
|
2018-04-11 14:41:50 +01:00
|
|
|
// offset might start a few bytes in, potentially discard the initial bytes
|
2019-06-04 12:36:27 +01:00
|
|
|
_, err = io.CopyN(ioutil.Discard, r,
|
2018-09-27 12:52:18 +01:00
|
|
|
offset-firstBlock*int64(dr.es.StripeSize()))
|
2018-04-11 14:41:50 +01:00
|
|
|
if err != nil {
|
2018-06-18 17:46:49 +01:00
|
|
|
return nil, Error.Wrap(err)
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
|
|
|
// length might not have included all of the blocks, limit what we return
|
2018-06-18 17:46:49 +01:00
|
|
|
return readcloser.LimitReadCloser(r, length), nil
|
2018-04-11 14:41:50 +01:00
|
|
|
}
|
2019-02-05 10:54:25 +00:00
|
|
|
|
|
|
|
func checkMBM(mbm int) error {
|
|
|
|
if mbm < 0 {
|
|
|
|
return Error.New("negative max buffer memory")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|