Fix checks around repair threshold (#2246)

This commit is contained in:
Kaloyan Raev 2019-06-19 23:13:11 +03:00 committed by Stefan Benten
parent 6ba110c90a
commit 964c87c476
7 changed files with 52 additions and 66 deletions

View File

@ -157,51 +157,18 @@ func (uplink *Uplink) Upload(ctx context.Context, satellite *satellite.Peer, buc
return uplink.UploadWithExpiration(ctx, satellite, bucket, path, data, time.Time{})
}
// UploadWithExpiration data to specific satellite
// UploadWithExpiration data to specific satellite and expiration time
func (uplink *Uplink) UploadWithExpiration(ctx context.Context, satellite *satellite.Peer, bucket string, path storj.Path, data []byte, expiration time.Time) error {
config := uplink.GetConfig(satellite)
metainfo, streams, err := config.GetMetainfo(ctx, uplink.Identity)
if err != nil {
return err
}
encScheme := config.GetEncryptionScheme()
redScheme := config.GetRedundancyScheme()
// create bucket if not exists
_, err = metainfo.GetBucket(ctx, bucket)
if err != nil {
if storj.ErrBucketNotFound.Has(err) {
_, err := metainfo.CreateBucket(ctx, bucket, &storj.Bucket{PathCipher: encScheme.Cipher})
if err != nil {
return err
}
} else {
return err
}
}
createInfo := storj.CreateObject{
RedundancyScheme: redScheme,
EncryptionScheme: encScheme,
Expires: expiration,
}
obj, err := metainfo.CreateObject(ctx, bucket, path, &createInfo)
if err != nil {
return err
}
reader := bytes.NewReader(data)
err = uploadStream(ctx, streams, obj, reader)
if err != nil {
return err
}
return nil
return uplink.UploadWithExpirationAndConfig(ctx, satellite, nil, bucket, path, data, expiration)
}
// UploadWithConfig uploads data to specific satellite with configured values
func (uplink *Uplink) UploadWithConfig(ctx context.Context, satellite *satellite.Peer, redundancy *uplink.RSConfig, bucket string, path storj.Path, data []byte) error {
return uplink.UploadWithExpirationAndConfig(ctx, satellite, redundancy, bucket, path, data, time.Time{})
}
// UploadWithExpirationAndConfig uploads data to specific satellite with configured values and expiration time
func (uplink *Uplink) UploadWithExpirationAndConfig(ctx context.Context, satellite *satellite.Peer, redundancy *uplink.RSConfig, bucket string, path storj.Path, data []byte, expiration time.Time) error {
config := uplink.GetConfig(satellite)
if redundancy != nil {
if redundancy.MinThreshold > 0 {
@ -245,6 +212,7 @@ func (uplink *Uplink) UploadWithConfig(ctx context.Context, satellite *satellite
createInfo := storj.CreateObject{
RedundancyScheme: redScheme,
EncryptionScheme: encScheme,
Expires: expiration,
}
obj, err := metainfo.CreateObject(ctx, bucket, path, &createInfo)
if err != nil {

View File

@ -185,7 +185,7 @@ func (checker *Checker) updateSegmentStatus(ctx context.Context, pointer *pb.Poi
redundancy := pointer.Remote.Redundancy
// we repair when the number of healthy files is less than or equal to the repair threshold
// except for the case when the repair and success thresholds are the same (a case usually seen during testing)
if numHealthy > redundancy.MinReq && numHealthy <= redundancy.RepairThreshold && redundancy.RepairThreshold != redundancy.SuccessThreshold {
if numHealthy > redundancy.MinReq && numHealthy <= redundancy.RepairThreshold && numHealthy < redundancy.SuccessThreshold {
if len(missingPieces) == 0 {
checker.logger.Warn("Missing pieces is zero in checker, but this should be impossible -- bad redundancy scheme.")
return nil
@ -206,7 +206,7 @@ func (checker *Checker) updateSegmentStatus(ctx context.Context, pointer *pb.Poi
}
// we need one additional piece for error correction. If only the minimum is remaining the file can't be repaired and is lost.
// except for the case when minimum and repair thresholds are the same (a case usually seen during testing)
} else if numHealthy <= redundancy.MinReq && redundancy.MinReq != redundancy.RepairThreshold {
} else if numHealthy <= redundancy.MinReq && numHealthy < redundancy.RepairThreshold {
// check to make sure there are at least *4* path elements. the first three
// are project, segment, and bucket name, but we want to make sure we're talking
// about an actual object, and that there's an object name specified

View File

@ -89,8 +89,10 @@ func TestIdentifyIrreparableSegments(t *testing.T) {
pointer := &pb.Pointer{
Remote: &pb.RemoteSegment{
Redundancy: &pb.RedundancyScheme{
MinReq: int32(3),
RepairThreshold: int32(8),
MinReq: int32(3),
RepairThreshold: int32(8),
SuccessThreshold: int32(9),
Total: int32(10),
},
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
RemotePieces: pieces,
@ -136,8 +138,10 @@ func TestIdentifyIrreparableSegments(t *testing.T) {
pointer = &pb.Pointer{
Remote: &pb.RemoteSegment{
Redundancy: &pb.RedundancyScheme{
MinReq: int32(2),
RepairThreshold: int32(8),
MinReq: int32(2),
RepairThreshold: int32(8),
SuccessThreshold: int32(9),
Total: int32(10),
},
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
RemotePieces: pieces,
@ -183,8 +187,10 @@ func makePointer(t *testing.T, planet *testplanet.Planet, pieceID string, create
pointer := &pb.Pointer{
Remote: &pb.RemoteSegment{
Redundancy: &pb.RedundancyScheme{
MinReq: int32(minReq),
RepairThreshold: int32(repairThreshold),
MinReq: int32(minReq),
RepairThreshold: int32(repairThreshold),
SuccessThreshold: int32(repairThreshold) + 1,
Total: int32(repairThreshold) + 2,
},
RootPieceId: teststorj.PieceIDFromString(pieceID),
RemotePieces: pieces,

View File

@ -77,8 +77,9 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
return nil, nil, Error.New("size of limits slice (%d) does not match total count (%d) of erasure scheme", len(limits), rs.TotalCount())
}
if nonNilCount(limits) < rs.RepairThreshold() {
return nil, nil, Error.New("number of non-nil limits (%d) is less than repair threshold (%d) of erasure scheme", nonNilCount(limits), rs.RepairThreshold())
nonNilLimits := nonNilCount(limits)
if nonNilLimits <= rs.RepairThreshold() && nonNilLimits < rs.OptimalThreshold() {
return nil, nil, Error.New("number of non-nil limits (%d) is less than or equal to the repair threshold (%d) of erasure scheme", nonNilLimits, rs.RepairThreshold())
}
if !unique(limits) {
@ -137,11 +138,17 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
successfulHashes[info.i] = info.hash
switch int(atomic.AddInt32(&successfulCount, 1)) {
case rs.RepairThreshold():
case rs.OptimalThreshold():
zap.S().Infof("Success threshold (%d nodes) reached. Canceling the long tail...", rs.OptimalThreshold())
if timer != nil {
timer.Stop()
}
cancel()
case rs.RepairThreshold() + 1:
elapsed := time.Since(start)
more := elapsed * 3 / 2
zap.S().Infof("Repair threshold (%d nodes) reached in %.2f s. Starting a timer for %.2f s for reaching the success threshold (%d nodes)...",
zap.S().Infof("Repair threshold (%d nodes) passed in %.2f s. Starting a timer for %.2f s for reaching the success threshold (%d nodes)...",
rs.RepairThreshold(), elapsed.Seconds(), more.Seconds(), rs.OptimalThreshold())
timer = time.AfterFunc(more, func() {
@ -150,10 +157,6 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
cancel()
}
})
case rs.OptimalThreshold():
zap.S().Infof("Success threshold (%d nodes) reached. Canceling the long tail...", rs.OptimalThreshold())
timer.Stop()
cancel()
}
}
@ -175,8 +178,9 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
}
}()
if int(atomic.LoadInt32(&successfulCount)) < rs.RepairThreshold() {
return nil, nil, Error.New("successful puts (%d) less than repair threshold (%d)", atomic.LoadInt32(&successfulCount), rs.RepairThreshold())
successes := int(atomic.LoadInt32(&successfulCount))
if successes <= rs.RepairThreshold() && successes < rs.OptimalThreshold() {
return nil, nil, Error.New("successful puts (%d) less than or equal to repair threshold (%d)", successes, rs.RepairThreshold())
}
return successfulNodes, successfulHashes, nil

View File

@ -472,8 +472,8 @@ func (endpoint *Endpoint) filterValidPieces(ctx context.Context, pointer *pb.Poi
// we repair when the number of healthy files is less than or equal to the repair threshold
// except for the case when the repair and success thresholds are the same (a case usually seen during testing)
if int32(len(remotePieces)) <= remote.Redundancy.RepairThreshold && remote.Redundancy.RepairThreshold != remote.Redundancy.SuccessThreshold {
return Error.New("Number of valid pieces is less than or equal to the repair threshold: %v < %v",
if int32(len(remotePieces)) <= remote.Redundancy.RepairThreshold && int32(len(remotePieces)) < remote.Redundancy.SuccessThreshold {
return Error.New("Number of valid pieces (%d) is less than or equal to the repair threshold (%d)",
len(remotePieces),
remote.Redundancy.RepairThreshold,
)

View File

@ -334,7 +334,7 @@ func TestCommitSegment(t *testing.T) {
}
_, err = metainfo.CommitSegment(ctx, "bucket", "path", -1, pointer, limits)
require.Error(t, err)
require.Contains(t, err.Error(), "Number of valid pieces is less than or equal to the repair threshold")
require.Contains(t, err.Error(), "less than or equal to the repair threshold")
}
})
}

View File

@ -14,6 +14,7 @@ import (
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testplanet"
"storj.io/storj/pkg/storj"
"storj.io/storj/uplink"
)
func TestCollector(t *testing.T) {
@ -31,13 +32,20 @@ func TestCollector(t *testing.T) {
_, err := rand.Read(expectedData)
require.NoError(t, err)
// upload some data that expires in 8 days
err = planet.Uplinks[0].UploadWithExpiration(ctx,
planet.Satellites[0], "testbucket", "test/path",
// upload some data to exactly 2 nodes that expires in 8 days
err = planet.Uplinks[0].UploadWithExpirationAndConfig(ctx,
planet.Satellites[0],
&uplink.RSConfig{
MinThreshold: 1,
RepairThreshold: 1,
SuccessThreshold: 2,
MaxThreshold: 2,
},
"testbucket", "test/path",
expectedData, time.Now().Add(8*24*time.Hour))
require.NoError(t, err)
// stop planet to prevent audits
// stop satellite to prevent audits
require.NoError(t, planet.StopPeer(planet.Satellites[0]))
collections := 0
@ -75,7 +83,7 @@ func TestCollector(t *testing.T) {
}
require.NotZero(t, collections)
require.Equal(t, serialsPresent, 2)
require.Equal(t, 2, serialsPresent)
serialsPresent = 0