Fix checks around repair threshold (#2246)
This commit is contained in:
parent
6ba110c90a
commit
964c87c476
@ -157,51 +157,18 @@ func (uplink *Uplink) Upload(ctx context.Context, satellite *satellite.Peer, buc
|
||||
return uplink.UploadWithExpiration(ctx, satellite, bucket, path, data, time.Time{})
|
||||
}
|
||||
|
||||
// UploadWithExpiration data to specific satellite
|
||||
// UploadWithExpiration data to specific satellite and expiration time
|
||||
func (uplink *Uplink) UploadWithExpiration(ctx context.Context, satellite *satellite.Peer, bucket string, path storj.Path, data []byte, expiration time.Time) error {
|
||||
config := uplink.GetConfig(satellite)
|
||||
metainfo, streams, err := config.GetMetainfo(ctx, uplink.Identity)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
encScheme := config.GetEncryptionScheme()
|
||||
redScheme := config.GetRedundancyScheme()
|
||||
|
||||
// create bucket if not exists
|
||||
_, err = metainfo.GetBucket(ctx, bucket)
|
||||
if err != nil {
|
||||
if storj.ErrBucketNotFound.Has(err) {
|
||||
_, err := metainfo.CreateBucket(ctx, bucket, &storj.Bucket{PathCipher: encScheme.Cipher})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
createInfo := storj.CreateObject{
|
||||
RedundancyScheme: redScheme,
|
||||
EncryptionScheme: encScheme,
|
||||
Expires: expiration,
|
||||
}
|
||||
obj, err := metainfo.CreateObject(ctx, bucket, path, &createInfo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(data)
|
||||
err = uploadStream(ctx, streams, obj, reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return uplink.UploadWithExpirationAndConfig(ctx, satellite, nil, bucket, path, data, expiration)
|
||||
}
|
||||
|
||||
// UploadWithConfig uploads data to specific satellite with configured values
|
||||
func (uplink *Uplink) UploadWithConfig(ctx context.Context, satellite *satellite.Peer, redundancy *uplink.RSConfig, bucket string, path storj.Path, data []byte) error {
|
||||
return uplink.UploadWithExpirationAndConfig(ctx, satellite, redundancy, bucket, path, data, time.Time{})
|
||||
}
|
||||
|
||||
// UploadWithExpirationAndConfig uploads data to specific satellite with configured values and expiration time
|
||||
func (uplink *Uplink) UploadWithExpirationAndConfig(ctx context.Context, satellite *satellite.Peer, redundancy *uplink.RSConfig, bucket string, path storj.Path, data []byte, expiration time.Time) error {
|
||||
config := uplink.GetConfig(satellite)
|
||||
if redundancy != nil {
|
||||
if redundancy.MinThreshold > 0 {
|
||||
@ -245,6 +212,7 @@ func (uplink *Uplink) UploadWithConfig(ctx context.Context, satellite *satellite
|
||||
createInfo := storj.CreateObject{
|
||||
RedundancyScheme: redScheme,
|
||||
EncryptionScheme: encScheme,
|
||||
Expires: expiration,
|
||||
}
|
||||
obj, err := metainfo.CreateObject(ctx, bucket, path, &createInfo)
|
||||
if err != nil {
|
||||
|
@ -185,7 +185,7 @@ func (checker *Checker) updateSegmentStatus(ctx context.Context, pointer *pb.Poi
|
||||
redundancy := pointer.Remote.Redundancy
|
||||
// we repair when the number of healthy files is less than or equal to the repair threshold
|
||||
// except for the case when the repair and success thresholds are the same (a case usually seen during testing)
|
||||
if numHealthy > redundancy.MinReq && numHealthy <= redundancy.RepairThreshold && redundancy.RepairThreshold != redundancy.SuccessThreshold {
|
||||
if numHealthy > redundancy.MinReq && numHealthy <= redundancy.RepairThreshold && numHealthy < redundancy.SuccessThreshold {
|
||||
if len(missingPieces) == 0 {
|
||||
checker.logger.Warn("Missing pieces is zero in checker, but this should be impossible -- bad redundancy scheme.")
|
||||
return nil
|
||||
@ -206,7 +206,7 @@ func (checker *Checker) updateSegmentStatus(ctx context.Context, pointer *pb.Poi
|
||||
}
|
||||
// we need one additional piece for error correction. If only the minimum is remaining the file can't be repaired and is lost.
|
||||
// except for the case when minimum and repair thresholds are the same (a case usually seen during testing)
|
||||
} else if numHealthy <= redundancy.MinReq && redundancy.MinReq != redundancy.RepairThreshold {
|
||||
} else if numHealthy <= redundancy.MinReq && numHealthy < redundancy.RepairThreshold {
|
||||
// check to make sure there are at least *4* path elements. the first three
|
||||
// are project, segment, and bucket name, but we want to make sure we're talking
|
||||
// about an actual object, and that there's an object name specified
|
||||
|
@ -89,8 +89,10 @@ func TestIdentifyIrreparableSegments(t *testing.T) {
|
||||
pointer := &pb.Pointer{
|
||||
Remote: &pb.RemoteSegment{
|
||||
Redundancy: &pb.RedundancyScheme{
|
||||
MinReq: int32(3),
|
||||
RepairThreshold: int32(8),
|
||||
MinReq: int32(3),
|
||||
RepairThreshold: int32(8),
|
||||
SuccessThreshold: int32(9),
|
||||
Total: int32(10),
|
||||
},
|
||||
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
|
||||
RemotePieces: pieces,
|
||||
@ -136,8 +138,10 @@ func TestIdentifyIrreparableSegments(t *testing.T) {
|
||||
pointer = &pb.Pointer{
|
||||
Remote: &pb.RemoteSegment{
|
||||
Redundancy: &pb.RedundancyScheme{
|
||||
MinReq: int32(2),
|
||||
RepairThreshold: int32(8),
|
||||
MinReq: int32(2),
|
||||
RepairThreshold: int32(8),
|
||||
SuccessThreshold: int32(9),
|
||||
Total: int32(10),
|
||||
},
|
||||
RootPieceId: teststorj.PieceIDFromString("fake-piece-id"),
|
||||
RemotePieces: pieces,
|
||||
@ -183,8 +187,10 @@ func makePointer(t *testing.T, planet *testplanet.Planet, pieceID string, create
|
||||
pointer := &pb.Pointer{
|
||||
Remote: &pb.RemoteSegment{
|
||||
Redundancy: &pb.RedundancyScheme{
|
||||
MinReq: int32(minReq),
|
||||
RepairThreshold: int32(repairThreshold),
|
||||
MinReq: int32(minReq),
|
||||
RepairThreshold: int32(repairThreshold),
|
||||
SuccessThreshold: int32(repairThreshold) + 1,
|
||||
Total: int32(repairThreshold) + 2,
|
||||
},
|
||||
RootPieceId: teststorj.PieceIDFromString(pieceID),
|
||||
RemotePieces: pieces,
|
||||
|
@ -77,8 +77,9 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
|
||||
return nil, nil, Error.New("size of limits slice (%d) does not match total count (%d) of erasure scheme", len(limits), rs.TotalCount())
|
||||
}
|
||||
|
||||
if nonNilCount(limits) < rs.RepairThreshold() {
|
||||
return nil, nil, Error.New("number of non-nil limits (%d) is less than repair threshold (%d) of erasure scheme", nonNilCount(limits), rs.RepairThreshold())
|
||||
nonNilLimits := nonNilCount(limits)
|
||||
if nonNilLimits <= rs.RepairThreshold() && nonNilLimits < rs.OptimalThreshold() {
|
||||
return nil, nil, Error.New("number of non-nil limits (%d) is less than or equal to the repair threshold (%d) of erasure scheme", nonNilLimits, rs.RepairThreshold())
|
||||
}
|
||||
|
||||
if !unique(limits) {
|
||||
@ -137,11 +138,17 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
|
||||
successfulHashes[info.i] = info.hash
|
||||
|
||||
switch int(atomic.AddInt32(&successfulCount, 1)) {
|
||||
case rs.RepairThreshold():
|
||||
case rs.OptimalThreshold():
|
||||
zap.S().Infof("Success threshold (%d nodes) reached. Canceling the long tail...", rs.OptimalThreshold())
|
||||
if timer != nil {
|
||||
timer.Stop()
|
||||
}
|
||||
cancel()
|
||||
case rs.RepairThreshold() + 1:
|
||||
elapsed := time.Since(start)
|
||||
more := elapsed * 3 / 2
|
||||
|
||||
zap.S().Infof("Repair threshold (%d nodes) reached in %.2f s. Starting a timer for %.2f s for reaching the success threshold (%d nodes)...",
|
||||
zap.S().Infof("Repair threshold (%d nodes) passed in %.2f s. Starting a timer for %.2f s for reaching the success threshold (%d nodes)...",
|
||||
rs.RepairThreshold(), elapsed.Seconds(), more.Seconds(), rs.OptimalThreshold())
|
||||
|
||||
timer = time.AfterFunc(more, func() {
|
||||
@ -150,10 +157,6 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
|
||||
cancel()
|
||||
}
|
||||
})
|
||||
case rs.OptimalThreshold():
|
||||
zap.S().Infof("Success threshold (%d nodes) reached. Canceling the long tail...", rs.OptimalThreshold())
|
||||
timer.Stop()
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,8 +178,9 @@ func (ec *ecClient) Put(ctx context.Context, limits []*pb.AddressedOrderLimit, r
|
||||
}
|
||||
}()
|
||||
|
||||
if int(atomic.LoadInt32(&successfulCount)) < rs.RepairThreshold() {
|
||||
return nil, nil, Error.New("successful puts (%d) less than repair threshold (%d)", atomic.LoadInt32(&successfulCount), rs.RepairThreshold())
|
||||
successes := int(atomic.LoadInt32(&successfulCount))
|
||||
if successes <= rs.RepairThreshold() && successes < rs.OptimalThreshold() {
|
||||
return nil, nil, Error.New("successful puts (%d) less than or equal to repair threshold (%d)", successes, rs.RepairThreshold())
|
||||
}
|
||||
|
||||
return successfulNodes, successfulHashes, nil
|
||||
|
@ -472,8 +472,8 @@ func (endpoint *Endpoint) filterValidPieces(ctx context.Context, pointer *pb.Poi
|
||||
|
||||
// we repair when the number of healthy files is less than or equal to the repair threshold
|
||||
// except for the case when the repair and success thresholds are the same (a case usually seen during testing)
|
||||
if int32(len(remotePieces)) <= remote.Redundancy.RepairThreshold && remote.Redundancy.RepairThreshold != remote.Redundancy.SuccessThreshold {
|
||||
return Error.New("Number of valid pieces is less than or equal to the repair threshold: %v < %v",
|
||||
if int32(len(remotePieces)) <= remote.Redundancy.RepairThreshold && int32(len(remotePieces)) < remote.Redundancy.SuccessThreshold {
|
||||
return Error.New("Number of valid pieces (%d) is less than or equal to the repair threshold (%d)",
|
||||
len(remotePieces),
|
||||
remote.Redundancy.RepairThreshold,
|
||||
)
|
||||
|
@ -334,7 +334,7 @@ func TestCommitSegment(t *testing.T) {
|
||||
}
|
||||
_, err = metainfo.CommitSegment(ctx, "bucket", "path", -1, pointer, limits)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "Number of valid pieces is less than or equal to the repair threshold")
|
||||
require.Contains(t, err.Error(), "less than or equal to the repair threshold")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"storj.io/storj/internal/testcontext"
|
||||
"storj.io/storj/internal/testplanet"
|
||||
"storj.io/storj/pkg/storj"
|
||||
"storj.io/storj/uplink"
|
||||
)
|
||||
|
||||
func TestCollector(t *testing.T) {
|
||||
@ -31,13 +32,20 @@ func TestCollector(t *testing.T) {
|
||||
_, err := rand.Read(expectedData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// upload some data that expires in 8 days
|
||||
err = planet.Uplinks[0].UploadWithExpiration(ctx,
|
||||
planet.Satellites[0], "testbucket", "test/path",
|
||||
// upload some data to exactly 2 nodes that expires in 8 days
|
||||
err = planet.Uplinks[0].UploadWithExpirationAndConfig(ctx,
|
||||
planet.Satellites[0],
|
||||
&uplink.RSConfig{
|
||||
MinThreshold: 1,
|
||||
RepairThreshold: 1,
|
||||
SuccessThreshold: 2,
|
||||
MaxThreshold: 2,
|
||||
},
|
||||
"testbucket", "test/path",
|
||||
expectedData, time.Now().Add(8*24*time.Hour))
|
||||
require.NoError(t, err)
|
||||
|
||||
// stop planet to prevent audits
|
||||
// stop satellite to prevent audits
|
||||
require.NoError(t, planet.StopPeer(planet.Satellites[0]))
|
||||
|
||||
collections := 0
|
||||
@ -75,7 +83,7 @@ func TestCollector(t *testing.T) {
|
||||
}
|
||||
|
||||
require.NotZero(t, collections)
|
||||
require.Equal(t, serialsPresent, 2)
|
||||
require.Equal(t, 2, serialsPresent)
|
||||
|
||||
serialsPresent = 0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user