garbage collection endpoint on storage node (#2424)

This commit is contained in:
Fadila 2019-07-10 15:41:47 +02:00 committed by Natalie Villasana
parent 8f024b3db8
commit fa1f5c8d7f
4 changed files with 209 additions and 0 deletions

View File

@ -58,6 +58,8 @@ type DB interface {
Add(context.Context, *Info) error
// Get returns Info about a piece.
Get(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (*Info, error)
// GetPieceIDs gets pieceIDs using the satelliteID
GetPieceIDs(ctx context.Context, satelliteID storj.NodeID, createdBefore time.Time, limit, offset int) (pieceIDs []storj.PieceID, err error)
// Delete deletes Info about a piece.
Delete(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) error
// DeleteFailed marks piece deletion from disk failed

View File

@ -7,6 +7,7 @@ import (
"context"
"io"
"os"
"runtime"
"sync/atomic"
"time"
@ -20,6 +21,7 @@ import (
"storj.io/storj/internal/memory"
"storj.io/storj/internal/sync2"
"storj.io/storj/pkg/auth/signing"
"storj.io/storj/pkg/bloomfilter"
"storj.io/storj/pkg/identity"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
@ -541,6 +543,52 @@ func (endpoint *Endpoint) SaveOrder(ctx context.Context, limit *pb.OrderLimit, o
}
}
// Retain keeps only piece ids specified in the request
func (endpoint *Endpoint) Retain(ctx context.Context, retainReq *pb.RetainRequest) (*pb.RetainResponse, error) {
peer, err := identity.PeerIdentityFromContext(ctx)
if err != nil {
return nil, Error.Wrap(err)
}
filter, err := bloomfilter.NewFromBytes(retainReq.GetFilter())
if err != nil {
return nil, Error.Wrap(err)
}
const limit = 1000
offset := 0
hasMorePieces := true
for hasMorePieces {
// subtract one hour to leave room for clock difference between the satellite and storage node
createdBefore := retainReq.GetCreationDate().Add(-1 * time.Hour)
pieceIDs, err := endpoint.pieceinfo.GetPieceIDs(ctx, peer.ID, createdBefore, limit, offset)
if err != nil {
return nil, Error.Wrap(err)
}
for _, pieceID := range pieceIDs {
if !filter.Contains(pieceID) {
if err = endpoint.store.Delete(ctx, peer.ID, pieceID); err != nil {
endpoint.log.Error("failed to delete a piece", zap.Error(Error.Wrap(err)))
// continue because if we fail to delete from file system,
// we need to keep the pieceinfo so we can delete next time
continue
}
if err = endpoint.pieceinfo.Delete(ctx, peer.ID, pieceID); err != nil {
endpoint.log.Error("failed to delete piece info", zap.Error(Error.Wrap(err)))
}
}
}
hasMorePieces = (len(pieceIDs) == limit)
offset += len(pieceIDs)
// We call Gosched() here because the GC process is expected to be long and we want to keep it at low priority,
// so other goroutines can continue serving requests.
runtime.Gosched()
}
return &pb.RetainResponse{}, nil
}
// min finds the min of two values
func min(a, b int64) int64 {
if a < b {

View File

@ -4,6 +4,8 @@
package piecestore_test
import (
"crypto/tls"
"crypto/x509"
"io"
"strings"
"sync/atomic"
@ -17,18 +19,25 @@ import (
"go.uber.org/zap/zaptest"
"golang.org/x/sync/errgroup"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/peer"
"storj.io/storj/internal/errs2"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testidentity"
"storj.io/storj/internal/testplanet"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/auth/signing"
"storj.io/storj/pkg/bloomfilter"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pkcrypto"
"storj.io/storj/pkg/storj"
"storj.io/storj/storagenode"
"storj.io/storj/storagenode/bandwidth"
"storj.io/storj/storagenode/pieces"
ps "storj.io/storj/storagenode/piecestore"
"storj.io/storj/storagenode/storagenodedb/storagenodedbtest"
"storj.io/storj/uplink/piecestore"
)
@ -499,3 +508,127 @@ func GenerateOrderLimit(t *testing.T, satellite storj.NodeID, uplink storj.NodeI
Limit: limit,
}
}
func TestRetain(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
pieceInfos := db.PieceInfo()
store := pieces.NewStore(zaptest.NewLogger(t), db.Pieces())
const numPieces = 1000
const numPiecesToKeep = 990
// pieces from numPiecesToKeep + numOldPieces to numPieces will
// have a recent timestamp and thus should not be deleted
const numOldPieces = 5
filter := bloomfilter.NewOptimal(numPiecesToKeep, 0.1)
pieceIDs := generateTestIDs(numPieces)
satellite0 := testidentity.MustPregeneratedSignedIdentity(0, storj.LatestIDVersion())
satellite1 := testidentity.MustPregeneratedSignedIdentity(2, storj.LatestIDVersion())
uplink := testidentity.MustPregeneratedSignedIdentity(3, storj.LatestIDVersion())
endpoint, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, nil, nil, store, pieceInfos, nil, nil, nil, ps.Config{})
require.NoError(t, err)
recentTime := time.Now()
oldTime := recentTime.Add(-time.Duration(48) * time.Hour)
var pieceCreation time.Time
// add all pieces to the node pieces info DB - but only count piece ids in filter
for index, id := range pieceIDs {
if index < numPiecesToKeep {
filter.Add(id)
}
if index < numPiecesToKeep+numOldPieces {
pieceCreation = oldTime
} else {
pieceCreation = recentTime
}
piecehash0, err := signing.SignPieceHash(ctx,
signing.SignerFromFullIdentity(uplink),
&pb.PieceHash{
PieceId: id,
Hash: []byte{0, 2, 3, 4, 5},
})
require.NoError(t, err)
piecehash1, err := signing.SignPieceHash(ctx,
signing.SignerFromFullIdentity(uplink),
&pb.PieceHash{
PieceId: id,
Hash: []byte{0, 2, 3, 4, 5},
})
require.NoError(t, err)
pieceinfo0 := pieces.Info{
SatelliteID: satellite0.ID,
PieceSize: 4,
PieceID: id,
PieceCreation: pieceCreation,
UplinkPieceHash: piecehash0,
Uplink: uplink.PeerIdentity(),
}
pieceinfo1 := pieces.Info{
SatelliteID: satellite1.ID,
PieceSize: 4,
PieceID: id,
PieceCreation: pieceCreation,
UplinkPieceHash: piecehash1,
Uplink: uplink.PeerIdentity(),
}
err = pieceInfos.Add(ctx, &pieceinfo0)
require.NoError(t, err)
err = pieceInfos.Add(ctx, &pieceinfo1)
require.NoError(t, err)
}
ctxSatellite0 := peer.NewContext(ctx, &peer.Peer{
AuthInfo: credentials.TLSInfo{
State: tls.ConnectionState{
PeerCertificates: []*x509.Certificate{satellite0.PeerIdentity().Leaf, satellite0.PeerIdentity().CA},
},
},
})
retainReq := pb.RetainRequest{}
retainReq.Filter = filter.Bytes()
retainReq.CreationDate = recentTime
_, err = endpoint.Retain(ctxSatellite0, &retainReq)
require.NoError(t, err)
// check we have deleted nothing for satellite1
satellite1Pieces, err := pieceInfos.GetPieceIDs(ctx, satellite1.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, 0)
require.NoError(t, err)
require.Equal(t, numPieces, len(satellite1Pieces))
// check we did not delete recent pieces
satellite0Pieces, err := pieceInfos.GetPieceIDs(ctx, satellite0.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, 0)
require.NoError(t, err)
for _, id := range pieceIDs[:numPiecesToKeep] {
require.Contains(t, satellite0Pieces, id, "piece should not have been deleted (not in bloom filter)")
}
for _, id := range pieceIDs[numPiecesToKeep+numOldPieces:] {
require.Contains(t, satellite0Pieces, id, "piece should not have been deleted (recent piece)")
}
})
}
// generateTestIDs generates n piece ids
func generateTestIDs(n int) []storj.PieceID {
ids := make([]storj.PieceID, n)
for i := range ids {
ids[i] = testrand.PieceID()
}
return ids
}

View File

@ -57,6 +57,32 @@ func (db *pieceinfo) Add(ctx context.Context, info *pieces.Info) (err error) {
return ErrInfo.Wrap(err)
}
// GetPieceIDs gets pieceIDs using the satelliteID
func (db *pieceinfo) GetPieceIDs(ctx context.Context, satelliteID storj.NodeID, createdBefore time.Time, limit, offset int) (pieceIDs []storj.PieceID, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := db.db.QueryContext(ctx, db.Rebind(`
SELECT piece_id
FROM pieceinfo
WHERE satellite_id = ? AND piece_creation < ?
ORDER BY piece_id
LIMIT ? OFFSET ?
`), satelliteID, createdBefore, limit, offset)
if err != nil {
return nil, ErrInfo.Wrap(err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
for rows.Next() {
var pieceID storj.PieceID
err = rows.Scan(&pieceID)
if err != nil {
return pieceIDs, ErrInfo.Wrap(err)
}
pieceIDs = append(pieceIDs, pieceID)
}
return pieceIDs, nil
}
// Get gets piece information by satellite id and piece id.
func (db *pieceinfo) Get(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (_ *pieces.Info, err error) {
defer mon.Task()(&ctx)(&err)