satellite/gracefulexit: add flag for enabling/disabling graceful exit on the satellite (#3437)

This commit is contained in:
Maximillian von Briesen 2019-11-01 10:21:24 -04:00 committed by Egon Elbre
parent 0d2c03a124
commit 590312970d
6 changed files with 66 additions and 27 deletions

View File

@ -366,6 +366,8 @@ func (planet *Planet) newSatellites(count int) ([]*SatelliteSystem, error) {
},
Version: planet.NewVersionConfig(),
GracefulExit: gracefulexit.Config{
Enabled: true,
ChoreBatchSize: 10,
ChoreInterval: defaultInterval,

View File

@ -439,20 +439,22 @@ func NewAPI(log *zap.Logger, full *identity.FullIdentity, db DB, pointerDB metai
}
{ // setup graceful exit
log.Debug("Satellite API Process setting up graceful exit endpoint")
peer.GracefulExit.Endpoint = gracefulexit.NewEndpoint(
peer.Log.Named("gracefulexit:endpoint"),
signing.SignerFromFullIdentity(peer.Identity),
peer.DB.GracefulExit(),
peer.Overlay.DB,
peer.Overlay.Service,
peer.Metainfo.Service,
peer.Orders.Service,
peer.DB.PeerIdentities(),
config.GracefulExit)
if config.GracefulExit.Enabled {
log.Debug("Satellite API Process setting up graceful exit endpoint")
peer.GracefulExit.Endpoint = gracefulexit.NewEndpoint(
peer.Log.Named("gracefulexit:endpoint"),
signing.SignerFromFullIdentity(peer.Identity),
peer.DB.GracefulExit(),
peer.Overlay.DB,
peer.Overlay.Service,
peer.Metainfo.Service,
peer.Orders.Service,
peer.DB.PeerIdentities(),
config.GracefulExit)
pb.RegisterSatelliteGracefulExitServer(peer.Server.GRPC(), peer.GracefulExit.Endpoint)
pb.DRPCRegisterSatelliteGracefulExit(peer.Server.DRPC(), peer.GracefulExit.Endpoint.DRPC())
pb.RegisterSatelliteGracefulExitServer(peer.Server.GRPC(), peer.GracefulExit.Endpoint)
pb.DRPCRegisterSatelliteGracefulExit(peer.Server.DRPC(), peer.GracefulExit.Endpoint.DRPC())
}
}
return peer, nil

View File

@ -22,6 +22,8 @@ var (
// Config for the chore
type Config struct {
Enabled bool `help:"whether or not graceful exit is enabled on the satellite side." releaseDefault:"false" devDefault:"true"`
ChoreBatchSize int `help:"size of the buffer used to batch inserts into the transfer queue." default:"500"`
ChoreInterval time.Duration `help:"how often to run the transfer queue chore." releaseDefault:"30s" devDefault:"10s"`

View File

@ -205,9 +205,7 @@ func TestConcurrentConnections(t *testing.T) {
// connect to satellite so we initiate the exit ("main" call)
conn, err := exitingNode.Dialer.DialAddressID(ctx, satellite.Addr(), satellite.Identity.ID)
require.NoError(t, err)
defer func() {
err = errs.Combine(err, conn.Close())
}()
defer ctx.Check(conn.Close)
client := conn.SatelliteGracefulExitClient()
// this connection will immediately return since graceful exit has not been initiated yet
@ -799,6 +797,38 @@ func TestUpdatePointerFailure_DuplicatedNodeID(t *testing.T) {
})
}
func TestExitDisabled(t *testing.T) {
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
StorageNodeCount: 2,
UplinkCount: 1,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.GracefulExit.Enabled = false
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
satellite := planet.Satellites[0]
exitingNode := planet.StorageNodes[0]
require.Nil(t, satellite.GracefulExit.Chore)
require.Nil(t, satellite.GracefulExit.Endpoint)
conn, err := exitingNode.Dialer.DialAddressID(ctx, satellite.Addr(), satellite.Identity.ID)
require.NoError(t, err)
defer ctx.Check(conn.Close)
client := conn.SatelliteGracefulExitClient()
processClient, err := client.Process(ctx)
require.NoError(t, err)
// Process endpoint should return immediately if GE is disabled
response, err := processClient.Recv()
require.True(t, errs2.IsRPC(err, rpcstatus.Unimplemented))
require.Nil(t, response)
})
}
func testTransfers(t *testing.T, objects int, verifier func(ctx *testcontext.Context, nodeFullIDs map[storj.NodeID]*identity.FullIdentity, satellite *testplanet.SatelliteSystem, processClient exitProcessClient, exitingNode *storagenode.Peer, numPieces int)) {
successThreshold := 4
testplanet.Run(t, testplanet.Config{
@ -839,9 +869,7 @@ func testTransfers(t *testing.T, objects int, verifier func(ctx *testcontext.Con
// connect to satellite so we initiate the exit.
conn, err := exitingNode.Dialer.DialAddressID(ctx, satellite.Addr(), satellite.Identity.ID)
require.NoError(t, err)
defer func() {
err = errs.Combine(err, conn.Close())
}()
defer ctx.Check(conn.Close)
client := conn.SatelliteGracefulExitClient()
@ -876,9 +904,7 @@ func testTransfers(t *testing.T, objects int, verifier func(ctx *testcontext.Con
// connect to satellite again to start receiving transfers
c, err = client.Process(ctx)
require.NoError(t, err)
defer func() {
err = errs.Combine(err, c.CloseSend())
}()
defer ctx.Check(c.CloseSend)
verifier(ctx, nodeFullIDs, satellite, c, exitingNode, len(incompleteTransfers))
})

View File

@ -400,8 +400,10 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, pointerDB metainfo
}
{ // setup graceful exit
log.Debug("Setting up graceful")
peer.GracefulExit.Chore = gracefulexit.NewChore(peer.Log.Named("graceful exit chore"), peer.DB.GracefulExit(), peer.Overlay.DB, peer.Metainfo.Loop, config.GracefulExit)
if config.GracefulExit.Enabled {
log.Debug("Setting up graceful exit")
peer.GracefulExit.Chore = gracefulexit.NewChore(peer.Log.Named("graceful exit chore"), peer.DB.GracefulExit(), peer.Overlay.DB, peer.Metainfo.Loop, config.GracefulExit)
}
}
{ // setup metrics service
@ -451,9 +453,11 @@ func (peer *Peer) Run(ctx context.Context) (err error) {
group.Go(func() error {
return errs2.IgnoreCanceled(peer.GarbageCollection.Service.Run(ctx))
})
group.Go(func() error {
return errs2.IgnoreCanceled(peer.GracefulExit.Chore.Run(ctx))
})
if peer.GracefulExit.Chore != nil {
group.Go(func() error {
return errs2.IgnoreCanceled(peer.GracefulExit.Chore.Run(ctx))
})
}
group.Go(func() error {
return errs2.IgnoreCanceled(peer.Metrics.Chore.Run(ctx))
})

View File

@ -115,6 +115,9 @@ contact.external-address: ""
# how often to run the transfer queue chore.
# graceful-exit.chore-interval: 30s
# whether or not graceful exit is enabled on the satellite side.
# graceful-exit.enabled: false
# size of the buffer used to batch transfer queue reads and sends to the storage node.
# graceful-exit.endpoint-batch-size: 100