satellite/gracefulexit: add failure threshold check (#3329)

* add overall failure percentage check and inactive time frame check before sending a response to sno

* update comment

* delete node from transfer queue if it has been inactive for too long

* fix linting error

* add test config value

* fix nil pointer

* add config value into testplanet

* add unit test for overall failure threshold

* move timeframe threshold to chore

* update protolock

* add chore test

* add per peiece failure count logic

* change config name from EndpointMaxFailures to MaxFailuresPerPiece

* address comments

* fix linting error

* add error handling for no row returned from progress table

* fix test for graceful exit chore on storagenode

* fix typo InActive -> Inactive

* improve readability for failure threshold calculation

* update config lock

* change error handling for GetProgress in graceful exit endpoint on the satellite side

* return proper rpc error in endpoint

* add check in chore test for checking finish timestamp and queue
This commit is contained in:
Yingrong Zhao 2019-10-24 12:24:42 -04:00 committed by GitHub
parent bff5c19de6
commit fa1ac24e19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 279 additions and 143 deletions

View File

@ -363,8 +363,10 @@ func (planet *Planet) newSatellites(count int) ([]*SatelliteSystem, error) {
ChoreBatchSize: 10,
ChoreInterval: defaultInterval,
EndpointBatchSize: 100,
EndpointMaxFailures: 5,
EndpointBatchSize: 100,
MaxFailuresPerPiece: 5,
MaxInactiveTimeFrame: time.Second * 10,
OverallMaxFailuresPercentage: 10,
},
Metrics: metrics.Config{
ChoreInterval: defaultInterval,

View File

@ -58,15 +58,21 @@ func (TransferFailed_Error) EnumDescriptor() ([]byte, []int) {
type ExitFailed_Reason int32
const (
ExitFailed_VERIFICATION_FAILED ExitFailed_Reason = 0
ExitFailed_VERIFICATION_FAILED ExitFailed_Reason = 0
ExitFailed_INACTIVE_TIMEFRAME_EXCEEDED ExitFailed_Reason = 1
ExitFailed_OVERALL_FAILURE_PERCENTAGE_EXCEEDED ExitFailed_Reason = 2
)
var ExitFailed_Reason_name = map[int32]string{
0: "VERIFICATION_FAILED",
1: "INACTIVE_TIMEFRAME_EXCEEDED",
2: "OVERALL_FAILURE_PERCENTAGE_EXCEEDED",
}
var ExitFailed_Reason_value = map[string]int32{
"VERIFICATION_FAILED": 0,
"VERIFICATION_FAILED": 0,
"INACTIVE_TIMEFRAME_EXCEEDED": 1,
"OVERALL_FAILURE_PERCENTAGE_EXCEEDED": 2,
}
func (x ExitFailed_Reason) String() string {
@ -1032,75 +1038,78 @@ func init() {
func init() { proto.RegisterFile("gracefulexit.proto", fileDescriptor_8f0acbf2ce5fa631) }
var fileDescriptor_8f0acbf2ce5fa631 = []byte{
// 1076 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xef, 0x6e, 0x1b, 0x45,
0x10, 0xf7, 0xb9, 0x8d, 0x13, 0x8f, 0x1d, 0xc7, 0xd9, 0xfc, 0x33, 0x6e, 0x1b, 0xbb, 0x27, 0x10,
0xae, 0x04, 0x11, 0x32, 0x50, 0x90, 0x2a, 0x81, 0x9c, 0xda, 0xb1, 0x4d, 0xc3, 0x39, 0x5d, 0x27,
0x05, 0x21, 0xc1, 0x69, 0xeb, 0x9b, 0x38, 0x27, 0xec, 0xbb, 0x63, 0x6f, 0x5d, 0x35, 0x5f, 0x78,
0x0e, 0x3e, 0xf1, 0x01, 0x5e, 0x06, 0x1e, 0x01, 0x24, 0x2a, 0xf1, 0x26, 0x68, 0xef, 0xf6, 0x2e,
0x77, 0xb6, 0x13, 0x22, 0xf5, 0x93, 0x7d, 0x33, 0xbf, 0xf9, 0xed, 0xec, 0xcc, 0x6f, 0x67, 0x17,
0xc8, 0x98, 0xb3, 0x11, 0x9e, 0xcf, 0x26, 0xf8, 0xda, 0x16, 0x07, 0x1e, 0x77, 0x85, 0x4b, 0x8a,
0x49, 0x5b, 0x15, 0xc6, 0xee, 0xd8, 0x0d, 0x3d, 0xd5, 0xd2, 0x14, 0x05, 0xb3, 0x9d, 0xf3, 0xe8,
0xbb, 0xe8, 0x72, 0x0b, 0xb9, 0x1f, 0x7e, 0xe9, 0x75, 0xd8, 0xef, 0xa2, 0x30, 0x5c, 0xa7, 0xf3,
0xda, 0x16, 0xb6, 0x33, 0x1e, 0x32, 0x81, 0x93, 0x89, 0x2d, 0xd0, 0xa7, 0xf8, 0xd3, 0x0c, 0x7d,
0xa1, 0x1f, 0xc1, 0xbd, 0xbe, 0x63, 0x0b, 0x9b, 0x09, 0xec, 0xaa, 0x35, 0x24, 0x56, 0xb9, 0xc9,
0xfb, 0xb0, 0xea, 0xb8, 0x16, 0x9a, 0xb6, 0x55, 0xd1, 0xea, 0x5a, 0xa3, 0x78, 0x58, 0xfa, 0xe3,
0x4d, 0x2d, 0xf3, 0xf7, 0x9b, 0x5a, 0xce, 0x70, 0x2d, 0xec, 0xb7, 0x69, 0x4e, 0xba, 0xfb, 0x96,
0xfe, 0x33, 0x6c, 0x2d, 0x59, 0xe6, 0xd6, 0xf1, 0xa4, 0x06, 0x05, 0xcb, 0x9d, 0x32, 0xdb, 0x31,
0x1d, 0x36, 0xc5, 0x4a, 0xb6, 0xae, 0x35, 0xf2, 0x14, 0x42, 0x93, 0xc1, 0xa6, 0x48, 0x1e, 0x00,
0xf8, 0x1e, 0x1b, 0xa1, 0x39, 0xf3, 0xd1, 0xaa, 0xdc, 0xa9, 0x6b, 0x0d, 0x8d, 0xe6, 0x03, 0xcb,
0x99, 0x8f, 0x96, 0x6e, 0x41, 0xed, 0xda, 0x9d, 0xfa, 0x9e, 0xeb, 0xf8, 0x48, 0x5a, 0x00, 0x7e,
0x6c, 0xad, 0x68, 0xf5, 0x3b, 0x8d, 0x42, 0xf3, 0xe1, 0x41, 0xaa, 0xda, 0x4b, 0xe2, 0x69, 0x22,
0x48, 0xaf, 0xc0, 0x6e, 0x17, 0x85, 0x84, 0x9c, 0x70, 0x77, 0xcc, 0xd1, 0x8f, 0xeb, 0xf8, 0x1c,
0xf6, 0x16, 0x3c, 0x6a, 0xdd, 0xc7, 0xb0, 0xe6, 0x29, 0x9b, 0x5a, 0xb5, 0x9a, 0x5e, 0x35, 0x15,
0x15, 0x63, 0xf5, 0xdf, 0x35, 0x28, 0x26, 0x5d, 0xf3, 0x35, 0xd2, 0x16, 0x6a, 0x94, 0xa8, 0x76,
0xf6, 0xc6, 0x6a, 0x3f, 0x82, 0xb2, 0x87, 0x7c, 0x84, 0x8e, 0x30, 0x47, 0xee, 0xd4, 0x9b, 0xa0,
0xc0, 0xa0, 0xa4, 0x59, 0xba, 0xa1, 0xec, 0x4f, 0x95, 0x99, 0xec, 0x03, 0xf8, 0xb3, 0xd1, 0x08,
0x7d, 0xff, 0x7c, 0x36, 0xa9, 0xdc, 0xad, 0x6b, 0x8d, 0x35, 0x9a, 0xb0, 0xe8, 0xbf, 0x65, 0x61,
0xf3, 0x94, 0x33, 0xc7, 0x3f, 0x47, 0x3e, 0x94, 0x66, 0xb4, 0xd0, 0x22, 0x6d, 0xd8, 0x76, 0xb9,
0x3d, 0xb6, 0x1d, 0x36, 0x31, 0x03, 0x45, 0x9a, 0x13, 0x7b, 0x6a, 0x8b, 0x20, 0xe7, 0x42, 0x93,
0x1c, 0x28, 0x95, 0x0e, 0xe4, 0xcf, 0xb1, 0xf4, 0x50, 0x12, 0xe1, 0xaf, 0x6c, 0xa4, 0x05, 0x5b,
0x31, 0x8b, 0x67, 0xe3, 0x08, 0xcd, 0x0b, 0xe6, 0x5f, 0x04, 0x7b, 0x2b, 0x34, 0x37, 0x23, 0x92,
0x13, 0xe9, 0xe9, 0x31, 0xff, 0x82, 0x6e, 0x46, 0xe8, 0xd8, 0x44, 0xba, 0xb0, 0xcb, 0xd1, 0x9b,
0xb0, 0x11, 0x4e, 0xe5, 0x6e, 0x13, 0x2c, 0x77, 0xae, 0x63, 0xd9, 0x4e, 0x04, 0x5c, 0x11, 0x3d,
0x81, 0xcd, 0xb9, 0x5c, 0x6c, 0x2b, 0x28, 0x47, 0xf1, 0x70, 0x43, 0x55, 0x79, 0x35, 0x40, 0xf7,
0xdb, 0x74, 0x23, 0x95, 0x47, 0xdf, 0xd2, 0xff, 0xd5, 0xa0, 0x14, 0x15, 0xe9, 0x88, 0xd9, 0x13,
0xb4, 0x96, 0xf3, 0x69, 0xb7, 0xe3, 0x23, 0x9f, 0xc3, 0x0a, 0x72, 0xee, 0xf2, 0xa0, 0x14, 0xa5,
0xa6, 0x9e, 0xd6, 0x53, 0x7a, 0xa5, 0x83, 0x8e, 0x44, 0xd2, 0x30, 0x40, 0xff, 0x16, 0x56, 0x82,
0x6f, 0xb2, 0x0e, 0x79, 0x63, 0x70, 0x6a, 0x1e, 0x0d, 0xce, 0x8c, 0x76, 0x39, 0x43, 0xee, 0x43,
0x65, 0x78, 0x3a, 0xa0, 0xad, 0x6e, 0xc7, 0x34, 0x06, 0xed, 0x8e, 0x79, 0x66, 0xb4, 0x5e, 0xb4,
0xfa, 0xc7, 0xad, 0xc3, 0xe3, 0x4e, 0x59, 0x23, 0x3b, 0xb0, 0xd9, 0x6b, 0x0d, 0x7b, 0xe6, 0x8b,
0x0e, 0xed, 0x1f, 0xf5, 0x9f, 0xb6, 0x4e, 0xfb, 0x03, 0xa3, 0x9c, 0x25, 0x05, 0x58, 0x3d, 0x33,
0x9e, 0x19, 0x83, 0x6f, 0x8c, 0x32, 0xe8, 0xbf, 0x68, 0x40, 0x86, 0xc2, 0xe5, 0x6c, 0x8c, 0x52,
0x6d, 0x5f, 0xa3, 0xef, 0xb3, 0x31, 0x92, 0x2f, 0x21, 0xef, 0x47, 0xb2, 0x50, 0xed, 0xaf, 0x2d,
0x4f, 0x37, 0x56, 0x4f, 0x2f, 0x43, 0xaf, 0x62, 0xc8, 0x63, 0xc8, 0x9d, 0x07, 0x1b, 0x51, 0x7d,
0xbf, 0x7f, 0xd3, 0x66, 0x7b, 0x19, 0xaa, 0xd0, 0x87, 0x79, 0x58, 0x55, 0x39, 0xe8, 0x00, 0x6b,
0x86, 0x2b, 0x28, 0x32, 0xeb, 0x52, 0xff, 0x4b, 0x83, 0xf5, 0x28, 0x26, 0x28, 0xe7, 0xdb, 0x76,
0xa2, 0xe0, 0x71, 0xfb, 0x15, 0x13, 0x68, 0xfe, 0x88, 0x97, 0xea, 0xd8, 0xed, 0xa9, 0xb0, 0x8d,
0x00, 0x75, 0x12, 0xfa, 0x9f, 0xe1, 0x25, 0x05, 0x2f, 0xfe, 0x4f, 0x9e, 0xc3, 0x0e, 0xb3, 0x2c,
0x79, 0xb0, 0xd1, 0x4a, 0x9d, 0x91, 0x50, 0x98, 0x0f, 0x0e, 0xe2, 0xc9, 0xde, 0x8a, 0x60, 0x89,
0xe3, 0xb2, 0xc5, 0x16, 0x8d, 0xfa, 0x57, 0x50, 0x68, 0xa3, 0x3c, 0xb5, 0x6f, 0xbf, 0x31, 0xbd,
0x0b, 0xeb, 0x72, 0xf8, 0x44, 0x73, 0x40, 0xf6, 0x61, 0x4f, 0x16, 0x3c, 0x1e, 0x18, 0xa6, 0x6f,
0x8f, 0x1d, 0x26, 0x66, 0x3c, 0x9c, 0x44, 0x45, 0xba, 0x83, 0x09, 0xfc, 0x30, 0x72, 0xea, 0xbf,
0x6a, 0x00, 0x92, 0x49, 0xe9, 0xfe, 0x13, 0xd8, 0x0d, 0x68, 0x64, 0x97, 0x66, 0x7c, 0x91, 0x65,
0x1b, 0x15, 0x76, 0xc6, 0xaf, 0x48, 0xc8, 0x67, 0x90, 0xe3, 0xc8, 0x7c, 0xd7, 0x51, 0x8a, 0xaf,
0x2d, 0x4e, 0x50, 0xa5, 0x76, 0x1a, 0xc0, 0xa8, 0x82, 0xeb, 0x0f, 0x21, 0x17, 0x5a, 0xc8, 0x1e,
0x6c, 0x25, 0xe5, 0x6b, 0x1e, 0xb5, 0xfa, 0xc7, 0x9d, 0x76, 0x39, 0xa3, 0xff, 0x93, 0x85, 0x72,
0x3c, 0xee, 0x23, 0xd9, 0x7e, 0x0a, 0x79, 0xc7, 0x15, 0x26, 0x97, 0x9a, 0x51, 0xb2, 0xdd, 0x9d,
0xbf, 0x2b, 0x42, 0x45, 0xf5, 0x32, 0x74, 0xcd, 0x51, 0xff, 0x49, 0x1b, 0x4a, 0x42, 0x89, 0x2b,
0x2c, 0xb9, 0x12, 0xed, 0xbd, 0xe5, 0xa2, 0x0d, 0xc7, 0x4b, 0x86, 0xae, 0x8b, 0x94, 0x22, 0xbf,
0x80, 0xa2, 0x15, 0xf4, 0x51, 0x71, 0x84, 0x8a, 0x78, 0x27, 0xcd, 0x91, 0xe8, 0x74, 0x2f, 0x43,
0x0b, 0x56, 0xa2, 0xf1, 0x6d, 0x28, 0xa5, 0x5a, 0x15, 0x0e, 0xaa, 0x85, 0x2c, 0x52, 0xfd, 0x95,
0x59, 0x60, 0xaa, 0xe1, 0x4f, 0xa0, 0x10, 0x77, 0x0a, 0xad, 0xca, 0x4a, 0x40, 0x51, 0xb9, 0xae,
0xf0, 0xbd, 0x0c, 0x05, 0x8c, 0xbf, 0x12, 0xa7, 0xaf, 0xf9, 0x67, 0x16, 0xca, 0x72, 0x22, 0x24,
0xdf, 0x17, 0xe4, 0x55, 0x70, 0x5f, 0x2e, 0xbb, 0xaf, 0xc9, 0x07, 0xe9, 0x25, 0x6e, 0x7e, 0xc0,
0x54, 0x3f, 0xbc, 0x25, 0x5a, 0x5d, 0xc6, 0xdf, 0xc3, 0xf6, 0xb2, 0xf7, 0x0e, 0x79, 0x94, 0xa6,
0xb9, 0xe1, 0x4d, 0x54, 0xbd, 0xe1, 0xf6, 0x26, 0x3f, 0xc0, 0xc6, 0xdc, 0x33, 0x80, 0xbc, 0xbb,
0x90, 0xe0, 0x92, 0xf7, 0x43, 0xf5, 0xbd, 0xff, 0x41, 0x85, 0xe9, 0x37, 0x2f, 0x60, 0x27, 0xde,
0x54, 0x2a, 0xff, 0x01, 0xac, 0x9e, 0x70, 0x57, 0x5e, 0xca, 0xa4, 0x9e, 0xa6, 0x5a, 0x9c, 0xc9,
0xd5, 0xfd, 0x39, 0xc4, 0x9c, 0xf8, 0x1b, 0xda, 0x47, 0xda, 0xe1, 0xdd, 0xef, 0xb2, 0xde, 0xcb,
0x97, 0xb9, 0xe0, 0x1d, 0xf9, 0xf1, 0x7f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x9f, 0x3c, 0xc5, 0xf1,
0x95, 0x0a, 0x00, 0x00,
// 1128 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xef, 0x6e, 0xe3, 0x44,
0x10, 0x8f, 0x73, 0xd7, 0xb4, 0x99, 0xa4, 0x69, 0xba, 0xfd, 0x17, 0xd2, 0xbb, 0x26, 0x18, 0xd0,
0xe5, 0x24, 0xa8, 0x50, 0x80, 0x03, 0xe9, 0x24, 0x90, 0xdb, 0x38, 0x89, 0xb9, 0xd4, 0xe9, 0x6d,
0xd2, 0x72, 0x42, 0x02, 0xcb, 0x17, 0x4f, 0x53, 0xeb, 0x12, 0x3b, 0xac, 0x9d, 0xd3, 0xf5, 0x0b,
0xcf, 0xc1, 0x67, 0x78, 0x19, 0x78, 0x04, 0x90, 0x38, 0x89, 0x6f, 0x3c, 0x06, 0x5a, 0x7b, 0xe3,
0xda, 0x49, 0x5a, 0x2a, 0xdd, 0xa7, 0xc4, 0x33, 0xbf, 0x99, 0x9d, 0x9d, 0xf9, 0xcd, 0xec, 0x00,
0x19, 0x32, 0x73, 0x80, 0x17, 0xd3, 0x11, 0xbe, 0xb1, 0xfd, 0xc3, 0x09, 0x73, 0x7d, 0x97, 0xe4,
0xe3, 0xb2, 0x32, 0x0c, 0xdd, 0xa1, 0x1b, 0x6a, 0xca, 0x85, 0x31, 0xfa, 0xa6, 0xed, 0x5c, 0xcc,
0xbe, 0xf3, 0x2e, 0xb3, 0x90, 0x79, 0xe1, 0x97, 0x5c, 0x85, 0x83, 0x16, 0xfa, 0xba, 0xeb, 0xa8,
0x6f, 0x6c, 0xdf, 0x76, 0x86, 0x3d, 0xd3, 0xc7, 0xd1, 0xc8, 0xf6, 0xd1, 0xa3, 0xf8, 0xd3, 0x14,
0x3d, 0x5f, 0x6e, 0xc2, 0xbe, 0xe6, 0xd8, 0xbe, 0x6d, 0xfa, 0xd8, 0x12, 0x67, 0x70, 0xac, 0x50,
0x93, 0x47, 0xb0, 0xea, 0xb8, 0x16, 0x1a, 0xb6, 0x55, 0x92, 0xaa, 0x52, 0x2d, 0x7f, 0x54, 0xf8,
0xfd, 0x6d, 0x25, 0xf5, 0xd7, 0xdb, 0x4a, 0x46, 0x77, 0x2d, 0xd4, 0x1a, 0x34, 0xc3, 0xd5, 0x9a,
0x25, 0xff, 0x0c, 0x5b, 0x4b, 0x8e, 0xb9, 0xb3, 0x3d, 0xa9, 0x40, 0xce, 0x72, 0xc7, 0xa6, 0xed,
0x18, 0x8e, 0x39, 0xc6, 0x52, 0xba, 0x2a, 0xd5, 0xb2, 0x14, 0x42, 0x91, 0x6e, 0x8e, 0x91, 0x3c,
0x04, 0xf0, 0x26, 0xe6, 0x00, 0x8d, 0xa9, 0x87, 0x56, 0xe9, 0x5e, 0x55, 0xaa, 0x49, 0x34, 0x1b,
0x48, 0xce, 0x3c, 0xb4, 0x64, 0x0b, 0x2a, 0x37, 0xde, 0xd4, 0x9b, 0xb8, 0x8e, 0x87, 0x44, 0x01,
0xf0, 0x22, 0x69, 0x49, 0xaa, 0xde, 0xab, 0xe5, 0xea, 0xef, 0x1f, 0x26, 0xb2, 0xbd, 0xc4, 0x9e,
0xc6, 0x8c, 0xe4, 0x12, 0xec, 0xb6, 0xd0, 0xe7, 0x90, 0x53, 0xe6, 0x0e, 0x19, 0x7a, 0x51, 0x1e,
0x9f, 0xc3, 0xde, 0x82, 0x46, 0x9c, 0xfb, 0x04, 0xd6, 0x26, 0x42, 0x26, 0x4e, 0x2d, 0x27, 0x4f,
0x4d, 0x58, 0x45, 0x58, 0xf9, 0x37, 0x09, 0xf2, 0x71, 0xd5, 0x7c, 0x8e, 0xa4, 0x85, 0x1c, 0xc5,
0xb2, 0x9d, 0xbe, 0x35, 0xdb, 0x8f, 0xa1, 0x38, 0x41, 0x36, 0x40, 0xc7, 0x37, 0x06, 0xee, 0x78,
0x32, 0x42, 0x1f, 0x83, 0x94, 0xa6, 0xe9, 0x86, 0x90, 0x1f, 0x0b, 0x31, 0x39, 0x00, 0xf0, 0xa6,
0x83, 0x01, 0x7a, 0xde, 0xc5, 0x74, 0x54, 0xba, 0x5f, 0x95, 0x6a, 0x6b, 0x34, 0x26, 0x91, 0x7f,
0x4d, 0xc3, 0x66, 0x9f, 0x99, 0x8e, 0x77, 0x81, 0xac, 0xc7, 0xc5, 0x68, 0xa1, 0x45, 0x1a, 0xb0,
0xed, 0x32, 0x7b, 0x68, 0x3b, 0xe6, 0xc8, 0x08, 0x18, 0x69, 0x8c, 0xec, 0xb1, 0xed, 0x07, 0x31,
0xe7, 0xea, 0xe4, 0x50, 0xb0, 0xb4, 0xcb, 0x7f, 0x3a, 0x5c, 0x43, 0xc9, 0x0c, 0x7f, 0x2d, 0x23,
0x0a, 0x6c, 0x45, 0x5e, 0x26, 0x36, 0x0e, 0xd0, 0xb8, 0x34, 0xbd, 0xcb, 0xe0, 0x6e, 0xb9, 0xfa,
0xe6, 0xcc, 0xc9, 0x29, 0xd7, 0xb4, 0x4d, 0xef, 0x92, 0x6e, 0xce, 0xd0, 0x91, 0x88, 0xb4, 0x60,
0x97, 0xe1, 0x64, 0x64, 0x0e, 0x70, 0xcc, 0x6f, 0x1b, 0xf3, 0x72, 0xef, 0x26, 0x2f, 0xdb, 0x31,
0x83, 0x6b, 0x47, 0x4f, 0x61, 0x73, 0x2e, 0x16, 0xdb, 0x0a, 0xd2, 0x91, 0x3f, 0xda, 0x10, 0x59,
0x5e, 0x0d, 0xd0, 0x5a, 0x83, 0x6e, 0x24, 0xe2, 0xd0, 0x2c, 0xf9, 0x1f, 0x09, 0x0a, 0xb3, 0x24,
0x35, 0x4d, 0x7b, 0x84, 0xd6, 0x72, 0x7f, 0xd2, 0xdd, 0xfc, 0x91, 0xaf, 0x60, 0x05, 0x19, 0x73,
0x59, 0x90, 0x8a, 0x42, 0x5d, 0x4e, 0xf2, 0x29, 0x79, 0xd2, 0xa1, 0xca, 0x91, 0x34, 0x34, 0x90,
0x5f, 0xc0, 0x4a, 0xf0, 0x4d, 0xd6, 0x21, 0xab, 0x77, 0xfb, 0x46, 0xb3, 0x7b, 0xa6, 0x37, 0x8a,
0x29, 0xf2, 0x00, 0x4a, 0xbd, 0x7e, 0x97, 0x2a, 0x2d, 0xd5, 0xd0, 0xbb, 0x0d, 0xd5, 0x38, 0xd3,
0x95, 0x73, 0x45, 0xeb, 0x28, 0x47, 0x1d, 0xb5, 0x28, 0x91, 0x1d, 0xd8, 0x6c, 0x2b, 0xbd, 0xb6,
0x71, 0xae, 0x52, 0xad, 0xa9, 0x1d, 0x2b, 0x7d, 0xad, 0xab, 0x17, 0xd3, 0x24, 0x07, 0xab, 0x67,
0xfa, 0x33, 0xbd, 0xfb, 0x9d, 0x5e, 0x04, 0xf9, 0x17, 0x09, 0x48, 0xcf, 0x77, 0x99, 0x39, 0x44,
0xce, 0xb6, 0x13, 0xf4, 0x3c, 0x73, 0x88, 0xe4, 0x1b, 0xc8, 0x7a, 0x33, 0x5a, 0x88, 0xf2, 0x57,
0x96, 0x87, 0x1b, 0xb1, 0xa7, 0x9d, 0xa2, 0xd7, 0x36, 0xe4, 0x09, 0x64, 0x2e, 0x82, 0x8b, 0x88,
0xba, 0x3f, 0xb8, 0xed, 0xb2, 0xed, 0x14, 0x15, 0xe8, 0xa3, 0x2c, 0xac, 0x8a, 0x18, 0x64, 0x80,
0x35, 0xdd, 0xf5, 0x29, 0x9a, 0xd6, 0x95, 0xfc, 0xa7, 0x04, 0xeb, 0x33, 0x9b, 0x20, 0x9d, 0xef,
0x5a, 0x89, 0xdc, 0x84, 0xd9, 0xaf, 0x4d, 0x1f, 0x8d, 0x57, 0x78, 0x25, 0xda, 0x6e, 0x4f, 0x98,
0x6d, 0x04, 0xa8, 0xd3, 0x50, 0xff, 0x0c, 0xaf, 0x28, 0x4c, 0xa2, 0xff, 0xe4, 0x39, 0xec, 0x98,
0x96, 0xc5, 0x1b, 0x1b, 0xad, 0x44, 0x8f, 0x84, 0xc4, 0x7c, 0x78, 0x18, 0x4d, 0x76, 0x65, 0x06,
0x8b, 0xb5, 0xcb, 0x96, 0xb9, 0x28, 0x94, 0xbf, 0x85, 0x5c, 0x03, 0x79, 0xd7, 0xbe, 0xfb, 0xc5,
0xe4, 0x16, 0xac, 0xf3, 0xe1, 0x33, 0x9b, 0x03, 0xbc, 0x0e, 0x7b, 0x3c, 0xe1, 0xd1, 0xc0, 0x30,
0x3c, 0x7b, 0xe8, 0x98, 0xfe, 0x94, 0x85, 0x93, 0x28, 0x4f, 0x77, 0x30, 0x86, 0xef, 0xcd, 0x94,
0xf2, 0xbf, 0x12, 0x00, 0xf7, 0x24, 0x78, 0xff, 0x39, 0xec, 0x06, 0x6e, 0x78, 0x95, 0xa6, 0x6c,
0xd1, 0xcb, 0x36, 0x0a, 0xec, 0x94, 0x5d, 0x3b, 0x21, 0x5f, 0x42, 0x86, 0xa1, 0xe9, 0xb9, 0x8e,
0x60, 0x7c, 0x65, 0x71, 0x82, 0x0a, 0xb6, 0xd3, 0x00, 0x46, 0x05, 0x5c, 0x7e, 0x05, 0x99, 0x50,
0x42, 0xf6, 0x60, 0x2b, 0x4e, 0x5f, 0xa3, 0xa9, 0x68, 0x1d, 0x95, 0x53, 0xbf, 0x02, 0xfb, 0x9a,
0xae, 0x1c, 0xf7, 0xb5, 0x73, 0xd5, 0xe8, 0x6b, 0x27, 0x6a, 0x93, 0x2a, 0x27, 0xaa, 0xa1, 0xbe,
0x38, 0x56, 0xd5, 0x86, 0xda, 0x28, 0x4a, 0xe4, 0x11, 0x7c, 0xd0, 0x3d, 0x57, 0xa9, 0xd2, 0xe9,
0x04, 0x46, 0x67, 0x54, 0x35, 0x4e, 0x55, 0x7a, 0xac, 0xea, 0x7d, 0xde, 0x2e, 0x11, 0x30, 0x2d,
0xff, 0x9d, 0x86, 0x62, 0xf4, 0x70, 0xcc, 0x1a, 0xe0, 0x0b, 0xc8, 0x3a, 0xae, 0x6f, 0x30, 0xce,
0x3e, 0xd1, 0x00, 0xbb, 0xf3, 0xaf, 0x4e, 0xc8, 0xcd, 0x76, 0x8a, 0xae, 0x39, 0xe2, 0x3f, 0x69,
0x40, 0xc1, 0x17, 0x34, 0x0d, 0x8b, 0x27, 0xe8, 0xbf, 0xbf, 0x9c, 0xfe, 0xe1, 0xa0, 0x4a, 0xd1,
0x75, 0x3f, 0xc1, 0xed, 0xaf, 0x21, 0x6f, 0x05, 0x8c, 0x10, 0x3e, 0x42, 0x6e, 0xbd, 0x97, 0xf4,
0x11, 0xe3, 0x4c, 0x3b, 0x45, 0x73, 0x56, 0x8c, 0x42, 0x0d, 0x28, 0x24, 0x8a, 0x1e, 0x8e, 0xbc,
0x85, 0x28, 0x12, 0x4c, 0xe1, 0x51, 0x60, 0x82, 0x3a, 0x4f, 0x21, 0x17, 0xd5, 0x1c, 0xad, 0xd2,
0x4a, 0xe0, 0xa2, 0x74, 0x53, 0x09, 0xdb, 0x29, 0x0a, 0x18, 0x7d, 0xc5, 0xfa, 0xb8, 0xfe, 0x47,
0x1a, 0x8a, 0x7c, 0xb6, 0xc4, 0x37, 0x15, 0xf2, 0x3a, 0x78, 0x79, 0x97, 0xbd, 0xfc, 0xe4, 0xe3,
0xe4, 0x11, 0xb7, 0xaf, 0x42, 0xe5, 0x4f, 0xee, 0x88, 0x16, 0xcf, 0xfa, 0x0f, 0xb0, 0xbd, 0x6c,
0x73, 0x22, 0x8f, 0x93, 0x6e, 0x6e, 0xd9, 0xae, 0xca, 0xb7, 0xec, 0x01, 0xe4, 0x47, 0xd8, 0x98,
0x5b, 0x28, 0xc8, 0x87, 0x0b, 0x01, 0x2e, 0xd9, 0x44, 0xca, 0x1f, 0xfd, 0x0f, 0x2a, 0x0c, 0xbf,
0x7e, 0x09, 0x3b, 0xd1, 0xa5, 0x12, 0xf1, 0x77, 0x61, 0xf5, 0x94, 0xb9, 0xfc, 0x79, 0x27, 0xd5,
0xa4, 0xab, 0xc5, 0xe9, 0x5e, 0x3e, 0x98, 0x43, 0xcc, 0x91, 0xbf, 0x26, 0x7d, 0x2a, 0x1d, 0xdd,
0xff, 0x3e, 0x3d, 0x79, 0xf9, 0x32, 0x13, 0x6c, 0xa4, 0x9f, 0xfd, 0x17, 0x00, 0x00, 0xff, 0xff,
0x42, 0x7c, 0x67, 0xa5, 0xdf, 0x0a, 0x00, 0x00,
}
type DRPCNodeGracefulExitClient interface {

View File

@ -103,6 +103,8 @@ message ExitCompleted {
message ExitFailed {
enum Reason {
VERIFICATION_FAILED = 0;
INACTIVE_TIMEFRAME_EXCEEDED = 1;
OVERALL_FAILURE_PERCENTAGE_EXCEEDED = 2;
}
// on failure
bytes exit_failure_signature = 1;

View File

@ -876,6 +876,14 @@
"enum_fields": [
{
"name": "VERIFICATION_FAILED"
},
{
"name": "INACTIVE_TIMEFRAME_EXCEEDED",
"integer": 1
},
{
"name": "OVERALL_FAILURE_PERCENTAGE_EXCEEDED",
"integer": 2
}
]
}

View File

@ -5,11 +5,14 @@ package gracefulexit
import (
"context"
"database/sql"
"time"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/storj/internal/sync2"
"storj.io/storj/pkg/storj"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/overlay"
)
@ -46,9 +49,9 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
chore.log.Info("running graceful exit chore.")
exitingNodes, err := chore.overlay.GetExitingNodesLoopIncomplete(ctx)
exitingNodes, err := chore.overlay.GetExitingNodes(ctx)
if err != nil {
chore.log.Error("error retrieving nodes that have not completed the metainfo loop.", zap.Error(err))
chore.log.Error("error retrieving nodes that have not finished exiting", zap.Error(err))
return nil
}
@ -58,7 +61,47 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
return nil
}
pathCollector := NewPathCollector(chore.db, exitingNodes, chore.log, chore.config.ChoreBatchSize)
exitingNodesLoopIncomplete := make(storj.NodeIDList, 0, nodeCount)
for _, node := range exitingNodes {
if node.ExitLoopCompletedAt == nil {
exitingNodesLoopIncomplete = append(exitingNodesLoopIncomplete, node.NodeID)
continue
}
progress, err := chore.db.GetProgress(ctx, node.NodeID)
if err != nil && !errs.Is(err, sql.ErrNoRows) {
chore.log.Error("error retrieving progress for node", zap.Stringer("Node ID", node.NodeID), zap.Error(err))
continue
}
lastActivityTime := *node.ExitLoopCompletedAt
if progress != nil {
lastActivityTime = progress.UpdatedAt
}
// check inactive timeframe
if lastActivityTime.Add(chore.config.MaxInactiveTimeFrame).Before(time.Now().UTC()) {
exitStatusRequest := &overlay.ExitStatusRequest{
NodeID: node.NodeID,
ExitSuccess: false,
ExitFinishedAt: time.Now().UTC(),
}
_, err = chore.overlay.UpdateExitStatus(ctx, exitStatusRequest)
if err != nil {
chore.log.Error("error updating exit status", zap.Error(err))
continue
}
// remove all items from the transfer queue
err := chore.db.DeleteTransferQueueItems(ctx, node.NodeID)
if err != nil {
chore.log.Error("error deleting node from transfer queue", zap.Error(err))
}
}
}
// Populate transfer queue for nodes that have not completed the exit loop yet
pathCollector := NewPathCollector(chore.db, exitingNodesLoopIncomplete, chore.log, chore.config.ChoreBatchSize)
err = chore.metainfoLoop.Join(ctx, pathCollector)
if err != nil {
chore.log.Error("error joining metainfo loop.", zap.Error(err))
@ -72,7 +115,7 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
}
now := time.Now().UTC()
for _, nodeID := range exitingNodes {
for _, nodeID := range exitingNodesLoopIncomplete {
exitStatus := overlay.ExitStatusRequest{
NodeID: nodeID,
ExitLoopCompletedAt: now,

View File

@ -9,11 +9,13 @@ import (
"time"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testplanet"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/storj"
"storj.io/storj/satellite"
"storj.io/storj/satellite/gracefulexit"
"storj.io/storj/satellite/overlay"
@ -22,10 +24,16 @@ import (
)
func TestChore(t *testing.T) {
var maximumInactiveTimeFrame = time.Second * 1
testplanet.Run(t, testplanet.Config{
SatelliteCount: 1,
StorageNodeCount: 8,
UplinkCount: 1,
Reconfigure: testplanet.Reconfigure{
Satellite: func(log *zap.Logger, index int, config *satellite.Config) {
config.GracefulExit.MaxInactiveTimeFrame = maximumInactiveTimeFrame
},
},
}, func(t *testing.T, ctx *testcontext.Context, planet *testplanet.Planet) {
uplinkPeer := planet.Uplinks[0]
satellite := planet.Satellites[0]
@ -46,16 +54,22 @@ func TestChore(t *testing.T) {
err = uplinkPeer.UploadWithConfig(ctx, satellite, rs, "testbucket", "test/path2", testrand.Bytes(5*memory.KiB))
require.NoError(t, err)
exitStatus := overlay.ExitStatusRequest{
exitStatusRequest := overlay.ExitStatusRequest{
NodeID: exitingNode.ID(),
ExitInitiatedAt: time.Now().UTC(),
}
_, err = satellite.Overlay.DB.UpdateExitStatus(ctx, &exitStatus)
_, err = satellite.Overlay.DB.UpdateExitStatus(ctx, &exitStatusRequest)
require.NoError(t, err)
nodeIDs, err := satellite.Overlay.DB.GetExitingNodesLoopIncomplete(ctx)
exitingNodes, err := satellite.Overlay.DB.GetExitingNodes(ctx)
require.NoError(t, err)
nodeIDs := make(storj.NodeIDList, 0, len(exitingNodes))
for _, exitingNode := range exitingNodes {
if exitingNode.ExitLoopCompletedAt == nil {
nodeIDs = append(nodeIDs, exitingNode.NodeID)
}
}
require.Len(t, nodeIDs, 1)
satellite.GracefulExit.Chore.Loop.TriggerWait()
@ -77,9 +91,37 @@ func TestChore(t *testing.T) {
require.Len(t, incompleteTransfers, 0)
}
nodeIDs, err = satellite.Overlay.DB.GetExitingNodesLoopIncomplete(ctx)
exitingNodes, err = satellite.Overlay.DB.GetExitingNodes(ctx)
require.NoError(t, err)
nodeIDs = make(storj.NodeIDList, 0, len(exitingNodes))
for _, exitingNode := range exitingNodes {
if exitingNode.ExitLoopCompletedAt == nil {
nodeIDs = append(nodeIDs, exitingNode.NodeID)
}
}
require.Len(t, nodeIDs, 0)
satellite.GracefulExit.Chore.Loop.Pause()
err = satellite.DB.GracefulExit().IncrementProgress(ctx, exitingNode.ID(), 0, 0, 0)
require.NoError(t, err)
incompleteTransfers, err = satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 20, 0)
require.NoError(t, err)
require.Len(t, incompleteTransfers, 2)
// node should fail graceful exit if it has been inactive for maximum inactive time frame since last activity
time.Sleep(maximumInactiveTimeFrame + time.Second*1)
satellite.GracefulExit.Chore.Loop.TriggerWait()
exitStatus, err := satellite.Overlay.DB.GetExitStatus(ctx, exitingNode.ID())
require.NoError(t, err)
require.False(t, exitStatus.ExitSuccess)
require.NotNil(t, exitStatus.ExitFinishedAt)
incompleteTransfers, err = satellite.DB.GracefulExit().GetIncomplete(ctx, exitingNode.ID(), 20, 0)
require.NoError(t, err)
require.Len(t, incompleteTransfers, 0)
})
}

View File

@ -25,6 +25,10 @@ type Config struct {
ChoreBatchSize int `help:"size of the buffer used to batch inserts into the transfer queue." default:"500"`
ChoreInterval time.Duration `help:"how often to run the transfer queue chore." releaseDefault:"30s" devDefault:"10s"`
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"100"`
EndpointMaxFailures int `help:"maximum number of transfer failures per piece." default:"3"`
EndpointBatchSize int `help:"size of the buffer used to batch transfer queue reads and sends to the storage node." default:"100"`
MaxFailuresPerPiece int `help:"maximum number of transfer failures per piece." default:"3"`
// TODO: what's the default number?
OverallMaxFailuresPercentage int `help:"maximum percentage of transfer failures per node." default:"10"`
MaxInactiveTimeFrame time.Duration `help:"maximum inactive time frame of transfer activities per node." default:"500h"`
}

View File

@ -202,7 +202,7 @@ func (endpoint *Endpoint) doProcess(stream processStream) (err error) {
}
if len(incomplete) == 0 {
incomplete, err = endpoint.db.GetIncompleteFailed(ctx, nodeID, endpoint.config.EndpointMaxFailures, endpoint.config.EndpointBatchSize, 0)
incomplete, err = endpoint.db.GetIncompleteFailed(ctx, nodeID, endpoint.config.MaxFailuresPerPiece, endpoint.config.EndpointBatchSize, 0)
if err != nil {
return handleError(err)
}
@ -233,19 +233,58 @@ func (endpoint *Endpoint) doProcess(stream processStream) (err error) {
}
pendingCount := pending.length()
// if there are no more transfers and the pending queue is empty, send complete
if atomic.LoadInt32(&morePiecesFlag) == 0 && pendingCount == 0 {
// TODO check whether failure threshold is met before sending completed
// TODO needs exit signature
transferMsg := &pb.SatelliteMessage{
Message: &pb.SatelliteMessage_ExitCompleted{
ExitCompleted: &pb.ExitCompleted{},
},
exitStatusRequest := &overlay.ExitStatusRequest{
NodeID: nodeID,
ExitFinishedAt: time.Now().UTC(),
}
progress, err := endpoint.db.GetProgress(ctx, nodeID)
if err != nil {
return rpcstatus.Error(rpcstatus.Internal, err.Error())
}
var transferMsg *pb.SatelliteMessage
processed := progress.PiecesFailed + progress.PiecesTransferred
// check node's exiting progress to see if it has failed passed max failure threshold
if processed > 0 && float64(progress.PiecesFailed)/float64(processed)*100 >= float64(endpoint.config.OverallMaxFailuresPercentage) {
exitStatusRequest.ExitSuccess = false
// TODO needs signature
transferMsg = &pb.SatelliteMessage{
Message: &pb.SatelliteMessage_ExitFailed{
ExitFailed: &pb.ExitFailed{
Reason: pb.ExitFailed_OVERALL_FAILURE_PERCENTAGE_EXCEEDED,
},
},
}
} else {
exitStatusRequest.ExitSuccess = true
// TODO needs signature
transferMsg = &pb.SatelliteMessage{
Message: &pb.SatelliteMessage_ExitCompleted{
ExitCompleted: &pb.ExitCompleted{},
},
}
}
_, err = endpoint.overlaydb.UpdateExitStatus(ctx, exitStatusRequest)
if err != nil {
return rpcstatus.Error(rpcstatus.Internal, err.Error())
}
err = stream.Send(transferMsg)
if err != nil {
return Error.Wrap(err)
}
// remove remaining items from the queue after notifying nodes about their exit status
err = endpoint.db.DeleteTransferQueueItems(ctx, nodeID)
if err != nil {
return rpcstatus.Error(rpcstatus.Internal, err.Error())
}
break
}
// skip if there are none pending
@ -425,7 +464,7 @@ func (endpoint *Endpoint) handleSucceeded(ctx context.Context, pending *pendingM
}
var failed int64
if transferQueueItem.FailedCount != nil && *transferQueueItem.FailedCount > 0 {
if transferQueueItem.FailedCount != nil && *transferQueueItem.FailedCount >= endpoint.config.MaxFailuresPerPiece {
failed = -1
}
@ -476,8 +515,8 @@ func (endpoint *Endpoint) handleFailed(ctx context.Context, pending *pendingMap,
return Error.Wrap(err)
}
// only increment failed if it hasn't failed before
if failedCount == 1 {
// only increment overall failed count if piece failures has reached the threshold
if failedCount == endpoint.config.MaxFailuresPerPiece {
err = endpoint.db.IncrementProgress(ctx, nodeID, 0, 0, 1)
if err != nil {
return Error.Wrap(err)

View File

@ -123,6 +123,12 @@ func TestFailure(t *testing.T) {
case *pb.SatelliteMessage_ExitCompleted:
// TODO test completed signature stuff
break
case *pb.SatelliteMessage_ExitFailed:
status, err := satellite.DB.OverlayCache().GetExitStatus(ctx, exitingNode.ID())
require.NoError(t, err)
require.False(t, status.ExitSuccess)
require.Equal(t, m.ExitFailed.Reason, pb.ExitFailed_OVERALL_FAILURE_PERCENTAGE_EXCEEDED)
break
default:
t.FailNow()
}
@ -161,9 +167,9 @@ func testTransfers(t *testing.T, objects int, verifier func(ctx *testcontext.Con
require.NoError(t, err)
}
// check that there are no exiting nodes.
exitingNodeIDs, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
require.NoError(t, err)
require.Len(t, exitingNodeIDs, 0)
require.Len(t, exitingNodes, 0)
exitingNode, err := findNodeToExit(ctx, planet, objects)
require.NoError(t, err)
@ -187,11 +193,11 @@ func testTransfers(t *testing.T, objects int, verifier func(ctx *testcontext.Con
switch response.GetMessage().(type) {
case *pb.SatelliteMessage_NotReady:
// now check that the exiting node is initiated.
exitingNodeIDs, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
exitingNodes, err := satellite.DB.OverlayCache().GetExitingNodes(ctx)
require.NoError(t, err)
require.Len(t, exitingNodeIDs, 1)
require.Len(t, exitingNodes, 1)
require.Equal(t, exitingNode.ID(), exitingNodeIDs[0])
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
default:
t.FailNow()
}

View File

@ -70,14 +70,20 @@ func TestGetExitingNodes(t *testing.T) {
nodes, err := cache.GetExitingNodes(ctx)
require.NoError(t, err)
require.Len(t, nodes, exitingCount)
for _, id := range nodes {
require.True(t, exiting[id])
for _, node := range nodes {
require.True(t, exiting[node.NodeID])
}
nodes, err = cache.GetExitingNodesLoopIncomplete(ctx)
nodes, err = cache.GetExitingNodes(ctx)
require.NoError(t, err)
require.Len(t, nodes, exitingLoopIncompleteCount)
for _, id := range nodes {
exitingNodesLoopIncomplete := make(storj.NodeIDList, 0, len(nodes))
for _, node := range nodes {
if node.ExitLoopCompletedAt == nil {
exitingNodesLoopIncomplete = append(exitingNodesLoopIncomplete, node.NodeID)
}
}
require.Len(t, exitingNodesLoopIncomplete, exitingLoopIncompleteCount)
for _, id := range exitingNodesLoopIncomplete {
require.True(t, exitingLoopIncomplete[id])
}
})

View File

@ -74,9 +74,7 @@ type DB interface {
// UpdateExitStatus is used to update a node's graceful exit status.
UpdateExitStatus(ctx context.Context, request *ExitStatusRequest) (stats *NodeStats, err error)
// GetExitingNodes returns nodes who have initiated a graceful exit, but have not completed it.
GetExitingNodes(ctx context.Context) (exitingNodes storj.NodeIDList, err error)
// GetExitingNodesLoopIncomplete returns exiting nodes who haven't completed the metainfo loop iteration.
GetExitingNodesLoopIncomplete(ctx context.Context) (exitingNodes storj.NodeIDList, err error)
GetExitingNodes(ctx context.Context) (exitingNodes []*ExitStatus, err error)
// GetGracefulExitCompletedByTimeFrame returns nodes who have completed graceful exit within a time window (time window is around graceful exit completion).
GetGracefulExitCompletedByTimeFrame(ctx context.Context, begin, end time.Time) (exitedNodes storj.NodeIDList, err error)
// GetGracefulExitIncompleteByTimeFrame returns nodes who have initiated, but not completed graceful exit within a time window (time window is around graceful exit initiation).

View File

@ -224,7 +224,7 @@ func (db *gracefulexitDB) GetIncompleteFailed(ctx context.Context, nodeID storj.
WHERE node_id = ?
AND finished_at is NULL
AND last_failed_at is not NULL
AND failed_count <= ?
AND failed_count < ?
ORDER BY durability_ratio asc, queued_at asc LIMIT ? OFFSET ?`
rows, err := db.db.Query(db.db.Rebind(sql), nodeID.Bytes(), maxFailures, limit, offset)
if err != nil {

View File

@ -829,11 +829,11 @@ func (cache *overlaycache) UpdatePieceCounts(ctx context.Context, pieceCounts ma
}
// GetExitingNodes returns nodes who have initiated a graceful exit, but have not completed it.
func (cache *overlaycache) GetExitingNodes(ctx context.Context) (exitingNodes storj.NodeIDList, err error) {
func (cache *overlaycache) GetExitingNodes(ctx context.Context) (exitingNodes []*overlay.ExitStatus, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := cache.db.Query(cache.db.Rebind(`
SELECT id FROM nodes
SELECT id, exit_initiated_at, exit_loop_completed_at, exit_finished_at, exit_success FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_finished_at IS NULL
`),
@ -846,41 +846,12 @@ func (cache *overlaycache) GetExitingNodes(ctx context.Context) (exitingNodes st
}()
for rows.Next() {
var id storj.NodeID
err = rows.Scan(&id)
var exitingNodeStatus overlay.ExitStatus
err = rows.Scan(&exitingNodeStatus.NodeID, &exitingNodeStatus.ExitInitiatedAt, &exitingNodeStatus.ExitLoopCompletedAt, &exitingNodeStatus.ExitFinishedAt, &exitingNodeStatus.ExitSuccess)
if err != nil {
return nil, err
}
exitingNodes = append(exitingNodes, id)
}
return exitingNodes, nil
}
// GetExitingNodesLoopIncomplete returns exiting nodes who haven't completed the metainfo loop iteration.
func (cache *overlaycache) GetExitingNodesLoopIncomplete(ctx context.Context) (exitingNodes storj.NodeIDList, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := cache.db.Query(cache.db.Rebind(`
SELECT id FROM nodes
WHERE exit_initiated_at IS NOT NULL
AND exit_loop_completed_at IS NULL
AND exit_finished_at IS NULL
`),
)
if err != nil {
return nil, err
}
defer func() {
err = errs.Combine(err, rows.Close())
}()
for rows.Next() {
var id storj.NodeID
err = rows.Scan(&id)
if err != nil {
return nil, err
}
exitingNodes = append(exitingNodes, id)
exitingNodes = append(exitingNodes, &exitingNodeStatus)
}
return exitingNodes, nil
}

View File

@ -119,7 +119,13 @@ contact.external-address: ""
# graceful-exit.endpoint-batch-size: 100
# maximum number of transfer failures per piece.
# graceful-exit.endpoint-max-failures: 3
# graceful-exit.max-failures-per-piece: 3
# maximum inactive time frame of transfer activities per node.
# graceful-exit.max-inactive-time-frame: 500h0m0s
# maximum percentage of transfer failures per node.
# graceful-exit.overall-max-failures-percentage: 10
# path to the certificate chain for this identity
identity.cert-path: /root/.local/share/storj/identity/satellite/identity.cert

View File

@ -106,7 +106,7 @@ func exitSatellite(ctx context.Context, t *testing.T, planet *testplanet.Planet,
exitingNodes, err := satellite1.DB.OverlayCache().GetExitingNodes(ctx)
require.NoError(t, err)
require.Len(t, exitingNodes, 1)
require.Equal(t, exitingNode.ID(), exitingNodes[0])
require.Equal(t, exitingNode.ID(), exitingNodes[0].NodeID)
queueItems, err := satellite1.DB.GracefulExit().GetIncomplete(ctx, exitStatus.NodeID, 10, 0)
require.NoError(t, err)