all: use keyed special comment
Change-Id: I57f6af053382c638026b64c5ff77b169bd3c6c8b
This commit is contained in:
parent
3ff8467878
commit
0bdb952269
@ -103,7 +103,7 @@ func cmdDetect(cmd *cobra.Command, args []string) (err error) {
|
||||
log.Info("number of remote segments", zap.Int("segments", observer.remoteSegments))
|
||||
log.Info("number of zombie segments", zap.Int("segments", observer.zombieSegments))
|
||||
|
||||
mon.IntVal("zombie_segments").Observe(int64(observer.zombieSegments)) //locked
|
||||
mon.IntVal("zombie_segments").Observe(int64(observer.zombieSegments)) //mon:locked
|
||||
|
||||
return process.Report(ctx)
|
||||
}
|
||||
|
@ -174,15 +174,15 @@ func (service *Service) Tally(ctx context.Context) (err error) {
|
||||
monAccounting.IntVal("bucket.remote_bytes").Observe(bucket.RemoteBytes)
|
||||
total.Combine(bucket)
|
||||
}
|
||||
monAccounting.IntVal("total.objects").Observe(total.ObjectCount) //locked
|
||||
monAccounting.IntVal("total.objects").Observe(total.ObjectCount) //mon:locked
|
||||
|
||||
monAccounting.IntVal("total.segments").Observe(total.Segments()) //locked
|
||||
monAccounting.IntVal("total.inline_segments").Observe(total.InlineSegments) //locked
|
||||
monAccounting.IntVal("total.remote_segments").Observe(total.RemoteSegments) //locked
|
||||
monAccounting.IntVal("total.segments").Observe(total.Segments()) //mon:locked
|
||||
monAccounting.IntVal("total.inline_segments").Observe(total.InlineSegments) //mon:locked
|
||||
monAccounting.IntVal("total.remote_segments").Observe(total.RemoteSegments) //mon:locked
|
||||
|
||||
monAccounting.IntVal("total.bytes").Observe(total.Bytes()) //locked
|
||||
monAccounting.IntVal("total.inline_bytes").Observe(total.InlineBytes) //locked
|
||||
monAccounting.IntVal("total.remote_bytes").Observe(total.RemoteBytes) //locked
|
||||
monAccounting.IntVal("total.bytes").Observe(total.Bytes()) //mon:locked
|
||||
monAccounting.IntVal("total.inline_bytes").Observe(total.InlineBytes) //mon:locked
|
||||
monAccounting.IntVal("total.remote_bytes").Observe(total.RemoteBytes) //mon:locked
|
||||
}
|
||||
|
||||
// return errors if something went wrong.
|
||||
|
@ -235,7 +235,7 @@ func (verifier *Verifier) Verify(ctx context.Context, path storj.Path, skip map[
|
||||
zap.Error(share.Error))
|
||||
}
|
||||
|
||||
mon.IntVal("verify_shares_downloaded_successfully").Observe(int64(len(sharesToAudit))) //locked
|
||||
mon.IntVal("verify_shares_downloaded_successfully").Observe(int64(len(sharesToAudit))) //mon:locked
|
||||
|
||||
required := int(pointer.Remote.Redundancy.GetMinReq())
|
||||
total := int(pointer.Remote.Redundancy.GetTotal())
|
||||
@ -287,27 +287,27 @@ func (verifier *Verifier) Verify(ctx context.Context, path storj.Path, skip map[
|
||||
unknownPercentage = float64(numUnknown) / float64(totalAudited)
|
||||
}
|
||||
|
||||
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //locked
|
||||
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //locked
|
||||
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //locked
|
||||
mon.Meter("audit_contained_nodes_global").Mark(numContained) //locked
|
||||
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //locked
|
||||
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //locked
|
||||
mon.Meter("audit_total_pointer_nodes_global").Mark(totalInPointer) //locked
|
||||
mon.Meter("audit_success_nodes_global").Mark(numSuccessful) //mon:locked
|
||||
mon.Meter("audit_fail_nodes_global").Mark(numFailed) //mon:locked
|
||||
mon.Meter("audit_offline_nodes_global").Mark(numOffline) //mon:locked
|
||||
mon.Meter("audit_contained_nodes_global").Mark(numContained) //mon:locked
|
||||
mon.Meter("audit_unknown_nodes_global").Mark(numUnknown) //mon:locked
|
||||
mon.Meter("audit_total_nodes_global").Mark(totalAudited) //mon:locked
|
||||
mon.Meter("audit_total_pointer_nodes_global").Mark(totalInPointer) //mon:locked
|
||||
|
||||
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //locked
|
||||
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //locked
|
||||
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //locked
|
||||
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //locked
|
||||
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //locked
|
||||
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //locked
|
||||
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalInPointer)) //locked
|
||||
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //locked
|
||||
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //locked
|
||||
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //locked
|
||||
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //locked
|
||||
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //locked
|
||||
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //locked
|
||||
mon.IntVal("audit_success_nodes").Observe(int64(numSuccessful)) //mon:locked
|
||||
mon.IntVal("audit_fail_nodes").Observe(int64(numFailed)) //mon:locked
|
||||
mon.IntVal("audit_offline_nodes").Observe(int64(numOffline)) //mon:locked
|
||||
mon.IntVal("audit_contained_nodes").Observe(int64(numContained)) //mon:locked
|
||||
mon.IntVal("audit_unknown_nodes").Observe(int64(numUnknown)) //mon:locked
|
||||
mon.IntVal("audit_total_nodes").Observe(int64(totalAudited)) //mon:locked
|
||||
mon.IntVal("audit_total_pointer_nodes").Observe(int64(totalInPointer)) //mon:locked
|
||||
mon.FloatVal("audited_percentage").Observe(auditedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_offline_percentage").Observe(offlinePercentage) //mon:locked
|
||||
mon.FloatVal("audit_successful_percentage").Observe(successfulPercentage) //mon:locked
|
||||
mon.FloatVal("audit_failed_percentage").Observe(failedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_contained_percentage").Observe(containedPercentage) //mon:locked
|
||||
mon.FloatVal("audit_unknown_percentage").Observe(unknownPercentage) //mon:locked
|
||||
|
||||
pendingAudits, err := createPendingAudits(ctx, containedNodes, correctedShares, pointer, randomIndex, path)
|
||||
if err != nil {
|
||||
@ -638,20 +638,20 @@ func (verifier *Verifier) Reverify(ctx context.Context, path storj.Path) (report
|
||||
}
|
||||
}
|
||||
|
||||
mon.Meter("reverify_successes_global").Mark(len(report.Successes)) //locked
|
||||
mon.Meter("reverify_offlines_global").Mark(len(report.Offlines)) //locked
|
||||
mon.Meter("reverify_fails_global").Mark(len(report.Fails)) //locked
|
||||
mon.Meter("reverify_contained_global").Mark(len(report.PendingAudits)) //locked
|
||||
mon.Meter("reverify_unknown_global").Mark(len(report.Unknown)) //locked
|
||||
mon.Meter("reverify_successes_global").Mark(len(report.Successes)) //mon:locked
|
||||
mon.Meter("reverify_offlines_global").Mark(len(report.Offlines)) //mon:locked
|
||||
mon.Meter("reverify_fails_global").Mark(len(report.Fails)) //mon:locked
|
||||
mon.Meter("reverify_contained_global").Mark(len(report.PendingAudits)) //mon:locked
|
||||
mon.Meter("reverify_unknown_global").Mark(len(report.Unknown)) //mon:locked
|
||||
|
||||
mon.IntVal("reverify_successes").Observe(int64(len(report.Successes))) //locked
|
||||
mon.IntVal("reverify_offlines").Observe(int64(len(report.Offlines))) //locked
|
||||
mon.IntVal("reverify_fails").Observe(int64(len(report.Fails))) //locked
|
||||
mon.IntVal("reverify_contained").Observe(int64(len(report.PendingAudits))) //locked
|
||||
mon.IntVal("reverify_unknown").Observe(int64(len(report.Unknown))) //locked
|
||||
mon.IntVal("reverify_successes").Observe(int64(len(report.Successes))) //mon:locked
|
||||
mon.IntVal("reverify_offlines").Observe(int64(len(report.Offlines))) //mon:locked
|
||||
mon.IntVal("reverify_fails").Observe(int64(len(report.Fails))) //mon:locked
|
||||
mon.IntVal("reverify_contained").Observe(int64(len(report.PendingAudits))) //mon:locked
|
||||
mon.IntVal("reverify_unknown").Observe(int64(len(report.Unknown))) //mon:locked
|
||||
|
||||
mon.IntVal("reverify_contained_in_segment").Observe(containedInSegment) //locked
|
||||
mon.IntVal("reverify_total_in_segment").Observe(int64(len(pieces))) //locked
|
||||
mon.IntVal("reverify_contained_in_segment").Observe(containedInSegment) //mon:locked
|
||||
mon.IntVal("reverify_total_in_segment").Observe(int64(len(pieces))) //mon:locked
|
||||
|
||||
return report, err
|
||||
}
|
||||
|
@ -285,7 +285,7 @@ func (endpoint *Endpoint) Process(stream pb.DRPCSatelliteGracefulExit_ProcessStr
|
||||
return rpcstatus.Error(rpcstatus.Internal, err.Error())
|
||||
}
|
||||
|
||||
mon.Meter("graceful_exit_fail_validation").Mark(1) //locked
|
||||
mon.Meter("graceful_exit_fail_validation").Mark(1) //mon:locked
|
||||
|
||||
exitStatusRequest := &overlay.ExitStatusRequest{
|
||||
NodeID: nodeID,
|
||||
@ -510,7 +510,7 @@ func (endpoint *Endpoint) handleSucceeded(ctx context.Context, stream pb.DRPCSat
|
||||
return Error.Wrap(err)
|
||||
}
|
||||
|
||||
mon.Meter("graceful_exit_transfer_piece_success").Mark(1) //locked
|
||||
mon.Meter("graceful_exit_transfer_piece_success").Mark(1) //mon:locked
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -522,7 +522,7 @@ func (endpoint *Endpoint) handleFailed(ctx context.Context, pending *PendingMap,
|
||||
zap.Stringer("nodeID", nodeID),
|
||||
zap.Stringer("transfer error", message.Failed.GetError()),
|
||||
)
|
||||
mon.Meter("graceful_exit_transfer_piece_fail").Mark(1) //locked
|
||||
mon.Meter("graceful_exit_transfer_piece_fail").Mark(1) //mon:locked
|
||||
|
||||
pieceID := message.Failed.OriginalPieceId
|
||||
transfer, ok := pending.Get(pieceID)
|
||||
@ -798,10 +798,10 @@ func (endpoint *Endpoint) checkExitStatus(ctx context.Context, nodeID storj.Node
|
||||
|
||||
// graceful exit initiation metrics
|
||||
age := time.Now().UTC().Sub(node.CreatedAt.UTC())
|
||||
mon.FloatVal("graceful_exit_init_node_age_seconds").Observe(age.Seconds()) //locked
|
||||
mon.IntVal("graceful_exit_init_node_audit_success_count").Observe(node.Reputation.AuditSuccessCount) //locked
|
||||
mon.IntVal("graceful_exit_init_node_audit_total_count").Observe(node.Reputation.AuditCount) //locked
|
||||
mon.IntVal("graceful_exit_init_node_piece_count").Observe(node.PieceCount) //locked
|
||||
mon.FloatVal("graceful_exit_init_node_age_seconds").Observe(age.Seconds()) //mon:locked
|
||||
mon.IntVal("graceful_exit_init_node_audit_success_count").Observe(node.Reputation.AuditSuccessCount) //mon:locked
|
||||
mon.IntVal("graceful_exit_init_node_audit_total_count").Observe(node.Reputation.AuditCount) //mon:locked
|
||||
mon.IntVal("graceful_exit_init_node_piece_count").Observe(node.PieceCount) //mon:locked
|
||||
|
||||
return &pb.SatelliteMessage{Message: &pb.SatelliteMessage_NotReady{NotReady: &pb.NotReady{}}}, nil
|
||||
}
|
||||
@ -820,13 +820,13 @@ func (endpoint *Endpoint) generateExitStatusRequest(ctx context.Context, nodeID
|
||||
return nil, exitFailedReason, rpcstatus.Error(rpcstatus.Internal, err.Error())
|
||||
}
|
||||
|
||||
mon.IntVal("graceful_exit_final_pieces_failed").Observe(progress.PiecesFailed) //locked
|
||||
mon.IntVal("graceful_exit_final_pieces_succeess").Observe(progress.PiecesTransferred) //locked
|
||||
mon.IntVal("graceful_exit_final_bytes_transferred").Observe(progress.BytesTransferred) //locked
|
||||
mon.IntVal("graceful_exit_final_pieces_failed").Observe(progress.PiecesFailed) //mon:locked
|
||||
mon.IntVal("graceful_exit_final_pieces_succeess").Observe(progress.PiecesTransferred) //mon:locked
|
||||
mon.IntVal("graceful_exit_final_bytes_transferred").Observe(progress.BytesTransferred) //mon:locked
|
||||
processed := progress.PiecesFailed + progress.PiecesTransferred
|
||||
|
||||
if processed > 0 {
|
||||
mon.IntVal("graceful_exit_successful_pieces_transfer_ratio").Observe(progress.PiecesTransferred / processed) //locked
|
||||
mon.IntVal("graceful_exit_successful_pieces_transfer_ratio").Observe(progress.PiecesTransferred / processed) //mon:locked
|
||||
}
|
||||
|
||||
exitStatusRequest := &overlay.ExitStatusRequest{
|
||||
@ -842,9 +842,9 @@ func (endpoint *Endpoint) generateExitStatusRequest(ctx context.Context, nodeID
|
||||
}
|
||||
|
||||
if exitStatusRequest.ExitSuccess {
|
||||
mon.Meter("graceful_exit_success").Mark(1) //locked
|
||||
mon.Meter("graceful_exit_success").Mark(1) //mon:locked
|
||||
} else {
|
||||
mon.Meter("graceful_exit_fail_max_failures_percentage").Mark(1) //locked
|
||||
mon.Meter("graceful_exit_fail_max_failures_percentage").Mark(1) //mon:locked
|
||||
}
|
||||
|
||||
return exitStatusRequest, exitFailedReason, nil
|
||||
|
@ -231,7 +231,7 @@ func (endpoint *Endpoint) checkRate(ctx context.Context, projectID uuid.UUID) (e
|
||||
zap.Stringer("projectID", projectID),
|
||||
zap.Float64("limit", float64(limiter.(*rate.Limiter).Limit())))
|
||||
|
||||
mon.Event("metainfo_rate_limit_exceeded") //locked
|
||||
mon.Event("metainfo_rate_limit_exceeded") //mon:locked
|
||||
|
||||
return rpcstatus.Error(rpcstatus.ResourceExhausted, "Too Many Requests")
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ func (service *Service) CreateGetOrderLimits(ctx context.Context, bucket metabas
|
||||
}
|
||||
}
|
||||
if len(signer.AddressedLimits) < redundancy.RequiredCount() {
|
||||
mon.Meter("download_failed_not_enough_pieces_uplink").Mark(1) //locked
|
||||
mon.Meter("download_failed_not_enough_pieces_uplink").Mark(1) //mon:locked
|
||||
return nil, storj.PiecePrivateKey{}, ErrDownloadFailedNotEnoughPieces.New("not enough orderlimits: got %d, required %d", len(signer.AddressedLimits), redundancy.RequiredCount())
|
||||
}
|
||||
|
||||
|
@ -138,24 +138,24 @@ func (checker *Checker) IdentifyInjuredSegments(ctx context.Context) (err error)
|
||||
return Error.Wrap(err)
|
||||
}
|
||||
|
||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //locked
|
||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //locked
|
||||
mon.IntVal("remote_segments_failed_to_check").Observe(observer.monStats.remoteSegmentsFailedToCheck) //locked
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //locked
|
||||
mon.IntVal("new_remote_segments_needing_repair").Observe(observer.monStats.newRemoteSegmentsNeedingRepair) //locked
|
||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //locked
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_1").Observe(observer.monStats.remoteSegmentsOverThreshold[0]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_2").Observe(observer.monStats.remoteSegmentsOverThreshold[1]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_3").Observe(observer.monStats.remoteSegmentsOverThreshold[2]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //locked
|
||||
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //locked
|
||||
mon.IntVal("healthy_segments_removed_from_queue").Observe(healthyDeleted) //locked
|
||||
mon.IntVal("remote_files_checked").Observe(observer.monStats.objectsChecked) //mon:locked
|
||||
mon.IntVal("remote_segments_checked").Observe(observer.monStats.remoteSegmentsChecked) //mon:locked
|
||||
mon.IntVal("remote_segments_failed_to_check").Observe(observer.monStats.remoteSegmentsFailedToCheck) //mon:locked
|
||||
mon.IntVal("remote_segments_needing_repair").Observe(observer.monStats.remoteSegmentsNeedingRepair) //mon:locked
|
||||
mon.IntVal("new_remote_segments_needing_repair").Observe(observer.monStats.newRemoteSegmentsNeedingRepair) //mon:locked
|
||||
mon.IntVal("remote_segments_lost").Observe(observer.monStats.remoteSegmentsLost) //mon:locked
|
||||
mon.IntVal("remote_files_lost").Observe(int64(len(observer.monStats.remoteSegmentInfo))) //mon:locked
|
||||
mon.IntVal("remote_segments_over_threshold_1").Observe(observer.monStats.remoteSegmentsOverThreshold[0]) //mon:locked
|
||||
mon.IntVal("remote_segments_over_threshold_2").Observe(observer.monStats.remoteSegmentsOverThreshold[1]) //mon:locked
|
||||
mon.IntVal("remote_segments_over_threshold_3").Observe(observer.monStats.remoteSegmentsOverThreshold[2]) //mon:locked
|
||||
mon.IntVal("remote_segments_over_threshold_4").Observe(observer.monStats.remoteSegmentsOverThreshold[3]) //mon:locked
|
||||
mon.IntVal("remote_segments_over_threshold_5").Observe(observer.monStats.remoteSegmentsOverThreshold[4]) //mon:locked
|
||||
mon.IntVal("healthy_segments_removed_from_queue").Observe(healthyDeleted) //mon:locked
|
||||
|
||||
allUnhealthy := observer.monStats.remoteSegmentsNeedingRepair + observer.monStats.remoteSegmentsFailedToCheck
|
||||
allChecked := observer.monStats.remoteSegmentsChecked
|
||||
allHealthy := allChecked - allUnhealthy
|
||||
mon.FloatVal("remote_segments_healthy_percentage").Observe(100 * float64(allHealthy) / float64(allChecked)) //locked
|
||||
mon.FloatVal("remote_segments_healthy_percentage").Observe(100 * float64(allHealthy) / float64(allChecked)) //mon:locked
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -281,11 +281,11 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, location metabase
|
||||
}
|
||||
|
||||
numHealthy := int32(len(pieces) - len(missingPieces))
|
||||
mon.IntVal("checker_segment_total_count").Observe(int64(len(pieces))) //locked
|
||||
mon.IntVal("checker_segment_healthy_count").Observe(int64(numHealthy)) //locked
|
||||
mon.IntVal("checker_segment_total_count").Observe(int64(len(pieces))) //mon:locked
|
||||
mon.IntVal("checker_segment_healthy_count").Observe(int64(numHealthy)) //mon:locked
|
||||
|
||||
segmentAge := time.Since(pointer.CreationDate)
|
||||
mon.IntVal("checker_segment_age").Observe(int64(segmentAge.Seconds())) //locked
|
||||
mon.IntVal("checker_segment_age").Observe(int64(segmentAge.Seconds())) //mon:locked
|
||||
|
||||
redundancy := pointer.Remote.Redundancy
|
||||
|
||||
@ -332,7 +332,7 @@ func (obs *checkerObserver) RemoteSegment(ctx context.Context, location metabase
|
||||
} else {
|
||||
segmentAge = time.Since(pointer.CreationDate)
|
||||
}
|
||||
mon.IntVal("checker_segment_time_until_irreparable").Observe(int64(segmentAge.Seconds())) //locked
|
||||
mon.IntVal("checker_segment_time_until_irreparable").Observe(int64(segmentAge.Seconds())) //mon:locked
|
||||
|
||||
obs.monStats.remoteSegmentsLost++
|
||||
// make an entry into the irreparable table
|
||||
|
@ -145,7 +145,7 @@ func (ec *ECRepairer) Get(ctx context.Context, limits []*pb.AddressedOrderLimit,
|
||||
limiter.Wait()
|
||||
|
||||
if successfulPieces < es.RequiredCount() {
|
||||
mon.Meter("download_failed_not_enough_pieces_repair").Mark(1) //locked
|
||||
mon.Meter("download_failed_not_enough_pieces_repair").Mark(1) //mon:locked
|
||||
return nil, failedPieces, &irreparableError{
|
||||
path: path,
|
||||
piecesAvailable: int32(successfulPieces),
|
||||
@ -398,10 +398,10 @@ func (ec *ECRepairer) Repair(ctx context.Context, limits []*pb.AddressedOrderLim
|
||||
zap.Int32("Success Count", atomic.LoadInt32(&successfulCount)),
|
||||
)
|
||||
|
||||
mon.IntVal("repair_segment_pieces_total").Observe(int64(pieceCount)) //locked
|
||||
mon.IntVal("repair_segment_pieces_successful").Observe(int64(successfulCount)) //locked
|
||||
mon.IntVal("repair_segment_pieces_failed").Observe(int64(failureCount)) //locked
|
||||
mon.IntVal("repair_segment_pieces_canceled").Observe(int64(cancellationCount)) //locked
|
||||
mon.IntVal("repair_segment_pieces_total").Observe(int64(pieceCount)) //mon:locked
|
||||
mon.IntVal("repair_segment_pieces_successful").Observe(int64(successfulCount)) //mon:locked
|
||||
mon.IntVal("repair_segment_pieces_failed").Observe(int64(failureCount)) //mon:locked
|
||||
mon.IntVal("repair_segment_pieces_canceled").Observe(int64(cancellationCount)) //mon:locked
|
||||
|
||||
return successfulNodes, successfulHashes, nil
|
||||
}
|
||||
|
@ -191,13 +191,13 @@ func (service *Service) worker(ctx context.Context, seg *pb.InjuredSegment) (err
|
||||
|
||||
repairedTime := time.Now().UTC()
|
||||
timeForRepair := repairedTime.Sub(workerStartTime)
|
||||
mon.FloatVal("time_for_repair").Observe(timeForRepair.Seconds()) //locked
|
||||
mon.FloatVal("time_for_repair").Observe(timeForRepair.Seconds()) //mon:locked
|
||||
|
||||
insertedTime := seg.GetInsertedTime()
|
||||
// do not send metrics if segment was added before the InsertedTime field was added
|
||||
if !insertedTime.IsZero() {
|
||||
timeSinceQueued := workerStartTime.Sub(insertedTime)
|
||||
mon.FloatVal("time_since_checker_queue").Observe(timeSinceQueued.Seconds()) //locked
|
||||
mon.FloatVal("time_since_checker_queue").Observe(timeSinceQueued.Seconds()) //mon:locked
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -104,8 +104,8 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
pointer, err := repairer.metainfo.Get(ctx, metabase.SegmentKey(path))
|
||||
if err != nil {
|
||||
if storj.ErrObjectNotFound.Has(err) {
|
||||
mon.Meter("repair_unnecessary").Mark(1) //locked
|
||||
mon.Meter("segment_deleted_before_repair").Mark(1) //locked
|
||||
mon.Meter("repair_unnecessary").Mark(1) //mon:locked
|
||||
mon.Meter("segment_deleted_before_repair").Mark(1) //mon:locked
|
||||
repairer.log.Debug("segment was deleted")
|
||||
return true, nil
|
||||
}
|
||||
@ -117,12 +117,12 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
}
|
||||
|
||||
if !pointer.ExpirationDate.IsZero() && pointer.ExpirationDate.Before(time.Now().UTC()) {
|
||||
mon.Meter("repair_expired").Mark(1) //locked
|
||||
mon.Meter("repair_expired").Mark(1) //mon:locked
|
||||
return true, nil
|
||||
}
|
||||
|
||||
mon.Meter("repair_attempts").Mark(1) //locked
|
||||
mon.IntVal("repair_segment_size").Observe(pointer.GetSegmentSize()) //locked
|
||||
mon.Meter("repair_attempts").Mark(1) //mon:locked
|
||||
mon.IntVal("repair_segment_size").Observe(pointer.GetSegmentSize()) //mon:locked
|
||||
|
||||
redundancy, err := eestream.NewRedundancyStrategyFromProto(pointer.GetRemote().GetRedundancy())
|
||||
if err != nil {
|
||||
@ -141,7 +141,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
numHealthy := len(pieces) - len(missingPieces)
|
||||
// irreparable piece
|
||||
if int32(numHealthy) < pointer.Remote.Redundancy.MinReq {
|
||||
mon.Meter("repair_nodes_unavailable").Mark(1) //locked
|
||||
mon.Meter("repair_nodes_unavailable").Mark(1) //mon:locked
|
||||
return true, &irreparableError{
|
||||
path: path,
|
||||
piecesAvailable: int32(numHealthy),
|
||||
@ -157,7 +157,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
|
||||
// repair not needed
|
||||
if int32(numHealthy) > repairThreshold {
|
||||
mon.Meter("repair_unnecessary").Mark(1) //locked
|
||||
mon.Meter("repair_unnecessary").Mark(1) //mon:locked
|
||||
repairer.log.Debug("segment above repair threshold", zap.Int("numHealthy", numHealthy), zap.Int32("repairThreshold", repairThreshold))
|
||||
return true, nil
|
||||
}
|
||||
@ -166,7 +166,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
if pointer.Remote.Redundancy.Total != 0 {
|
||||
healthyRatioBeforeRepair = float64(numHealthy) / float64(pointer.Remote.Redundancy.Total)
|
||||
}
|
||||
mon.FloatVal("healthy_ratio_before_repair").Observe(healthyRatioBeforeRepair) //locked
|
||||
mon.FloatVal("healthy_ratio_before_repair").Observe(healthyRatioBeforeRepair) //mon:locked
|
||||
|
||||
lostPiecesSet := sliceToSet(missingPieces)
|
||||
|
||||
@ -249,7 +249,7 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
// gave us irreparableError, then we failed to download enough pieces and must try
|
||||
// to wait for nodes to come back online.
|
||||
if irreparableErr, ok := err.(*irreparableError); ok {
|
||||
mon.Meter("repair_too_many_nodes_failed").Mark(1) //locked
|
||||
mon.Meter("repair_too_many_nodes_failed").Mark(1) //mon:locked
|
||||
irreparableErr.segmentInfo = pointer
|
||||
return true, irreparableErr
|
||||
}
|
||||
@ -288,18 +288,18 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
// put at least one piece, else ec.Repair() would have returned an error. So the
|
||||
// repair "succeeded" in that the segment is now healthier than it was, but it is
|
||||
// not as healthy as we want it to be.
|
||||
mon.Meter("repair_failed").Mark(1) //locked
|
||||
mon.Meter("repair_failed").Mark(1) //mon:locked
|
||||
case healthyAfterRepair < pointer.Remote.Redundancy.SuccessThreshold:
|
||||
mon.Meter("repair_partial").Mark(1) //locked
|
||||
mon.Meter("repair_partial").Mark(1) //mon:locked
|
||||
default:
|
||||
mon.Meter("repair_success").Mark(1) //locked
|
||||
mon.Meter("repair_success").Mark(1) //mon:locked
|
||||
}
|
||||
|
||||
healthyRatioAfterRepair := 0.0
|
||||
if pointer.Remote.Redundancy.Total != 0 {
|
||||
healthyRatioAfterRepair = float64(healthyAfterRepair) / float64(pointer.Remote.Redundancy.Total)
|
||||
}
|
||||
mon.FloatVal("healthy_ratio_after_repair").Observe(healthyRatioAfterRepair) //locked
|
||||
mon.FloatVal("healthy_ratio_after_repair").Observe(healthyRatioAfterRepair) //mon:locked
|
||||
|
||||
var toRemove []*pb.RemotePiece
|
||||
if healthyAfterRepair >= pointer.Remote.Redundancy.SuccessThreshold {
|
||||
@ -335,8 +335,8 @@ func (repairer *SegmentRepairer) Repair(ctx context.Context, path storj.Path) (s
|
||||
return false, metainfoPutError.Wrap(err)
|
||||
}
|
||||
|
||||
mon.IntVal("segment_time_until_repair").Observe(int64(segmentAge.Seconds())) //locked
|
||||
mon.IntVal("segment_repair_count").Observe(int64(pointer.RepairCount)) //locked
|
||||
mon.IntVal("segment_time_until_repair").Observe(int64(segmentAge.Seconds())) //mon:locked
|
||||
mon.IntVal("segment_repair_count").Observe(int64(pointer.RepairCount)) //mon:locked
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
@ -1248,11 +1248,11 @@ func (cache *overlaycache) populateUpdateNodeStats(dbNode *dbx.Node, updateReq *
|
||||
)
|
||||
|
||||
}
|
||||
mon.FloatVal("audit_reputation_alpha").Observe(auditAlpha) //locked
|
||||
mon.FloatVal("audit_reputation_beta").Observe(auditBeta) //locked
|
||||
mon.FloatVal("unknown_audit_reputation_alpha").Observe(unknownAuditAlpha) //locked
|
||||
mon.FloatVal("unknown_audit_reputation_beta").Observe(unknownAuditBeta) //locked
|
||||
mon.FloatVal("audit_online_score").Observe(auditOnlineScore) //locked
|
||||
mon.FloatVal("audit_reputation_alpha").Observe(auditAlpha) //mon:locked
|
||||
mon.FloatVal("audit_reputation_beta").Observe(auditBeta) //mon:locked
|
||||
mon.FloatVal("unknown_audit_reputation_alpha").Observe(unknownAuditAlpha) //mon:locked
|
||||
mon.FloatVal("unknown_audit_reputation_beta").Observe(unknownAuditBeta) //mon:locked
|
||||
mon.FloatVal("audit_online_score").Observe(auditOnlineScore) //mon:locked
|
||||
|
||||
totalUptimeCount := dbNode.TotalUptimeCount
|
||||
if updateReq.IsUp {
|
||||
|
@ -37,7 +37,7 @@ func (db *StoragenodeAccounting) SaveTallies(ctx context.Context, latestTally ti
|
||||
totals = append(totals, total)
|
||||
totalSum += total
|
||||
}
|
||||
mon.IntVal("nodetallies.totalsum").Observe(int64(totalSum)) //locked
|
||||
mon.IntVal("nodetallies.totalsum").Observe(int64(totalSum)) //mon:locked
|
||||
|
||||
err = db.db.WithTx(ctx, func(ctx context.Context, tx *dbx.Tx) error {
|
||||
_, err = tx.Tx.ExecContext(ctx, db.db.Rebind(`
|
||||
|
@ -26,7 +26,7 @@ var (
|
||||
Error = errs.Class("filestore error")
|
||||
|
||||
mon = monkit.Package()
|
||||
monFileInTrash = mon.Meter("open_file_in_trash") //locked
|
||||
monFileInTrash = mon.Meter("open_file_in_trash") //mon:locked
|
||||
|
||||
_ storage.Blobs = (*blobStore)(nil)
|
||||
)
|
||||
|
@ -90,7 +90,7 @@ func (service *Service) pingSatellite(ctx context.Context, satellite storj.NodeI
|
||||
attempts := 0
|
||||
for {
|
||||
|
||||
mon.Meter("satellite_contact_request").Mark(1) //locked
|
||||
mon.Meter("satellite_contact_request").Mark(1) //mon:locked
|
||||
|
||||
err := service.pingSatelliteOnce(ctx, satellite)
|
||||
attempts++
|
||||
|
@ -66,7 +66,7 @@ func (chore *Chore) Run(ctx context.Context) (err error) {
|
||||
chore.log.Debug("exiting", zap.Int("satellites", len(satellites)))
|
||||
|
||||
for _, satellite := range satellites {
|
||||
mon.Meter("satellite_gracefulexit_request").Mark(1) //locked
|
||||
mon.Meter("satellite_gracefulexit_request").Mark(1) //mon:locked
|
||||
if satellite.FinishedAt != nil {
|
||||
continue
|
||||
}
|
||||
|
@ -202,7 +202,7 @@ func (table *Table) Count() int {
|
||||
// deleteRandomSerial deletes a random item.
|
||||
// It expects the mutex to be locked before being called.
|
||||
func (table *Table) deleteRandomSerial() error {
|
||||
mon.Meter("delete_random_serial").Mark(1) //locked
|
||||
mon.Meter("delete_random_serial").Mark(1) //mon:locked
|
||||
for _, satMap := range table.serials {
|
||||
for expirationHour, serialList := range satMap {
|
||||
if len(serialList.partialSerials) > 0 {
|
||||
|
Loading…
Reference in New Issue
Block a user