diff --git a/storagenode/console/service.go b/storagenode/console/service.go index 2e3db7271..ecee54616 100644 --- a/storagenode/console/service.go +++ b/storagenode/console/service.go @@ -62,7 +62,7 @@ type Service struct { startedAt time.Time versionInfo version.Info - quicEnabled bool + quicStats *contact.QUICStats configuredPort string } @@ -71,7 +71,7 @@ func NewService(log *zap.Logger, bandwidth bandwidth.DB, pieceStore *pieces.Stor allocatedDiskSpace memory.Size, walletAddress string, versionInfo version.Info, trust *trust.Pool, reputationDB reputation.DB, storageUsageDB storageusage.DB, pricingDB pricing.DB, satelliteDB satellites.DB, pingStats *contact.PingStats, contact *contact.Service, estimation *estimatedpayouts.Service, usageCache *pieces.BlobsUsageCache, - walletFeatures operator.WalletFeatures, port string, quicEnabled bool) (*Service, error) { + walletFeatures operator.WalletFeatures, port string, quicStats *contact.QUICStats) (*Service, error) { if log == nil { return nil, errs.New("log can't be nil") } @@ -123,7 +123,7 @@ func NewService(log *zap.Logger, bandwidth bandwidth.DB, pieceStore *pieces.Stor startedAt: time.Now(), versionInfo: versionInfo, walletFeatures: walletFeatures, - quicEnabled: quicEnabled, + quicStats: quicStats, configuredPort: port, }, nil } @@ -156,8 +156,9 @@ type Dashboard struct { StartedAt time.Time `json:"startedAt"` - ConfiguredPort string `json:"configuredPort"` - QUICEnabled bool `json:"quicEnabled"` + ConfiguredPort string `json:"configuredPort"` + QUICStatus string `json:"quicStatus"` + LastQUICPingedAt time.Time `json:"lastQuicPingedAt"` } // GetDashboardData returns stale dashboard data. @@ -174,7 +175,8 @@ func (s *Service) GetDashboardData(ctx context.Context) (_ *Dashboard, err error data.LastPinged = s.pingStats.WhenLastPinged() data.AllowedVersion, data.UpToDate = s.version.IsAllowed(ctx) - data.QUICEnabled = s.quicEnabled + data.QUICStatus = s.quicStats.Status() + data.LastQUICPingedAt = s.quicStats.WhenLastPinged() data.ConfiguredPort = s.configuredPort stats, err := s.reputationDB.All(ctx) @@ -476,8 +478,3 @@ func (s *Service) VerifySatelliteID(ctx context.Context, satelliteID storj.NodeI return nil } - -// SetQUICEnabled sets QUIC status for the SNO dashboard. -func (s *Service) SetQUICEnabled(enabled bool) { - s.quicEnabled = enabled -} diff --git a/storagenode/contact/contact_test.go b/storagenode/contact/contact_test.go index 0d395f30d..ea62d18ec 100644 --- a/storagenode/contact/contact_test.go +++ b/storagenode/contact/contact_test.go @@ -19,6 +19,7 @@ import ( "storj.io/common/testcontext" "storj.io/storj/private/testplanet" "storj.io/storj/satellite" + "storj.io/storj/storagenode/contact" ) func TestStoragenodeContactEndpoint(t *testing.T) { @@ -168,7 +169,8 @@ func TestServiceRequestPingMeQUIC(t *testing.T) { node := planet.StorageNodes[0] node.Contact.Chore.Pause(ctx) - err := node.Contact.Service.RequestPingMeQUIC(ctx) + quicStats, err := node.Contact.Service.RequestPingMeQUIC(ctx) require.NoError(t, err) + require.Equal(t, contact.NetworkStatusOk, quicStats.Status()) }) } diff --git a/storagenode/contact/network.go b/storagenode/contact/network.go new file mode 100644 index 000000000..bc3504c39 --- /dev/null +++ b/storagenode/contact/network.go @@ -0,0 +1,75 @@ +// Copyright (C) 2021 Storj Labs, Inc. +// See LICENSE for copying information. + +package contact + +import ( + "sync" + "time" +) + +const ( + // NetworkStatusOk represents node successfully pinged. + NetworkStatusOk = "OK" + // NetworkStatusMisconfigured means satellite could not ping + // back node due to misconfiguration on the node host. + NetworkStatusMisconfigured = "Misconfigured" + // NetworkStatusDisabled means QUIC is disabled by config. + NetworkStatusDisabled = "Disabled" + // NetworkStatusRefreshing means QUIC check is in progress. + NetworkStatusRefreshing = "Refreshing" +) + +// QUICStats contains information regarding QUIC status of the node. +type QUICStats struct { + status string + enabled bool + + mu sync.Mutex + lastPinged time.Time +} + +// NewQUICStats returns a new QUICStats. +func NewQUICStats(enabled bool) *QUICStats { + stats := &QUICStats{ + enabled: enabled, + status: NetworkStatusRefreshing, + } + + if !enabled { + stats.status = NetworkStatusDisabled + } + return stats +} + +// SetStatus sets the QUIC status during PingMe request. +func (q *QUICStats) SetStatus(pingSuccess bool) { + q.mu.Lock() + defer q.mu.Unlock() + + q.lastPinged = time.Now() + if pingSuccess { + q.status = NetworkStatusOk + return + } + + q.status = NetworkStatusMisconfigured +} + +// Status returns the quic status gathered in a PingMe request. +func (q *QUICStats) Status() string { + q.mu.Lock() + defer q.mu.Unlock() + + if !q.enabled { + return NetworkStatusDisabled + } + return q.status +} + +// WhenLastPinged returns last time someone pinged this node via QUIC. +func (q *QUICStats) WhenLastPinged() (when time.Time) { + q.mu.Lock() + defer q.mu.Unlock() + return q.lastPinged +} diff --git a/storagenode/contact/service.go b/storagenode/contact/service.go index 9d0ec4961..7caa23f4a 100644 --- a/storagenode/contact/service.go +++ b/storagenode/contact/service.go @@ -58,19 +58,21 @@ type Service struct { mu sync.Mutex self NodeInfo - trust *trust.Pool + trust *trust.Pool + quicStats *QUICStats initialized sync2.Fence } // NewService creates a new contact service. -func NewService(log *zap.Logger, dialer rpc.Dialer, self NodeInfo, trust *trust.Pool) *Service { +func NewService(log *zap.Logger, dialer rpc.Dialer, self NodeInfo, trust *trust.Pool, quicStats *QUICStats) *Service { return &Service{ - log: log, - rand: rand.New(rand.NewSource(time.Now().UnixNano())), - dialer: dialer, - trust: trust, - self: self, + log: log, + rand: rand.New(rand.NewSource(time.Now().UnixNano())), + dialer: dialer, + trust: trust, + self: self, + quicStats: quicStats, } } @@ -132,11 +134,16 @@ func (service *Service) pingSatelliteOnce(ctx context.Context, id storj.NodeID) Capacity: &self.Capacity, Operator: &self.Operator, }) + service.quicStats.SetStatus(false) if err != nil { return errPingSatellite.Wrap(err) } - if resp != nil && !resp.PingNodeSuccess { - return errPingSatellite.New("%s", resp.PingErrorMessage) + if resp != nil { + service.quicStats.SetStatus(resp.PingNodeSuccessQuic) + + if !resp.PingNodeSuccess { + return errPingSatellite.New("%s", resp.PingErrorMessage) + } } if resp.PingErrorMessage != "" { service.log.Warn("Your node is still considered to be online but encountered an error.", zap.Stringer("Satellite ID", id), zap.String("Error", resp.GetPingErrorMessage())) @@ -145,12 +152,14 @@ func (service *Service) pingSatelliteOnce(ctx context.Context, id storj.NodeID) } // RequestPingMeQUIC sends pings request to satellite for a pingBack via QUIC. -func (service *Service) RequestPingMeQUIC(ctx context.Context) (err error) { +func (service *Service) RequestPingMeQUIC(ctx context.Context) (stats *QUICStats, err error) { defer mon.Task()(&ctx)(&err) + stats = NewQUICStats(true) + satellites := service.trust.GetSatellites(ctx) if len(satellites) < 1 { - return errPingSatellite.New("no trusted satellite available") + return nil, errPingSatellite.New("no trusted satellite available") } // Shuffle the satellites @@ -166,14 +175,18 @@ func (service *Service) RequestPingMeQUIC(ctx context.Context) (err error) { for _, satellite := range satellites { err = service.requestPingMeOnce(ctx, satellite) if err != nil { + stats.SetStatus(false) // log warning and try the next trusted satellite service.log.Warn("failed PingMe request to satellite", zap.Stringer("Satellite ID", satellite), zap.Error(err)) continue } - return nil + + stats.SetStatus(true) + + return stats, nil } - return errPingSatellite.New("failed to ping storage node using QUIC: %q", err) + return stats, errPingSatellite.New("failed to ping storage node using QUIC: %q", err) } func (service *Service) requestPingMeOnce(ctx context.Context, satellite storj.NodeID) (err error) { diff --git a/storagenode/peer.go b/storagenode/peer.go index fdc39e93c..db90652cc 100644 --- a/storagenode/peer.go +++ b/storagenode/peer.go @@ -237,6 +237,7 @@ type Peer struct { Chore *contact.Chore Endpoint *contact.Endpoint PingStats *contact.PingStats + QUICStats *contact.QUICStats } Estimation struct { @@ -433,7 +434,8 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, revocationDB exten Version: *pbVersion, } peer.Contact.PingStats = new(contact.PingStats) - peer.Contact.Service = contact.NewService(peer.Log.Named("contact:service"), peer.Dialer, self, peer.Storage2.Trust) + peer.Contact.QUICStats = contact.NewQUICStats(peer.Server.IsQUICEnabled()) + peer.Contact.Service = contact.NewService(peer.Log.Named("contact:service"), peer.Dialer, self, peer.Storage2.Trust, peer.Contact.QUICStats) peer.Contact.Chore = contact.NewChore(peer.Log.Named("contact:chore"), config.Contact.Interval, peer.Contact.Service) peer.Services.Add(lifecycle.Item{ @@ -679,7 +681,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, revocationDB exten peer.Storage2.BlobsCache, config.Operator.WalletFeatures, port, - false, + peer.Contact.QUICStats, ) if err != nil { return nil, errs.Combine(err, peer.Close()) @@ -707,7 +709,13 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, revocationDB exten peer.Payout.Service, peer.Console.Listener, ) - // NOTE: Console service is added to peer services during peer run to allow for QUIC checkins + + // add console service to peer services + peer.Services.Add(lifecycle.Item{ + Name: "console:endpoint", + Run: peer.Console.Endpoint.Run, + Close: peer.Console.Endpoint.Close, + }) } { // setup storage inspector @@ -859,32 +867,6 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, revocationDB exten return peer, nil } -// addConsoleService completes the SNO dashboard setup and adds the console service -// to the peer services. -func (peer *Peer) addConsoleService(ctx context.Context) { - // perform QUIC checks - quicEnabled := peer.Server.IsQUICEnabled() - if quicEnabled { - if err := peer.Contact.Service.RequestPingMeQUIC(ctx); err != nil { - peer.Log.Warn("failed QUIC check", zap.Error(err)) - quicEnabled = false - } else { - peer.Log.Debug("QUIC check success") - } - } else { - peer.Log.Warn("UDP Port not configured for QUIC") - } - - peer.Console.Service.SetQUICEnabled(quicEnabled) - - // add console service to peer services - peer.Services.Add(lifecycle.Item{ - Name: "console:endpoint", - Run: peer.Console.Endpoint.Run, - Close: peer.Console.Endpoint.Close, - }) -} - // Run runs storage node until it's either closed or it errors. func (peer *Peer) Run(ctx context.Context) (err error) { defer mon.Task()(&ctx)(&err) @@ -903,9 +885,6 @@ func (peer *Peer) Run(ctx context.Context) (err error) { group, ctx := errgroup.WithContext(ctx) peer.Servers.Run(ctx, group) - // complete SNO dashboard setup and add console service to peer services - peer.addConsoleService(ctx) - // run peer services peer.Services.Run(ctx, group) return group.Wait() diff --git a/web/storagenode/src/app/components/SNOContentTitle.vue b/web/storagenode/src/app/components/SNOContentTitle.vue index 715a0a91b..42a321185 100644 --- a/web/storagenode/src/app/components/SNOContentTitle.vue +++ b/web/storagenode/src/app/components/SNOContentTitle.vue @@ -19,18 +19,29 @@
-

QUIC

-

OK

+

{{ quicStatusRefreshing }}

-
+ +
+

QUIC

+

{{ quicStatusOk }}

+
+
+

QUIC

@@ -81,7 +92,7 @@