storj/satellite/inspector/inspector.go
Jeff Wendling 7999d24f81 all: use monkit v3
this commit updates our monkit dependency to the v3 version where
it outputs in an influx style. this makes discovery much easier
as many tools are built to look at it this way.

graphite and rothko will suffer some due to no longer being a tree
based on dots. hopefully time will exist to update rothko to
index based on the new metric format.

it adds an influx output for the statreceiver so that we can
write to influxdb v1 or v2 directly.

Change-Id: Iae9f9494a6d29cfbd1f932a5e71a891b490415ff
2020-02-05 23:53:17 +00:00

188 lines
4.4 KiB
Go

// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package inspector
import (
"context"
"strconv"
"github.com/skyrings/skyring-common/tools/uuid"
"github.com/spacemonkeygo/monkit/v3"
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/pb"
"storj.io/common/storj"
"storj.io/storj/satellite/metainfo"
"storj.io/storj/satellite/overlay"
)
var (
mon = monkit.Package()
// Error wraps errors returned from Server struct methods
Error = errs.Class("Endpoint error")
)
const lastSegmentIndex = int64(-1)
// Endpoint for checking object and segment health
//
// architecture: Endpoint
type Endpoint struct {
log *zap.Logger
overlay *overlay.Service
metainfo *metainfo.Service
}
// NewEndpoint will initialize an Endpoint struct
func NewEndpoint(log *zap.Logger, cache *overlay.Service, metainfo *metainfo.Service) *Endpoint {
return &Endpoint{
log: log,
overlay: cache,
metainfo: metainfo,
}
}
// ObjectHealth will check the health of an object
func (endpoint *Endpoint) ObjectHealth(ctx context.Context, in *pb.ObjectHealthRequest) (resp *pb.ObjectHealthResponse, err error) {
defer mon.Task()(&ctx)(&err)
var segmentHealthResponses []*pb.SegmentHealth
var redundancy *pb.RedundancyScheme
limit := int64(100)
if in.GetLimit() > 0 {
limit = int64(in.GetLimit())
}
var start int64
if in.GetStartAfterSegment() > 0 {
start = in.GetStartAfterSegment() + 1
}
end := limit + start
if in.GetEndBeforeSegment() > 0 {
end = in.GetEndBeforeSegment()
}
bucket := in.GetBucket()
encryptedPath := in.GetEncryptedPath()
projectID := in.GetProjectId()
segmentIndex := start
for segmentIndex < end {
if segmentIndex-start >= limit {
break
}
segment := &pb.SegmentHealthRequest{
Bucket: bucket,
EncryptedPath: encryptedPath,
SegmentIndex: segmentIndex,
ProjectId: projectID,
}
segmentHealth, err := endpoint.SegmentHealth(ctx, segment)
if err != nil {
if segmentIndex == lastSegmentIndex {
return nil, Error.Wrap(err)
}
segmentIndex = lastSegmentIndex
continue
}
segmentHealthResponses = append(segmentHealthResponses, segmentHealth.GetHealth())
redundancy = segmentHealth.GetRedundancy()
if segmentIndex == lastSegmentIndex {
break
}
segmentIndex++
}
return &pb.ObjectHealthResponse{
Segments: segmentHealthResponses,
Redundancy: redundancy,
}, nil
}
// SegmentHealth will check the health of a segment
func (endpoint *Endpoint) SegmentHealth(ctx context.Context, in *pb.SegmentHealthRequest) (resp *pb.SegmentHealthResponse, err error) {
defer mon.Task()(&ctx)(&err)
health := &pb.SegmentHealth{}
projectID, err := uuid.Parse(string(in.GetProjectId()))
if err != nil {
return nil, Error.Wrap(err)
}
path, err := metainfo.CreatePath(ctx, *projectID, in.GetSegmentIndex(), in.GetBucket(), in.GetEncryptedPath())
if err != nil {
return nil, Error.Wrap(err)
}
pointer, err := endpoint.metainfo.Get(ctx, path)
if err != nil {
return nil, Error.Wrap(err)
}
if pointer.GetType() != pb.Pointer_REMOTE {
return nil, Error.New("cannot check health of inline segment")
}
var nodeIDs storj.NodeIDList
for _, piece := range pointer.GetRemote().GetRemotePieces() {
nodeIDs = append(nodeIDs, piece.NodeId)
}
unreliableOrOfflineNodes, err := endpoint.overlay.KnownUnreliableOrOffline(ctx, nodeIDs)
if err != nil {
return nil, Error.Wrap(err)
}
offlineNodes, err := endpoint.overlay.KnownOffline(ctx, nodeIDs)
if err != nil {
return nil, Error.Wrap(err)
}
offlineMap := make(map[storj.NodeID]bool)
for _, id := range offlineNodes {
offlineMap[id] = true
}
unreliableOfflineMap := make(map[storj.NodeID]bool)
for _, id := range unreliableOrOfflineNodes {
unreliableOfflineMap[id] = true
}
var healthyNodes storj.NodeIDList
var unhealthyNodes storj.NodeIDList
for _, id := range nodeIDs {
if offlineMap[id] {
continue
}
if unreliableOfflineMap[id] {
unhealthyNodes = append(unhealthyNodes, id)
} else {
healthyNodes = append(healthyNodes, id)
}
}
health.HealthyIds = healthyNodes
health.UnhealthyIds = unhealthyNodes
health.OfflineIds = offlineNodes
if in.GetSegmentIndex() > -1 {
health.Segment = []byte("s" + strconv.FormatInt(in.GetSegmentIndex(), 10))
} else {
health.Segment = []byte("l")
}
return &pb.SegmentHealthResponse{
Health: health,
Redundancy: pointer.GetRemote().GetRedundancy(),
}, nil
}