724bb44723
What: cmd/inspector/main.go: removes kad commands internal/testplanet/planet.go: Waits for contact chore to finish satellite/contact/nodesservice.go: creates an empty nodes service implementation satellite/contact/service.go: implements Local and FetchInfo methods & adds external address config value satellite/discovery/service.go: replaces kad.FetchInfo with contact.FetchInfo in Refresh() & removes Discover() satellite/peer.go: sets up contact service and endpoints storagenode/console/service.go: replaces nodeID with contact.Local() storagenode/contact/chore.go: replaces routing table with contact service storagenode/contact/nodesservice.go: creates empty implementation for ping and request info nodes service & implements RequestInfo method storagenode/contact/service.go: creates a service to return the local node and update its own capacity storagenode/monitor/monitor.go: uses contact service in place of routing table storagenode/operator.go: moves operatorconfig from kad into its own setup storagenode/peer.go: sets up contact service, chore, pingstats and endpoints satellite/overlay/config.go: changes NodeSelectionConfig.OnlineWindow default to 4hr to allow for accurate repair selection Removes kademlia setups in: cmd/storagenode/main.go cmd/storj-sim/network.go internal/testplane/planet.go internal/testplanet/satellite.go internal/testplanet/storagenode.go satellite/peer.go scripts/test-sim-backwards.sh scripts/testdata/satellite-config.yaml.lock storagenode/inspector/inspector.go storagenode/peer.go storagenode/storagenodedb/database.go Why: Replacing Kademlia Please describe the tests: • internal/testplanet/planet_test.go: TestBasic: assert that the storagenode can check in with the satellite without any errors TestContact: test that all nodes get inserted into both satellites' overlay cache during testplanet setup • satellite/contact/contact_test.go: TestFetchInfo: Tests that the FetchInfo method returns the correct info • storagenode/contact/contact_test.go: TestNodeInfoUpdated: tests that the contact chore updates the node information TestRequestInfoEndpoint: tests that the Request info endpoint returns the correct info Please describe the performance impact: Node discovery should be at least slightly more performant since each node connects directly to each satellite and no longer needs to wait for bootstrapping. It probably won't be faster in real time on start up since each node waits a random amount of time (less than 1 hr) to initialize its first connection (jitter).
435 lines
12 KiB
Go
435 lines
12 KiB
Go
// Copyright (C) 2019 Storj Labs, Inc.
|
|
// See LICENSE for copying information.
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
prompt "github.com/segmentio/go-prompt"
|
|
"github.com/spf13/cobra"
|
|
"github.com/zeebo/errs"
|
|
|
|
"storj.io/storj/pkg/identity"
|
|
"storj.io/storj/pkg/pb"
|
|
"storj.io/storj/pkg/process"
|
|
"storj.io/storj/pkg/storj"
|
|
"storj.io/storj/pkg/transport"
|
|
"storj.io/storj/uplink/eestream"
|
|
)
|
|
|
|
var (
|
|
// Addr is the address of peer from command flags
|
|
Addr = flag.String("address", "127.0.0.1:7778", "address of peer to inspect")
|
|
|
|
// IdentityPath is the path to the identity the inspector should use for network communication
|
|
IdentityPath = flag.String("identity-path", "", "path to the identity certificate for use on the network")
|
|
|
|
// CSVPath is the csv path where command output is written
|
|
CSVPath string
|
|
|
|
// ErrInspectorDial throws when there are errors dialing the inspector server
|
|
ErrInspectorDial = errs.Class("error dialing inspector server:")
|
|
|
|
// ErrRequest is for gRPC request errors after dialing
|
|
ErrRequest = errs.Class("error processing request:")
|
|
|
|
// ErrIdentity is for errors during identity creation for this CLI
|
|
ErrIdentity = errs.Class("error creating identity:")
|
|
|
|
// ErrArgs throws when there are errors with CLI args
|
|
ErrArgs = errs.Class("error with CLI args:")
|
|
|
|
irreparableLimit int32
|
|
|
|
// Commander CLI
|
|
rootCmd = &cobra.Command{
|
|
Use: "inspector",
|
|
Short: "CLI for interacting with Storj network",
|
|
}
|
|
statsCmd = &cobra.Command{
|
|
Use: "statdb",
|
|
Short: "commands for statdb",
|
|
}
|
|
healthCmd = &cobra.Command{
|
|
Use: "health",
|
|
Short: "commands for querying health of a stored data",
|
|
}
|
|
irreparableCmd = &cobra.Command{
|
|
Use: "irreparable",
|
|
Short: "list segments in irreparable database",
|
|
RunE: getSegments,
|
|
}
|
|
objectHealthCmd = &cobra.Command{
|
|
Use: "object <project-id> <bucket> <encrypted-path>",
|
|
Short: "Get stats about an object's health",
|
|
Args: cobra.MinimumNArgs(3),
|
|
RunE: ObjectHealth,
|
|
}
|
|
segmentHealthCmd = &cobra.Command{
|
|
Use: "segment <project-id> <segment-index> <bucket> <encrypted-path>",
|
|
Short: "Get stats about a segment's health",
|
|
Args: cobra.MinimumNArgs(4),
|
|
RunE: SegmentHealth,
|
|
}
|
|
)
|
|
|
|
// Inspector gives access to overlay.
|
|
type Inspector struct {
|
|
identity *identity.FullIdentity
|
|
overlayclient pb.OverlayInspectorClient
|
|
irrdbclient pb.IrreparableInspectorClient
|
|
healthclient pb.HealthInspectorClient
|
|
}
|
|
|
|
// NewInspector creates a new gRPC inspector client for access to overlay.
|
|
func NewInspector(address, path string) (*Inspector, error) {
|
|
ctx := context.Background()
|
|
|
|
id, err := identity.Config{
|
|
CertPath: fmt.Sprintf("%s/identity.cert", path),
|
|
KeyPath: fmt.Sprintf("%s/identity.key", path),
|
|
}.Load()
|
|
if err != nil {
|
|
return nil, ErrIdentity.Wrap(err)
|
|
}
|
|
|
|
conn, err := transport.DialAddressInsecure(ctx, address)
|
|
if err != nil {
|
|
return &Inspector{}, ErrInspectorDial.Wrap(err)
|
|
}
|
|
|
|
return &Inspector{
|
|
identity: id,
|
|
overlayclient: pb.NewOverlayInspectorClient(conn),
|
|
irrdbclient: pb.NewIrreparableInspectorClient(conn),
|
|
healthclient: pb.NewHealthInspectorClient(conn),
|
|
}, nil
|
|
}
|
|
|
|
// ObjectHealth gets information about the health of an object on the network
|
|
func ObjectHealth(cmd *cobra.Command, args []string) (err error) {
|
|
ctx := context.Background()
|
|
|
|
i, err := NewInspector(*Addr, *IdentityPath)
|
|
if err != nil {
|
|
return ErrArgs.Wrap(err)
|
|
}
|
|
|
|
startAfterSegment := int64(0) // start from first segment
|
|
endBeforeSegment := int64(0) // No end, so we stop when we've hit limit or arrived at the last segment
|
|
limit := int64(0) // No limit, so we stop when we've arrived at the last segment
|
|
|
|
switch len(args) {
|
|
case 6:
|
|
limit, err = strconv.ParseInt(args[5], 10, 64)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
fallthrough
|
|
case 5:
|
|
endBeforeSegment, err = strconv.ParseInt(args[4], 10, 64)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
fallthrough
|
|
case 4:
|
|
startAfterSegment, err = strconv.ParseInt(args[3], 10, 64)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
fallthrough
|
|
default:
|
|
}
|
|
|
|
req := &pb.ObjectHealthRequest{
|
|
ProjectId: []byte(args[0]),
|
|
Bucket: []byte(args[1]),
|
|
EncryptedPath: []byte(args[2]),
|
|
StartAfterSegment: startAfterSegment,
|
|
EndBeforeSegment: endBeforeSegment,
|
|
Limit: int32(limit),
|
|
}
|
|
|
|
resp, err := i.healthclient.ObjectHealth(ctx, req)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
f, err := csvOutput()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err := f.Close()
|
|
if err != nil {
|
|
fmt.Printf("error closing file: %+v\n", err)
|
|
}
|
|
}()
|
|
|
|
w := csv.NewWriter(f)
|
|
defer w.Flush()
|
|
|
|
redundancy, err := eestream.NewRedundancyStrategyFromProto(resp.GetRedundancy())
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
if err := printRedundancyTable(w, redundancy); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := printSegmentHealthAndNodeTables(w, redundancy, resp.GetSegments()); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// SegmentHealth gets information about the health of a segment on the network
|
|
func SegmentHealth(cmd *cobra.Command, args []string) (err error) {
|
|
ctx := context.Background()
|
|
|
|
i, err := NewInspector(*Addr, *IdentityPath)
|
|
if err != nil {
|
|
return ErrArgs.Wrap(err)
|
|
}
|
|
|
|
segmentIndex, err := strconv.ParseInt(args[1], 10, 64)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
req := &pb.SegmentHealthRequest{
|
|
ProjectId: []byte(args[0]),
|
|
SegmentIndex: segmentIndex,
|
|
Bucket: []byte(args[2]),
|
|
EncryptedPath: []byte(args[3]),
|
|
}
|
|
|
|
resp, err := i.healthclient.SegmentHealth(ctx, req)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
f, err := csvOutput()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
err := f.Close()
|
|
if err != nil {
|
|
fmt.Printf("error closing file: %+v\n", err)
|
|
}
|
|
}()
|
|
|
|
w := csv.NewWriter(f)
|
|
defer w.Flush()
|
|
|
|
redundancy, err := eestream.NewRedundancyStrategyFromProto(resp.GetRedundancy())
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
if err := printRedundancyTable(w, redundancy); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := printSegmentHealthAndNodeTables(w, redundancy, []*pb.SegmentHealth{resp.GetHealth()}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func csvOutput() (*os.File, error) {
|
|
if CSVPath == "stdout" {
|
|
return os.Stdout, nil
|
|
}
|
|
|
|
return os.Create(CSVPath)
|
|
}
|
|
|
|
func printSegmentHealthAndNodeTables(w *csv.Writer, redundancy eestream.RedundancyStrategy, segments []*pb.SegmentHealth) error {
|
|
segmentTableHeader := []string{
|
|
"Segment Index", "Healthy Nodes", "Unhealthy Nodes", "Offline Nodes",
|
|
}
|
|
|
|
if err := w.Write(segmentTableHeader); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
|
|
currentNodeIndex := 1 // start at index 1 to leave first column empty
|
|
nodeIndices := make(map[storj.NodeID]int) // to keep track of node positions for node table
|
|
// Add each segment to the segmentTable
|
|
for _, segment := range segments {
|
|
healthyNodes := segment.HealthyIds // healthy nodes with pieces currently online
|
|
unhealthyNodes := segment.UnhealthyIds // unhealthy nodes with pieces currently online
|
|
offlineNodes := segment.OfflineIds // offline nodes
|
|
segmentIndexPath := string(segment.GetSegment()) // path formatted Segment Index
|
|
|
|
row := []string{
|
|
segmentIndexPath,
|
|
strconv.FormatInt(int64(len(healthyNodes)), 10),
|
|
strconv.FormatInt(int64(len(unhealthyNodes)), 10),
|
|
strconv.FormatInt(int64(len(offlineNodes)), 10),
|
|
}
|
|
|
|
if err := w.Write(row); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
|
|
allNodes := append(healthyNodes, unhealthyNodes...)
|
|
allNodes = append(allNodes, offlineNodes...)
|
|
for _, id := range allNodes {
|
|
if nodeIndices[id] == 0 {
|
|
nodeIndices[id] = currentNodeIndex
|
|
currentNodeIndex++
|
|
}
|
|
}
|
|
}
|
|
|
|
if err := w.Write([]string{}); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
|
|
numNodes := len(nodeIndices)
|
|
nodeTableHeader := make([]string, numNodes+1)
|
|
for id, i := range nodeIndices {
|
|
nodeTableHeader[i] = id.String()
|
|
}
|
|
if err := w.Write(nodeTableHeader); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
|
|
// Add online/offline info to the node table
|
|
for _, segment := range segments {
|
|
row := make([]string, numNodes+1)
|
|
for _, id := range segment.HealthyIds {
|
|
i := nodeIndices[id]
|
|
row[i] = "healthy"
|
|
}
|
|
for _, id := range segment.UnhealthyIds {
|
|
i := nodeIndices[id]
|
|
row[i] = "unhealthy"
|
|
}
|
|
for _, id := range segment.OfflineIds {
|
|
i := nodeIndices[id]
|
|
row[i] = "offline"
|
|
}
|
|
row[0] = string(segment.GetSegment())
|
|
if err := w.Write(row); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func printRedundancyTable(w *csv.Writer, redundancy eestream.RedundancyStrategy) error {
|
|
total := redundancy.TotalCount() // total amount of pieces we generated (n)
|
|
required := redundancy.RequiredCount() // minimum required stripes for reconstruction (k)
|
|
optimalThreshold := redundancy.OptimalThreshold() // amount of pieces we need to store to call it a success (o)
|
|
repairThreshold := redundancy.RepairThreshold() // amount of pieces we need to drop to before triggering repair (m)
|
|
|
|
redundancyTable := [][]string{
|
|
{"Total Pieces (n)", "Minimum Required (k)", "Optimal Threshold (o)", "Repair Threshold (m)"},
|
|
{strconv.Itoa(total), strconv.Itoa(required), strconv.Itoa(optimalThreshold), strconv.Itoa(repairThreshold)},
|
|
{},
|
|
}
|
|
|
|
for _, row := range redundancyTable {
|
|
if err := w.Write(row); err != nil {
|
|
return fmt.Errorf("error writing record to csv: %s", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getSegments(cmd *cobra.Command, args []string) error {
|
|
if irreparableLimit <= int32(0) {
|
|
return ErrArgs.New("limit must be greater than 0")
|
|
}
|
|
|
|
i, err := NewInspector(*Addr, *IdentityPath)
|
|
if err != nil {
|
|
return ErrInspectorDial.Wrap(err)
|
|
}
|
|
var lastSeenSegmentPath = []byte{}
|
|
|
|
// query DB and paginate results
|
|
for {
|
|
req := &pb.ListIrreparableSegmentsRequest{
|
|
Limit: irreparableLimit,
|
|
LastSeenSegmentPath: lastSeenSegmentPath,
|
|
}
|
|
res, err := i.irrdbclient.ListIrreparableSegments(context.Background(), req)
|
|
if err != nil {
|
|
return ErrRequest.Wrap(err)
|
|
}
|
|
|
|
if len(res.Segments) == 0 {
|
|
break
|
|
}
|
|
lastSeenSegmentPath = res.Segments[len(res.Segments)-1].Path
|
|
|
|
objects := sortSegments(res.Segments)
|
|
// format and print segments
|
|
enc := json.NewEncoder(os.Stdout)
|
|
enc.SetIndent("", " ")
|
|
err = enc.Encode(objects)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
length := int32(len(res.Segments))
|
|
if length >= irreparableLimit {
|
|
if !prompt.Confirm("\nNext page? (y/n)") {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sortSegments by the object they belong to
|
|
func sortSegments(segments []*pb.IrreparableSegment) map[string][]*pb.IrreparableSegment {
|
|
objects := make(map[string][]*pb.IrreparableSegment)
|
|
for _, seg := range segments {
|
|
pathElements := storj.SplitPath(string(seg.Path))
|
|
|
|
// by removing the segment index, we can easily sort segments into a map of objects
|
|
pathElements = append(pathElements[:1], pathElements[2:]...)
|
|
objPath := strings.Join(pathElements, "/")
|
|
objects[objPath] = append(objects[objPath], seg)
|
|
}
|
|
return objects
|
|
}
|
|
|
|
func init() {
|
|
rootCmd.AddCommand(statsCmd)
|
|
rootCmd.AddCommand(irreparableCmd)
|
|
rootCmd.AddCommand(healthCmd)
|
|
|
|
healthCmd.AddCommand(objectHealthCmd)
|
|
healthCmd.AddCommand(segmentHealthCmd)
|
|
|
|
objectHealthCmd.Flags().StringVar(&CSVPath, "csv-path", "stdout", "csv path where command output is written")
|
|
|
|
irreparableCmd.Flags().Int32Var(&irreparableLimit, "limit", 50, "max number of results per page")
|
|
|
|
flag.Parse()
|
|
}
|
|
|
|
func main() {
|
|
process.Exec(rootCmd)
|
|
}
|