2019-01-02 10:23:25 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-09-19 05:46:39 +01:00
|
|
|
"crypto/tls"
|
2021-01-28 19:43:47 +00:00
|
|
|
"errors"
|
2019-01-02 10:23:25 +00:00
|
|
|
"net"
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
"net/http"
|
2021-01-28 19:43:47 +00:00
|
|
|
"os"
|
|
|
|
"runtime"
|
2019-09-07 01:02:38 +01:00
|
|
|
"sync"
|
2021-01-28 19:43:47 +00:00
|
|
|
"syscall"
|
2023-03-17 12:55:09 +00:00
|
|
|
"time"
|
2019-01-02 10:23:25 +00:00
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
"github.com/jtolio/noiseconn"
|
2019-03-07 18:19:37 +00:00
|
|
|
"github.com/zeebo/errs"
|
2019-07-31 13:09:45 +01:00
|
|
|
"go.uber.org/zap"
|
2019-02-04 14:50:55 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2019-01-02 10:23:25 +00:00
|
|
|
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
"storj.io/common/errs2"
|
2022-09-21 10:54:51 +01:00
|
|
|
"storj.io/common/experiment"
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/identity"
|
2022-12-21 22:18:45 +00:00
|
|
|
"storj.io/common/pb"
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/peertls/tlsopts"
|
|
|
|
"storj.io/common/rpc"
|
2023-02-02 22:43:57 +00:00
|
|
|
"storj.io/common/rpc/noise"
|
2021-08-03 04:25:41 +01:00
|
|
|
"storj.io/common/rpc/quic"
|
2020-03-30 16:22:32 +01:00
|
|
|
"storj.io/common/rpc/rpctracing"
|
2022-12-21 22:18:45 +00:00
|
|
|
"storj.io/drpc"
|
2021-03-23 15:17:51 +00:00
|
|
|
"storj.io/drpc/drpcmigrate"
|
2020-03-24 17:49:20 +00:00
|
|
|
"storj.io/drpc/drpcmux"
|
2019-09-07 01:02:38 +01:00
|
|
|
"storj.io/drpc/drpcserver"
|
2020-03-30 16:22:32 +01:00
|
|
|
jaeger "storj.io/monkit-jaeger"
|
2023-03-17 12:55:09 +00:00
|
|
|
"storj.io/storj/private/server/debounce"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// tcpMaxPacketAge is the maximum amount of time we expect to worry about
|
|
|
|
// an undelivered TCP packet lingering in the network. TCP TTL isn't
|
|
|
|
// supposed to exceed about 4 minutes, so this is double that with
|
|
|
|
// padding.
|
|
|
|
tcpMaxPacketAge = 10 * time.Minute
|
|
|
|
// debounceLimit is the amount of times the server should worry about
|
|
|
|
// debouncing incoming noise or TLS messages, per message. debouncing
|
|
|
|
// won't happen if the number of identical packets received is larger than
|
|
|
|
// this.
|
|
|
|
debounceLimit = 2
|
2019-01-02 10:23:25 +00:00
|
|
|
)
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Config holds server specific configuration parameters.
|
2020-03-24 16:28:19 +00:00
|
|
|
type Config struct {
|
|
|
|
tlsopts.Config
|
2020-05-11 06:26:32 +01:00
|
|
|
Address string `user:"true" help:"public address to listen on" default:":7777"`
|
|
|
|
PrivateAddress string `user:"true" help:"private address to listen on" default:"127.0.0.1:7778"`
|
2021-01-28 23:24:35 +00:00
|
|
|
DisableQUIC bool `help:"disable QUIC listener on a server" hidden:"true" default:"false"`
|
2020-05-11 06:26:32 +01:00
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
DisableTCP bool `help:"disable TCP listener on a server" internal:"true"`
|
2020-05-11 06:26:32 +01:00
|
|
|
DebugLogTraffic bool `hidden:"true" default:"false"` // Deprecated
|
2023-01-05 14:41:03 +00:00
|
|
|
|
2023-03-17 12:55:09 +00:00
|
|
|
TCPFastOpen bool `help:"enable support for tcp fast open" default:"true"`
|
|
|
|
TCPFastOpenQueue int `help:"the size of the tcp fast open queue" default:"256"`
|
|
|
|
DebouncingEnabled bool `help:"whether to debounce incoming messages" default:"true"`
|
2019-01-02 10:23:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Server represents a bundle of services defined by a specific ID.
|
|
|
|
// Examples of servers are the satellite, the storagenode, and the uplink.
|
|
|
|
type Server struct {
|
2019-09-19 05:46:39 +01:00
|
|
|
log *zap.Logger
|
|
|
|
tlsOptions *tlsopts.Options
|
2022-12-21 22:18:45 +00:00
|
|
|
noiseConf noise.Config
|
|
|
|
config Config
|
2023-06-02 15:17:23 +01:00
|
|
|
fastOpen bool
|
2022-12-21 22:18:45 +00:00
|
|
|
|
|
|
|
publicTCPListener net.Listener
|
|
|
|
publicUDPConn *net.UDPConn
|
|
|
|
publicQUICListener net.Listener
|
|
|
|
privateTCPListener net.Listener
|
|
|
|
addr net.Addr
|
|
|
|
|
|
|
|
publicEndpointsReplaySafe *endpointCollection
|
|
|
|
publicEndpointsAll *endpointCollection
|
|
|
|
privateEndpoints *endpointCollection
|
|
|
|
|
|
|
|
// http fallback for the public endpoint
|
|
|
|
publicHTTP http.HandlerFunc
|
2019-09-07 01:02:38 +01:00
|
|
|
|
|
|
|
mu sync.Mutex
|
|
|
|
wg sync.WaitGroup
|
|
|
|
once sync.Once
|
|
|
|
done chan struct{}
|
2019-01-02 10:23:25 +00:00
|
|
|
}
|
|
|
|
|
2019-01-28 15:04:53 +00:00
|
|
|
// New creates a Server out of an Identity, a net.Listener,
|
2020-03-25 19:38:08 +00:00
|
|
|
// and interceptors.
|
2021-01-28 23:24:35 +00:00
|
|
|
func New(log *zap.Logger, tlsOptions *tlsopts.Options, config Config) (_ *Server, err error) {
|
2023-02-02 22:43:57 +00:00
|
|
|
noiseConf, err := noise.GenerateServerConf(noise.DefaultProto, tlsOptions.Ident)
|
2022-12-21 22:18:45 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2019-07-31 13:09:45 +01:00
|
|
|
server := &Server{
|
2019-09-19 05:46:39 +01:00
|
|
|
log: log,
|
|
|
|
tlsOptions: tlsOptions,
|
2022-12-21 22:18:45 +00:00
|
|
|
noiseConf: noiseConf,
|
|
|
|
config: config,
|
|
|
|
|
|
|
|
publicEndpointsReplaySafe: newEndpointCollection(),
|
|
|
|
publicEndpointsAll: newEndpointCollection(),
|
|
|
|
privateEndpoints: newEndpointCollection(),
|
|
|
|
|
|
|
|
done: make(chan struct{}),
|
2019-07-31 13:09:45 +01:00
|
|
|
}
|
|
|
|
|
2023-01-05 14:41:03 +00:00
|
|
|
listenConfig := net.ListenConfig{}
|
|
|
|
if config.TCPFastOpen {
|
2023-06-02 15:17:23 +01:00
|
|
|
server.fastOpen = tryInitFastOpen(log)
|
|
|
|
if server.fastOpen {
|
|
|
|
listenConfig.Control = func(network, address string, c syscall.RawConn) error {
|
|
|
|
return c.Control(func(fd uintptr) {
|
|
|
|
err := setTCPFastOpen(fd, config.TCPFastOpenQueue)
|
|
|
|
if err != nil {
|
|
|
|
log.Sugar().Infof("failed to set tcp fast open for this socket: %v", err)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2023-01-05 14:41:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
for retry := 0; ; retry++ {
|
|
|
|
addr := config.Address
|
|
|
|
if !config.DisableTCP {
|
2023-01-05 14:41:03 +00:00
|
|
|
publicTCPListener, err := listenConfig.Listen(context.Background(), "tcp", addr)
|
2022-12-21 22:18:45 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
addr = publicTCPListener.Addr().String()
|
|
|
|
server.publicTCPListener = wrapListener(publicTCPListener)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !config.DisableQUIC {
|
|
|
|
udpAddr, err := net.ResolveUDPAddr("udp", addr)
|
|
|
|
if err != nil {
|
|
|
|
if server.publicTCPListener != nil {
|
|
|
|
_ = server.publicTCPListener.Close()
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
publicUDPConn, err := net.ListenUDP("udp", udpAddr)
|
|
|
|
if err != nil {
|
|
|
|
_, port, splitErr := net.SplitHostPort(config.Address)
|
|
|
|
if splitErr == nil && port == "0" && retry < 10 && isErrorAddressAlreadyInUse(err) {
|
|
|
|
// from here, we know for sure that the tcp port chosen by the
|
|
|
|
// os is available, but we don't know if the same port number
|
|
|
|
// for udp is also available.
|
|
|
|
// if a udp port is already in use, we will close the tcp port and retry
|
|
|
|
// to find one that is available for both udp and tcp.
|
|
|
|
if server.publicTCPListener != nil {
|
|
|
|
_ = server.publicTCPListener.Close()
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if server.publicTCPListener != nil {
|
|
|
|
return nil, errs.Combine(err, server.publicTCPListener.Close())
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
server.publicUDPConn = publicUDPConn
|
|
|
|
}
|
|
|
|
|
|
|
|
break
|
2021-01-19 16:33:50 +00:00
|
|
|
}
|
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
if server.publicTCPListener != nil {
|
|
|
|
server.addr = server.publicTCPListener.Addr()
|
|
|
|
} else if server.publicUDPConn != nil {
|
|
|
|
server.addr = server.publicUDPConn.LocalAddr()
|
2019-03-07 18:19:37 +00:00
|
|
|
}
|
2022-12-21 22:18:45 +00:00
|
|
|
|
|
|
|
privateTCPListener, err := net.Listen("tcp", config.PrivateAddress)
|
2019-03-07 18:19:37 +00:00
|
|
|
if err != nil {
|
2022-12-21 22:18:45 +00:00
|
|
|
return nil, errs.Combine(err, server.Close())
|
2019-03-07 18:19:37 +00:00
|
|
|
}
|
2022-12-21 22:18:45 +00:00
|
|
|
server.privateTCPListener = wrapListener(privateTCPListener)
|
2019-03-07 18:19:37 +00:00
|
|
|
|
2019-07-31 13:09:45 +01:00
|
|
|
return server, nil
|
2019-01-02 10:23:25 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Identity returns the server's identity.
|
2019-09-19 05:46:39 +01:00
|
|
|
func (p *Server) Identity() *identity.FullIdentity { return p.tlsOptions.Ident }
|
2019-01-02 10:23:25 +00:00
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Addr returns the server's public listener address.
|
2022-12-21 22:18:45 +00:00
|
|
|
func (p *Server) Addr() net.Addr { return p.addr }
|
2019-03-07 18:19:37 +00:00
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// PrivateAddr returns the server's private listener address.
|
2022-12-21 22:18:45 +00:00
|
|
|
func (p *Server) PrivateAddr() net.Addr { return p.privateTCPListener.Addr() }
|
2019-01-02 10:23:25 +00:00
|
|
|
|
2023-02-02 22:43:57 +00:00
|
|
|
// DRPC returns the server's DRPC mux that supports all endpoints for
|
|
|
|
// registration purposes.
|
2022-12-21 22:18:45 +00:00
|
|
|
func (p *Server) DRPC() drpc.Mux {
|
2023-02-02 22:43:57 +00:00
|
|
|
return p.publicEndpointsAll.mux
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaySafeDRPC returns the server's DRPC mux that supports replay safe
|
|
|
|
// endpoints for registration purposes.
|
|
|
|
func (p *Server) ReplaySafeDRPC() drpc.Mux {
|
|
|
|
return p.publicEndpointsReplaySafe.mux
|
2022-12-21 22:18:45 +00:00
|
|
|
}
|
2019-09-07 01:02:38 +01:00
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
// PrivateDRPC returns the server's DRPC mux for registration purposes.
|
|
|
|
func (p *Server) PrivateDRPC() drpc.Mux { return p.privateEndpoints.mux }
|
2019-09-07 01:02:38 +01:00
|
|
|
|
2022-01-12 14:34:32 +00:00
|
|
|
// IsQUICEnabled checks if QUIC is enabled by config and udp port is open.
|
2022-12-21 22:18:45 +00:00
|
|
|
func (p *Server) IsQUICEnabled() bool { return !p.config.DisableQUIC && p.publicUDPConn != nil }
|
|
|
|
|
2022-12-21 22:19:05 +00:00
|
|
|
// NoiseKeyAttestation returns the noise key attestation for this server.
|
|
|
|
func (p *Server) NoiseKeyAttestation(ctx context.Context) (_ *pb.NoiseKeyAttestation, err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
2023-02-02 22:43:57 +00:00
|
|
|
info, err := noise.ConfigToInfo(p.noiseConf)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return noise.GenerateKeyAttestation(ctx, p.tlsOptions.Ident, info)
|
2022-12-21 22:19:05 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 12:55:09 +00:00
|
|
|
// DebounceLimit is the amount of times the server is able to
|
|
|
|
// debounce incoming noise or TLS messages, per message.
|
|
|
|
func (p *Server) DebounceLimit() int {
|
|
|
|
if !p.config.DebouncingEnabled {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return debounceLimit
|
|
|
|
}
|
|
|
|
|
2023-06-02 15:17:23 +01:00
|
|
|
// FastOpen returns true if FastOpen is possibly open. false means we
|
|
|
|
// know FastOpen is off.
|
|
|
|
func (p *Server) FastOpen() bool {
|
|
|
|
return p.fastOpen
|
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Close shuts down the server.
|
2019-01-02 10:23:25 +00:00
|
|
|
func (p *Server) Close() error {
|
2019-09-07 01:02:38 +01:00
|
|
|
p.mu.Lock()
|
|
|
|
defer p.mu.Unlock()
|
|
|
|
|
|
|
|
// Close done and wait for any Runs to exit.
|
|
|
|
p.once.Do(func() { close(p.done) })
|
|
|
|
p.wg.Wait()
|
|
|
|
|
|
|
|
// Ensure the listeners are closed in case Run was never called.
|
|
|
|
// We ignore these errors because there's not really anything to do
|
|
|
|
// even if they happen, and they'll just be errors due to duplicate
|
|
|
|
// closes anyway.
|
2022-12-21 22:18:45 +00:00
|
|
|
if p.publicQUICListener != nil {
|
|
|
|
_ = p.publicQUICListener.Close()
|
|
|
|
}
|
|
|
|
if p.publicUDPConn != nil {
|
|
|
|
_ = p.publicUDPConn.Close()
|
|
|
|
}
|
|
|
|
if p.publicTCPListener != nil {
|
|
|
|
_ = p.publicTCPListener.Close()
|
|
|
|
}
|
|
|
|
if p.privateTCPListener != nil {
|
|
|
|
_ = p.privateTCPListener.Close()
|
|
|
|
}
|
2019-01-02 10:23:25 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
// AddHTTPFallback adds http fallback to the drpc endpoint.
|
|
|
|
func (p *Server) AddHTTPFallback(httpHandler http.HandlerFunc) {
|
2022-12-21 22:18:45 +00:00
|
|
|
p.publicHTTP = httpHandler
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:18:02 +01:00
|
|
|
// Run will run the server and all of its services.
|
2019-01-02 10:23:25 +00:00
|
|
|
func (p *Server) Run(ctx context.Context) (err error) {
|
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
2019-09-07 01:02:38 +01:00
|
|
|
// Make sure the server isn't already closed. If it is, register
|
|
|
|
// ourselves in the wait group so that Close can wait on it.
|
|
|
|
p.mu.Lock()
|
|
|
|
select {
|
|
|
|
case <-p.done:
|
|
|
|
p.mu.Unlock()
|
|
|
|
return errs.New("server closed")
|
|
|
|
default:
|
|
|
|
p.wg.Add(1)
|
|
|
|
defer p.wg.Done()
|
|
|
|
}
|
|
|
|
p.mu.Unlock()
|
|
|
|
|
|
|
|
// We want to launch the muxes in a different group so that they are
|
|
|
|
// only closed after we're sure that p.Close is called. The reason why
|
|
|
|
// is so that we don't get "listener closed" errors because the
|
|
|
|
// Run call exits and closes the listeners before the servers have had
|
|
|
|
// a chance to be notified that they're done running.
|
|
|
|
|
2021-01-28 23:24:35 +00:00
|
|
|
var (
|
2022-12-21 22:18:45 +00:00
|
|
|
publicTLSDRPCListener net.Listener
|
|
|
|
publicNoiseDRPCListener net.Listener
|
|
|
|
publicHTTPListener net.Listener
|
|
|
|
privateDRPCListener net.Listener
|
2021-01-28 23:24:35 +00:00
|
|
|
)
|
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
if p.publicUDPConn != nil {
|
|
|
|
// TODO: we goofed here. we need something like a drpcmigrate.ListenMux
|
|
|
|
// for UDP packets.
|
|
|
|
publicQUICListener, err := quic.NewListener(p.publicUDPConn, p.tlsOptions.ServerTLSConfig(), nil)
|
2021-01-28 23:24:35 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-12-21 22:18:45 +00:00
|
|
|
// TODO: this is also strange. why does (*Server).Close() need to close
|
|
|
|
// the quic listener? Shouldn't closing p.publicUDPConn be enough?
|
|
|
|
// We should be able to remove UDP-specific protocols from the Server
|
|
|
|
// struct and keep them localized to (*Server).Run, akin to TLS vs
|
|
|
|
// Noise drpcmigrate.ListenMuxed listeners over TCP.
|
|
|
|
p.publicQUICListener = wrapListener(publicQUICListener)
|
2021-01-28 23:24:35 +00:00
|
|
|
}
|
2019-09-07 01:02:38 +01:00
|
|
|
|
|
|
|
// We need a new context chain because we require this context to be
|
2020-05-06 11:54:14 +01:00
|
|
|
// canceled only after all of the upcoming drpc servers have
|
2019-09-07 01:02:38 +01:00
|
|
|
// fully exited. The reason why is because Run closes listener for
|
|
|
|
// the mux when it exits, and we can only do that after all of the
|
|
|
|
// Servers are no longer accepting.
|
|
|
|
muxCtx, muxCancel := context.WithCancel(context.Background())
|
|
|
|
defer muxCancel()
|
|
|
|
|
|
|
|
var muxGroup errgroup.Group
|
2022-12-21 22:18:45 +00:00
|
|
|
|
|
|
|
if p.publicTCPListener != nil {
|
|
|
|
publicLMux := drpcmigrate.NewListenMux(p.publicTCPListener, len(drpcmigrate.DRPCHeader))
|
2023-03-17 12:55:09 +00:00
|
|
|
tlsMux := publicLMux.Route(drpcmigrate.DRPCHeader)
|
|
|
|
var noiseOpts noiseconn.Options
|
|
|
|
|
|
|
|
if p.config.DebouncingEnabled {
|
|
|
|
debouncer := debounce.NewDebouncer(tcpMaxPacketAge, debounceLimit)
|
|
|
|
tlsMux = tlsDebounce(tlsMux, debouncer.ResponderFirstMessageValidator)
|
|
|
|
noiseOpts.ResponderFirstMessageValidator = debouncer.ResponderFirstMessageValidator
|
|
|
|
}
|
|
|
|
|
|
|
|
publicTLSDRPCListener = tls.NewListener(tlsMux, p.tlsOptions.ServerTLSConfig())
|
|
|
|
publicNoiseDRPCListener = noiseconn.NewListenerWithOptions(
|
|
|
|
publicLMux.Route(noise.Header),
|
|
|
|
p.noiseConf,
|
|
|
|
noiseOpts)
|
2022-12-21 22:18:45 +00:00
|
|
|
if p.publicHTTP != nil {
|
|
|
|
publicHTTPListener = NewPrefixedListener([]byte("GET / HT"), publicLMux.Route("GET / HT"))
|
|
|
|
}
|
2021-01-28 23:24:35 +00:00
|
|
|
muxGroup.Go(func() error {
|
2022-12-21 22:18:45 +00:00
|
|
|
return publicLMux.Run(muxCtx)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
privateLMux := drpcmigrate.NewListenMux(p.privateTCPListener, len(drpcmigrate.DRPCHeader))
|
|
|
|
privateDRPCListener = privateLMux.Route(drpcmigrate.DRPCHeader)
|
|
|
|
muxGroup.Go(func() error {
|
|
|
|
return privateLMux.Run(muxCtx)
|
2021-01-28 23:24:35 +00:00
|
|
|
})
|
|
|
|
}
|
2019-09-07 01:02:38 +01:00
|
|
|
|
|
|
|
// Now we launch all the stuff that uses the listeners.
|
2019-02-04 14:50:55 +00:00
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
2019-09-07 01:00:42 +01:00
|
|
|
defer cancel()
|
|
|
|
|
2019-02-04 14:50:55 +00:00
|
|
|
var group errgroup.Group
|
2019-09-07 01:00:42 +01:00
|
|
|
group.Go(func() error {
|
2019-09-07 01:02:38 +01:00
|
|
|
select {
|
|
|
|
case <-p.done:
|
|
|
|
cancel()
|
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2019-09-07 01:00:42 +01:00
|
|
|
})
|
2019-09-07 01:02:38 +01:00
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
connectListenerToEndpoints := func(ctx context.Context, listener net.Listener, endpoints *endpointCollection) {
|
|
|
|
if listener != nil {
|
|
|
|
group.Go(func() error {
|
|
|
|
defer cancel()
|
|
|
|
return endpoints.drpc.Serve(ctx, listener)
|
|
|
|
})
|
|
|
|
}
|
2021-01-28 23:24:35 +00:00
|
|
|
}
|
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
connectListenerToEndpoints(ctx, publicTLSDRPCListener, p.publicEndpointsAll)
|
|
|
|
connectListenerToEndpoints(ctx, p.publicQUICListener, p.publicEndpointsAll)
|
|
|
|
connectListenerToEndpoints(ctx, publicNoiseDRPCListener, p.publicEndpointsReplaySafe)
|
|
|
|
connectListenerToEndpoints(ctx, privateDRPCListener, p.privateEndpoints)
|
2021-01-28 23:24:35 +00:00
|
|
|
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
if publicHTTPListener != nil {
|
|
|
|
// this http server listens on the filtered messages of the incoming DRPC port, instead of a separated port
|
|
|
|
httpServer := http.Server{
|
2022-12-21 22:18:45 +00:00
|
|
|
Handler: p.publicHTTP,
|
storagenode: accept HTTP calls on public port, listening for monitoring requests
Today each storagenode should have a port which is opened for the internet, and handles DRPC protocol calls.
When we do a HTTP call on the DRPC endpoint, it hangs until a timeout.
This patch changes the behavior: the main DRPC port of the storagenodes can accept HTTP requests and can be used to monitor the status of the node:
* if returns with HTTP 200 only if the storagnode is healthy (not suspended / disqualified + online score > 0.9)
* it CAN include information about the current status (per satellite). It's opt-in, you should configure it so.
In this way it becomes extremely easy to monitor storagenodes with external uptime services.
Note: this patch exposes some information which was not easily available before (especially the node status, and used satellites). I think it should be acceptable:
* Until having more community satellites, all storagenodes are connected to the main Storj satellites.
* With community satellites, it's good thing to have more transparency (easy way to check who is connected to which satellites)
The implementation is based on this line:
```
http.Serve(NewPrefixedListener([]byte("GET / HT"), publicMux.Route("GET / HT")), p.public.http)
```
This line answers to the TCP requests with `GET / HT...` (GET HTTP request to the route), but puts back the removed prefix.
Change-Id: I3700c7e24524850825ecdf75a4bcc3b4afcb3a74
2022-08-23 11:28:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
group.Go(func() error {
|
|
|
|
<-ctx.Done()
|
|
|
|
return httpServer.Shutdown(context.Background())
|
|
|
|
})
|
|
|
|
group.Go(func() error {
|
|
|
|
defer cancel()
|
|
|
|
err := httpServer.Serve(publicHTTPListener)
|
|
|
|
if errs2.IsCanceled(err) || errors.Is(err, http.ErrServerClosed) {
|
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-09-07 01:02:38 +01:00
|
|
|
// Now we wait for all the stuff using the listeners to exit.
|
|
|
|
err = group.Wait()
|
|
|
|
|
|
|
|
// Now we close down our listeners.
|
|
|
|
muxCancel()
|
|
|
|
return errs.Combine(err, muxGroup.Wait())
|
2019-01-02 10:23:25 +00:00
|
|
|
}
|
2021-01-28 19:43:47 +00:00
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
type endpointCollection struct {
|
|
|
|
mux *drpcmux.Mux
|
|
|
|
drpc *drpcserver.Server
|
2021-01-28 23:24:35 +00:00
|
|
|
}
|
|
|
|
|
2022-12-21 22:18:45 +00:00
|
|
|
func newEndpointCollection() *endpointCollection {
|
|
|
|
mux := drpcmux.New()
|
|
|
|
return &endpointCollection{
|
|
|
|
mux: mux,
|
|
|
|
drpc: drpcserver.NewWithOptions(
|
|
|
|
experiment.NewHandler(
|
|
|
|
rpctracing.NewHandler(
|
|
|
|
mux,
|
|
|
|
jaeger.RemoteTraceHandler),
|
|
|
|
),
|
|
|
|
drpcserver.Options{
|
|
|
|
Manager: rpc.NewDefaultManagerOptions(),
|
|
|
|
},
|
|
|
|
),
|
2021-01-28 23:24:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-28 19:43:47 +00:00
|
|
|
// isErrorAddressAlreadyInUse checks whether the error is corresponding to
|
|
|
|
// EADDRINUSE. Taken from https://stackoverflow.com/a/65865898.
|
|
|
|
func isErrorAddressAlreadyInUse(err error) bool {
|
|
|
|
var eOsSyscall *os.SyscallError
|
|
|
|
if !errors.As(err, &eOsSyscall) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
var errErrno syscall.Errno
|
|
|
|
if !errors.As(eOsSyscall.Err, &errErrno) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if errErrno == syscall.EADDRINUSE {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
const WSAEADDRINUSE = 10048
|
|
|
|
if runtime.GOOS == "windows" && errErrno == WSAEADDRINUSE {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|