2019-03-18 10:55:06 +00:00
|
|
|
// Copyright (C) 2019 Storj Labs, Inc.
|
|
|
|
// See LICENSE for copying information.
|
|
|
|
|
|
|
|
package orders
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2019-12-30 20:35:26 +00:00
|
|
|
"math/rand"
|
2020-07-01 23:05:01 +01:00
|
|
|
"sync"
|
2019-03-18 10:55:06 +00:00
|
|
|
"time"
|
|
|
|
|
2019-11-08 20:40:39 +00:00
|
|
|
"github.com/spacemonkeygo/monkit/v3"
|
2019-06-04 13:31:39 +01:00
|
|
|
"github.com/zeebo/errs"
|
2019-03-18 10:55:06 +00:00
|
|
|
"go.uber.org/zap"
|
2019-03-21 13:24:26 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2019-03-18 10:55:06 +00:00
|
|
|
|
2019-12-27 11:48:47 +00:00
|
|
|
"storj.io/common/pb"
|
|
|
|
"storj.io/common/rpc"
|
|
|
|
"storj.io/common/storj"
|
|
|
|
"storj.io/common/sync2"
|
2020-10-01 23:52:22 +01:00
|
|
|
"storj.io/storj/storagenode/orders/ordersfile"
|
2019-07-18 18:15:09 +01:00
|
|
|
"storj.io/storj/storagenode/trust"
|
2019-03-18 10:55:06 +00:00
|
|
|
)
|
|
|
|
|
2019-06-04 13:31:39 +01:00
|
|
|
var (
|
2020-08-11 15:50:01 +01:00
|
|
|
// OrderError represents errors with orders.
|
2019-06-04 13:31:39 +01:00
|
|
|
OrderError = errs.Class("order")
|
2020-08-11 15:50:01 +01:00
|
|
|
// OrderNotFoundError is the error returned when an order is not found.
|
2019-08-16 15:53:22 +01:00
|
|
|
OrderNotFoundError = errs.Class("order not found")
|
2019-06-04 13:31:39 +01:00
|
|
|
|
|
|
|
mon = monkit.Package()
|
|
|
|
)
|
|
|
|
|
2019-03-21 13:24:26 +00:00
|
|
|
// ArchivedInfo contains full information about an archived order.
|
|
|
|
type ArchivedInfo struct {
|
2019-07-09 22:33:45 +01:00
|
|
|
Limit *pb.OrderLimit
|
|
|
|
Order *pb.Order
|
2019-03-21 13:24:26 +00:00
|
|
|
|
|
|
|
Status Status
|
|
|
|
ArchivedAt time.Time
|
|
|
|
}
|
|
|
|
|
|
|
|
// Status is the archival status of the order.
|
|
|
|
type Status byte
|
|
|
|
|
|
|
|
// Statuses for satellite responses.
|
|
|
|
const (
|
|
|
|
StatusUnsent Status = iota
|
|
|
|
StatusAccepted
|
|
|
|
StatusRejected
|
|
|
|
)
|
|
|
|
|
2019-07-31 17:40:08 +01:00
|
|
|
// ArchiveRequest defines arguments for archiving a single order.
|
|
|
|
type ArchiveRequest struct {
|
|
|
|
Satellite storj.NodeID
|
|
|
|
Serial storj.SerialNumber
|
|
|
|
Status Status
|
|
|
|
}
|
|
|
|
|
2019-03-18 10:55:06 +00:00
|
|
|
// DB implements storing orders for sending to the satellite.
|
2019-09-10 14:24:16 +01:00
|
|
|
//
|
|
|
|
// architecture: Database
|
2019-03-18 10:55:06 +00:00
|
|
|
type DB interface {
|
|
|
|
// Enqueue inserts order to the list of orders needing to be sent to the satellite.
|
2020-10-01 23:52:22 +01:00
|
|
|
Enqueue(ctx context.Context, info *ordersfile.Info) error
|
2019-03-18 10:55:06 +00:00
|
|
|
// ListUnsent returns orders that haven't been sent yet.
|
2020-10-01 23:52:22 +01:00
|
|
|
ListUnsent(ctx context.Context, limit int) ([]*ordersfile.Info, error)
|
2019-03-21 13:24:26 +00:00
|
|
|
// ListUnsentBySatellite returns orders that haven't been sent yet grouped by satellite.
|
2020-10-01 23:52:22 +01:00
|
|
|
ListUnsentBySatellite(ctx context.Context) (map[storj.NodeID][]*ordersfile.Info, error)
|
2019-03-21 13:24:26 +00:00
|
|
|
|
|
|
|
// Archive marks order as being handled.
|
2019-08-22 15:33:14 +01:00
|
|
|
Archive(ctx context.Context, archivedAt time.Time, requests ...ArchiveRequest) error
|
2019-03-21 13:24:26 +00:00
|
|
|
// ListArchived returns orders that have been sent.
|
|
|
|
ListArchived(ctx context.Context, limit int) ([]*ArchivedInfo, error)
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
// CleanArchive deletes all entries older than the before time.
|
|
|
|
CleanArchive(ctx context.Context, deleteBefore time.Time) (int, error)
|
2019-03-18 10:55:06 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 15:33:14 +01:00
|
|
|
// Config defines configuration for sending orders.
|
|
|
|
type Config struct {
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
MaxSleep time.Duration `help:"maximum duration to wait before trying to send orders" releaseDefault:"30s" devDefault:"1s"`
|
2020-10-08 17:19:19 +01:00
|
|
|
SenderInterval time.Duration `help:"duration between sending" releaseDefault:"1h0m0s" devDefault:"30s"`
|
2019-10-23 00:57:24 +01:00
|
|
|
SenderTimeout time.Duration `help:"timeout for sending" default:"1h0m0s"`
|
|
|
|
SenderDialTimeout time.Duration `help:"timeout for dialing satellite during sending orders" default:"1m0s"`
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
CleanupInterval time.Duration `help:"duration between archive cleanups" default:"5m0s"`
|
2019-10-23 00:57:24 +01:00
|
|
|
ArchiveTTL time.Duration `help:"length of time to archive orders before deletion" default:"168h0m0s"` // 7 days
|
2020-07-01 23:05:01 +01:00
|
|
|
Path string `help:"path to store order limit files in" default:"$CONFDIR/orders"`
|
2019-03-18 10:55:06 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 15:33:14 +01:00
|
|
|
// Service sends every interval unsent orders to the satellite.
|
2019-09-10 14:24:16 +01:00
|
|
|
//
|
|
|
|
// architecture: Chore
|
2019-08-22 15:33:14 +01:00
|
|
|
type Service struct {
|
2019-03-18 10:55:06 +00:00
|
|
|
log *zap.Logger
|
2019-08-22 15:33:14 +01:00
|
|
|
config Config
|
2019-03-18 10:55:06 +00:00
|
|
|
|
2020-07-01 23:05:01 +01:00
|
|
|
dialer rpc.Dialer
|
|
|
|
ordersStore *FileStore
|
|
|
|
orders DB
|
|
|
|
trust *trust.Pool
|
2019-03-21 13:24:26 +00:00
|
|
|
|
2020-01-29 15:37:50 +00:00
|
|
|
Sender *sync2.Cycle
|
|
|
|
Cleanup *sync2.Cycle
|
2019-03-18 10:55:06 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 15:33:14 +01:00
|
|
|
// NewService creates an order service.
|
2020-07-01 23:05:01 +01:00
|
|
|
func NewService(log *zap.Logger, dialer rpc.Dialer, ordersStore *FileStore, orders DB, trust *trust.Pool, config Config) *Service {
|
2019-08-22 15:33:14 +01:00
|
|
|
return &Service{
|
2020-07-01 23:05:01 +01:00
|
|
|
log: log,
|
|
|
|
dialer: dialer,
|
|
|
|
ordersStore: ordersStore,
|
|
|
|
orders: orders,
|
|
|
|
config: config,
|
|
|
|
trust: trust,
|
2019-03-21 13:24:26 +00:00
|
|
|
|
2020-01-29 15:37:50 +00:00
|
|
|
Sender: sync2.NewCycle(config.SenderInterval),
|
|
|
|
Cleanup: sync2.NewCycle(config.CleanupInterval),
|
2019-03-18 10:55:06 +00:00
|
|
|
}
|
|
|
|
}
|
2019-03-21 13:24:26 +00:00
|
|
|
|
|
|
|
// Run sends orders on every interval to the appropriate satellites.
|
2019-08-22 15:33:14 +01:00
|
|
|
func (service *Service) Run(ctx context.Context) (err error) {
|
2019-06-04 13:31:39 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
2019-08-22 15:33:14 +01:00
|
|
|
|
|
|
|
var group errgroup.Group
|
2019-12-30 20:35:26 +00:00
|
|
|
|
|
|
|
service.Sender.Start(ctx, &group, func(ctx context.Context) error {
|
|
|
|
if err := service.sleep(ctx); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
service.SendOrders(ctx, time.Now())
|
2019-12-30 20:35:26 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
service.Cleanup.Start(ctx, &group, func(ctx context.Context) error {
|
|
|
|
if err := service.sleep(ctx); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
err := service.CleanArchive(ctx, time.Now().Add(-service.config.ArchiveTTL))
|
2019-12-30 20:35:26 +00:00
|
|
|
if err != nil {
|
|
|
|
service.log.Error("clean archive failed", zap.Error(err))
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
2019-08-22 15:33:14 +01:00
|
|
|
|
|
|
|
return group.Wait()
|
|
|
|
}
|
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
// CleanArchive removes all archived orders that were archived before the deleteBefore time.
|
|
|
|
func (service *Service) CleanArchive(ctx context.Context, deleteBefore time.Time) (err error) {
|
2019-08-22 15:33:14 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
service.log.Debug("cleaning")
|
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
deleted, err := service.orders.CleanArchive(ctx, deleteBefore)
|
2019-08-22 15:33:14 +01:00
|
|
|
if err != nil {
|
2020-07-01 23:05:01 +01:00
|
|
|
service.log.Error("cleaning DB archive", zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
err = service.ordersStore.CleanArchive(deleteBefore)
|
2020-07-01 23:05:01 +01:00
|
|
|
if err != nil {
|
|
|
|
service.log.Error("cleaning filestore archive", zap.Error(err))
|
2019-08-22 15:33:14 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
service.log.Debug("cleanup finished", zap.Int("items deleted", deleted))
|
|
|
|
return nil
|
2019-06-04 13:31:39 +01:00
|
|
|
}
|
2019-03-21 13:24:26 +00:00
|
|
|
|
storagenode: live tracking of order window usage
This change accomplishes multiple things:
1. Instead of having a max in flight time, which means
we effectively have a minimum bandwidth for uploads
and downloads, we keep track of what windows have
active requests happening in them.
2. We don't double check when we save the order to see if it
is too old: by then, it's too late. A malicious uplink
could just submit orders outside of the grace window and
receive all the data, but the node would just not commit
it, so the uplink gets free traffic. Because the endpoints
also check for the order being too old, this would be a
very tight race that depends on knowledge of the node system
clock, but best to not have the race exist. Instead, we piggy
back off of the in flight tracking and do the check when
we start to handle the order, and commit at the end.
3. Change the functions that send orders and list unsent
orders to accept a time at which that operation is
happening. This way, in tests, we can pretend we're
listing or sending far into the future after the windows
are available to send, rather than exposing test functions
to modify internal state about the grace period to get
the desired effect. This brings tests closer to actual
usage in production.
4. Change the calculation for if an order is allowed to be
enqueued due to the grace period to just look at the
order creation time, rather than some computation involving
the window it will be in. In this way, you can easily
answer the question of "will this order be accepted?" by
asking "is it older than X?" where X is the grace period.
5. Increases the frequency we check to send up orders to once
every 5 minutes instead of once every hour because we already
have hour-long buffering due to the windows. This decreases
the maximum latency that an order will be reported back to
the satellite by 55 minutes.
Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
|
|
|
// SendOrders sends the orders using now as the current time.
|
|
|
|
func (service *Service) SendOrders(ctx context.Context, now time.Time) {
|
2020-09-06 00:06:06 +01:00
|
|
|
defer mon.Task()(&ctx)(nil)
|
2019-08-22 15:33:14 +01:00
|
|
|
service.log.Debug("sending")
|
2019-03-21 13:24:26 +00:00
|
|
|
|
2020-07-01 23:05:01 +01:00
|
|
|
errorSatellites := make(map[storj.NodeID]struct{})
|
|
|
|
var errorSatellitesMu sync.Mutex
|
|
|
|
|
|
|
|
// Continue sending until there are no more windows to send, or all relevant satellites are offline.
|
|
|
|
for {
|
2020-10-15 19:57:02 +01:00
|
|
|
ordersBySatellite, err := service.ordersStore.ListUnsentBySatellite(ctx, now)
|
2020-07-01 23:05:01 +01:00
|
|
|
if err != nil {
|
|
|
|
service.log.Error("listing orders", zap.Error(err))
|
|
|
|
}
|
|
|
|
if len(ordersBySatellite) == 0 {
|
|
|
|
service.log.Debug("no orders to send")
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
var group errgroup.Group
|
|
|
|
attemptedSatellites := 0
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, service.config.SenderTimeout)
|
|
|
|
|
|
|
|
for satelliteID, unsentInfo := range ordersBySatellite {
|
|
|
|
satelliteID, unsentInfo := satelliteID, unsentInfo
|
|
|
|
if _, ok := errorSatellites[satelliteID]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
attemptedSatellites++
|
|
|
|
|
|
|
|
group.Go(func() error {
|
|
|
|
log := service.log.Named(satelliteID.String())
|
|
|
|
status, err := service.settleWindow(ctx, log, satelliteID, unsentInfo.InfoList)
|
|
|
|
if err != nil {
|
|
|
|
// satellite returned an error, but settlement was not explicitly rejected; we want to retry later
|
|
|
|
errorSatellitesMu.Lock()
|
|
|
|
errorSatellites[satelliteID] = struct{}{}
|
|
|
|
errorSatellitesMu.Unlock()
|
|
|
|
log.Error("failed to settle orders for satellite", zap.String("satellite ID", satelliteID.String()), zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-10-01 23:52:22 +01:00
|
|
|
err = service.ordersStore.Archive(satelliteID, unsentInfo, time.Now().UTC(), status)
|
2020-07-01 23:05:01 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to archive orders", zap.Error(err))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
_ = group.Wait() // doesn't return errors
|
2020-11-02 12:21:55 +00:00
|
|
|
cancel()
|
2020-07-01 23:05:01 +01:00
|
|
|
|
|
|
|
// if all satellites that orders need to be sent to are offline, exit and try again later.
|
|
|
|
if attemptedSatellites == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-01 23:52:22 +01:00
|
|
|
func (service *Service) settleWindow(ctx context.Context, log *zap.Logger, satelliteID storj.NodeID, orders []*ordersfile.Info) (status pb.SettlementWithWindowResponse_Status, err error) {
|
2020-07-01 23:05:01 +01:00
|
|
|
defer mon.Task()(&ctx)(&err)
|
|
|
|
|
|
|
|
log.Info("sending", zap.Int("count", len(orders)))
|
|
|
|
defer log.Info("finished")
|
|
|
|
|
|
|
|
nodeurl, err := service.trust.GetNodeURL(ctx, satelliteID)
|
|
|
|
if err != nil {
|
|
|
|
return 0, OrderError.New("unable to get satellite address: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
conn, err := service.dialer.DialNodeURL(ctx, nodeurl)
|
|
|
|
if err != nil {
|
|
|
|
return 0, OrderError.New("unable to connect to the satellite: %w", err)
|
|
|
|
}
|
|
|
|
defer func() { err = errs.Combine(err, conn.Close()) }()
|
|
|
|
|
|
|
|
stream, err := pb.NewDRPCOrdersClient(conn).SettlementWithWindow(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return 0, OrderError.New("failed to start settlement: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, order := range orders {
|
|
|
|
req := pb.SettlementRequest{
|
|
|
|
Limit: order.Limit,
|
|
|
|
Order: order.Order,
|
|
|
|
}
|
|
|
|
err := stream.Send(&req)
|
|
|
|
if err != nil {
|
|
|
|
err = OrderError.New("sending settlement agreements returned an error: %w", err)
|
|
|
|
log.Error("rpc client when sending new orders settlements",
|
|
|
|
zap.Error(err),
|
|
|
|
zap.Any("request", req),
|
|
|
|
)
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
res, err := stream.CloseAndRecv()
|
|
|
|
if err != nil {
|
|
|
|
err = OrderError.New("CloseAndRecv settlement agreements returned an error: %w", err)
|
|
|
|
log.Error("rpc client error when closing sender ", zap.Error(err))
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return res.Status, nil
|
|
|
|
}
|
|
|
|
|
2020-07-16 16:27:24 +01:00
|
|
|
// sleep for random interval in [0;maxSleep).
|
|
|
|
// Returns an error if context was cancelled.
|
2019-12-30 20:35:26 +00:00
|
|
|
func (service *Service) sleep(ctx context.Context) error {
|
|
|
|
if service.config.MaxSleep <= 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
jitter := time.Duration(rand.Int63n(int64(service.config.MaxSleep)))
|
|
|
|
if !sync2.Sleep(ctx, jitter) {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-03-21 13:24:26 +00:00
|
|
|
// Close stops the sending service.
|
2019-08-22 15:33:14 +01:00
|
|
|
func (service *Service) Close() error {
|
|
|
|
service.Sender.Close()
|
|
|
|
service.Cleanup.Close()
|
2019-03-21 13:24:26 +00:00
|
|
|
return nil
|
|
|
|
}
|