storj/storagenode/orders/store_test.go

532 lines
16 KiB
Go
Raw Normal View History

// Copyright (C) 2020 Storj Labs, Inc.
// See LICENSE for copying information.
package orders_test
import (
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"storj.io/common/pb"
"storj.io/common/storj"
"storj.io/common/testcontext"
"storj.io/common/testrand"
"storj.io/storj/storagenode/orders"
"storj.io/storj/storagenode/orders/ordersfile"
)
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
func TestOrdersStore_Enqueue_GracePeriodFailure(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
now := time.Now()
// make order limit grace period 24 hours
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, 24*time.Hour)
require.NoError(t, err)
// adding order before grace period should result in an error
newSN := testrand.SerialNumber()
newInfo := &ordersfile.Info{
Limit: &pb.OrderLimit{
SerialNumber: newSN,
SatelliteId: testrand.NodeID(),
Action: pb.PieceAction_GET,
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
OrderCreation: now.Add(-48 * time.Hour),
OrderExpiration: now.Add(time.Hour),
},
Order: &pb.Order{
SerialNumber: newSN,
Amount: 10,
},
}
err = ordersStore.Enqueue(newInfo)
require.Error(t, err)
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
}
func TestOrdersStore_ListUnsentBySatellite(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
now := time.Now()
// make order limit grace period 12 hours
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, 12*time.Hour)
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
require.NoError(t, err)
// for each satellite, make three orders from four hours ago, three from two hours ago, and three from now.
numSatellites := 3
createdTimes := []time.Time{
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
now.Add(-4 * time.Hour),
now.Add(-2 * time.Hour),
now,
}
serialsPerSatPerTime := 3
originalInfos, err := storeNewOrders(ordersStore, numSatellites, serialsPerSatPerTime, createdTimes)
require.NoError(t, err)
// 3 times:
// list unsent orders - should receive data from all satellites the first two times, and nothing the last time.
// archive unsent orders
expectedArchivedInfos := make(map[storj.SerialNumber]*orders.ArchivedInfo)
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
archiveTime1 := now.Add(-2 * time.Hour)
archiveTime2 := now
status1 := pb.SettlementWithWindowResponse_ACCEPTED
status2 := pb.SettlementWithWindowResponse_REJECTED
for i := 0; i < 3; i++ {
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
// This should return all the orders created more than 1 hour before "now".
unsentMap, err := ordersStore.ListUnsentBySatellite(now.Add(12 * time.Hour))
require.NoError(t, err)
// on last iteration, expect nothing returned
if i == 2 {
require.Len(t, unsentMap, 0)
break
}
// go through order limits and make sure information is accurate
require.Len(t, unsentMap, numSatellites)
for satelliteID, unsentSatList := range unsentMap {
require.Len(t, unsentSatList.InfoList, serialsPerSatPerTime)
for _, unsentInfo := range unsentSatList.InfoList {
// "new" orders should not be returned
require.True(t, unsentInfo.Limit.OrderCreation.Before(createdTimes[2]))
sn := unsentInfo.Limit.SerialNumber
originalInfo := originalInfos[sn]
verifyInfosEqual(t, unsentInfo, originalInfo)
// expect that creation hour is consistent with order
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
require.True(t, unsentSatList.CreatedAtHour.Equal(unsentInfo.Limit.OrderCreation.Truncate(time.Hour)))
// add to archive batch
// create
archivedAt := archiveTime1
orderStatus := orders.StatusAccepted
if i == 1 {
archivedAt = archiveTime2
orderStatus = orders.StatusRejected
}
newArchivedInfo := &orders.ArchivedInfo{
Limit: unsentInfo.Limit,
Order: unsentInfo.Order,
Status: orderStatus,
ArchivedAt: archivedAt,
}
expectedArchivedInfos[unsentInfo.Limit.SerialNumber] = newArchivedInfo
}
// archive unsent file
archivedAt := archiveTime1
status := status1
if i == 1 {
archivedAt = archiveTime2
status = status2
}
err = ordersStore.Archive(satelliteID, unsentSatList, archivedAt, status)
require.NoError(t, err)
}
}
// list archived, expect everything from first two created at time buckets
archived, err := ordersStore.ListArchived()
require.NoError(t, err)
require.Len(t, archived, numSatellites*serialsPerSatPerTime*2)
for _, archivedInfo := range archived {
sn := archivedInfo.Limit.SerialNumber
expectedInfo := expectedArchivedInfos[sn]
verifyArchivedInfosEqual(t, expectedInfo, archivedInfo)
// one of the batches should be "accepted" and the other should be "rejected"
if archivedInfo.ArchivedAt.Round(0).Equal(archiveTime2.Round(0)) {
require.Equal(t, archivedInfo.Status, orders.StatusRejected)
} else {
require.Equal(t, archivedInfo.Status, orders.StatusAccepted)
}
}
// clean archive for anything older than 30 minutes
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
err = ordersStore.CleanArchive(now.Add(-30 * time.Minute))
require.NoError(t, err)
// list archived, expect only recent archived batch (other was cleaned)
archived, err = ordersStore.ListArchived()
require.NoError(t, err)
require.Len(t, archived, numSatellites*serialsPerSatPerTime)
for _, archivedInfo := range archived {
sn := archivedInfo.Limit.SerialNumber
expectedInfo := expectedArchivedInfos[sn]
verifyArchivedInfosEqual(t, expectedInfo, archivedInfo)
require.Equal(t, archivedInfo.ArchivedAt.Round(0), archiveTime2.Round(0))
require.Equal(t, archivedInfo.Status, orders.StatusRejected)
}
// clean archive for everything before now, expect list to return nothing
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
err = ordersStore.CleanArchive(now.Add(time.Nanosecond))
require.NoError(t, err)
archived, err = ordersStore.ListArchived()
require.NoError(t, err)
require.Len(t, archived, 0)
}
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
func TestOrdersStore_ListUnsentBySatellite_Ongoing(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
now := time.Now()
satellite := testrand.NodeID()
tomorrow := now.Add(24 * time.Hour)
// make order limit grace period 1 hour
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, time.Hour)
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
require.NoError(t, err)
// empty store means no orders can be listed
unsent, err := ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 0)
// store an order that can be listed
sn := testrand.SerialNumber()
require.NoError(t, ordersStore.Enqueue(&ordersfile.Info{
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
Limit: &pb.OrderLimit{
SerialNumber: sn,
SatelliteId: satellite,
Action: pb.PieceAction_GET,
OrderCreation: now,
},
Order: &pb.Order{
SerialNumber: sn,
Amount: 1,
},
}))
// check that we can list it tomorrow
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
// begin an enqueue in the bucket
commit, err := ordersStore.BeginEnqueue(satellite, now)
require.NoError(t, err)
// we should no longer be able to list that window
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 0)
// commit the order
sn = testrand.SerialNumber()
require.NoError(t, commit(&ordersfile.Info{
storagenode: live tracking of order window usage This change accomplishes multiple things: 1. Instead of having a max in flight time, which means we effectively have a minimum bandwidth for uploads and downloads, we keep track of what windows have active requests happening in them. 2. We don't double check when we save the order to see if it is too old: by then, it's too late. A malicious uplink could just submit orders outside of the grace window and receive all the data, but the node would just not commit it, so the uplink gets free traffic. Because the endpoints also check for the order being too old, this would be a very tight race that depends on knowledge of the node system clock, but best to not have the race exist. Instead, we piggy back off of the in flight tracking and do the check when we start to handle the order, and commit at the end. 3. Change the functions that send orders and list unsent orders to accept a time at which that operation is happening. This way, in tests, we can pretend we're listing or sending far into the future after the windows are available to send, rather than exposing test functions to modify internal state about the grace period to get the desired effect. This brings tests closer to actual usage in production. 4. Change the calculation for if an order is allowed to be enqueued due to the grace period to just look at the order creation time, rather than some computation involving the window it will be in. In this way, you can easily answer the question of "will this order be accepted?" by asking "is it older than X?" where X is the grace period. 5. Increases the frequency we check to send up orders to once every 5 minutes instead of once every hour because we already have hour-long buffering due to the windows. This decreases the maximum latency that an order will be reported back to the satellite by 55 minutes. Change-Id: Ie08b90d139d45ee89b82347e191a2f8db1b88036
2020-08-12 20:01:43 +01:00
Limit: &pb.OrderLimit{
SerialNumber: sn,
SatelliteId: satellite,
Action: pb.PieceAction_GET,
OrderCreation: now,
},
Order: &pb.Order{
SerialNumber: sn,
Amount: 1,
},
}))
// check that we can list it tomorrow
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
}
func TestOrdersStore_CorruptUnsentV0(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
now := time.Now()
satellite := testrand.NodeID()
tomorrow := now.Add(24 * time.Hour)
// make order limit grace period 1 hour
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, time.Hour)
require.NoError(t, err)
// empty store means no orders can be listed
unsent, err := ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 0)
sn := testrand.SerialNumber()
info := &ordersfile.Info{
Limit: &pb.OrderLimit{
SerialNumber: sn,
SatelliteId: satellite,
Action: pb.PieceAction_GET,
OrderCreation: now,
},
Order: &pb.Order{
SerialNumber: sn,
Amount: 1,
},
}
// store two orders for the same window using deprecated V0
unsentFileName := ordersfile.UnsentFileName(satellite, now, ordersfile.V0)
unsentDir := filepath.Join(dirName, "unsent")
unsentFilePath := filepath.Join(unsentDir, unsentFileName)
of, err := ordersfile.OpenWritableV0(unsentFilePath)
require.NoError(t, err)
require.NoError(t, of.Append(info))
require.NoError(t, of.Append(info))
require.NoError(t, of.Close())
// check that we can see both orders tomorrow
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 2)
// corrupt unsent orders file by removing the last byte
err = filepath.Walk(unsentDir, func(path string, info os.FileInfo, err error) error {
require.NoError(t, err)
if info.IsDir() {
return nil
}
err = os.Truncate(path, info.Size()-1)
return err
})
require.NoError(t, err)
// add another order, which we shouldn't see for V0 since it is after the corrupted one
of, err = ordersfile.OpenWritableV0(unsentFilePath)
require.NoError(t, err)
require.NoError(t, of.Append(info))
require.NoError(t, of.Close())
// only the second order should be corrupted, so we should still see one order
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 1)
}
func TestOrdersStore_CorruptUnsentV1(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
now := time.Now()
satellite := testrand.NodeID()
tomorrow := now.Add(24 * time.Hour)
// make order limit grace period 1 hour
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, time.Hour)
require.NoError(t, err)
// empty store means no orders can be listed
unsent, err := ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 0)
sn1 := testrand.SerialNumber()
sn2 := testrand.SerialNumber()
sn3 := testrand.SerialNumber()
info := &ordersfile.Info{
Limit: &pb.OrderLimit{
SerialNumber: sn1,
SatelliteId: satellite,
Action: pb.PieceAction_GET,
OrderCreation: now,
},
Order: &pb.Order{
SerialNumber: sn1,
Amount: 1,
},
}
// store sn1 and sn2 in the same window
require.NoError(t, ordersStore.Enqueue(info))
info.Limit.SerialNumber = sn2
info.Order.SerialNumber = sn2
require.NoError(t, ordersStore.Enqueue(info))
// check that we can see both orders tomorrow
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 2)
// corrupt unsent orders file by removing the last byte
err = filepath.Walk(filepath.Join(dirName, "unsent"), func(path string, info os.FileInfo, err error) error {
require.NoError(t, err)
if info.IsDir() {
return nil
}
err = os.Truncate(path, info.Size()-1)
return err
})
require.NoError(t, err)
// only the second order should be corrupted, so we should still see one order (sn1)
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 1)
require.EqualValues(t, sn1, unsent[satellite].InfoList[0].Order.SerialNumber)
// add another order, sn3, to the same window
info.Limit.SerialNumber = sn3
info.Order.SerialNumber = sn3
require.NoError(t, ordersStore.Enqueue(info))
// only the second order should be corrupted, so we should still see first and last orders (sn1, sn3)
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 2)
require.Equal(t, ordersfile.V1, unsent[satellite].Version)
require.EqualValues(t, sn1, unsent[satellite].InfoList[0].Order.SerialNumber)
require.EqualValues(t, sn3, unsent[satellite].InfoList[1].Order.SerialNumber)
}
func TestOrdersStore_V0ToV1(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dirName := ctx.Dir("test-orders")
now := time.Now()
satellite := testrand.NodeID()
tomorrow := now.Add(24 * time.Hour)
// make order limit grace period 1 hour
ordersStore, err := orders.NewFileStore(zaptest.NewLogger(t), dirName, time.Hour)
require.NoError(t, err)
// empty store means no orders can be listed
unsent, err := ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 0)
sn1 := testrand.SerialNumber()
sn2 := testrand.SerialNumber()
info := &ordersfile.Info{
Limit: &pb.OrderLimit{
SerialNumber: sn1,
SatelliteId: satellite,
Action: pb.PieceAction_GET,
OrderCreation: now,
},
Order: &pb.Order{
SerialNumber: sn1,
Amount: 1,
},
}
// store sn1 and sn2 in the same window
// sn1 is stored with deprecated V0, so sn2 should also be stored with V0 even when Enqueue() is used
unsentFileName := ordersfile.UnsentFileName(satellite, now, ordersfile.V0)
unsentDir := filepath.Join(dirName, "unsent")
unsentFilePath := filepath.Join(unsentDir, unsentFileName)
of, err := ordersfile.OpenWritableV0(unsentFilePath)
require.NoError(t, err)
require.NoError(t, of.Append(info))
info.Limit.SerialNumber = sn2
info.Order.SerialNumber = sn2
require.NoError(t, of.Append(info))
require.NoError(t, of.Close())
// check that we can see both orders tomorrow
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 2)
require.Equal(t, ordersfile.V0, unsent[satellite].Version)
// archive file to free up window
require.NoError(t, ordersStore.Archive(satellite, unsent[satellite], time.Now(), pb.SettlementWithWindowResponse_ACCEPTED))
// new file should be created with version V1
require.NoError(t, ordersStore.Enqueue(info))
unsent, err = ordersStore.ListUnsentBySatellite(tomorrow)
require.NoError(t, err)
require.Len(t, unsent, 1)
require.Len(t, unsent[satellite].InfoList, 1)
require.Equal(t, ordersfile.V1, unsent[satellite].Version)
}
func verifyInfosEqual(t *testing.T, a, b *ordersfile.Info) {
t.Helper()
require.NotNil(t, a)
require.NotNil(t, b)
require.Equal(t, a.Limit.SerialNumber, b.Limit.SerialNumber)
require.Equal(t, a.Limit.SatelliteId, b.Limit.SatelliteId)
require.Equal(t, a.Limit.OrderExpiration.UTC(), b.Limit.OrderExpiration.UTC())
require.Equal(t, a.Limit.Action, b.Limit.Action)
require.Equal(t, a.Order.Amount, b.Order.Amount)
require.Equal(t, a.Order.SerialNumber, b.Order.SerialNumber)
}
func verifyArchivedInfosEqual(t *testing.T, a, b *orders.ArchivedInfo) {
t.Helper()
require.NotNil(t, a)
require.NotNil(t, b)
require.Equal(t, a.Limit.SerialNumber, b.Limit.SerialNumber)
require.Equal(t, a.Limit.SatelliteId, b.Limit.SatelliteId)
require.Equal(t, a.Limit.OrderExpiration.UTC(), b.Limit.OrderExpiration.UTC())
require.Equal(t, a.Limit.Action, b.Limit.Action)
require.Equal(t, a.Order.Amount, b.Order.Amount)
require.Equal(t, a.Order.SerialNumber, b.Order.SerialNumber)
require.Equal(t, a.Status, b.Status)
require.Equal(t, a.ArchivedAt.UTC(), b.ArchivedAt.UTC())
}
func storeNewOrders(ordersStore *orders.FileStore, numSatellites, numOrdersPerSatPerTime int, createdAtTimes []time.Time) (map[storj.SerialNumber]*ordersfile.Info, error) {
actions := []pb.PieceAction{
pb.PieceAction_GET,
pb.PieceAction_PUT_REPAIR,
pb.PieceAction_GET_AUDIT,
}
originalInfos := make(map[storj.SerialNumber]*ordersfile.Info)
for i := 0; i < numSatellites; i++ {
satellite := testrand.NodeID()
for _, createdAt := range createdAtTimes {
for j := 0; j < numOrdersPerSatPerTime; j++ {
expiration := time.Now().Add(time.Hour)
amount := testrand.Int63n(1000)
sn := testrand.SerialNumber()
action := actions[j%len(actions)]
newInfo := &ordersfile.Info{
Limit: &pb.OrderLimit{
SerialNumber: sn,
SatelliteId: satellite,
Action: action,
OrderCreation: createdAt,
OrderExpiration: expiration,
},
Order: &pb.Order{
SerialNumber: sn,
Amount: amount,
},
}
originalInfos[sn] = newInfo
// store the new info in the orders store
err := ordersStore.Enqueue(newInfo)
if err != nil {
return originalInfos, err
}
}
}
}
return originalInfos, nil
}