move piece info into files (#2629)

Deprecate the pieceinfo database, and start storing piece info as a header to
piece files. Institute a "storage format version" concept allowing us to handle
pieces stored under multiple different types of storage. Add a piece_expirations
table which will still be used to track expiration times, so we can query it, but
which should be much smaller than the pieceinfo database would be for the
same number of pieces. (Only pieces with expiration times need to be stored in piece_expirations, and we don't need to store large byte blobs like the serialized
order limit, etc.) Use specialized names for accessing any functionality related
only to dealing with V0 pieces (e.g., `store.V0PieceInfo()`). Move SpaceUsed-
type functionality under the purview of the piece store. Add some generic
interfaces for traversing all blobs or all pieces. Add lots of tests.
This commit is contained in:
paul cannon 2019-08-07 20:47:30 -05:00 committed by GitHub
parent 271f9ea722
commit 17bdb5e9e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 2919 additions and 556 deletions

View File

@ -26,7 +26,7 @@ type SlowDB struct {
func NewSlowDB(log *zap.Logger, db storagenode.DB) *SlowDB {
return &SlowDB{
DB: db,
blobs: NewSlowBlobs(log, db.Pieces()),
blobs: newSlowBlobs(log, db.Pieces()),
log: log,
}
}
@ -49,9 +49,9 @@ type SlowBlobs struct {
log *zap.Logger
}
// NewSlowBlobs creates a new slow blob store wrapping the provided blobs.
// newSlowBlobs creates a new slow blob store wrapping the provided blobs.
// Use SetLatency to dynamically configure the latency of all operations.
func NewSlowBlobs(log *zap.Logger, blobs storage.Blobs) *SlowBlobs {
func newSlowBlobs(log *zap.Logger, blobs storage.Blobs) *SlowBlobs {
return &SlowBlobs{
log: log,
blobs: blobs,
@ -71,18 +71,64 @@ func (slow *SlowBlobs) Open(ctx context.Context, ref storage.BlobRef) (storage.B
return slow.blobs.Open(ctx, ref)
}
// OpenWithStorageFormat opens a reader for the already-located blob, avoiding the potential need
// to check multiple storage formats to find the blob.
func (slow *SlowBlobs) OpenWithStorageFormat(ctx context.Context, ref storage.BlobRef, formatVer storage.FormatVersion) (storage.BlobReader, error) {
slow.sleep()
return slow.blobs.OpenWithStorageFormat(ctx, ref, formatVer)
}
// Delete deletes the blob with the namespace and key.
func (slow *SlowBlobs) Delete(ctx context.Context, ref storage.BlobRef) error {
slow.sleep()
return slow.blobs.Delete(ctx, ref)
}
// Stat looks up disk metadata on the blob file
func (slow *SlowBlobs) Stat(ctx context.Context, ref storage.BlobRef) (storage.BlobInfo, error) {
slow.sleep()
return slow.blobs.Stat(ctx, ref)
}
// StatWithStorageFormat looks up disk metadata for the blob file with the given storage format
// version. This avoids the potential need to check multiple storage formats for the blob
// when the format is already known.
func (slow *SlowBlobs) StatWithStorageFormat(ctx context.Context, ref storage.BlobRef, formatVer storage.FormatVersion) (storage.BlobInfo, error) {
slow.sleep()
return slow.blobs.StatWithStorageFormat(ctx, ref, formatVer)
}
// WalkNamespace executes walkFunc for each locally stored blob in the given namespace.
// If walkFunc returns a non-nil error, WalkNamespace will stop iterating and return the
// error immediately.
func (slow *SlowBlobs) WalkNamespace(ctx context.Context, namespace []byte, walkFunc func(storage.BlobInfo) error) error {
slow.sleep()
return slow.blobs.WalkNamespace(ctx, namespace, walkFunc)
}
// ListNamespaces returns all namespaces that might be storing data.
func (slow *SlowBlobs) ListNamespaces(ctx context.Context) ([][]byte, error) {
return slow.blobs.ListNamespaces(ctx)
}
// FreeSpace return how much free space left for writing.
func (slow *SlowBlobs) FreeSpace() (int64, error) {
slow.sleep()
return slow.blobs.FreeSpace()
}
// SpaceUsed adds up how much is used in all namespaces
func (slow *SlowBlobs) SpaceUsed(ctx context.Context) (int64, error) {
slow.sleep()
return slow.blobs.SpaceUsed(ctx)
}
// SpaceUsedInNamespace adds up how much is used in the given namespace
func (slow *SlowBlobs) SpaceUsedInNamespace(ctx context.Context, namespace []byte) (int64, error) {
slow.sleep()
return slow.blobs.SpaceUsedInNamespace(ctx, namespace)
}
// SetLatency configures the blob store to sleep for delay duration for all
// operations. A zero or negative delay means no sleep.
func (slow *SlowBlobs) SetLatency(delay time.Duration) {

View File

@ -26,6 +26,31 @@ var _ = time.Kitchen
// proto package needs to be updated.
const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
type PieceHeader_FormatVersion int32
const (
PieceHeader_FORMAT_V0 PieceHeader_FormatVersion = 0
PieceHeader_FORMAT_V1 PieceHeader_FormatVersion = 1
)
var PieceHeader_FormatVersion_name = map[int32]string{
0: "FORMAT_V0",
1: "FORMAT_V1",
}
var PieceHeader_FormatVersion_value = map[string]int32{
"FORMAT_V0": 0,
"FORMAT_V1": 1,
}
func (x PieceHeader_FormatVersion) String() string {
return proto.EnumName(PieceHeader_FormatVersion_name, int32(x))
}
func (PieceHeader_FormatVersion) EnumDescriptor() ([]byte, []int) {
return fileDescriptor_23ff32dd550c2439, []int{8, 0}
}
// Expected order of messages from uplink:
// OrderLimit ->
// repeated
@ -523,7 +548,88 @@ func (m *RetainResponse) XXX_DiscardUnknown() {
var xxx_messageInfo_RetainResponse proto.InternalMessageInfo
// PieceHeader is used in piece storage to keep track of piece attributes.
type PieceHeader struct {
// the storage format version being used for this piece. The piece filename should agree with this.
// The inclusion of this field is intended to aid repairability when filenames are damaged.
FormatVersion PieceHeader_FormatVersion `protobuf:"varint,1,opt,name=format_version,json=formatVersion,proto3,enum=piecestore.PieceHeader_FormatVersion" json:"format_version,omitempty"`
// content hash of the piece
Hash []byte `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
// timestamp when upload occurred, as given by the "timestamp" field in the original orders.PieceHash
CreationTime time.Time `protobuf:"bytes,3,opt,name=creation_time,json=creationTime,proto3,stdtime" json:"creation_time"`
// signature from uplink over the original orders.PieceHash (the corresponding PieceHashSigning
// is reconstructable using the piece id from the piecestore, the piece size from the
// filesystem (minus the piece header size), and these (hash, upload_time, signature) fields).
Signature []byte `protobuf:"bytes,4,opt,name=signature,proto3" json:"signature,omitempty"`
// the OrderLimit authorizing storage of this piece, as signed by the satellite and sent by
// the uplink
OrderLimit OrderLimit `protobuf:"bytes,5,opt,name=order_limit,json=orderLimit,proto3" json:"order_limit"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *PieceHeader) Reset() { *m = PieceHeader{} }
func (m *PieceHeader) String() string { return proto.CompactTextString(m) }
func (*PieceHeader) ProtoMessage() {}
func (*PieceHeader) Descriptor() ([]byte, []int) {
return fileDescriptor_23ff32dd550c2439, []int{8}
}
func (m *PieceHeader) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_PieceHeader.Unmarshal(m, b)
}
func (m *PieceHeader) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_PieceHeader.Marshal(b, m, deterministic)
}
func (m *PieceHeader) XXX_Merge(src proto.Message) {
xxx_messageInfo_PieceHeader.Merge(m, src)
}
func (m *PieceHeader) XXX_Size() int {
return xxx_messageInfo_PieceHeader.Size(m)
}
func (m *PieceHeader) XXX_DiscardUnknown() {
xxx_messageInfo_PieceHeader.DiscardUnknown(m)
}
var xxx_messageInfo_PieceHeader proto.InternalMessageInfo
func (m *PieceHeader) GetFormatVersion() PieceHeader_FormatVersion {
if m != nil {
return m.FormatVersion
}
return PieceHeader_FORMAT_V0
}
func (m *PieceHeader) GetHash() []byte {
if m != nil {
return m.Hash
}
return nil
}
func (m *PieceHeader) GetCreationTime() time.Time {
if m != nil {
return m.CreationTime
}
return time.Time{}
}
func (m *PieceHeader) GetSignature() []byte {
if m != nil {
return m.Signature
}
return nil
}
func (m *PieceHeader) GetOrderLimit() OrderLimit {
if m != nil {
return m.OrderLimit
}
return OrderLimit{}
}
func init() {
proto.RegisterEnum("piecestore.PieceHeader_FormatVersion", PieceHeader_FormatVersion_name, PieceHeader_FormatVersion_value)
proto.RegisterType((*PieceUploadRequest)(nil), "piecestore.PieceUploadRequest")
proto.RegisterType((*PieceUploadRequest_Chunk)(nil), "piecestore.PieceUploadRequest.Chunk")
proto.RegisterType((*PieceUploadResponse)(nil), "piecestore.PieceUploadResponse")
@ -535,45 +641,53 @@ func init() {
proto.RegisterType((*PieceDeleteResponse)(nil), "piecestore.PieceDeleteResponse")
proto.RegisterType((*RetainRequest)(nil), "piecestore.RetainRequest")
proto.RegisterType((*RetainResponse)(nil), "piecestore.RetainResponse")
proto.RegisterType((*PieceHeader)(nil), "piecestore.PieceHeader")
}
func init() { proto.RegisterFile("piecestore2.proto", fileDescriptor_23ff32dd550c2439) }
var fileDescriptor_23ff32dd550c2439 = []byte{
// 513 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x53, 0xcd, 0x6e, 0x13, 0x3d,
0x14, 0xcd, 0xe4, 0x67, 0xd4, 0xef, 0x7e, 0x09, 0xa2, 0x86, 0xa2, 0x60, 0x09, 0x52, 0x86, 0xbf,
0xac, 0xa6, 0x28, 0xdd, 0xa1, 0x52, 0xa4, 0x92, 0x05, 0x08, 0x10, 0x95, 0xa1, 0x1b, 0x36, 0x95,
0x93, 0xb9, 0x49, 0x2d, 0x26, 0xe3, 0x61, 0xec, 0x08, 0xa9, 0xaf, 0xc0, 0x86, 0x47, 0x62, 0xc9,
0x53, 0xc0, 0x82, 0xc7, 0x60, 0x83, 0xc6, 0x3f, 0x2d, 0x93, 0x86, 0x44, 0x20, 0xb1, 0x9a, 0xb1,
0xef, 0x39, 0xf7, 0x1e, 0x9f, 0x63, 0xc3, 0x66, 0x2e, 0x70, 0x8c, 0x4a, 0xcb, 0x02, 0x07, 0x71,
0x5e, 0x48, 0x2d, 0x09, 0x9c, 0x6f, 0x51, 0x98, 0xca, 0xa9, 0xb4, 0xfb, 0xb4, 0x37, 0x95, 0x72,
0x9a, 0xe2, 0x8e, 0x59, 0x8d, 0xe6, 0x93, 0x1d, 0x2d, 0x66, 0xa8, 0x34, 0x9f, 0xe5, 0x0e, 0xd0,
0x96, 0x45, 0x82, 0x85, 0xb2, 0xab, 0xe8, 0x47, 0x00, 0xe4, 0xb0, 0xec, 0x74, 0x94, 0xa7, 0x92,
0x27, 0x0c, 0xdf, 0xcf, 0x51, 0x69, 0xd2, 0x87, 0x56, 0x2a, 0x66, 0x42, 0x77, 0x83, 0xed, 0xa0,
0xff, 0xff, 0x80, 0xc4, 0x8e, 0xf4, 0xaa, 0xfc, 0xbc, 0x28, 0x2b, 0xcc, 0x02, 0xc8, 0x6d, 0x68,
0x99, 0x5a, 0xb7, 0x6e, 0x90, 0x9d, 0x0a, 0x92, 0xd9, 0x1a, 0x79, 0x08, 0xad, 0xf1, 0xc9, 0x3c,
0x7b, 0xd7, 0x6d, 0x18, 0xd0, 0x9d, 0xf8, 0x5c, 0x7c, 0x7c, 0x71, 0x7a, 0xfc, 0xa4, 0xc4, 0x32,
0x4b, 0x21, 0x77, 0xa1, 0x99, 0xc8, 0x0c, 0xbb, 0x4d, 0x43, 0xdd, 0xf4, 0xfd, 0x0d, 0xed, 0x29,
0x57, 0x27, 0xcc, 0x94, 0xe9, 0x2e, 0xb4, 0x0c, 0x8d, 0x5c, 0x83, 0x50, 0x4e, 0x26, 0x0a, 0xad,
0xf6, 0x06, 0x73, 0x2b, 0x42, 0xa0, 0x99, 0x70, 0xcd, 0x8d, 0xce, 0x36, 0x33, 0xff, 0xd1, 0x1e,
0x5c, 0xa9, 0x8c, 0x57, 0xb9, 0xcc, 0x14, 0x9e, 0x8d, 0x0c, 0x56, 0x8e, 0x8c, 0xbe, 0x07, 0x70,
0xd5, 0xec, 0x0d, 0xe5, 0x87, 0xec, 0x1f, 0xba, 0xb7, 0x57, 0x75, 0xef, 0xde, 0x05, 0xf7, 0x16,
0xe6, 0x57, 0xfc, 0xa3, 0xfb, 0xeb, 0x8c, 0xb9, 0x01, 0x60, 0x90, 0xc7, 0x4a, 0x9c, 0xa2, 0x11,
0xd2, 0x60, 0xff, 0x99, 0x9d, 0xd7, 0xe2, 0x14, 0xa3, 0x8f, 0x01, 0x6c, 0x2d, 0x4c, 0x71, 0x36,
0x3d, 0xf2, 0xba, 0xec, 0x31, 0xef, 0xaf, 0xd0, 0x65, 0x19, 0x55, 0x61, 0x7f, 0x95, 0xd8, 0xbe,
0xbb, 0xae, 0x43, 0x4c, 0x51, 0xe3, 0x1f, 0x1b, 0x1e, 0x6d, 0xb9, 0xc4, 0x3d, 0xdf, 0x0a, 0x8b,
0x0a, 0xe8, 0x30, 0xd4, 0x5c, 0x64, 0xbe, 0xe3, 0x33, 0xe8, 0x8c, 0x0b, 0xe4, 0x5a, 0xc8, 0xec,
0x38, 0xe1, 0xda, 0xdf, 0x05, 0x1a, 0xdb, 0xe7, 0x15, 0xfb, 0xe7, 0x15, 0xbf, 0xf1, 0xcf, 0xeb,
0x60, 0xe3, 0xcb, 0xd7, 0x5e, 0xed, 0xd3, 0xb7, 0x5e, 0xc0, 0xda, 0x9e, 0x3a, 0xe4, 0x1a, 0xcb,
0xe3, 0x4d, 0x44, 0xaa, 0x5d, 0xc8, 0x6d, 0xe6, 0x56, 0xd1, 0x65, 0xb8, 0xe4, 0x67, 0x5a, 0x15,
0x83, 0xcf, 0x75, 0x80, 0xc3, 0x33, 0x0f, 0xc9, 0x4b, 0x08, 0xed, 0xc5, 0x24, 0x37, 0x57, 0x3f,
0x18, 0xda, 0xfb, 0x6d, 0xdd, 0x9d, 0xaf, 0xd6, 0x0f, 0xc8, 0x11, 0x6c, 0xf8, 0x40, 0xc8, 0xf6,
0xba, 0x3b, 0x44, 0x6f, 0xad, 0x4d, 0xb3, 0x6c, 0xfa, 0x20, 0x20, 0xcf, 0x21, 0xb4, 0x66, 0x2e,
0x51, 0x59, 0x49, 0x69, 0x89, 0xca, 0x85, 0x14, 0x6a, 0xe4, 0x31, 0x84, 0xd6, 0x13, 0x72, 0xfd,
0x57, 0x70, 0x25, 0x1b, 0x4a, 0x97, 0x95, 0x6c, 0x8b, 0x83, 0xe6, 0xdb, 0x7a, 0x3e, 0x1a, 0x85,
0x26, 0x9e, 0xdd, 0x9f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x1c, 0xd5, 0x7e, 0x72, 0x38, 0x05, 0x00,
0x00,
// 634 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x54, 0x4f, 0x6f, 0x12, 0x4f,
0x18, 0x66, 0x29, 0x90, 0xf2, 0xc2, 0x12, 0x3a, 0xbf, 0x5f, 0x0d, 0x6e, 0x54, 0xea, 0x6a, 0x95,
0x8b, 0xdb, 0x4a, 0x4f, 0x9a, 0x5a, 0x23, 0x92, 0x46, 0x63, 0x9b, 0x36, 0x63, 0xdb, 0x83, 0x17,
0x32, 0xc0, 0x2c, 0x6c, 0x84, 0x9d, 0x75, 0x67, 0xd0, 0xa4, 0x5f, 0xc1, 0x8b, 0x1f, 0xc9, 0x78,
0xf2, 0x53, 0xe8, 0xc1, 0x8f, 0xe1, 0xc5, 0xcc, 0xcc, 0x2e, 0xb0, 0x40, 0x21, 0x35, 0xf1, 0x04,
0xef, 0xff, 0x67, 0x9e, 0xf7, 0x79, 0x17, 0x36, 0x02, 0x8f, 0x76, 0x28, 0x17, 0x2c, 0xa4, 0x75,
0x27, 0x08, 0x99, 0x60, 0x08, 0x26, 0x2e, 0x0b, 0x7a, 0xac, 0xc7, 0xb4, 0xdf, 0xaa, 0xf6, 0x18,
0xeb, 0x0d, 0xe8, 0x8e, 0xb2, 0xda, 0x23, 0x77, 0x47, 0x78, 0x43, 0xca, 0x05, 0x19, 0x06, 0x51,
0x42, 0x91, 0x85, 0x5d, 0x1a, 0x72, 0x6d, 0xd9, 0xbf, 0x0d, 0x40, 0xa7, 0xb2, 0xd3, 0x79, 0x30,
0x60, 0xa4, 0x8b, 0xe9, 0x87, 0x11, 0xe5, 0x02, 0xd5, 0x20, 0x3b, 0xf0, 0x86, 0x9e, 0xa8, 0x18,
0x5b, 0x46, 0xad, 0x50, 0x47, 0x4e, 0x54, 0x74, 0x22, 0x7f, 0x8e, 0x64, 0x04, 0xeb, 0x04, 0x74,
0x0f, 0xb2, 0x2a, 0x56, 0x49, 0xab, 0x4c, 0x33, 0x91, 0x89, 0x75, 0x0c, 0x3d, 0x85, 0x6c, 0xa7,
0x3f, 0xf2, 0xdf, 0x57, 0xd6, 0x54, 0xd2, 0x7d, 0x67, 0x02, 0xde, 0x99, 0x9f, 0xee, 0xbc, 0x94,
0xb9, 0x58, 0x97, 0xa0, 0x6d, 0xc8, 0x74, 0x99, 0x4f, 0x2b, 0x19, 0x55, 0xba, 0x11, 0xf7, 0x57,
0x65, 0xaf, 0x08, 0xef, 0x63, 0x15, 0xb6, 0xf6, 0x20, 0xab, 0xca, 0xd0, 0x0d, 0xc8, 0x31, 0xd7,
0xe5, 0x54, 0x63, 0x5f, 0xc3, 0x91, 0x85, 0x10, 0x64, 0xba, 0x44, 0x10, 0x85, 0xb3, 0x88, 0xd5,
0x7f, 0x7b, 0x1f, 0xfe, 0x4b, 0x8c, 0xe7, 0x01, 0xf3, 0x39, 0x1d, 0x8f, 0x34, 0x96, 0x8e, 0xb4,
0x7f, 0x19, 0xf0, 0xbf, 0xf2, 0x35, 0xd9, 0x27, 0xff, 0x1f, 0xb2, 0xb7, 0x9f, 0x64, 0xef, 0xc1,
0x1c, 0x7b, 0x33, 0xf3, 0x13, 0xfc, 0x59, 0x07, 0xab, 0x88, 0xb9, 0x0d, 0xa0, 0x32, 0x5b, 0xdc,
0xbb, 0xa4, 0x0a, 0xc8, 0x1a, 0xce, 0x2b, 0xcf, 0x5b, 0xef, 0x92, 0xda, 0x9f, 0x0d, 0xd8, 0x9c,
0x99, 0x12, 0xd1, 0xf4, 0x2c, 0xc6, 0xa5, 0x9f, 0xf9, 0x70, 0x09, 0x2e, 0x5d, 0x91, 0x04, 0xf6,
0x57, 0x1b, 0x3b, 0x88, 0xe4, 0xda, 0xa4, 0x03, 0x2a, 0xe8, 0xb5, 0x09, 0xb7, 0x37, 0xa3, 0x8d,
0xc7, 0xf5, 0x1a, 0x98, 0x1d, 0x82, 0x89, 0xa9, 0x20, 0x9e, 0x1f, 0x77, 0x7c, 0x0d, 0x66, 0x27,
0xa4, 0x44, 0x78, 0xcc, 0x6f, 0x75, 0x89, 0x88, 0xb5, 0x60, 0x39, 0xfa, 0xbc, 0x9c, 0xf8, 0xbc,
0x9c, 0xb3, 0xf8, 0xbc, 0x1a, 0xeb, 0xdf, 0x7f, 0x54, 0x53, 0x5f, 0x7e, 0x56, 0x0d, 0x5c, 0x8c,
0x4b, 0x9b, 0x44, 0x50, 0xf9, 0x3c, 0xd7, 0x1b, 0x88, 0x68, 0xc9, 0x45, 0x1c, 0x59, 0x76, 0x19,
0x4a, 0xf1, 0xcc, 0x08, 0xc5, 0xb7, 0x34, 0x14, 0xb4, 0xc8, 0x28, 0x91, 0x8b, 0x3f, 0x82, 0x92,
0xcb, 0xc2, 0x21, 0x11, 0xad, 0x8f, 0x34, 0xe4, 0x1e, 0xf3, 0x15, 0x8a, 0x52, 0x7d, 0x7b, 0x8e,
0x69, 0x5d, 0xe0, 0x1c, 0xaa, 0xec, 0x0b, 0x9d, 0x8c, 0x4d, 0x77, 0xda, 0x94, 0x74, 0xf6, 0x09,
0xef, 0xc7, 0x74, 0xca, 0xff, 0x89, 0x67, 0xca, 0x0f, 0x45, 0x24, 0xb1, 0x6b, 0x3e, 0x53, 0x06,
0xd1, 0x2d, 0xc8, 0x73, 0xaf, 0xe7, 0x13, 0x31, 0x0a, 0xf5, 0xb1, 0x16, 0xf1, 0xc4, 0x81, 0x9e,
0x40, 0x41, 0xed, 0xa4, 0xa5, 0xf7, 0x94, 0xbd, 0x6a, 0x4f, 0x8d, 0x8c, 0x6c, 0x8f, 0x81, 0x8d,
0x3d, 0xf6, 0x23, 0x30, 0x13, 0xef, 0x42, 0x26, 0xe4, 0x0f, 0x4f, 0xf0, 0xf1, 0x8b, 0xb3, 0xd6,
0xc5, 0x6e, 0x39, 0x35, 0x6d, 0x3e, 0x2e, 0x1b, 0xf5, 0xaf, 0x69, 0x80, 0xd3, 0x31, 0x3d, 0xe8,
0x18, 0x72, 0xfa, 0xba, 0xd1, 0x9d, 0xe5, 0x5f, 0x1d, 0xab, 0x7a, 0x65, 0x3c, 0x5a, 0x4f, 0xaa,
0x66, 0xa0, 0x73, 0x58, 0x8f, 0x55, 0x8d, 0xb6, 0x56, 0x1d, 0xa2, 0x75, 0x77, 0xe5, 0x49, 0xc8,
0xa6, 0xbb, 0x06, 0x7a, 0x03, 0x39, 0xad, 0xc8, 0x05, 0x28, 0x13, 0x52, 0x5f, 0x80, 0x72, 0x46,
0xca, 0x29, 0xf4, 0x1c, 0x72, 0x5a, 0x58, 0xe8, 0xe6, 0x74, 0x72, 0x42, 0xe0, 0x96, 0xb5, 0x28,
0xa4, 0x5b, 0x34, 0x32, 0xef, 0xd2, 0x41, 0xbb, 0x9d, 0x53, 0xcb, 0xdf, 0xfb, 0x13, 0x00, 0x00,
0xff, 0xff, 0x64, 0x65, 0x86, 0x27, 0x7d, 0x06, 0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.

View File

@ -90,3 +90,25 @@ message RetainRequest {
message RetainResponse {
}
// PieceHeader is used in piece storage to keep track of piece attributes.
message PieceHeader {
enum FormatVersion {
FORMAT_V0 = 0;
FORMAT_V1 = 1;
}
// the storage format version being used for this piece. The piece filename should agree with this.
// The inclusion of this field is intended to aid repairability when filenames are damaged.
FormatVersion format_version = 1;
// content hash of the piece
bytes hash = 2;
// timestamp when upload occurred, as given by the "timestamp" field in the original orders.PieceHash
google.protobuf.Timestamp creation_time = 3 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false];
// signature from uplink over the original orders.PieceHash (the corresponding PieceHashSigning
// is reconstructable using the piece id from the piecestore, the piece size from the
// filesystem (minus the piece header size), and these (hash, upload_time, signature) fields).
bytes signature = 4;
// the OrderLimit authorizing storage of this piece, as signed by the satellite and sent by
// the uplink
orders.OrderLimit order_limit = 5 [(gogoproto.nullable) = false];
}

View File

@ -4941,6 +4941,20 @@
{
"protopath": "pkg:/:pb:/:piecestore2.proto",
"def": {
"enums": [
{
"name": "PieceHeader.FormatVersion",
"enum_fields": [
{
"name": "FORMAT_V0"
},
{
"name": "FORMAT_V1",
"integer": 1
}
]
}
],
"messages": [
{
"name": "PieceUploadRequest",
@ -5098,6 +5112,52 @@
},
{
"name": "RetainResponse"
},
{
"name": "PieceHeader",
"fields": [
{
"id": 1,
"name": "format_version",
"type": "FormatVersion"
},
{
"id": 2,
"name": "hash",
"type": "bytes"
},
{
"id": 3,
"name": "creation_time",
"type": "google.protobuf.Timestamp",
"options": [
{
"name": "(gogoproto.stdtime)",
"value": "true"
},
{
"name": "(gogoproto.nullable)",
"value": "false"
}
]
},
{
"id": 4,
"name": "signature",
"type": "bytes"
},
{
"id": 5,
"name": "order_limit",
"type": "orders.OrderLimit",
"options": [
{
"name": "(gogoproto.nullable)",
"value": "false"
}
]
}
]
}
],
"services": [

View File

@ -19,6 +19,7 @@ import (
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/satellite"
"storj.io/storj/storage"
)
// TestGarbageCollection does the following:
@ -76,9 +77,12 @@ func TestGarbageCollection(t *testing.T) {
require.NoError(t, err)
// Check that piece of the deleted object is on the storagenode
pieceInfo, err := targetNode.DB.PieceInfo().Get(ctx, satellite.ID(), deletedPieceID)
pieceAccess, err := targetNode.DB.Pieces().Stat(ctx, storage.BlobRef{
Namespace: satellite.ID().Bytes(),
Key: deletedPieceID.Bytes(),
})
require.NoError(t, err)
require.NotNil(t, pieceInfo)
require.NotNil(t, pieceAccess)
// The pieceInfo.GetPieceIDs query converts piece creation and the filter creation timestamps
// to datetime in sql. This chops off all precision beyond seconds.
@ -91,14 +95,20 @@ func TestGarbageCollection(t *testing.T) {
gcService.Loop.TriggerWait()
// Check that piece of the deleted object is not on the storagenode
pieceInfo, err = targetNode.DB.PieceInfo().Get(ctx, satellite.ID(), deletedPieceID)
pieceAccess, err = targetNode.DB.Pieces().Stat(ctx, storage.BlobRef{
Namespace: satellite.ID().Bytes(),
Key: deletedPieceID.Bytes(),
})
require.Error(t, err)
require.Nil(t, pieceInfo)
require.Nil(t, pieceAccess)
// Check that piece of the kept object is on the storagenode
pieceInfo, err = targetNode.DB.PieceInfo().Get(ctx, satellite.ID(), keptPieceID)
pieceAccess, err = targetNode.DB.Pieces().Stat(ctx, storage.BlobRef{
Namespace: satellite.ID().Bytes(),
Key: keptPieceID.Bytes(),
})
require.NoError(t, err)
require.NotNil(t, pieceInfo)
require.NotNil(t, pieceAccess)
})
}

View File

@ -6,6 +6,7 @@ package storage
import (
"context"
"io"
"os"
"github.com/zeebo/errs"
)
@ -13,6 +14,16 @@ import (
// ErrInvalidBlobRef is returned when an blob reference is invalid
var ErrInvalidBlobRef = errs.Class("invalid blob ref")
// FormatVersion represents differing storage format version values. Different Blobs implementors
// might interpret different FormatVersion values differently, but they share a type so that there
// can be a common StorageFormatVersion() call on the interface.
//
// Changes in FormatVersion might affect how a Blobs or BlobReader or BlobWriter instance works, or
// they might only be relevant to some higher layer. A FormatVersion must be specified when writing
// a new blob, and the blob storage interface must store that value with the blob somehow, so that
// the same FormatVersion is returned later when reading that stored blob.
type FormatVersion int
// BlobRef is a reference to a blob
type BlobRef struct {
Namespace []byte
@ -32,17 +43,23 @@ type BlobReader interface {
io.Closer
// Size returns the size of the blob
Size() (int64, error)
// StorageFormatVersion returns the storage format version associated with the blob.
StorageFormatVersion() FormatVersion
}
// BlobWriter is an interface that groups Read, ReadAt, Seek and Close.
// BlobWriter defines the interface that must be satisfied for a general blob storage provider.
// BlobWriter instances are returned by the Create() method on Blobs instances.
type BlobWriter interface {
io.Writer
io.Seeker
// Cancel discards the blob.
Cancel(context.Context) error
// Commit ensures that the blob is readable by others.
Commit(context.Context) error
// Size returns the size of the blob
Size() (int64, error)
// StorageFormatVersion returns the storage format version associated with the blob.
StorageFormatVersion() FormatVersion
}
// Blobs is a blob storage interface
@ -52,8 +69,41 @@ type Blobs interface {
Create(ctx context.Context, ref BlobRef, size int64) (BlobWriter, error)
// Open opens a reader with the specified namespace and key
Open(ctx context.Context, ref BlobRef) (BlobReader, error)
// OpenWithStorageFormat opens a reader for the already-located blob, avoiding the potential
// need to check multiple storage formats to find the blob.
OpenWithStorageFormat(ctx context.Context, ref BlobRef, formatVer FormatVersion) (BlobReader, error)
// Delete deletes the blob with the namespace and key
Delete(ctx context.Context, ref BlobRef) error
// Stat looks up disk metadata on the blob file
Stat(ctx context.Context, ref BlobRef) (BlobInfo, error)
// StatWithStorageFormat looks up disk metadata for the blob file with the given storage format
// version. This avoids the potential need to check multiple storage formats for the blob
// when the format is already known.
StatWithStorageFormat(ctx context.Context, ref BlobRef, formatVer FormatVersion) (BlobInfo, error)
// FreeSpace return how much free space left for writing
FreeSpace() (int64, error)
// SpaceUsed adds up how much is used in all namespaces
SpaceUsed(ctx context.Context) (int64, error)
// SpaceUsedInNamespace adds up how much is used in the given namespace
SpaceUsedInNamespace(ctx context.Context, namespace []byte) (int64, error)
// ListNamespaces finds all namespaces in which keys might currently be stored.
ListNamespaces(ctx context.Context) ([][]byte, error)
// WalkNamespace executes walkFunc for each locally stored blob, stored with
// storage format V1 or greater, in the given namespace. If walkFunc returns a non-nil
// error, WalkNamespace will stop iterating and return the error immediately. The ctx
// parameter is intended to allow canceling iteration early.
WalkNamespace(ctx context.Context, namespace []byte, walkFunc func(BlobInfo) error) error
}
// BlobInfo allows lazy inspection of a blob and its underlying file during iteration with
// WalkNamespace-type methods
type BlobInfo interface {
// BlobRef returns the relevant BlobRef for the blob
BlobRef() BlobRef
// StorageFormatVersion indicates the storage format version used to store the piece
StorageFormatVersion() FormatVersion
// FullPath gives the full path to the on-disk blob file
FullPath(ctx context.Context) (string, error)
// Stat does a stat on the on-disk blob file
Stat(ctx context.Context) (os.FileInfo, error)
}

View File

@ -13,13 +13,39 @@ import (
"storj.io/storj/storage"
)
const (
// FormatV0 is the identifier for storage format v0, which also corresponds to an absence of
// format version information.
FormatV0 storage.FormatVersion = 0
// FormatV1 is the identifier for storage format v1
FormatV1 storage.FormatVersion = 1
// Note: New FormatVersion values should be consecutive, as certain parts of this blob store
// iterate over them numerically and check for blobs stored with each version.
)
const (
// MaxFormatVersionSupported is the highest supported storage format version for reading, and
// the only supported storage format version for writing. If stored blobs claim a higher
// storage format version than this, or a caller requests _writing_ a storage format version
// which is not this, this software will not know how to perform the read or write and an error
// will be returned.
MaxFormatVersionSupported = FormatV1
// MinFormatVersionSupported is the lowest supported storage format version for reading. If
// stored blobs claim a lower storage format version than this, this software will not know how
// to perform the read and an error will be returned.
MinFormatVersionSupported = FormatV0
)
// blobReader implements reading blobs
type blobReader struct {
*os.File
formatVersion storage.FormatVersion
}
func newBlobReader(file *os.File) *blobReader {
return &blobReader{file}
func newBlobReader(file *os.File, formatVersion storage.FormatVersion) *blobReader {
return &blobReader{file, formatVersion}
}
// Size returns how large is the blob.
@ -31,17 +57,29 @@ func (blob *blobReader) Size() (int64, error) {
return stat.Size(), err
}
// StorageFormatVersion gets the storage format version being used by the blob.
func (blob *blobReader) StorageFormatVersion() storage.FormatVersion {
return blob.formatVersion
}
// blobWriter implements writing blobs
type blobWriter struct {
ref storage.BlobRef
store *Store
closed bool
ref storage.BlobRef
store *Store
closed bool
formatVersion storage.FormatVersion
*os.File
}
func newBlobWriter(ref storage.BlobRef, store *Store, file *os.File) *blobWriter {
return &blobWriter{ref, store, false, file}
func newBlobWriter(ref storage.BlobRef, store *Store, formatVersion storage.FormatVersion, file *os.File) *blobWriter {
return &blobWriter{
ref: ref,
store: store,
closed: false,
formatVersion: formatVersion,
File: file,
}
}
// Cancel discards the blob.
@ -63,7 +101,7 @@ func (blob *blobWriter) Commit(ctx context.Context) (err error) {
return Error.New("already closed")
}
blob.closed = true
err = blob.store.dir.Commit(ctx, blob.File, blob.ref)
err = blob.store.dir.Commit(ctx, blob.File, blob.ref, blob.formatVersion)
return Error.Wrap(err)
}
@ -75,3 +113,8 @@ func (blob *blobWriter) Size() (int64, error) {
}
return pos, err
}
// StorageFormatVersion indicates what storage format version the blob is using.
func (blob *blobWriter) StorageFormatVersion() storage.FormatVersion {
return blob.formatVersion
}

View File

@ -11,6 +11,7 @@ import (
"math"
"os"
"path/filepath"
"strings"
"sync"
"github.com/zeebo/errs"
@ -21,6 +22,10 @@ import (
const (
blobPermission = 0600
dirPermission = 0700
v0PieceFileSuffix = ""
v1PieceFileSuffix = ".sj1"
unknownPieceFileSuffix = "/..error_unknown_format../"
)
var pathEncoding = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
@ -81,8 +86,11 @@ func (dir *Dir) DeleteTemporary(ctx context.Context, file *os.File) (err error)
return errs.Combine(closeErr, os.Remove(file.Name()))
}
// blobToPath converts blob reference to a filepath in permanent storage
func (dir *Dir) blobToPath(ref storage.BlobRef) (string, error) {
// blobToBasePath converts a blob reference to a filepath in permanent storage. This may not be the
// entire path; blobPathForFormatVersion() must also be used. This is a separate call because this
// part of the filepath is constant, and blobPathForFormatVersion may need to be called multiple
// times with different storage.FormatVersion values.
func (dir *Dir) blobToBasePath(ref storage.BlobRef) (string, error) {
if !ref.IsValid() {
return "", storage.ErrInvalidBlobRef.New("")
}
@ -90,14 +98,27 @@ func (dir *Dir) blobToPath(ref storage.BlobRef) (string, error) {
namespace := pathEncoding.EncodeToString(ref.Namespace)
key := pathEncoding.EncodeToString(ref.Key)
if len(key) < 3 {
// ensure we always have at least
// ensure we always have enough characters to split [:2] and [2:]
key = "11" + key
}
return filepath.Join(dir.blobsdir(), namespace, key[:2], key[2:]), nil
}
// blobToTrashPath converts blob reference to a filepath in transient storage
// the files in trash are deleted in an interval (in case the initial deletion didn't work for some reason)
// blobPathForFormatVersion adjusts a bare blob path (as might have been generated by a call to
// blobToBasePath()) to what it should be for the given storage format version.
func blobPathForFormatVersion(path string, formatVersion storage.FormatVersion) string {
switch formatVersion {
case FormatV0:
return path + v0PieceFileSuffix
case FormatV1:
return path + v1PieceFileSuffix
}
return path + unknownPieceFileSuffix
}
// blobToTrashPath converts a blob reference to a filepath in transient storage.
// The files in trash are deleted on an interval (in case the initial deletion didn't work for
// some reason).
func (dir *Dir) blobToTrashPath(ref storage.BlobRef) string {
var name []byte
name = append(name, ref.Namespace...)
@ -105,8 +126,8 @@ func (dir *Dir) blobToTrashPath(ref storage.BlobRef) string {
return filepath.Join(dir.garbagedir(), pathEncoding.EncodeToString(name))
}
// Commit commits temporary file to the permanent storage
func (dir *Dir) Commit(ctx context.Context, file *os.File, ref storage.BlobRef) (err error) {
// Commit commits the temporary file to permanent storage.
func (dir *Dir) Commit(ctx context.Context, file *os.File, ref storage.BlobRef, formatVersion storage.FormatVersion) (err error) {
defer mon.Task()(&ctx)(&err)
position, seekErr := file.Seek(0, io.SeekCurrent)
truncErr := file.Truncate(position)
@ -119,11 +140,12 @@ func (dir *Dir) Commit(ctx context.Context, file *os.File, ref storage.BlobRef)
return errs.Combine(seekErr, truncErr, syncErr, chmodErr, closeErr, removeErr)
}
path, err := dir.blobToPath(ref)
path, err := dir.blobToBasePath(ref)
if err != nil {
removeErr := os.Remove(file.Name())
return errs.Combine(err, removeErr)
}
path = blobPathForFormatVersion(path, formatVersion)
mkdirErr := os.MkdirAll(filepath.Dir(path), dirPermission)
if os.IsExist(mkdirErr) {
@ -144,69 +166,158 @@ func (dir *Dir) Commit(ctx context.Context, file *os.File, ref storage.BlobRef)
return nil
}
// Open opens the file with the specified ref
func (dir *Dir) Open(ctx context.Context, ref storage.BlobRef) (_ *os.File, err error) {
// Open opens the file with the specified ref. It may need to check in more than one location in
// order to find the blob, if it was stored with an older version of the storage node software.
// In cases where the storage format version of a blob is already known, OpenWithStorageFormat()
// will generally be a better choice.
func (dir *Dir) Open(ctx context.Context, ref storage.BlobRef) (_ *os.File, _ storage.FormatVersion, err error) {
defer mon.Task()(&ctx)(&err)
path, err := dir.blobToPath(ref)
path, err := dir.blobToBasePath(ref)
if err != nil {
return nil, FormatV0, err
}
for formatVer := MaxFormatVersionSupported; formatVer >= MinFormatVersionSupported; formatVer-- {
vPath := blobPathForFormatVersion(path, formatVer)
file, err := openFileReadOnly(vPath, blobPermission)
if err == nil {
return file, formatVer, nil
}
if !os.IsNotExist(err) {
return nil, FormatV0, Error.New("unable to open %q: %v", vPath, err)
}
}
return nil, FormatV0, os.ErrNotExist
}
// OpenWithStorageFormat opens an already-located blob file with a known storage format version,
// which avoids the potential need to search through multiple storage formats to find the blob.
func (dir *Dir) OpenWithStorageFormat(ctx context.Context, blobRef storage.BlobRef, formatVer storage.FormatVersion) (_ *os.File, err error) {
defer mon.Task()(&ctx)(&err)
path, err := dir.blobToBasePath(blobRef)
if err != nil {
return nil, err
}
file, err := openFileReadOnly(path, blobPermission)
if err != nil {
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.New("unable to open %q: %v", path, err)
vPath := blobPathForFormatVersion(path, formatVer)
file, err := openFileReadOnly(vPath, blobPermission)
if err == nil {
return file, nil
}
return file, nil
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.New("unable to open %q: %v", vPath, err)
}
// Delete deletes file with the specified ref
// Stat looks up disk metadata on the blob file. It may need to check in more than one location
// in order to find the blob, if it was stored with an older version of the storage node software.
// In cases where the storage format version of a blob is already known, StatWithStorageFormat()
// will generally be a better choice.
func (dir *Dir) Stat(ctx context.Context, ref storage.BlobRef) (_ storage.BlobInfo, err error) {
defer mon.Task()(&ctx)(&err)
path, err := dir.blobToBasePath(ref)
if err != nil {
return nil, err
}
for formatVer := MaxFormatVersionSupported; formatVer >= MinFormatVersionSupported; formatVer-- {
vPath := blobPathForFormatVersion(path, formatVer)
stat, err := os.Stat(vPath)
if err == nil {
return newBlobInfo(ref, vPath, stat, formatVer), nil
}
if !os.IsNotExist(err) {
return nil, Error.New("unable to stat %q: %v", vPath, err)
}
}
return nil, os.ErrNotExist
}
// StatWithStorageFormat looks up disk metadata on the blob file with the given storage format
// version. This avoids the need for checking for the file in multiple different storage format
// types.
func (dir *Dir) StatWithStorageFormat(ctx context.Context, ref storage.BlobRef, formatVer storage.FormatVersion) (_ storage.BlobInfo, err error) {
defer mon.Task()(&ctx)(&err)
path, err := dir.blobToBasePath(ref)
if err != nil {
return nil, err
}
vPath := blobPathForFormatVersion(path, formatVer)
stat, err := os.Stat(vPath)
if err == nil {
return newBlobInfo(ref, vPath, stat, formatVer), nil
}
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.New("unable to stat %q: %v", vPath, err)
}
// Delete deletes blobs with the specified ref (in all supported storage formats).
func (dir *Dir) Delete(ctx context.Context, ref storage.BlobRef) (err error) {
defer mon.Task()(&ctx)(&err)
path, err := dir.blobToPath(ref)
pathBase, err := dir.blobToBasePath(ref)
if err != nil {
return err
}
trashPath := dir.blobToTrashPath(ref)
// move to trash folder, this is allowed for some OS-es
moveErr := rename(path, trashPath)
var (
moveErr error
combinedErrors errs.Group
)
// ignore concurrent delete
if os.IsNotExist(moveErr) {
return nil
}
if moveErr != nil {
trashPath = path
// Try deleting all possible paths, starting with the oldest format version. It is more
// likely, in the general case, that we will find the piece with the newest format version
// instead, but if we iterate backward here then we run the risk of a race condition: the
// piece might have existed with _SomeOldVer before the Delete call, and could then have
// been updated atomically with _MaxVer concurrently while we were iterating. If we iterate
// _forwards_, this race should not occur because it is assumed that pieces are never
// rewritten with an _older_ storage format version.
for i := MinFormatVersionSupported; i <= MaxFormatVersionSupported; i++ {
verPath := blobPathForFormatVersion(pathBase, i)
// move to trash folder, this is allowed for some OS-es
moveErr = rename(verPath, trashPath)
if os.IsNotExist(moveErr) {
// no piece at that path; either it has a different storage format version or there
// was a concurrent delete. (this function is expected by callers to return a nil
// error in the case of concurrent deletes.)
continue
}
if moveErr != nil {
// piece could not be moved into the trash dir; we'll try removing it directly
trashPath = verPath
}
// try removing the file
err = os.Remove(trashPath)
// ignore concurrent deletes
if os.IsNotExist(err) {
// something is happening at the same time as this; possibly a concurrent delete,
// or possibly a rewrite of the blob. keep checking for more versions.
continue
}
// the remove may have failed because of an open file handle. put it in a queue to be
// retried later.
if err != nil {
dir.mu.Lock()
dir.deleteQueue = append(dir.deleteQueue, trashPath)
dir.mu.Unlock()
}
// ignore is-busy errors, they are still in the queue
// but no need to notify
if isBusy(err) {
err = nil
}
combinedErrors.Add(err)
}
// try removing the file
err = os.Remove(trashPath)
// ignore concurrent deletes
if os.IsNotExist(err) {
return nil
}
// this may fail, because someone might be still reading it
if err != nil {
dir.mu.Lock()
dir.deleteQueue = append(dir.deleteQueue, trashPath)
dir.mu.Unlock()
}
// ignore is busy errors, they are still in the queue
// but no need to notify
if isBusy(err) {
err = nil
}
return err
return combinedErrors.Err()
}
// GarbageCollect collects files that are pending deletion
// GarbageCollect collects files that are pending deletion.
func (dir *Dir) GarbageCollect(ctx context.Context) (err error) {
defer mon.Task()(&ctx)(&err)
offset := int(math.MaxInt32)
@ -238,6 +349,144 @@ func (dir *Dir) GarbageCollect(ctx context.Context) (err error) {
return nil
}
const nameBatchSize = 1024
// ListNamespaces finds all known namespace IDs in use in local storage. They are not
// guaranteed to contain any blobs.
func (dir *Dir) ListNamespaces(ctx context.Context) (ids [][]byte, err error) {
defer mon.Task()(&ctx)(&err)
topBlobDir := dir.blobsdir()
openDir, err := os.Open(topBlobDir)
if err != nil {
return nil, err
}
defer func() { err = errs.Combine(err, openDir.Close()) }()
for {
dirNames, err := openDir.Readdirnames(nameBatchSize)
if err != nil && err != io.EOF {
return nil, err
}
if len(dirNames) == 0 {
return ids, nil
}
for _, name := range dirNames {
namespace, err := pathEncoding.DecodeString(name)
if err != nil {
// just an invalid directory entry, and not a namespace. probably
// don't need to pass on this error
continue
}
ids = append(ids, namespace)
}
}
}
// WalkNamespace executes walkFunc for each locally stored blob, stored with storage format V1 or
// greater, in the given namespace. If walkFunc returns a non-nil error, WalkNamespace will stop
// iterating and return the error immediately. The ctx parameter is intended specifically to allow
// canceling iteration early.
func (dir *Dir) WalkNamespace(ctx context.Context, namespace []byte, walkFunc func(storage.BlobInfo) error) (err error) {
namespaceDir := pathEncoding.EncodeToString(namespace)
nsDir := filepath.Join(dir.blobsdir(), namespaceDir)
openDir, err := os.Open(nsDir)
if err != nil {
if os.IsNotExist(err) {
// job accomplished: there are no blobs in this namespace!
return nil
}
return err
}
defer func() { err = errs.Combine(err, openDir.Close()) }()
for {
// check for context done both before and after our readdir() call
if err := ctx.Err(); err != nil {
return err
}
subdirNames, err := openDir.Readdirnames(nameBatchSize)
if err != nil && err != io.EOF {
return err
}
if os.IsNotExist(err) || len(subdirNames) == 0 {
return nil
}
if err := ctx.Err(); err != nil {
return err
}
for _, keyPrefix := range subdirNames {
if len(keyPrefix) != 2 {
// just an invalid subdir; could be garbage of many kinds. probably
// don't need to pass on this error
continue
}
err := dir.walkNamespaceWithPrefix(ctx, namespace, nsDir, keyPrefix, walkFunc)
if err != nil {
return err
}
}
}
}
func decodeBlobInfo(namespace []byte, keyPrefix, keyDir string, keyInfo os.FileInfo) (info storage.BlobInfo, ok bool) {
blobFileName := keyInfo.Name()
encodedKey := keyPrefix + blobFileName
formatVer := FormatV0
if strings.HasSuffix(blobFileName, v1PieceFileSuffix) {
formatVer = FormatV1
encodedKey = encodedKey[0 : len(encodedKey)-len(v1PieceFileSuffix)]
}
key, err := pathEncoding.DecodeString(encodedKey)
if err != nil {
return nil, false
}
ref := storage.BlobRef{
Namespace: namespace,
Key: key,
}
return newBlobInfo(ref, filepath.Join(keyDir, blobFileName), keyInfo, formatVer), true
}
func (dir *Dir) walkNamespaceWithPrefix(ctx context.Context, namespace []byte, nsDir, keyPrefix string, walkFunc func(storage.BlobInfo) error) (err error) {
keyDir := filepath.Join(nsDir, keyPrefix)
openDir, err := os.Open(keyDir)
if err != nil {
return err
}
defer func() { err = errs.Combine(err, openDir.Close()) }()
for {
// check for context done both before and after our readdir() call
if err := ctx.Err(); err != nil {
return err
}
keyInfos, err := openDir.Readdir(nameBatchSize)
if err != nil && err != io.EOF {
return err
}
if os.IsNotExist(err) || len(keyInfos) == 0 {
return nil
}
if err := ctx.Err(); err != nil {
return err
}
for _, keyInfo := range keyInfos {
if keyInfo.Mode().IsDir() {
continue
}
info, ok := decodeBlobInfo(namespace, keyPrefix, keyDir, keyInfo)
if !ok {
continue
}
err = walkFunc(info)
if err != nil {
return err
}
// also check for context done between every walkFunc callback.
if err := ctx.Err(); err != nil {
return err
}
}
}
}
// removeAllContent deletes everything in the folder
func removeAllContent(ctx context.Context, path string) (err error) {
defer mon.Task()(&ctx)(&err)
@ -275,3 +524,35 @@ func (dir *Dir) Info() (DiskInfo, error) {
}
return diskInfoFromPath(path)
}
type blobInfo struct {
ref storage.BlobRef
path string
fileInfo os.FileInfo
formatVersion storage.FormatVersion
}
func newBlobInfo(ref storage.BlobRef, path string, fileInfo os.FileInfo, formatVer storage.FormatVersion) storage.BlobInfo {
return &blobInfo{
ref: ref,
path: path,
fileInfo: fileInfo,
formatVersion: formatVer,
}
}
func (info *blobInfo) BlobRef() storage.BlobRef {
return info.ref
}
func (info *blobInfo) StorageFormatVersion() storage.FormatVersion {
return info.formatVersion
}
func (info *blobInfo) Stat(ctx context.Context) (os.FileInfo, error) {
return info.fileInfo, nil
}
func (info *blobInfo) FullPath(ctx context.Context) (string, error) {
return info.path, nil
}

View File

@ -8,7 +8,8 @@ import (
"os"
"github.com/zeebo/errs"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
"go.uber.org/zap"
"gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/storage"
)
@ -25,20 +26,21 @@ var (
// Store implements a blob store
type Store struct {
dir *Dir
log *zap.Logger
}
// New creates a new disk blob store in the specified directory
func New(dir *Dir) *Store {
return &Store{dir}
func New(log *zap.Logger, dir *Dir) *Store {
return &Store{dir: dir, log: log}
}
// NewAt creates a new disk blob store in the specified directory
func NewAt(path string) (*Store, error) {
func NewAt(log *zap.Logger, path string) (*Store, error) {
dir, err := NewDir(path)
if err != nil {
return nil, Error.Wrap(err)
}
return &Store{dir}, nil
return &Store{dir: dir, log: log}, nil
}
// Close closes the store.
@ -47,14 +49,42 @@ func (store *Store) Close() error { return nil }
// Open loads blob with the specified hash
func (store *Store) Open(ctx context.Context, ref storage.BlobRef) (_ storage.BlobReader, err error) {
defer mon.Task()(&ctx)(&err)
file, err := store.dir.Open(ctx, ref)
file, formatVer, err := store.dir.Open(ctx, ref)
if err != nil {
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.Wrap(err)
}
return newBlobReader(file), nil
return newBlobReader(file, formatVer), nil
}
// OpenWithStorageFormat loads the already-located blob, avoiding the potential need to check multiple
// storage formats to find the blob.
func (store *Store) OpenWithStorageFormat(ctx context.Context, blobRef storage.BlobRef, formatVer storage.FormatVersion) (_ storage.BlobReader, err error) {
defer mon.Task()(&ctx)(&err)
file, err := store.dir.OpenWithStorageFormat(ctx, blobRef, formatVer)
if err != nil {
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.Wrap(err)
}
return newBlobReader(file, formatVer), nil
}
// Stat looks up disk metadata on the blob file
func (store *Store) Stat(ctx context.Context, ref storage.BlobRef) (_ storage.BlobInfo, err error) {
defer mon.Task()(&ctx)(&err)
info, err := store.dir.Stat(ctx, ref)
return info, Error.Wrap(err)
}
// StatWithStorageFormat looks up disk metadata on the blob file with the given storage format version
func (store *Store) StatWithStorageFormat(ctx context.Context, ref storage.BlobRef, formatVer storage.FormatVersion) (_ storage.BlobInfo, err error) {
defer mon.Task()(&ctx)(&err)
info, err := store.dir.StatWithStorageFormat(ctx, ref, formatVer)
return info, Error.Wrap(err)
}
// Delete deletes blobs with the specified ref
@ -79,7 +109,45 @@ func (store *Store) Create(ctx context.Context, ref storage.BlobRef, size int64)
if err != nil {
return nil, Error.Wrap(err)
}
return newBlobWriter(ref, store, file), nil
return newBlobWriter(ref, store, MaxFormatVersionSupported, file), nil
}
// SpaceUsed adds up the space used in all namespaces for blob storage
func (store *Store) SpaceUsed(ctx context.Context) (space int64, err error) {
defer mon.Task()(&ctx)(&err)
var totalSpaceUsed int64
namespaces, err := store.ListNamespaces(ctx)
if err != nil {
return 0, Error.New("failed to enumerate namespaces: %v", err)
}
for _, namespace := range namespaces {
used, err := store.SpaceUsedInNamespace(ctx, namespace)
if err != nil {
return 0, Error.New("failed to sum space used: %v", err)
}
totalSpaceUsed += used
}
return totalSpaceUsed, nil
}
// SpaceUsedInNamespace adds up how much is used in the given namespace for blob storage
func (store *Store) SpaceUsedInNamespace(ctx context.Context, namespace []byte) (int64, error) {
var totalUsed int64
err := store.WalkNamespace(ctx, namespace, func(info storage.BlobInfo) error {
statInfo, statErr := info.Stat(ctx)
if statErr != nil {
store.log.Error("failed to stat blob", zap.Binary("namespace", namespace), zap.Binary("key", info.BlobRef().Key), zap.Error(statErr))
// keep iterating; we want a best effort total here.
return nil
}
totalUsed += statInfo.Size()
return nil
})
if err != nil {
return 0, err
}
return totalUsed, nil
}
// FreeSpace returns how much space left in underlying directory
@ -90,3 +158,33 @@ func (store *Store) FreeSpace() (int64, error) {
}
return info.AvailableSpace, nil
}
// ListNamespaces finds all known namespace IDs in use in local storage. They are not
// guaranteed to contain any blobs.
func (store *Store) ListNamespaces(ctx context.Context) (ids [][]byte, err error) {
return store.dir.ListNamespaces(ctx)
}
// WalkNamespace executes walkFunc for each locally stored blob in the given namespace. If walkFunc
// returns a non-nil error, WalkNamespace will stop iterating and return the error immediately. The
// ctx parameter is intended specifically to allow canceling iteration early.
func (store *Store) WalkNamespace(ctx context.Context, namespace []byte, walkFunc func(storage.BlobInfo) error) (err error) {
return store.dir.WalkNamespace(ctx, namespace, walkFunc)
}
// StoreForTest is a wrapper for Store that also allows writing new V0 blobs (in order to test
// situations involving those)
type StoreForTest struct {
*Store
}
// CreateV0 creates a new V0 blob that can be written. This is only appropriate in test situations.
func (testStore *StoreForTest) CreateV0(ctx context.Context, ref storage.BlobRef) (_ storage.BlobWriter, err error) {
defer mon.Task()(&ctx)(&err)
file, err := testStore.dir.CreateTemporaryFile(ctx, -1)
if err != nil {
return nil, Error.Wrap(err)
}
return newBlobWriter(ref, testStore.Store, FormatV0, file), nil
}

View File

@ -4,21 +4,32 @@
package filestore_test
import (
"errors"
"bytes"
"context"
"io"
"io/ioutil"
"os"
"path/filepath"
"sort"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zeebo/errs"
"go.uber.org/zap/zaptest"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testrand"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
)
const (
namespaceSize = 32
keySize = 32
)
func TestStoreLoad(t *testing.T) {
const blobSize = 8 << 10
const repeatCount = 16
@ -26,8 +37,9 @@ func TestStoreLoad(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
store, err := filestore.NewAt(ctx.Dir("store"))
store, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
ctx.Check(store.Close)
data := testrand.Bytes(blobSize)
temp := make([]byte, len(data))
@ -155,8 +167,9 @@ func TestDeleteWhileReading(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
store, err := filestore.NewAt(ctx.Dir("store"))
store, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
ctx.Check(store.Close)
data := testrand.Bytes(blobSize)
@ -213,9 +226,301 @@ func TestDeleteWhileReading(t *testing.T) {
if info.IsDir() {
return nil
}
return errors.New("found file " + path)
return errs.New("found file %q", path)
})
if err != nil {
t.Fatal(err)
}
}
func writeABlob(ctx context.Context, t testing.TB, store *filestore.Store, blobRef storage.BlobRef, data []byte, formatVersion storage.FormatVersion) {
var (
blobWriter storage.BlobWriter
err error
)
switch formatVersion {
case filestore.FormatV0:
tStore := &filestore.StoreForTest{store}
blobWriter, err = tStore.CreateV0(ctx, blobRef)
case filestore.FormatV1:
blobWriter, err = store.Create(ctx, blobRef, int64(len(data)))
default:
t.Fatalf("please teach me how to make a V%d blob", formatVersion)
}
require.NoError(t, err)
require.Equal(t, formatVersion, blobWriter.StorageFormatVersion())
_, err = blobWriter.Write(data)
require.NoError(t, err)
size, err := blobWriter.Size()
require.NoError(t, err)
assert.Equal(t, int64(len(data)), size)
err = blobWriter.Commit(ctx)
require.NoError(t, err)
}
func verifyBlobHandle(t testing.TB, reader storage.BlobReader, expectDataLen int, expectFormat storage.FormatVersion) {
assert.Equal(t, expectFormat, reader.StorageFormatVersion())
size, err := reader.Size()
require.NoError(t, err)
assert.Equal(t, int64(expectDataLen), size)
}
func verifyBlobInfo(ctx context.Context, t testing.TB, blobInfo storage.BlobInfo, expectDataLen int, expectFormat storage.FormatVersion) {
assert.Equal(t, expectFormat, blobInfo.StorageFormatVersion())
stat, err := blobInfo.Stat(ctx)
require.NoError(t, err)
assert.Equal(t, int64(expectDataLen), stat.Size())
}
func tryOpeningABlob(ctx context.Context, t testing.TB, store *filestore.Store, blobRef storage.BlobRef, expectDataLen int, expectFormat storage.FormatVersion) {
reader, err := store.Open(ctx, blobRef)
require.NoError(t, err)
verifyBlobHandle(t, reader, expectDataLen, expectFormat)
require.NoError(t, reader.Close())
blobInfo, err := store.Stat(ctx, blobRef)
require.NoError(t, err)
verifyBlobInfo(ctx, t, blobInfo, expectDataLen, expectFormat)
blobInfo, err = store.StatWithStorageFormat(ctx, blobRef, expectFormat)
require.NoError(t, err)
verifyBlobInfo(ctx, t, blobInfo, expectDataLen, expectFormat)
reader, err = store.OpenWithStorageFormat(ctx, blobInfo.BlobRef(), blobInfo.StorageFormatVersion())
require.NoError(t, err)
verifyBlobHandle(t, reader, expectDataLen, expectFormat)
require.NoError(t, reader.Close())
}
func TestMultipleStorageFormatVersions(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
store, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
ctx.Check(store.Close)
const blobSize = 1024
var (
data = testrand.Bytes(blobSize)
namespace = testrand.Bytes(namespaceSize)
v0BlobKey = testrand.Bytes(keySize)
v1BlobKey = testrand.Bytes(keySize)
v0Ref = storage.BlobRef{Namespace: namespace, Key: v0BlobKey}
v1Ref = storage.BlobRef{Namespace: namespace, Key: v1BlobKey}
)
// write a V0 blob
writeABlob(ctx, t, store, v0Ref, data, filestore.FormatV0)
// write a V1 blob
writeABlob(ctx, t, store, v1Ref, data, filestore.FormatV1)
// look up the different blobs with Open and Stat and OpenWithStorageFormat
tryOpeningABlob(ctx, t, store, v0Ref, len(data), filestore.FormatV0)
tryOpeningABlob(ctx, t, store, v1Ref, len(data), filestore.FormatV1)
// write a V1 blob with the same ID as the V0 blob (to simulate it being rewritten as
// V1 during a migration), with different data so we can distinguish them
differentData := make([]byte, len(data)+2)
copy(differentData, data)
copy(differentData[len(data):], "\xff\x00")
writeABlob(ctx, t, store, v0Ref, differentData, filestore.FormatV1)
// if we try to access the blob at that key, we should see only the V1 blob
tryOpeningABlob(ctx, t, store, v0Ref, len(differentData), filestore.FormatV1)
// unless we ask specifically for a V0 blob
blobInfo, err := store.StatWithStorageFormat(ctx, v0Ref, filestore.FormatV0)
require.NoError(t, err)
verifyBlobInfo(ctx, t, blobInfo, len(data), filestore.FormatV0)
reader, err := store.OpenWithStorageFormat(ctx, blobInfo.BlobRef(), blobInfo.StorageFormatVersion())
require.NoError(t, err)
verifyBlobHandle(t, reader, len(data), filestore.FormatV0)
require.NoError(t, reader.Close())
// delete the v0BlobKey; both the V0 and the V1 blobs should go away
err = store.Delete(ctx, v0Ref)
require.NoError(t, err)
reader, err = store.Open(ctx, v0Ref)
require.Error(t, err)
assert.Nil(t, reader)
}
// Check that the SpaceUsed and SpaceUsedInNamespace methods on filestore.Store
// work as expected.
func TestStoreSpaceUsed(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
store, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
ctx.Check(store.Close)
var (
namespace = testrand.Bytes(namespaceSize)
otherNamespace = testrand.Bytes(namespaceSize)
sizesToStore = []memory.Size{4093, 0, 512, 1, memory.MB}
)
spaceUsed, err := store.SpaceUsed(ctx)
require.NoError(t, err)
assert.Equal(t, int64(0), spaceUsed)
spaceUsed, err = store.SpaceUsedInNamespace(ctx, namespace)
require.NoError(t, err)
assert.Equal(t, int64(0), spaceUsed)
spaceUsed, err = store.SpaceUsedInNamespace(ctx, otherNamespace)
require.NoError(t, err)
assert.Equal(t, int64(0), spaceUsed)
var totalSoFar memory.Size
for _, size := range sizesToStore {
contents := testrand.Bytes(size)
blobRef := storage.BlobRef{Namespace: namespace, Key: testrand.Bytes(keySize)}
blobWriter, err := store.Create(ctx, blobRef, int64(len(contents)))
require.NoError(t, err)
_, err = blobWriter.Write(contents)
require.NoError(t, err)
err = blobWriter.Commit(ctx)
require.NoError(t, err)
totalSoFar += size
spaceUsed, err := store.SpaceUsed(ctx)
require.NoError(t, err)
assert.Equal(t, int64(totalSoFar), spaceUsed)
spaceUsed, err = store.SpaceUsedInNamespace(ctx, namespace)
require.NoError(t, err)
assert.Equal(t, int64(totalSoFar), spaceUsed)
spaceUsed, err = store.SpaceUsedInNamespace(ctx, otherNamespace)
require.NoError(t, err)
assert.Equal(t, int64(0), spaceUsed)
}
}
// Check that ListNamespaces and WalkNamespace work as expected.
func TestStoreTraversals(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
store, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
ctx.Check(store.Close)
// invent some namespaces and store stuff in them
type namespaceWithBlobs struct {
namespace []byte
blobs []storage.BlobRef
}
const numNamespaces = 4
recordsToInsert := make([]namespaceWithBlobs, numNamespaces)
var namespaceBase = testrand.Bytes(namespaceSize)
for i := range recordsToInsert {
// give each namespace a similar ID but modified in the last byte to distinguish
recordsToInsert[i].namespace = make([]byte, len(namespaceBase))
copy(recordsToInsert[i].namespace, namespaceBase)
recordsToInsert[i].namespace[len(namespaceBase)-1] = byte(i)
// put varying numbers of blobs in the namespaces
recordsToInsert[i].blobs = make([]storage.BlobRef, i+1)
for j := range recordsToInsert[i].blobs {
recordsToInsert[i].blobs[j] = storage.BlobRef{
Namespace: recordsToInsert[i].namespace,
Key: testrand.Bytes(keySize),
}
blobWriter, err := store.Create(ctx, recordsToInsert[i].blobs[j], 0)
require.NoError(t, err)
// also vary the sizes of the blobs so we can check Stat results
_, err = blobWriter.Write(testrand.Bytes(memory.Size(j)))
require.NoError(t, err)
err = blobWriter.Commit(ctx)
require.NoError(t, err)
}
}
// test ListNamespaces
gotNamespaces, err := store.ListNamespaces(ctx)
require.NoError(t, err)
sort.Slice(gotNamespaces, func(i, j int) bool {
return bytes.Compare(gotNamespaces[i], gotNamespaces[j]) < 0
})
sort.Slice(recordsToInsert, func(i, j int) bool {
return bytes.Compare(recordsToInsert[i].namespace, recordsToInsert[j].namespace) < 0
})
for i, expected := range recordsToInsert {
require.Equalf(t, expected.namespace, gotNamespaces[i], "mismatch at index %d: recordsToInsert is %+v and gotNamespaces is %v", i, recordsToInsert, gotNamespaces)
}
// test WalkNamespace
for _, expected := range recordsToInsert {
// this isn't strictly necessary, since the function closure below is not persisted
// past the end of a loop iteration, but this keeps the linter from complaining.
expected := expected
// keep track of which blobs we visit with WalkNamespace
found := make([]bool, len(expected.blobs))
err = store.WalkNamespace(ctx, expected.namespace, func(info storage.BlobInfo) error {
gotBlobRef := info.BlobRef()
assert.Equal(t, expected.namespace, gotBlobRef.Namespace)
// find which blob this is in expected.blobs
blobIdentified := -1
for i, expectedBlobRef := range expected.blobs {
if bytes.Equal(gotBlobRef.Key, expectedBlobRef.Key) {
found[i] = true
blobIdentified = i
}
}
// make sure this is a blob we actually put in
require.NotEqualf(t, -1, blobIdentified,
"WalkNamespace gave BlobRef %v, but I don't remember storing that",
gotBlobRef)
// check BlobInfo sanity
stat, err := info.Stat(ctx)
require.NoError(t, err)
nameFromStat := stat.Name()
fullPath, err := info.FullPath(ctx)
require.NoError(t, err)
basePath := filepath.Base(fullPath)
assert.Equal(t, nameFromStat, basePath)
assert.Equal(t, int64(blobIdentified), stat.Size())
assert.False(t, stat.IsDir())
return nil
})
require.NoError(t, err)
// make sure all blobs were visited
for i := range found {
assert.True(t, found[i],
"WalkNamespace never yielded blob at index %d: %v",
i, expected.blobs[i])
}
}
// test WalkNamespace on a nonexistent namespace also
namespaceBase[len(namespaceBase)-1] = byte(numNamespaces)
err = store.WalkNamespace(ctx, namespaceBase, func(info storage.BlobInfo) error {
t.Fatal("this should not have been called")
return nil
})
require.NoError(t, err)
// check that WalkNamespace stops iterating after an error return
iterations := 0
expectedErr := errs.New("an expected error")
err = store.WalkNamespace(ctx, recordsToInsert[numNamespaces-1].namespace, func(info storage.BlobInfo) error {
iterations++
if iterations == 2 {
return expectedErr
}
return nil
})
assert.Error(t, err)
assert.Equal(t, err, expectedErr)
assert.Equal(t, 2, iterations)
}

View File

@ -11,7 +11,6 @@ import (
"go.uber.org/zap"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/sync2"
"storj.io/storj/storagenode/pieces"
"storj.io/storj/storagenode/piecestore"
@ -28,18 +27,16 @@ type Config struct {
type Service struct {
log *zap.Logger
pieces *pieces.Store
pieceinfos pieces.DB
usedSerials piecestore.UsedSerials
Loop sync2.Cycle
}
// NewService creates a new collector service.
func NewService(log *zap.Logger, pieces *pieces.Store, pieceinfos pieces.DB, usedSerials piecestore.UsedSerials, config Config) *Service {
func NewService(log *zap.Logger, pieces *pieces.Store, usedSerials piecestore.UsedSerials, config Config) *Service {
return &Service{
log: log,
pieces: pieces,
pieceinfos: pieceinfos,
usedSerials: usedSerials,
Loop: *sync2.NewCycle(config.Interval),
}
@ -76,15 +73,14 @@ func (service *Service) Collect(ctx context.Context, now time.Time) (err error)
const batchSize = 1000
var count int64
var bytes int64
defer func() {
if count > 0 {
service.log.Info("collect", zap.Int64("count", count), zap.Stringer("size", memory.Size(bytes)))
service.log.Info("collect", zap.Int64("count", count))
}
}()
for k := 0; k < maxBatches; k++ {
infos, err := service.pieceinfos.GetExpired(ctx, now, batchSize)
infos, err := service.pieces.GetExpired(ctx, now, batchSize)
if err != nil {
return err
}
@ -95,7 +91,7 @@ func (service *Service) Collect(ctx context.Context, now time.Time) (err error)
for _, expired := range infos {
err := service.pieces.Delete(ctx, expired.SatelliteID, expired.PieceID)
if err != nil {
errfailed := service.pieceinfos.DeleteFailed(ctx, expired.SatelliteID, expired.PieceID, now)
errfailed := service.pieces.DeleteFailed(ctx, expired, now)
if errfailed != nil {
service.log.Error("unable to update piece info", zap.Stringer("satellite id", expired.SatelliteID), zap.Stringer("piece id", expired.PieceID), zap.Error(errfailed))
}
@ -103,14 +99,7 @@ func (service *Service) Collect(ctx context.Context, now time.Time) (err error)
continue
}
err = service.pieceinfos.Delete(ctx, expired.SatelliteID, expired.PieceID)
if err != nil {
service.log.Error("unable to delete piece info", zap.Stringer("satellite id", expired.SatelliteID), zap.Stringer("piece id", expired.PieceID), zap.Error(err))
continue
}
count++
bytes += expired.PieceSize
}
}

View File

@ -51,11 +51,11 @@ func TestCollector(t *testing.T) {
// imagine we are 30 minutes in the future
for _, storageNode := range planet.StorageNodes {
pieceinfos := storageNode.DB.PieceInfo()
pieceStore := storageNode.DB.Pieces()
usedSerials := storageNode.DB.UsedSerials()
// verify that we actually have some data on storage nodes
used, err := pieceinfos.SpaceUsed(ctx)
used, err := pieceStore.SpaceUsed(ctx)
require.NoError(t, err)
if used == 0 {
// this storage node didn't get picked for storing data
@ -101,7 +101,7 @@ func TestCollector(t *testing.T) {
// imagine we are 10 days in the future
for _, storageNode := range planet.StorageNodes {
pieceinfos := storageNode.DB.PieceInfo()
pieceStore := storageNode.DB.Pieces()
usedSerials := storageNode.DB.UsedSerials()
// collect all the data
@ -109,7 +109,7 @@ func TestCollector(t *testing.T) {
require.NoError(t, err)
// verify that we deleted everything
used, err := pieceinfos.SpaceUsed(ctx)
used, err := pieceStore.SpaceUsed(ctx)
require.NoError(t, err)
require.Equal(t, int64(0), used)

View File

@ -46,7 +46,7 @@ type Service struct {
consoleDB DB
bandwidthDB bandwidth.DB
pieceInfoDB pieces.DB
pieceStore *pieces.Store
kademlia *kademlia.Kademlia
version *version.Service
nodestats *nodestats.Service
@ -59,7 +59,7 @@ type Service struct {
}
// NewService returns new instance of Service
func NewService(log *zap.Logger, consoleDB DB, bandwidth bandwidth.DB, pieceInfo pieces.DB, kademlia *kademlia.Kademlia, version *version.Service,
func NewService(log *zap.Logger, consoleDB DB, bandwidth bandwidth.DB, pieceStore *pieces.Store, kademlia *kademlia.Kademlia, version *version.Service,
nodestats *nodestats.Service, allocatedBandwidth, allocatedDiskSpace memory.Size, walletAddress string, versionInfo version.Info) (*Service, error) {
if log == nil {
return nil, errs.New("log can't be nil")
@ -73,8 +73,8 @@ func NewService(log *zap.Logger, consoleDB DB, bandwidth bandwidth.DB, pieceInfo
return nil, errs.New("bandwidth can't be nil")
}
if pieceInfo == nil {
return nil, errs.New("pieceInfo can't be nil")
if pieceStore == nil {
return nil, errs.New("pieceStore can't be nil")
}
if version == nil {
@ -89,7 +89,7 @@ func NewService(log *zap.Logger, consoleDB DB, bandwidth bandwidth.DB, pieceInfo
log: log,
consoleDB: consoleDB,
bandwidthDB: bandwidth,
pieceInfoDB: pieceInfo,
pieceStore: pieceStore,
kademlia: kademlia,
version: version,
nodestats: nodestats,
@ -146,7 +146,7 @@ func (s *Service) GetBandwidthBySatellite(ctx context.Context, satelliteID storj
func (s *Service) GetUsedStorageTotal(ctx context.Context) (_ *DiskSpaceInfo, err error) {
defer mon.Task()(&ctx)(&err)
spaceUsed, err := s.pieceInfoDB.SpaceUsed(ctx)
spaceUsed, err := s.pieceStore.SpaceUsedForPieces(ctx)
if err != nil {
return nil, err
}
@ -158,7 +158,7 @@ func (s *Service) GetUsedStorageTotal(ctx context.Context) (_ *DiskSpaceInfo, er
func (s *Service) GetUsedStorageBySatellite(ctx context.Context, satelliteID storj.NodeID) (_ *DiskSpaceInfo, err error) {
defer mon.Task()(&ctx)(&err)
spaceUsed, err := s.pieceInfoDB.SpaceUsedBySatellite(ctx, satelliteID)
spaceUsed, err := s.pieceStore.SpaceUsedBySatellite(ctx, satelliteID)
if err != nil {
return nil, err
}

View File

@ -31,10 +31,10 @@ var (
// Endpoint does inspectory things
type Endpoint struct {
log *zap.Logger
pieceInfo pieces.DB
kademlia *kademlia.Kademlia
usageDB bandwidth.DB
log *zap.Logger
pieceStore *pieces.Store
kademlia *kademlia.Kademlia
usageDB bandwidth.DB
startTime time.Time
pieceStoreConfig piecestore.OldConfig
@ -44,7 +44,7 @@ type Endpoint struct {
// NewEndpoint creates piecestore inspector instance
func NewEndpoint(
log *zap.Logger,
pieceInfo pieces.DB,
pieceStore *pieces.Store,
kademlia *kademlia.Kademlia,
usageDB bandwidth.DB,
pieceStoreConfig piecestore.OldConfig,
@ -52,7 +52,7 @@ func NewEndpoint(
return &Endpoint{
log: log,
pieceInfo: pieceInfo,
pieceStore: pieceStore,
kademlia: kademlia,
usageDB: usageDB,
pieceStoreConfig: pieceStoreConfig,
@ -65,7 +65,7 @@ func (inspector *Endpoint) retrieveStats(ctx context.Context) (_ *pb.StatSummary
defer mon.Task()(&ctx)(&err)
// Space Usage
totalUsedSpace, err := inspector.pieceInfo.SpaceUsed(ctx)
totalUsedSpace, err := inspector.pieceStore.SpaceUsedForPieces(ctx)
if err != nil {
return nil, err
}

View File

@ -38,7 +38,6 @@ type Service struct {
log *zap.Logger
routingTable *kademlia.RoutingTable
store *pieces.Store
pieceInfo pieces.DB
usageDB bandwidth.DB
allocatedDiskSpace int64
allocatedBandwidth int64
@ -49,12 +48,11 @@ type Service struct {
// TODO: should it be responsible for monitoring actual bandwidth as well?
// NewService creates a new storage node monitoring service.
func NewService(log *zap.Logger, routingTable *kademlia.RoutingTable, store *pieces.Store, pieceInfo pieces.DB, usageDB bandwidth.DB, allocatedDiskSpace, allocatedBandwidth int64, interval time.Duration, config Config) *Service {
func NewService(log *zap.Logger, routingTable *kademlia.RoutingTable, store *pieces.Store, usageDB bandwidth.DB, allocatedDiskSpace, allocatedBandwidth int64, interval time.Duration, config Config) *Service {
return &Service{
log: log,
routingTable: routingTable,
store: store,
pieceInfo: pieceInfo,
usageDB: usageDB,
allocatedDiskSpace: allocatedDiskSpace,
allocatedBandwidth: allocatedBandwidth,
@ -162,7 +160,7 @@ func (service *Service) updateNodeInformation(ctx context.Context) (err error) {
func (service *Service) usedSpace(ctx context.Context) (_ int64, err error) {
defer mon.Task()(&ctx)(&err)
usedSpace, err := service.pieceInfo.SpaceUsed(ctx)
usedSpace, err := service.store.SpaceUsedForPieces(ctx)
if err != nil {
return 0, err
}
@ -181,7 +179,7 @@ func (service *Service) usedBandwidth(ctx context.Context) (_ int64, err error)
// AvailableSpace returns available disk space for upload
func (service *Service) AvailableSpace(ctx context.Context) (_ int64, err error) {
defer mon.Task()(&ctx)(&err)
usedSpace, err := service.pieceInfo.SpaceUsed(ctx)
usedSpace, err := service.store.SpaceUsedForPieces(ctx)
if err != nil {
return 0, Error.Wrap(err)
}

View File

@ -53,7 +53,8 @@ type DB interface {
Pieces() storage.Blobs
Orders() orders.DB
PieceInfo() pieces.DB
V0PieceInfo() pieces.V0PieceInfoDB
PieceExpirationDB() pieces.PieceExpirationDB
Bandwidth() bandwidth.DB
UsedSerials() piecestore.UsedSerials
Vouchers() vouchers.DB
@ -225,13 +226,12 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config, ver
return nil, errs.Combine(err, peer.Close())
}
peer.Storage2.Store = pieces.NewStore(peer.Log.Named("pieces"), peer.DB.Pieces())
peer.Storage2.Store = pieces.NewStore(peer.Log.Named("pieces"), peer.DB.Pieces(), peer.DB.V0PieceInfo(), peer.DB.PieceExpirationDB())
peer.Storage2.Monitor = monitor.NewService(
log.Named("piecestore:monitor"),
peer.Kademlia.RoutingTable,
peer.Storage2.Store,
peer.DB.PieceInfo(),
peer.DB.Bandwidth(),
config.Storage.AllocatedDiskSpace.Int64(),
config.Storage.AllocatedBandwidth.Int64(),
@ -246,7 +246,6 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config, ver
peer.Storage2.Trust,
peer.Storage2.Monitor,
peer.Storage2.Store,
peer.DB.PieceInfo(),
peer.DB.Orders(),
peer.DB.Bandwidth(),
peer.DB.UsedSerials(),
@ -285,7 +284,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config, ver
peer.Log.Named("console:service"),
peer.DB.Console(),
peer.DB.Bandwidth(),
peer.DB.PieceInfo(),
peer.Storage2.Store,
peer.Kademlia.Service,
peer.Version,
peer.NodeStats,
@ -314,7 +313,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config, ver
{ // setup storage inspector
peer.Storage2.Inspector = inspector.NewEndpoint(
peer.Log.Named("pieces:inspector"),
peer.DB.PieceInfo(),
peer.Storage2.Store,
peer.Kademlia.Service,
peer.DB.Bandwidth(),
config.Storage,
@ -323,7 +322,7 @@ func New(log *zap.Logger, full *identity.FullIdentity, db DB, config Config, ver
pb.RegisterPieceStoreInspectorServer(peer.Server.PrivateGRPC(), peer.Storage2.Inspector)
}
peer.Collector = collector.NewService(peer.Log.Named("collector"), peer.Storage2.Store, peer.DB.PieceInfo(), peer.DB.UsedSerials(), config.Collector)
peer.Collector = collector.NewService(peer.Log.Named("collector"), peer.Storage2.Store, peer.DB.UsedSerials(), config.Collector)
peer.Bandwidth = bandwidth.NewService(peer.Log.Named("bandwidth"), peer.DB.Bandwidth(), config.Bandwidth)

View File

@ -23,12 +23,12 @@ import (
"storj.io/storj/storagenode/storagenodedb/storagenodedbtest"
)
func TestPieceInfo(t *testing.T) {
func TestV0PieceInfo(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
pieceinfos := db.PieceInfo()
pieceinfos := db.V0PieceInfo().(pieces.V0PieceInfoDBForTest)
satellite0 := testidentity.MustPregeneratedSignedIdentity(0, storj.LatestIDVersion())
satellite1 := testidentity.MustPregeneratedSignedIdentity(1, storj.LatestIDVersion())

View File

@ -0,0 +1,91 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package pieces_test
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testrand"
"storj.io/storj/storagenode"
"storj.io/storj/storagenode/pieces"
"storj.io/storj/storagenode/storagenodedb/storagenodedbtest"
)
func TestPieceExpirationDB(t *testing.T) {
// test GetExpired, SetExpiration, DeleteExpiration, DeleteFailed
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
expireDB := db.PieceExpirationDB()
satelliteID := testrand.NodeID()
pieceID := testrand.PieceID()
expectedExpireInfo := pieces.ExpiredInfo{
SatelliteID: satelliteID,
PieceID: pieceID,
InPieceInfo: false,
}
// GetExpired with no matches
expiredPieceIDs, err := expireDB.GetExpired(ctx, time.Now(), 1000)
require.NoError(t, err)
require.Len(t, expiredPieceIDs, 0)
// DeleteExpiration with no matches
found, err := expireDB.DeleteExpiration(ctx, satelliteID, pieceID)
require.NoError(t, err)
require.False(t, found)
// DeleteFailed with no matches
err = expireDB.DeleteFailed(ctx, satelliteID, pieceID, time.Now())
require.NoError(t, err)
expireAt := time.Now()
// SetExpiration normal usage
err = expireDB.SetExpiration(ctx, satelliteID, pieceID, expireAt)
require.NoError(t, err)
// SetExpiration duplicate
err = expireDB.SetExpiration(ctx, satelliteID, pieceID, expireAt.Add(time.Hour))
require.Error(t, err)
// GetExpired normal usage
expiredPieceIDs, err = expireDB.GetExpired(ctx, expireAt.Add(time.Microsecond), 1000)
require.NoError(t, err)
require.Len(t, expiredPieceIDs, 1)
assert.Equal(t, expiredPieceIDs[0], expectedExpireInfo)
deleteFailedAt := expireAt.Add(2 * time.Microsecond)
// DeleteFailed normal usage
err = expireDB.DeleteFailed(ctx, satelliteID, pieceID, deleteFailedAt)
require.NoError(t, err)
// GetExpired filters out rows with deletion_failed_at = t
expiredPieceIDs, err = expireDB.GetExpired(ctx, deleteFailedAt, 1000)
require.NoError(t, err)
require.Len(t, expiredPieceIDs, 0)
expiredPieceIDs, err = expireDB.GetExpired(ctx, deleteFailedAt.Add(time.Microsecond), 1000)
require.NoError(t, err)
require.Len(t, expiredPieceIDs, 1)
assert.Equal(t, expiredPieceIDs[0], expectedExpireInfo)
// DeleteExpiration normal usage
found, err = expireDB.DeleteExpiration(ctx, satelliteID, pieceID)
require.NoError(t, err)
require.True(t, found)
// Should not be there anymore
expiredPieceIDs, err = expireDB.GetExpired(ctx, expireAt.Add(365*24*time.Hour), 1000)
require.NoError(t, err)
require.Len(t, expiredPieceIDs, 0)
})
}

View File

@ -5,18 +5,61 @@ package pieces
import (
"context"
"encoding/binary"
"hash"
"io"
"github.com/gogo/protobuf/proto"
"github.com/zeebo/errs"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pkcrypto"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
)
const (
// V1PieceHeaderReservedArea is the amount of space to be reserved at the beginning of
// pieces stored with filestore.FormatV1 or greater. Serialized piece headers should be
// written into that space, and the remaining space afterward should be zeroes.
// V1PieceHeaderReservedArea includes the size of the framing field
// (v1PieceHeaderFrameSize). It has a constant size because:
//
// * We do not anticipate needing more than this.
// * We will be able to sum up all space used by a satellite (or all satellites) without
// opening and reading from each piece file (stat() is faster than open()).
// * This simplifies piece file writing (if we needed to know the exact header size
// before writing, then we'd need to spool the entire contents of the piece somewhere
// before we could calculate the hash and size). This way, we can simply reserve the
// header space, write the piece content as it comes in, and then seek back to the
// beginning and fill in the header.
//
// We put it at the beginning of piece files because:
//
// * If we put it at the end instead, we would have to seek to the end of a file (to find
// out the real size while avoiding race conditions with stat()) and then seek backward
// again to get the header, and then seek back to the beginning to get the content.
// Seeking on spinning platter hard drives is very slow compared to reading sequential
// bytes.
// * Putting the header in the middle of piece files might be entertaining, but it would
// also be silly.
// * If piece files are incorrectly truncated or not completely written, it will be
// much easier to identify those cases when the header is intact and findable.
//
// If more space than this is needed, we will need to use a new storage format version.
V1PieceHeaderReservedArea = 512
// v1PieceHeaderFramingSize is the size of the field used at the beginning of piece
// files to indicate the size of the marshaled piece header within the reserved header
// area (because protobufs are not self-delimiting, which is lame).
v1PieceHeaderFramingSize = 2
)
// Writer implements a piece writer that writes content to blob store and calculates a hash.
type Writer struct {
hash hash.Hash
blob storage.BlobWriter
size int64
hash hash.Hash
blob storage.BlobWriter
pieceSize int64 // piece size only; i.e., not including piece header
closed bool
}
@ -24,6 +67,20 @@ type Writer struct {
// NewWriter creates a new writer for storage.BlobWriter.
func NewWriter(blob storage.BlobWriter) (*Writer, error) {
w := &Writer{}
if blob.StorageFormatVersion() >= filestore.FormatV1 {
// We skip past the reserved header area for now- we want the header to be at the
// beginning of the file, to make it quick to seek there and also to make it easier
// to identify situations where a blob file has been truncated incorrectly. And we
// don't know what exactly is going to be in the header yet--we won't know what the
// hash or size or timestamp or expiration or signature fields need to be until we
// have received the whole piece.
//
// Once the writer calls Commit() on this writer, we will seek back to the beginning
// of the file and write the header.
if _, err := blob.Seek(V1PieceHeaderReservedArea, io.SeekStart); err != nil {
return nil, Error.Wrap(err)
}
}
w.blob = blob
w.hash = pkcrypto.NewHash()
return w, nil
@ -32,7 +89,7 @@ func NewWriter(blob storage.BlobWriter) (*Writer, error) {
// Write writes data to the blob and calculates the hash.
func (w *Writer) Write(data []byte) (int, error) {
n, err := w.blob.Write(data)
w.size += int64(n)
w.pieceSize += int64(n)
_, _ = w.hash.Write(data[:n]) // guaranteed not to return an error
if err == io.EOF {
return n, err
@ -40,20 +97,78 @@ func (w *Writer) Write(data []byte) (int, error) {
return n, Error.Wrap(err)
}
// Size returns the amount of data written so far.
func (w *Writer) Size() int64 { return w.size }
// Size returns the amount of data written to the piece so far, not including the size of
// the piece header.
func (w *Writer) Size() int64 { return w.pieceSize }
// Hash returns the hash of data written so far.
func (w *Writer) Hash() []byte { return w.hash.Sum(nil) }
// Commit commits piece to permanent storage.
func (w *Writer) Commit(ctx context.Context) (err error) {
func (w *Writer) Commit(ctx context.Context, pieceHeader *pb.PieceHeader) (err error) {
defer mon.Task()(&ctx)(&err)
if w.closed {
return Error.New("already closed")
}
// point of no return: after this we definitely either commit or cancel
w.closed = true
return Error.Wrap(w.blob.Commit(ctx))
defer func() {
if err != nil {
err = Error.Wrap(errs.Combine(err, w.blob.Cancel(ctx)))
} else {
err = Error.Wrap(w.blob.Commit(ctx))
}
}()
formatVer := w.blob.StorageFormatVersion()
if formatVer == filestore.FormatV0 {
return nil
}
pieceHeader.FormatVersion = pb.PieceHeader_FormatVersion(formatVer)
headerBytes, err := proto.Marshal(pieceHeader)
if err != nil {
return err
}
mon.IntVal("storagenode_pieces_pieceheader_size").Observe(int64(len(headerBytes)))
if len(headerBytes) > (V1PieceHeaderReservedArea - v1PieceHeaderFramingSize) {
// This should never happen under normal circumstances, and it might deserve a panic(),
// but I'm not *entirely* sure this case can't be triggered by a malicious uplink. Are
// google.protobuf.Timestamp fields variable-width?
mon.Meter("storagenode_pieces_pieceheader_overflow").Mark(len(headerBytes))
return Error.New("marshaled piece header too big!")
}
size, err := w.blob.Size()
if err != nil {
return err
}
if _, err := w.blob.Seek(0, io.SeekStart); err != nil {
return err
}
// We need to store some "framing" bytes first, because protobufs are not self-delimiting.
// In cases where the serialized pieceHeader is not exactly V1PieceHeaderReservedArea bytes
// (probably _all_ cases), without this marker, we wouldn't have any way to take the
// V1PieceHeaderReservedArea bytes from a piece blob and trim off the right number of zeroes
// at the end so that the protobuf unmarshals correctly.
var framingBytes [v1PieceHeaderFramingSize]byte
binary.BigEndian.PutUint16(framingBytes[:], uint16(len(headerBytes)))
if _, err = w.blob.Write(framingBytes[:]); err != nil {
return Error.New("failed writing piece framing field at file start: %v", err)
}
// Now write the serialized header bytes.
if _, err = w.blob.Write(headerBytes); err != nil {
return Error.New("failed writing piece header at file start: %v", err)
}
// seek back to the end, as blob.Commit will truncate from the current file position.
// (don't try to seek(0, io.SeekEnd), because dir.CreateTemporaryFile preallocs space
// and the actual end of the file might be far past the intended end of the piece.)
if _, err := w.blob.Seek(size, io.SeekStart); err != nil {
return err
}
return nil
}
// Cancel deletes any temporarily written data.
@ -68,9 +183,11 @@ func (w *Writer) Cancel(ctx context.Context) (err error) {
// Reader implements a piece reader that reads content from blob store.
type Reader struct {
blob storage.BlobReader
pos int64
size int64
formatVersion storage.FormatVersion
blob storage.BlobReader
pos int64 // relative to file start; i.e., it includes piece header
pieceSize int64 // piece size only; i.e., not including piece header
}
// NewReader creates a new reader for storage.BlobReader.
@ -79,16 +196,80 @@ func NewReader(blob storage.BlobReader) (*Reader, error) {
if err != nil {
return nil, Error.Wrap(err)
}
formatVersion := blob.StorageFormatVersion()
if formatVersion >= filestore.FormatV1 {
if size < V1PieceHeaderReservedArea {
return nil, Error.New("invalid piece file for storage format version %d: too small for header (%d < %d)", formatVersion, size, V1PieceHeaderReservedArea)
}
size -= V1PieceHeaderReservedArea
}
reader := &Reader{}
reader.blob = blob
reader.size = size
reader := &Reader{
formatVersion: formatVersion,
blob: blob,
pieceSize: size,
}
return reader, nil
}
// StorageFormatVersion returns the storage format version of the piece being read.
func (r *Reader) StorageFormatVersion() storage.FormatVersion {
return r.formatVersion
}
// GetPieceHeader reads, unmarshals, and returns the piece header. It may only be called once,
// before any Read() calls. (Retrieving the header at any time could be supported, but for the sake
// of performance we need to understand why and how often that would happen.)
func (r *Reader) GetPieceHeader() (*pb.PieceHeader, error) {
if r.formatVersion < filestore.FormatV1 {
return nil, Error.New("Can't get piece header from storage format V0 reader")
}
if r.pos != 0 {
return nil, Error.New("GetPieceHeader called when not at the beginning of the blob stream")
}
// We need to read the size of the serialized header protobuf before we read the header
// itself. The headers aren't a constant size, although V1PieceHeaderReservedArea is
// constant. Without this marker, we wouldn't have any way to know how much of the
// reserved header area is supposed to make up the serialized header protobuf.
var headerBytes [V1PieceHeaderReservedArea]byte
framingBytes := headerBytes[:v1PieceHeaderFramingSize]
n, err := io.ReadFull(r.blob, framingBytes)
if err != nil {
return nil, Error.Wrap(err)
}
if n != v1PieceHeaderFramingSize {
return nil, Error.New("Could not read whole PieceHeader framing field")
}
r.pos += int64(n)
headerSize := binary.BigEndian.Uint16(framingBytes)
if headerSize > (V1PieceHeaderReservedArea - v1PieceHeaderFramingSize) {
return nil, Error.New("PieceHeader framing field claims impossible size of %d bytes", headerSize)
}
// Now we can read the actual serialized header.
pieceHeaderBytes := headerBytes[v1PieceHeaderFramingSize : v1PieceHeaderFramingSize+headerSize]
n, err = io.ReadFull(r.blob, pieceHeaderBytes)
if err != nil {
return nil, Error.Wrap(err)
}
r.pos += int64(n)
// Deserialize and return.
header := &pb.PieceHeader{}
if err := proto.Unmarshal(pieceHeaderBytes, header); err != nil {
return nil, Error.New("piece header: %v", err)
}
return header, nil
}
// Read reads data from the underlying blob, buffering as necessary.
func (r *Reader) Read(data []byte) (int, error) {
if r.formatVersion >= filestore.FormatV1 && r.pos < V1PieceHeaderReservedArea {
// should only be necessary once per reader. or zero times, if GetPieceHeader is used
if _, err := r.blob.Seek(V1PieceHeaderReservedArea, io.SeekStart); err != nil {
return 0, Error.Wrap(err)
}
}
n, err := r.blob.Read(data)
r.pos += int64(n)
if err == io.EOF {
@ -97,22 +278,37 @@ func (r *Reader) Read(data []byte) (int, error) {
return n, Error.Wrap(err)
}
// Seek seeks to the specified location.
// Seek seeks to the specified location within the piece content (ignoring the header).
func (r *Reader) Seek(offset int64, whence int) (int64, error) {
if whence == io.SeekStart && r.formatVersion >= filestore.FormatV1 {
offset += V1PieceHeaderReservedArea
}
if whence == io.SeekStart && r.pos == offset {
return r.pos, nil
}
pos, err := r.blob.Seek(offset, whence)
r.pos = pos
if r.formatVersion >= filestore.FormatV1 {
if pos < V1PieceHeaderReservedArea {
// any position within the file header should show as 0 here
pos = 0
} else {
pos -= V1PieceHeaderReservedArea
}
}
if err == io.EOF {
return pos, err
}
return pos, Error.Wrap(err)
}
// ReadAt reads data at the specified offset
// ReadAt reads data at the specified offset, which is relative to the piece content,
// not the underlying blob. The piece header is not reachable by this method.
func (r *Reader) ReadAt(data []byte, offset int64) (int, error) {
if r.formatVersion >= filestore.FormatV1 {
offset += V1PieceHeaderReservedArea
}
n, err := r.blob.ReadAt(data, offset)
if err == io.EOF {
return n, err
@ -120,8 +316,8 @@ func (r *Reader) ReadAt(data []byte, offset int64) (int, error) {
return n, Error.Wrap(err)
}
// Size returns the amount of data written so far.
func (r *Reader) Size() int64 { return r.size }
// Size returns the amount of data in the piece.
func (r *Reader) Size() int64 { return r.pieceSize }
// Close closes the reader.
func (r *Reader) Close() error {

View File

@ -6,14 +6,19 @@ package pieces_test
import (
"io"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"go.uber.org/zap/zaptest"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
"storj.io/storj/storagenode/pieces"
)
@ -24,10 +29,10 @@ func BenchmarkReadWrite(b *testing.B) {
dir, err := filestore.NewDir(ctx.Dir("pieces"))
require.NoError(b, err)
blobs := filestore.New(dir)
blobs := filestore.New(zap.NewNop(), dir)
defer ctx.Check(blobs.Close)
store := pieces.NewStore(zap.NewNop(), blobs)
store := pieces.NewStore(zap.NewNop(), blobs, nil, nil)
// setup test parameters
const blockSize = int(256 * memory.KiB)
@ -51,7 +56,7 @@ func BenchmarkReadWrite(b *testing.B) {
data = data[n:]
}
require.NoError(b, writer.Commit(ctx))
require.NoError(b, writer.Commit(ctx, &pb.PieceHeader{}))
}
})
@ -61,7 +66,7 @@ func BenchmarkReadWrite(b *testing.B) {
require.NoError(b, err)
_, err = writer.Write(source)
require.NoError(b, err)
require.NoError(b, writer.Commit(ctx))
require.NoError(b, writer.Commit(ctx, &pb.PieceHeader{}))
}
b.Run("Read", func(b *testing.B) {
@ -83,3 +88,131 @@ func BenchmarkReadWrite(b *testing.B) {
}
})
}
func readAndWritePiece(t *testing.T, content []byte) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
dir, err := filestore.NewDir(ctx.Dir("pieces"))
require.NoError(t, err)
blobs := filestore.New(zaptest.NewLogger(t), dir)
defer ctx.Check(blobs.Close)
store := pieces.NewStore(zaptest.NewLogger(t), blobs, nil, nil)
// test parameters
satelliteID := testrand.NodeID()
pieceID := testrand.PieceID()
fakeHash := testrand.Bytes(32)
creationTime := time.Unix(1564362827, 18364029)
fakeSig := testrand.Bytes(32)
expirationTime := time.Unix(1595898827, 18364029)
// write a V1 format piece
w, err := store.Writer(ctx, satelliteID, pieceID)
require.NoError(t, err)
if len(content) > 0 {
_, err = w.Write(content)
require.NoError(t, err)
}
// make sure w.Size() works
assert.Equal(t, int64(len(content)), w.Size())
// commit the writer with the piece header, and close it
err = w.Commit(ctx, &pb.PieceHeader{
Hash: fakeHash,
CreationTime: creationTime,
Signature: fakeSig,
OrderLimit: pb.OrderLimit{
PieceExpiration: expirationTime.UTC(),
},
})
require.NoError(t, err)
// open a reader
r, err := store.Reader(ctx, satelliteID, pieceID)
require.NoError(t, err)
defer ctx.Check(r.Close)
assert.Equal(t, filestore.MaxFormatVersionSupported, r.StorageFormatVersion())
// make sure r.Size() works
assert.Equal(t, int64(len(content)), r.Size())
// make sure seek-nowhere works as expected before piece header is read
pos, err := r.Seek(0, io.SeekCurrent)
require.NoError(t, err)
require.Equal(t, int64(0), pos)
// read piece header
header, err := r.GetPieceHeader()
require.NoError(t, err)
assert.Equal(t, fakeHash, header.Hash)
assert.Truef(t, header.CreationTime.Equal(creationTime),
"header.CreationTime = %s, but expected creationTime = %s", header.CreationTime, creationTime)
assert.Equal(t, fakeSig, header.Signature)
require.NotZero(t, header.OrderLimit.PieceExpiration)
assert.Truef(t, header.OrderLimit.PieceExpiration.Equal(expirationTime),
"*header.ExpirationTime = %s, but expected expirationTime = %s", header.OrderLimit.PieceExpiration, expirationTime)
assert.Equal(t, pb.OrderLimit{PieceExpiration: expirationTime.UTC()}, header.OrderLimit)
assert.Equal(t, filestore.FormatV1, storage.FormatVersion(header.FormatVersion))
// make sure seek-nowhere works as expected after piece header is read too
// (from the point of view of the piece store, the file position has not moved)
pos, err = r.Seek(0, io.SeekCurrent)
require.NoError(t, err)
assert.Equal(t, int64(0), pos)
// read piece contents
bufSize := memory.MB.Int()
if len(content) < bufSize {
bufSize = len(content)
}
buf := make([]byte, bufSize)
bytesRead, err := r.Read(buf)
require.NoError(t, err)
require.Equal(t, bufSize, bytesRead)
require.Equal(t, content[:len(buf)], buf)
// GetPieceHeader should error here now
header, err = r.GetPieceHeader()
require.Error(t, err)
assert.Truef(t, pieces.Error.Has(err), "err is not a pieces.Error: %v", err)
assert.Nil(t, header)
// check file position again
pos, err = r.Seek(0, io.SeekCurrent)
require.NoError(t, err)
require.Equal(t, int64(bufSize), pos)
const miniReadSize = 256
if len(content) > int(pos+miniReadSize) {
// Continuing to read should be ok
bytesRead, err = r.Read(buf[:miniReadSize])
require.NoError(t, err)
require.Equal(t, miniReadSize, bytesRead)
require.Equal(t, content[int(memory.MB):int(memory.MB)+miniReadSize], buf[:miniReadSize])
// Perform a Seek that actually moves the file pointer
const startReadFrom = 11
pos, err = r.Seek(startReadFrom, io.SeekStart)
require.NoError(t, err)
assert.Equal(t, int64(startReadFrom), pos)
// And make sure that Seek had an effect
bytesRead, err = r.Read(buf[:miniReadSize])
require.NoError(t, err)
require.Equal(t, miniReadSize, bytesRead)
require.Equal(t, content[startReadFrom:startReadFrom+miniReadSize], buf[:miniReadSize])
}
}
func TestReadWriteWithPieceHeader(t *testing.T) {
content := testrand.Bytes(30 * memory.MB)
readAndWritePiece(t, content)
}
func TestEmptyPiece(t *testing.T) {
var content [0]byte
readAndWritePiece(t, content[:])
}

View File

@ -10,12 +10,13 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
"gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/memory"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
)
const (
@ -46,42 +47,108 @@ type Info struct {
type ExpiredInfo struct {
SatelliteID storj.NodeID
PieceID storj.PieceID
PieceSize int64
// This can be removed when we no longer need to support the pieceinfo db. Its only purpose
// is to keep track of whether expired entries came from piece_expirations or pieceinfo.
InPieceInfo bool
}
// DB stores meta information about a piece, the actual piece is stored in storage.Blobs
type DB interface {
// Add inserts Info to the database.
Add(context.Context, *Info) error
// PieceExpirationDB stores information about pieces with expiration dates.
type PieceExpirationDB interface {
// GetExpired gets piece IDs that expire or have expired before the given time
GetExpired(ctx context.Context, expiresBefore time.Time, limit int64) ([]ExpiredInfo, error)
// SetExpiration sets an expiration time for the given piece ID on the given satellite
SetExpiration(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID, expiresAt time.Time) error
// DeleteExpiration removes an expiration record for the given piece ID on the given satellite
DeleteExpiration(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID) (found bool, err error)
// DeleteFailed marks an expiration record as having experienced a failure in deleting the
// piece from the disk
DeleteFailed(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID, failedAt time.Time) error
}
// V0PieceInfoDB stores meta information about pieces stored with storage format V0 (where
// metadata goes in the "pieceinfo" table in the storagenodedb). The actual pieces are stored
// behind something providing the storage.Blobs interface.
type V0PieceInfoDB interface {
// Get returns Info about a piece.
Get(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (*Info, error)
// GetPieceIDs gets pieceIDs using the satelliteID
GetPieceIDs(ctx context.Context, satelliteID storj.NodeID, createdBefore time.Time, limit int, cursor storj.PieceID) (pieceIDs []storj.PieceID, err error)
// Delete deletes Info about a piece.
Delete(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) error
// DeleteFailed marks piece deletion from disk failed
DeleteFailed(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID, failedAt time.Time) error
// SpaceUsed returns the in memory value for disk space used by all pieces
SpaceUsed(ctx context.Context) (int64, error)
// CalculatedSpaceUsed calculates disk space used by all pieces
CalculatedSpaceUsed(ctx context.Context) (int64, error)
// SpaceUsedBySatellite calculates disk space used by all pieces by satellite
SpaceUsedBySatellite(ctx context.Context, satelliteID storj.NodeID) (int64, error)
// GetExpired gets orders that are expired and were created before some time
// GetExpired gets piece IDs stored with storage format V0 that expire or have expired
// before the given time
GetExpired(ctx context.Context, expiredAt time.Time, limit int64) ([]ExpiredInfo, error)
// WalkSatelliteV0Pieces executes walkFunc for each locally stored piece, stored
// with storage format V0 in the namespace of the given satellite. If walkFunc returns a
// non-nil error, WalkSatelliteV0Pieces will stop iterating and return the error
// immediately. The ctx parameter is intended specifically to allow canceling iteration
// early.
WalkSatelliteV0Pieces(ctx context.Context, blobStore storage.Blobs, satellite storj.NodeID, walkFunc func(StoredPieceAccess) error) error
}
// V0PieceInfoDBForTest is like V0PieceInfoDB, but adds on the Add() method so
// that test environments with V0 piece data can be set up.
type V0PieceInfoDBForTest interface {
V0PieceInfoDB
// Add inserts Info to the database. This is only a valid thing to do, now,
// during tests, to replicate the environment of a storage node not yet fully
// migrated to V1 storage.
Add(context.Context, *Info) error
}
// StoredPieceAccess allows inspection and manipulation of a piece during iteration with
// WalkSatellitePieces-type methods.
type StoredPieceAccess interface {
storage.BlobInfo
// PieceID gives the pieceID of the piece
PieceID() storj.PieceID
// Satellite gives the nodeID of the satellite which owns the piece
Satellite() (storj.NodeID, error)
// ContentSize gives the size of the piece content (not including the piece header, if
// applicable)
ContentSize(ctx context.Context) (int64, error)
// CreationTime returns the piece creation time as given in the original PieceHash (which is
// likely not the same as the file mtime). For non-FormatV0 pieces, this requires opening
// the file and unmarshaling the piece header. If exact precision is not required, ModTime()
// may be a better solution.
CreationTime(ctx context.Context) (time.Time, error)
// ModTime returns a less-precise piece creation time than CreationTime, but is generally
// much faster. For non-FormatV0 pieces, this gets the piece creation time from to the
// filesystem instead of the piece header.
ModTime(ctx context.Context) (time.Time, error)
}
// Store implements storing pieces onto a blob storage implementation.
type Store struct {
log *zap.Logger
blobs storage.Blobs
log *zap.Logger
blobs storage.Blobs
v0PieceInfo V0PieceInfoDB
expirationInfo PieceExpirationDB
// The value of reservedSpace is always added to the return value from the
// SpaceUsedForPieces() method.
// The reservedSpace field is part of an unfortunate hack that enables testing of low-space
// or no-space conditions. It is not (or should not be) used under regular operating
// conditions.
reservedSpace int64
}
// StoreForTest is a wrapper around Store to be used only in test scenarios. It enables writing
// pieces with older storage formats and allows use of the ReserveSpace() method.
type StoreForTest struct {
*Store
}
// NewStore creates a new piece store
func NewStore(log *zap.Logger, blobs storage.Blobs) *Store {
func NewStore(log *zap.Logger, blobs storage.Blobs, v0PieceInfo V0PieceInfoDB, expirationInfo PieceExpirationDB) *Store {
return &Store{
log: log,
blobs: blobs,
log: log,
blobs: blobs,
v0PieceInfo: v0PieceInfo,
expirationInfo: expirationInfo,
}
}
@ -100,6 +167,37 @@ func (store *Store) Writer(ctx context.Context, satellite storj.NodeID, pieceID
return writer, Error.Wrap(err)
}
// WriterForFormatVersion allows opening a piece writer with a specified storage format version.
// This is meant to be used externally only in test situations (thus the StoreForTest receiver
// type).
func (store StoreForTest) WriterForFormatVersion(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID, formatVersion storage.FormatVersion) (_ *Writer, err error) {
defer mon.Task()(&ctx)(&err)
blobRef := storage.BlobRef{
Namespace: satellite.Bytes(),
Key: pieceID.Bytes(),
}
var blob storage.BlobWriter
switch formatVersion {
case filestore.FormatV0:
fStore, ok := store.blobs.(*filestore.Store)
if !ok {
return nil, Error.New("can't make a WriterForFormatVersion with this blob store (%T)", store.blobs)
}
tStore := filestore.StoreForTest{Store: fStore}
blob, err = tStore.CreateV0(ctx, blobRef)
case filestore.FormatV1:
blob, err = store.blobs.Create(ctx, blobRef, preallocSize.Int64())
default:
return nil, Error.New("please teach me how to make V%d pieces", formatVersion)
}
if err != nil {
return nil, Error.Wrap(err)
}
writer, err := NewWriter(blob)
return writer, Error.Wrap(err)
}
// Reader returns a new piece reader.
func (store *Store) Reader(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID) (_ *Reader, err error) {
defer mon.Task()(&ctx)(&err)
@ -118,6 +216,23 @@ func (store *Store) Reader(ctx context.Context, satellite storj.NodeID, pieceID
return reader, Error.Wrap(err)
}
// ReaderWithStorageFormat returns a new piece reader for a located piece, which avoids the
// potential need to check multiple storage formats to find the right blob.
func (store *Store) ReaderWithStorageFormat(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID, formatVersion storage.FormatVersion) (_ *Reader, err error) {
defer mon.Task()(&ctx)(&err)
ref := storage.BlobRef{Namespace: satellite.Bytes(), Key: pieceID.Bytes()}
blob, err := store.blobs.OpenWithStorageFormat(ctx, ref, formatVersion)
if err != nil {
if os.IsNotExist(err) {
return nil, err
}
return nil, Error.Wrap(err)
}
reader, err := NewReader(blob)
return reader, Error.Wrap(err)
}
// Delete deletes the specified piece.
func (store *Store) Delete(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID) (err error) {
defer mon.Task()(&ctx)(&err)
@ -125,9 +240,164 @@ func (store *Store) Delete(ctx context.Context, satellite storj.NodeID, pieceID
Namespace: satellite.Bytes(),
Key: pieceID.Bytes(),
})
if err != nil {
return Error.Wrap(err)
}
// delete records in both the piece_expirations and pieceinfo DBs, wherever we find it.
// both of these calls should return no error if the requested record is not found.
if store.expirationInfo != nil {
_, err = store.expirationInfo.DeleteExpiration(ctx, satellite, pieceID)
}
if store.v0PieceInfo != nil {
err = errs.Combine(err, store.v0PieceInfo.Delete(ctx, satellite, pieceID))
}
return Error.Wrap(err)
}
// GetV0PieceInfoDB returns this piece-store's reference to the V0 piece info DB (or nil,
// if this piece-store does not have one). This is ONLY intended for use with testing
// functionality.
func (store *Store) GetV0PieceInfoDB() V0PieceInfoDB {
return store.v0PieceInfo
}
// WalkSatellitePieces executes walkFunc for each locally stored piece in the namespace of the
// given satellite. If walkFunc returns a non-nil error, WalkSatellitePieces will stop iterating
// and return the error immediately. The ctx parameter is intended specifically to allow canceling
// iteration early.
//
// Note that this method includes all locally stored pieces, both V0 and higher.
func (store *Store) WalkSatellitePieces(ctx context.Context, satellite storj.NodeID, walkFunc func(StoredPieceAccess) error) (err error) {
defer mon.Task()(&ctx)(&err)
// first iterate over all in V1 storage, then all in V0
err = store.blobs.WalkNamespace(ctx, satellite.Bytes(), func(blobInfo storage.BlobInfo) error {
if blobInfo.StorageFormatVersion() < filestore.FormatV1 {
// we'll address this piece while iterating over the V0 pieces below.
return nil
}
pieceAccess, err := newStoredPieceAccess(store, blobInfo)
if err != nil {
// this is not a real piece blob. the blob store can't distinguish between actual piece
// blobs and stray files whose names happen to decode as valid base32. skip this
// "blob".
return nil
}
return walkFunc(pieceAccess)
})
if err == nil && store.v0PieceInfo != nil {
err = store.v0PieceInfo.WalkSatelliteV0Pieces(ctx, store.blobs, satellite, walkFunc)
}
return err
}
// GetExpired gets piece IDs that are expired and were created before the given time
func (store *Store) GetExpired(ctx context.Context, expiredAt time.Time, limit int64) (_ []ExpiredInfo, err error) {
defer mon.Task()(&ctx)(&err)
expired, err := store.expirationInfo.GetExpired(ctx, expiredAt, limit)
if err != nil {
return nil, err
}
if int64(len(expired)) < limit && store.v0PieceInfo != nil {
v0Expired, err := store.v0PieceInfo.GetExpired(ctx, expiredAt, limit-int64(len(expired)))
if err != nil {
return nil, err
}
expired = append(expired, v0Expired...)
}
return expired, nil
}
// SetExpiration records an expiration time for the specified piece ID owned by the specified satellite
func (store *Store) SetExpiration(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID, expiresAt time.Time) (err error) {
return store.expirationInfo.SetExpiration(ctx, satellite, pieceID, expiresAt)
}
// DeleteFailed marks piece as a failed deletion.
func (store *Store) DeleteFailed(ctx context.Context, expired ExpiredInfo, when time.Time) (err error) {
defer mon.Task()(&ctx)(&err)
if expired.InPieceInfo {
return store.v0PieceInfo.DeleteFailed(ctx, expired.SatelliteID, expired.PieceID, when)
}
return store.expirationInfo.DeleteFailed(ctx, expired.SatelliteID, expired.PieceID, when)
}
// SpaceUsedForPieces returns *an approximation of* the disk space used by all local pieces (both
// V0 and later). This is an approximation because changes may be being applied to the filestore as
// this information is collected, and because it is possible that various errors in directory
// traversal could cause this count to be undersized.
//
// Important note: this metric does not include space used by piece headers, whereas
// storj/filestore/store.(*Store).SpaceUsed() *does* include all space used by the blobs.
//
// The value of reservedSpace for this Store is added to the result, but this should only affect
// tests (reservedSpace should always be 0 in real usage).
func (store *Store) SpaceUsedForPieces(ctx context.Context) (int64, error) {
satellites, err := store.getAllStoringSatellites(ctx)
if err != nil {
return 0, err
}
var total int64
for _, satellite := range satellites {
spaceUsed, err := store.SpaceUsedBySatellite(ctx, satellite)
if err != nil {
return 0, err
}
total += spaceUsed
}
return total + store.reservedSpace, nil
}
func (store *Store) getAllStoringSatellites(ctx context.Context) ([]storj.NodeID, error) {
namespaces, err := store.blobs.ListNamespaces(ctx)
if err != nil {
return nil, err
}
satellites := make([]storj.NodeID, len(namespaces))
for i, namespace := range namespaces {
satellites[i], err = storj.NodeIDFromBytes(namespace)
if err != nil {
return nil, err
}
}
return satellites, nil
}
// SpaceUsedBySatellite calculates *an approximation of* how much disk space is used for local
// piece storage in the given satellite's namespace. This is an approximation because changes may
// be being applied to the filestore as this information is collected, and because it is possible
// that various errors in directory traversal could cause this count to be undersized.
//
// Important note: this metric does not include space used by piece headers, whereas
// storj/filestore/store.(*Store).SpaceUsedInNamespace() *does* include all space used by the
// blobs.
func (store *Store) SpaceUsedBySatellite(ctx context.Context, satelliteID storj.NodeID) (int64, error) {
var totalUsed int64
err := store.WalkSatellitePieces(ctx, satelliteID, func(access StoredPieceAccess) error {
contentSize, statErr := access.ContentSize(ctx)
if statErr != nil {
store.log.Error("failed to stat", zap.Error(statErr), zap.String("pieceID", access.PieceID().String()), zap.String("satellite", satelliteID.String()))
// keep iterating; we want a best effort total here.
return nil
}
totalUsed += contentSize
return nil
})
if err != nil {
return 0, err
}
return totalUsed, nil
}
// ReserveSpace marks some amount of free space as used, even if it's not, so that future calls
// to SpaceUsedForPieces() are raised by this amount. Calls to ReserveSpace invalidate earlier
// calls, so ReserveSpace(0) undoes all prior space reservation. This should only be used in
// test scenarios.
func (store StoreForTest) ReserveSpace(amount int64) {
store.reservedSpace = amount
}
// StorageStatus contains information about the disk store is using.
type StorageStatus struct {
DiskUsed int64
@ -146,3 +416,78 @@ func (store *Store) StorageStatus(ctx context.Context) (_ StorageStatus, err err
DiskFree: diskFree,
}, nil
}
type storedPieceAccess struct {
storage.BlobInfo
store *Store
pieceID storj.PieceID
}
func newStoredPieceAccess(store *Store, blobInfo storage.BlobInfo) (storedPieceAccess, error) {
pieceID, err := storj.PieceIDFromBytes(blobInfo.BlobRef().Key)
if err != nil {
return storedPieceAccess{}, err
}
return storedPieceAccess{
BlobInfo: blobInfo,
store: store,
pieceID: pieceID,
}, nil
}
// PieceID returns the piece ID of the piece
func (access storedPieceAccess) PieceID() storj.PieceID {
return access.pieceID
}
// Satellite returns the satellite ID that owns the piece
func (access storedPieceAccess) Satellite() (storj.NodeID, error) {
return storj.NodeIDFromBytes(access.BlobRef().Namespace)
}
// ContentSize gives the size of the piece content (not including the piece header, if applicable)
func (access storedPieceAccess) ContentSize(ctx context.Context) (size int64, err error) {
defer mon.Task()(&ctx)(&err)
stat, err := access.Stat(ctx)
if err != nil {
return 0, err
}
size = stat.Size()
if access.StorageFormatVersion() >= filestore.FormatV1 {
size -= V1PieceHeaderReservedArea
}
return size, nil
}
// CreationTime returns the piece creation time as given in the original PieceHash (which is likely
// not the same as the file mtime). This requires opening the file and unmarshaling the piece
// header. If exact precision is not required, ModTime() may be a better solution.
func (access storedPieceAccess) CreationTime(ctx context.Context) (cTime time.Time, err error) {
defer mon.Task()(&ctx)(&err)
satellite, err := access.Satellite()
if err != nil {
return time.Time{}, err
}
reader, err := access.store.ReaderWithStorageFormat(ctx, satellite, access.PieceID(), access.StorageFormatVersion())
if err != nil {
return time.Time{}, err
}
header, err := reader.GetPieceHeader()
if err != nil {
return time.Time{}, err
}
return header.CreationTime, nil
}
// ModTime returns a less-precise piece creation time than CreationTime, but is generally
// much faster. This gets the piece creation time from to the filesystem instead of the
// piece header.
func (access storedPieceAccess) ModTime(ctx context.Context) (mTime time.Time, err error) {
defer mon.Task()(&ctx)(&err)
stat, err := access.Stat(ctx)
if err != nil {
return time.Time{}, err
}
return stat.ModTime(), nil
}

View File

@ -5,20 +5,29 @@ package pieces_test
import (
"bytes"
"context"
"io"
"io/ioutil"
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"storj.io/storj/internal/memory"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testidentity"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/pkcrypto"
"storj.io/storj/pkg/storj"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
"storj.io/storj/storagenode"
"storj.io/storj/storagenode/pieces"
"storj.io/storj/storagenode/storagenodedb/storagenodedbtest"
)
func TestPieces(t *testing.T) {
@ -28,10 +37,10 @@ func TestPieces(t *testing.T) {
dir, err := filestore.NewDir(ctx.Dir("pieces"))
require.NoError(t, err)
blobs := filestore.New(dir)
blobs := filestore.New(zaptest.NewLogger(t), dir)
defer ctx.Check(blobs.Close)
store := pieces.NewStore(zaptest.NewLogger(t), blobs)
store := pieces.NewStore(zaptest.NewLogger(t), blobs, nil, nil)
satelliteID := testidentity.MustPregeneratedSignedIdentity(0, storj.LatestIDVersion()).ID
pieceID := storj.NewPieceID()
@ -53,7 +62,7 @@ func TestPieces(t *testing.T) {
assert.Equal(t, hash.Sum(nil), writer.Hash())
// commit
require.NoError(t, writer.Commit(ctx))
require.NoError(t, writer.Commit(ctx, &pb.PieceHeader{}))
// after commit we should be able to call cancel without an error
require.NoError(t, writer.Cancel(ctx))
}
@ -120,10 +129,309 @@ func TestPieces(t *testing.T) {
// cancel writing
require.NoError(t, writer.Cancel(ctx))
// commit should not fail
require.Error(t, writer.Commit(ctx))
require.Error(t, writer.Commit(ctx, &pb.PieceHeader{}))
// read should fail
_, err = store.Reader(ctx, satelliteID, cancelledPieceID)
assert.Error(t, err)
}
}
func writeAPiece(ctx context.Context, t testing.TB, store *pieces.Store, satelliteID storj.NodeID, pieceID storj.PieceID, data []byte, atTime time.Time, expireTime *time.Time, formatVersion storage.FormatVersion) {
tStore := &pieces.StoreForTest{store}
writer, err := tStore.WriterForFormatVersion(ctx, satelliteID, pieceID, formatVersion)
require.NoError(t, err)
_, err = writer.Write(data)
require.NoError(t, err)
size := writer.Size()
assert.Equal(t, int64(len(data)), size)
limit := pb.OrderLimit{}
if expireTime != nil {
limit.PieceExpiration = *expireTime
}
err = writer.Commit(ctx, &pb.PieceHeader{
Hash: writer.Hash(),
CreationTime: atTime,
OrderLimit: limit,
})
require.NoError(t, err)
}
func verifyPieceHandle(t testing.TB, reader *pieces.Reader, expectDataLen int, expectCreateTime time.Time, expectFormat storage.FormatVersion) {
assert.Equal(t, expectFormat, reader.StorageFormatVersion())
assert.Equal(t, int64(expectDataLen), reader.Size())
if expectFormat != filestore.FormatV0 {
pieceHeader, err := reader.GetPieceHeader()
require.NoError(t, err)
assert.Equal(t, expectFormat, storage.FormatVersion(pieceHeader.FormatVersion))
assert.Equal(t, expectCreateTime.UTC(), pieceHeader.CreationTime.UTC())
}
}
func tryOpeningAPiece(ctx context.Context, t testing.TB, store *pieces.Store, satelliteID storj.NodeID, pieceID storj.PieceID, expectDataLen int, expectTime time.Time, expectFormat storage.FormatVersion) {
reader, err := store.Reader(ctx, satelliteID, pieceID)
require.NoError(t, err)
verifyPieceHandle(t, reader, expectDataLen, expectTime, expectFormat)
require.NoError(t, reader.Close())
reader, err = store.ReaderWithStorageFormat(ctx, satelliteID, pieceID, expectFormat)
require.NoError(t, err)
verifyPieceHandle(t, reader, expectDataLen, expectTime, expectFormat)
require.NoError(t, reader.Close())
}
// Test that the piece store can still read V0 pieces that might be left over from a previous
// version, as well as V1 pieces.
func TestMultipleStorageFormatVersions(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
blobs, err := filestore.NewAt(zaptest.NewLogger(t), ctx.Dir("store"))
require.NoError(t, err)
defer ctx.Check(blobs.Close)
store := pieces.NewStore(zaptest.NewLogger(t), blobs, nil, nil)
const pieceSize = 1024
var (
data = testrand.Bytes(pieceSize)
satellite = testrand.NodeID()
v0PieceID = testrand.PieceID()
v1PieceID = testrand.PieceID()
now = time.Now().UTC()
)
// write a V0 piece
writeAPiece(ctx, t, store, satellite, v0PieceID, data, now, nil, filestore.FormatV0)
// write a V1 piece
writeAPiece(ctx, t, store, satellite, v1PieceID, data, now, nil, filestore.FormatV1)
// look up the different pieces with Reader and ReaderWithStorageFormat
tryOpeningAPiece(ctx, t, store, satellite, v0PieceID, len(data), now, filestore.FormatV0)
tryOpeningAPiece(ctx, t, store, satellite, v1PieceID, len(data), now, filestore.FormatV1)
// write a V1 piece with the same ID as the V0 piece (to simulate it being rewritten as
// V1 during a migration)
differentData := append(data, 111, 104, 97, 105)
writeAPiece(ctx, t, store, satellite, v0PieceID, differentData, now, nil, filestore.FormatV1)
// if we try to access the piece at that key, we should see only the V1 piece
tryOpeningAPiece(ctx, t, store, satellite, v0PieceID, len(differentData), now, filestore.FormatV1)
// unless we ask specifically for a V0 piece
reader, err := store.ReaderWithStorageFormat(ctx, satellite, v0PieceID, filestore.FormatV0)
require.NoError(t, err)
verifyPieceHandle(t, reader, len(data), now, filestore.FormatV0)
require.NoError(t, reader.Close())
// delete the v0PieceID; both the V0 and the V1 pieces should go away
err = store.Delete(ctx, satellite, v0PieceID)
require.NoError(t, err)
reader, err = store.Reader(ctx, satellite, v0PieceID)
require.Error(t, err)
require.True(t, os.IsNotExist(err))
assert.Nil(t, reader)
}
func TestGetExpired(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
v0PieceInfo, ok := db.V0PieceInfo().(pieces.V0PieceInfoDBForTest)
require.True(t, ok, "V0PieceInfoDB can not satisfy V0PieceInfoDBForTest")
expirationInfo := db.PieceExpirationDB()
store := pieces.NewStore(zaptest.NewLogger(t), db.Pieces(), v0PieceInfo, expirationInfo)
now := time.Now().UTC()
testDates := []struct {
years, months, days int
}{
{-20, -1, -2},
{1, 6, 14},
{0, -1, 0},
{0, 0, 1},
}
testPieces := make([]pieces.Info, 4)
for p := range testPieces {
testPieces[p] = pieces.Info{
SatelliteID: testrand.NodeID(),
PieceID: testrand.PieceID(),
OrderLimit: &pb.OrderLimit{},
UplinkPieceHash: &pb.PieceHash{},
PieceExpiration: now.AddDate(testDates[p].years, testDates[p].months, testDates[p].days),
}
}
// put testPieces 0 and 1 in the v0 pieceinfo db
err := v0PieceInfo.Add(ctx, &testPieces[0])
require.NoError(t, err)
err = v0PieceInfo.Add(ctx, &testPieces[1])
require.NoError(t, err)
// put testPieces 2 and 3 in the piece_expirations db
err = expirationInfo.SetExpiration(ctx, testPieces[2].SatelliteID, testPieces[2].PieceID, testPieces[2].PieceExpiration)
require.NoError(t, err)
err = expirationInfo.SetExpiration(ctx, testPieces[3].SatelliteID, testPieces[3].PieceID, testPieces[3].PieceExpiration)
require.NoError(t, err)
// GetExpired with limit 0 gives empty result
expired, err := store.GetExpired(ctx, now, 0)
require.NoError(t, err)
assert.Empty(t, expired)
// GetExpired with limit 1 gives only 1 result, although there are 2 possible
expired, err = store.GetExpired(ctx, now, 1)
require.NoError(t, err)
require.Len(t, expired, 1)
assert.Equal(t, testPieces[2].PieceID, expired[0].PieceID)
assert.Equal(t, testPieces[2].SatelliteID, expired[0].SatelliteID)
assert.False(t, expired[0].InPieceInfo)
// GetExpired with 2 or more gives all expired results correctly; one from
// piece_expirations, and one from pieceinfo
expired, err = store.GetExpired(ctx, now, 1000)
require.NoError(t, err)
require.Len(t, expired, 2)
assert.Equal(t, testPieces[2].PieceID, expired[0].PieceID)
assert.Equal(t, testPieces[2].SatelliteID, expired[0].SatelliteID)
assert.False(t, expired[0].InPieceInfo)
assert.Equal(t, testPieces[0].PieceID, expired[1].PieceID)
assert.Equal(t, testPieces[0].SatelliteID, expired[1].SatelliteID)
assert.True(t, expired[1].InPieceInfo)
})
}
func TestOverwriteV0WithV1(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
v0PieceInfo, ok := db.V0PieceInfo().(pieces.V0PieceInfoDBForTest)
require.True(t, ok, "V0PieceInfoDB can not satisfy V0PieceInfoDBForTest")
expirationInfo := db.PieceExpirationDB()
store := pieces.NewStore(zaptest.NewLogger(t), db.Pieces(), v0PieceInfo, expirationInfo)
satelliteID := testrand.NodeID()
pieceID := testrand.PieceID()
v0Data := testrand.Bytes(4 * memory.MiB)
v1Data := testrand.Bytes(3 * memory.MiB)
// write the piece as V0. We can't provide the expireTime via writeAPiece, because
// BlobWriter.Commit only knows how to store expiration times in piece_expirations.
v0CreateTime := time.Now().UTC()
v0ExpireTime := v0CreateTime.AddDate(5, 0, 0)
writeAPiece(ctx, t, store, satelliteID, pieceID, v0Data, v0CreateTime, nil, filestore.FormatV0)
// now put the piece in the pieceinfo db directly, because store won't do that for us.
// this is where the expireTime takes effect.
err := v0PieceInfo.Add(ctx, &pieces.Info{
SatelliteID: satelliteID,
PieceID: pieceID,
PieceSize: int64(len(v0Data)),
PieceCreation: v0CreateTime,
PieceExpiration: v0ExpireTime,
OrderLimit: &pb.OrderLimit{},
UplinkPieceHash: &pb.PieceHash{},
})
require.NoError(t, err)
// ensure we can see it via store.Reader
{
reader, err := store.Reader(ctx, satelliteID, pieceID)
require.NoError(t, err)
assert.Equal(t, int64(len(v0Data)), reader.Size())
assert.Equal(t, filestore.FormatV0, reader.StorageFormatVersion())
gotData, err := ioutil.ReadAll(reader)
require.NoError(t, err)
assert.Equal(t, v0Data, gotData)
require.NoError(t, reader.Close())
}
// ensure we can see it via WalkSatellitePieces
calledTimes := 0
err = store.WalkSatellitePieces(ctx, satelliteID, func(access pieces.StoredPieceAccess) error {
calledTimes++
require.Equal(t, 1, calledTimes)
gotCreateTime, err := access.CreationTime(ctx)
require.NoError(t, err)
assert.Equal(t, v0CreateTime, gotCreateTime)
gotSize, err := access.ContentSize(ctx)
require.NoError(t, err)
assert.Equal(t, int64(len(v0Data)), gotSize)
return nil
})
require.NoError(t, err)
// now "overwrite" the piece (write a new blob with the same id, but with V1 storage)
v1CreateTime := time.Now().UTC()
v1ExpireTime := v1CreateTime.AddDate(5, 0, 0)
writeAPiece(ctx, t, store, satelliteID, pieceID, v1Data, v1CreateTime, &v1ExpireTime, filestore.FormatV1)
// ensure we can see it (the new piece) via store.Reader
{
reader, err := store.Reader(ctx, satelliteID, pieceID)
require.NoError(t, err)
assert.Equal(t, int64(len(v1Data)), reader.Size())
assert.Equal(t, filestore.FormatV1, reader.StorageFormatVersion())
gotData, err := ioutil.ReadAll(reader)
require.NoError(t, err)
assert.Equal(t, v1Data, gotData)
require.NoError(t, reader.Close())
}
// now _both_ pieces should show up under WalkSatellitePieces. this may
// be counter-intuitive, but the V0 piece still exists for now (so we can avoid
// hitting the pieceinfo db with every new piece write). I believe this is OK, because
// (a) I don't think that writing different pieces with the same piece ID is a normal
// use case, unless we make a V0->V1 migrator tool, which should know about these
// semantics; (b) the V0 piece should not ever become visible again to the user; it
// should not be possible under normal conditions to delete one without deleting the
// other.
calledTimes = 0
err = store.WalkSatellitePieces(ctx, satelliteID, func(access pieces.StoredPieceAccess) error {
calledTimes++
switch calledTimes {
case 1:
// expect the V1 piece
assert.Equal(t, pieceID, access.PieceID())
assert.Equal(t, filestore.FormatV1, access.StorageFormatVersion())
gotCreateTime, err := access.CreationTime(ctx)
require.NoError(t, err)
assert.Equal(t, v1CreateTime, gotCreateTime)
gotSize, err := access.ContentSize(ctx)
require.NoError(t, err)
assert.Equal(t, int64(len(v1Data)), gotSize)
case 2:
// expect the V0 piece
assert.Equal(t, pieceID, access.PieceID())
assert.Equal(t, filestore.FormatV0, access.StorageFormatVersion())
gotCreateTime, err := access.CreationTime(ctx)
require.NoError(t, err)
assert.Equal(t, v0CreateTime, gotCreateTime)
gotSize, err := access.ContentSize(ctx)
require.NoError(t, err)
assert.Equal(t, int64(len(v0Data)), gotSize)
default:
t.Fatalf("calledTimes should be 1 or 2, but it is %d", calledTimes)
}
return nil
})
require.NoError(t, err)
// delete the pieceID; this should get both V0 and V1
err = store.Delete(ctx, satelliteID, pieceID)
require.NoError(t, err)
err = store.WalkSatellitePieces(ctx, satelliteID, func(access pieces.StoredPieceAccess) error {
t.Fatalf("this should not have been called. pieceID=%x, format=%d", access.PieceID(), access.StorageFormatVersion())
return nil
})
require.NoError(t, err)
})
}

View File

@ -120,7 +120,6 @@ type Endpoint struct {
monitor *monitor.Service
store *pieces.Store
pieceinfo pieces.DB
orders orders.DB
usage bandwidth.DB
usedSerials UsedSerials
@ -129,7 +128,7 @@ type Endpoint struct {
}
// NewEndpoint creates a new piecestore endpoint.
func NewEndpoint(log *zap.Logger, signer signing.Signer, trust *trust.Pool, monitor *monitor.Service, store *pieces.Store, pieceinfo pieces.DB, orders orders.DB, usage bandwidth.DB, usedSerials UsedSerials, config Config) (*Endpoint, error) {
func NewEndpoint(log *zap.Logger, signer signing.Signer, trust *trust.Pool, monitor *monitor.Service, store *pieces.Store, orders orders.DB, usage bandwidth.DB, usedSerials UsedSerials, config Config) (*Endpoint, error) {
return &Endpoint{
log: log,
config: config,
@ -139,7 +138,6 @@ func NewEndpoint(log *zap.Logger, signer signing.Signer, trust *trust.Pool, moni
monitor: monitor,
store: store,
pieceinfo: pieceinfo,
orders: orders,
usage: usage,
usedSerials: usedSerials,
@ -167,11 +165,7 @@ func (endpoint *Endpoint) Delete(ctx context.Context, delete *pb.PieceDeleteRequ
return nil, Error.Wrap(err)
}
// TODO: parallelize this and maybe return early
pieceInfoErr := endpoint.pieceinfo.Delete(ctx, delete.Limit.SatelliteId, delete.Limit.PieceId)
pieceErr := endpoint.store.Delete(ctx, delete.Limit.SatelliteId, delete.Limit.PieceId)
if err := errs.Combine(pieceInfoErr, pieceErr); err != nil {
if err := endpoint.store.Delete(ctx, delete.Limit.SatelliteId, delete.Limit.PieceId); err != nil {
// explicitly ignoring error because the errors
// TODO: add more debug info
endpoint.log.Error("delete failed", zap.Stringer("Piece ID", delete.Limit.PieceId), zap.Error(err))
@ -327,44 +321,36 @@ func (endpoint *Endpoint) Upload(stream pb.Piecestore_UploadServer) (err error)
}
if message.Done != nil {
expectedHash := pieceWriter.Hash()
if err := endpoint.VerifyPieceHash(ctx, limit, message.Done, expectedHash); err != nil {
calculatedHash := pieceWriter.Hash()
if err := endpoint.VerifyPieceHash(ctx, limit, message.Done, calculatedHash); err != nil {
return err // TODO: report grpc status internal server error
}
if message.Done.PieceSize != pieceWriter.Size() {
return ErrProtocol.New("Size of finished piece does not match size declared by uplink! %d != %d",
message.Done.GetPieceSize(), pieceWriter.Size())
message.Done.PieceSize, pieceWriter.Size())
}
if err := pieceWriter.Commit(ctx); err != nil {
return ErrInternal.Wrap(err) // TODO: report grpc status internal server error
}
// TODO: do this in a goroutine
{
// TODO: maybe this should be as a pieceWriter.Commit(ctx, info)
info := &pieces.Info{
SatelliteID: limit.SatelliteId,
PieceID: limit.PieceId,
PieceSize: pieceWriter.Size(),
PieceCreation: limit.OrderCreation,
PieceExpiration: limit.PieceExpiration,
OrderLimit: limit,
UplinkPieceHash: message.Done,
info := &pb.PieceHeader{
Hash: calculatedHash,
CreationTime: message.Done.Timestamp,
Signature: message.Done.GetSignature(),
OrderLimit: *limit,
}
if err := endpoint.pieceinfo.Add(ctx, info); err != nil {
ignoreCancelContext := context.Background()
deleteErr := endpoint.store.Delete(ignoreCancelContext, limit.SatelliteId, limit.PieceId)
return ErrInternal.Wrap(errs.Combine(err, deleteErr))
if err := pieceWriter.Commit(ctx, info); err != nil {
return ErrInternal.Wrap(err) // TODO: report grpc status internal server error
}
if !limit.PieceExpiration.IsZero() {
err := endpoint.store.SetExpiration(ctx, limit.SatelliteId, limit.PieceId, limit.PieceExpiration)
if err != nil {
return ErrInternal.Wrap(err) // TODO: report grpc status internal server error
}
}
}
storageNodeHash, err := signing.SignPieceHash(ctx, endpoint.signer, &pb.PieceHash{
PieceId: limit.PieceId,
Hash: expectedHash,
Hash: calculatedHash,
PieceSize: pieceWriter.Size(),
Timestamp: time.Now(),
})
@ -589,6 +575,53 @@ func (endpoint *Endpoint) saveOrder(ctx context.Context, limit *pb.OrderLimit, o
}
}
// ------------------------------------------------------------------------------------------------
// On the correctness of using access.ModTime() in place of the more precise access.CreationTime()
// in Retain():
// ------------------------------------------------------------------------------------------------
//
// Background: for pieces not stored with storage.FormatV0, the access.CreationTime() value can
// only be retrieved by opening the piece file, and reading and unmarshaling the piece header.
// This is far slower than access.ModTime(), which gets the file modification time from the file
// system and only needs to do a stat(2) on the piece file. If we can make Retain() work with
// ModTime, we should.
//
// Possibility of mismatch: We do not force or require piece file modification times to be equal to
// or close to the CreationTime specified by the uplink, but we do expect that piece files will be
// written to the filesystem _after_ the CreationTime. We make the assumption already that storage
// nodes and satellites and uplinks have system clocks that are very roughly in sync (that is, they
// are out of sync with each other by less than an hour of real time, or whatever is configured as
// RetainTimeBuffer). So if an uplink is not lying about CreationTime and it uploads a piece that
// makes it to a storagenode's disk as quickly as possible, even in the worst-synchronized-clocks
// case we can assume that `ModTime > (CreationTime - RetainTimeBuffer)`. We also allow for storage
// node operators doing file system manipulations after a piece has been written. If piece files
// are copied between volumes and their attributes are not preserved, it will be possible for their
// modification times to be changed to something later in time. This still preserves the inequality
// relationship mentioned above, `ModTime > (CreationTime - RetainTimeBuffer)`. We only stipulate
// that storage node operators must not artificially change blob file modification times to be in
// the past.
//
// If there is a mismatch: in most cases, a mismatch between ModTime and CreationTime has no
// effect. In certain remaining cases, the only effect is that a piece file which _should_ be
// garbage collected survives until the next round of garbage collection. The only really
// problematic case is when there is a relatively new piece file which was created _after_ this
// node's Retain bloom filter started being built on the satellite, and is recorded in this
// storage node's blob store before the Retain operation has completed. Then, it might be possible
// for that new piece to be garbage collected incorrectly, because it does not show up in the
// bloom filter and the node incorrectly thinks that it was created before the bloom filter.
// But if the uplink is not lying about CreationTime and its clock drift versus the storage node
// is less than `RetainTimeBuffer`, and the ModTime on a blob file is correctly set from the
// storage node system time, then it is still true that `ModTime > (CreationTime -
// RetainTimeBuffer)`.
//
// The rule that storage node operators need to be aware of is only this: do not artificially set
// mtimes on blob files to be in the past. Let the filesystem manage mtimes. If blob files need to
// be moved or copied between locations, and this updates the mtime, that is ok. A secondary effect
// of this rule is that if the storage node's system clock needs to be changed forward by a
// nontrivial amount, mtimes on existing blobs should also be adjusted (by the same interval,
// ideally, but just running "touch" on all blobs is sufficient to avoid incorrect deletion of
// data).
// Retain keeps only piece ids specified in the request
func (endpoint *Endpoint) Retain(ctx context.Context, retainReq *pb.RetainRequest) (res *pb.RetainResponse, err error) {
defer mon.Task()(&ctx)(&err)
@ -613,48 +646,56 @@ func (endpoint *Endpoint) Retain(ctx context.Context, retainReq *pb.RetainReques
return nil, status.Error(codes.InvalidArgument, Error.Wrap(err).Error())
}
const limit = 1000
cursor := storj.PieceID{}
numDeleted := 0
hasMorePieces := true
for hasMorePieces {
// subtract some time to leave room for clock difference between the satellite and storage node
createdBefore := retainReq.GetCreationDate().Add(-endpoint.config.RetainTimeBuffer)
// subtract some time to leave room for clock difference between the satellite and storage node
createdBefore := retainReq.GetCreationDate().Add(-endpoint.config.RetainTimeBuffer)
pieceIDs, err := endpoint.pieceinfo.GetPieceIDs(ctx, peer.ID, createdBefore, limit, cursor)
if err != nil {
return nil, status.Error(codes.Internal, Error.Wrap(err).Error())
}
for _, pieceID := range pieceIDs {
cursor = pieceID
endpoint.log.Info("Prepared to run a Retain request.",
zap.Time("createdBefore", createdBefore),
zap.Int64("filterSize", filter.Size()),
zap.String("satellite", peer.ID.String()))
if !filter.Contains(pieceID) {
endpoint.log.Sugar().Debugf("About to delete piece id (%s) from satellite (%s). RetainStatus: %s", pieceID.String(), peer.ID.String(), endpoint.config.RetainStatus.String())
// if retain status is enabled, delete pieceid
if endpoint.config.RetainStatus == RetainEnabled {
if err = endpoint.store.Delete(ctx, peer.ID, pieceID); err != nil {
endpoint.log.Error("failed to delete a piece", zap.Error(err))
// continue because if we fail to delete from file system,
// we need to keep the pieceinfo so we can delete next time
continue
}
if err = endpoint.pieceinfo.Delete(ctx, peer.ID, pieceID); err != nil {
endpoint.log.Error("failed to delete piece info", zap.Error(err))
}
}
numDeleted++
}
}
hasMorePieces = (len(pieceIDs) == limit)
// We call Gosched() here because the GC process is expected to be long and we want to keep it at low priority,
err = endpoint.store.WalkSatellitePieces(ctx, peer.ID, func(access pieces.StoredPieceAccess) error {
// We call Gosched() when done because the GC process is expected to be long and we want to keep it at low priority,
// so other goroutines can continue serving requests.
runtime.Gosched()
}
defer runtime.Gosched()
// See the comment above the Retain() function for a discussion on the correctness
// of using ModTime in place of the more precise CreationTime.
mTime, err := access.ModTime(ctx)
if err != nil {
endpoint.log.Error("failed to determine mtime of blob", zap.Error(err))
// but continue iterating.
return nil
}
if !mTime.Before(createdBefore) {
return nil
}
pieceID := access.PieceID()
if !filter.Contains(pieceID) {
endpoint.log.Debug("About to delete piece id",
zap.String("satellite", peer.ID.String()),
zap.String("pieceID", pieceID.String()),
zap.String("retainStatus", endpoint.config.RetainStatus.String()))
// if retain status is enabled, delete pieceid
if endpoint.config.RetainStatus == RetainEnabled {
if err = endpoint.store.Delete(ctx, peer.ID, pieceID); err != nil {
endpoint.log.Error("failed to delete piece",
zap.String("satellite", peer.ID.String()),
zap.String("pieceID", pieceID.String()),
zap.Error(err))
return nil
}
}
numDeleted++
}
return nil
})
if err != nil {
return nil, status.Error(codes.Internal, Error.Wrap(err).Error())
}
mon.IntVal("garbage_collection_pieces_deleted").Observe(int64(numDeleted))
endpoint.log.Sugar().Debugf("Deleted %d pieces during retain. RetainStatus: %s", numDeleted, endpoint.config.RetainStatus.String())
return &pb.RetainResponse{}, nil

View File

@ -4,6 +4,7 @@
package piecestore_test
import (
"context"
"crypto/tls"
"crypto/x509"
"io"
@ -235,7 +236,7 @@ func TestDownload(t *testing.T) {
{ // should err with piece ID not specified
pieceID: storj.PieceID{2},
action: pb.PieceAction_GET,
errs: []string{"no such file or directory", "The system cannot find the path specified"},
errs: []string{"file does not exist", "The system cannot find the path specified"},
},
{ // should successfully download data
pieceID: orderLimit.PieceId,
@ -506,8 +507,7 @@ func TestRetain(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
pieceInfos := db.PieceInfo()
store := pieces.NewStore(zaptest.NewLogger(t), db.Pieces())
store := pieces.NewStore(zaptest.NewLogger(t), db.Pieces(), db.V0PieceInfo(), db.PieceExpirationDB())
const numPieces = 1000
const numPiecesToKeep = 990
@ -532,15 +532,15 @@ func TestRetain(t *testing.T) {
require.NoError(t, err)
uplink := testidentity.MustPregeneratedSignedIdentity(3, storj.LatestIDVersion())
endpointEnabled, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, pieceInfos, nil, nil, nil, ps.Config{
endpointEnabled, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, nil, nil, nil, ps.Config{
RetainStatus: ps.RetainEnabled,
})
require.NoError(t, err)
endpointDisabled, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, pieceInfos, nil, nil, nil, ps.Config{
endpointDisabled, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, nil, nil, nil, ps.Config{
RetainStatus: ps.RetainDisabled,
})
require.NoError(t, err)
endpointDebug, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, pieceInfos, nil, nil, nil, ps.Config{
endpointDebug, err := ps.NewEndpoint(zaptest.NewLogger(t), nil, trusted, nil, store, nil, nil, nil, ps.Config{
RetainStatus: ps.RetainDebug,
})
require.NoError(t, err)
@ -597,10 +597,11 @@ func TestRetain(t *testing.T) {
OrderLimit: &pb.OrderLimit{},
}
err = pieceInfos.Add(ctx, &pieceinfo0)
v0db := store.GetV0PieceInfoDB().(pieces.V0PieceInfoDBForTest)
err = v0db.Add(ctx, &pieceinfo0)
require.NoError(t, err)
err = pieceInfos.Add(ctx, &pieceinfo1)
err = v0db.Add(ctx, &pieceinfo1)
require.NoError(t, err)
}
@ -624,11 +625,11 @@ func TestRetain(t *testing.T) {
_, err = endpointDebug.Retain(ctxSatellite0, &retainReq)
require.NoError(t, err)
satellite1Pieces, err := pieceInfos.GetPieceIDs(ctx, satellite1.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, storj.PieceID{})
satellite1Pieces, err := getAllPieceIDs(ctx, store, satellite1.ID, recentTime.Add(time.Duration(5)*time.Second))
require.NoError(t, err)
require.Equal(t, numPieces, len(satellite1Pieces))
satellite0Pieces, err := pieceInfos.GetPieceIDs(ctx, satellite0.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, storj.PieceID{})
satellite0Pieces, err := getAllPieceIDs(ctx, store, satellite0.ID, recentTime.Add(time.Duration(5)*time.Second))
require.NoError(t, err)
require.Equal(t, numPieces, len(satellite0Pieces))
@ -637,13 +638,13 @@ func TestRetain(t *testing.T) {
require.NoError(t, err)
// check we have deleted nothing for satellite1
satellite1Pieces, err = pieceInfos.GetPieceIDs(ctx, satellite1.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, storj.PieceID{})
satellite1Pieces, err = getAllPieceIDs(ctx, store, satellite1.ID, recentTime.Add(time.Duration(5)*time.Second))
require.NoError(t, err)
require.Equal(t, numPieces, len(satellite1Pieces))
// check we did not delete recent pieces or retained pieces for satellite0
// also check that we deleted the correct pieces for satellite0
satellite0Pieces, err = pieceInfos.GetPieceIDs(ctx, satellite0.ID, recentTime.Add(time.Duration(5)*time.Second), numPieces, storj.PieceID{})
satellite0Pieces, err = getAllPieceIDs(ctx, store, satellite0.ID, recentTime.Add(time.Duration(5)*time.Second))
require.NoError(t, err)
require.Equal(t, numPieces-numOldPieces, len(satellite0Pieces))
@ -661,6 +662,21 @@ func TestRetain(t *testing.T) {
})
}
func getAllPieceIDs(ctx context.Context, store *pieces.Store, satellite storj.NodeID, createdBefore time.Time) (pieceIDs []storj.PieceID, err error) {
err = store.WalkSatellitePieces(ctx, satellite, func(pieceAccess pieces.StoredPieceAccess) error {
mTime, err := pieceAccess.CreationTime(ctx)
if err != nil {
return err
}
if !mTime.Before(createdBefore) {
return nil
}
pieceIDs = append(pieceIDs, pieceAccess.PieceID())
return nil
})
return pieceIDs, err
}
// generateTestIDs generates n piece ids
func generateTestIDs(n int) []storj.PieceID {
ids := make([]storj.PieceID, n)

View File

@ -5,7 +5,6 @@ package piecestore_test
import (
"context"
"fmt"
"testing"
"time"
@ -26,7 +25,8 @@ import (
const oneWeek = 7 * 24 * time.Hour
func TestOrderLimitPutValidation(t *testing.T) {
for i, tt := range []struct {
for _, tt := range []struct {
testName string
useUnknownSatellite bool
pieceID storj.PieceID
action pb.PieceAction
@ -38,7 +38,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
availableSpace int64
err string
}{
{ // unapproved satellite id
{
testName: "unapproved satellite id",
useUnknownSatellite: true,
pieceID: storj.PieceID{1},
action: pb.PieceAction_PUT,
@ -48,7 +49,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
limit: memory.KiB.Int64(),
err: " is untrusted",
},
{ // approved satellite id
{
testName: "approved satellite id",
pieceID: storj.PieceID{2},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{2},
@ -56,7 +58,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
orderExpiration: oneWeek,
limit: 10 * memory.KiB.Int64(),
},
{ // wrong action type
{
testName: "wrong action type",
pieceID: storj.PieceID{3},
action: pb.PieceAction_GET,
serialNumber: storj.SerialNumber{3},
@ -65,7 +68,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
limit: memory.KiB.Int64(),
err: "expected put or put repair action got GET",
},
{ // piece expired
{
testName: "piece expired",
pieceID: storj.PieceID{4},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{4},
@ -74,7 +78,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
limit: memory.KiB.Int64(),
err: "piece expired:",
},
{ // limit is negative
{
testName: "limit is negative",
pieceID: storj.PieceID{5},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{5},
@ -83,7 +88,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
limit: -1,
err: "order limit is negative",
},
{ // order limit expired
{
testName: "order limit expired",
pieceID: storj.PieceID{6},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{6},
@ -92,7 +98,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
limit: memory.KiB.Int64(),
err: "order expired:",
},
{ // allocated bandwidth limit
{
testName: "allocated bandwidth limit",
pieceID: storj.PieceID{7},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{7},
@ -102,7 +109,8 @@ func TestOrderLimitPutValidation(t *testing.T) {
availableBandwidth: 5 * memory.KiB.Int64(),
err: "out of bandwidth",
},
{ // allocated space limit
{
testName: "allocated space limit",
pieceID: storj.PieceID{8},
action: pb.PieceAction_PUT,
serialNumber: storj.SerialNumber{8},
@ -113,69 +121,71 @@ func TestOrderLimitPutValidation(t *testing.T) {
err: "out of space",
},
} {
ctx := testcontext.New(t)
defer ctx.Cleanup()
tt := tt
t.Run(tt.testName, func(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
planet, err := testplanet.New(t, 1, 1, 1)
require.NoError(t, err)
defer ctx.Check(planet.Shutdown)
planet.Start(ctx)
// set desirable bandwidth
setBandwidth(ctx, t, planet, tt.availableBandwidth)
// set desirable space
setSpace(ctx, t, planet, tt.availableSpace)
client, err := planet.Uplinks[0].DialPiecestore(ctx, planet.StorageNodes[0])
require.NoError(t, err)
defer ctx.Check(client.Close)
signer := signing.SignerFromFullIdentity(planet.Satellites[0].Identity)
satellite := planet.Satellites[0].Identity
if tt.useUnknownSatellite {
unapprovedSatellite, err := planet.NewIdentity()
planet, err := testplanet.New(t, 1, 1, 1)
require.NoError(t, err)
signer = signing.SignerFromFullIdentity(unapprovedSatellite)
satellite = unapprovedSatellite
}
defer ctx.Check(planet.Shutdown)
orderLimit, piecePrivateKey := GenerateOrderLimit(
t,
satellite.ID,
planet.StorageNodes[0].ID(),
tt.pieceID,
tt.action,
tt.serialNumber,
tt.pieceExpiration,
tt.orderExpiration,
tt.limit,
)
planet.Start(ctx)
orderLimit, err = signing.SignOrderLimit(ctx, signer, orderLimit)
require.NoError(t, err)
// set desirable bandwidth
setBandwidth(ctx, t, planet, tt.availableBandwidth)
// set desirable space
setSpace(ctx, t, planet, tt.availableSpace)
uploader, err := client.Upload(ctx, orderLimit, piecePrivateKey)
require.NoError(t, err)
client, err := planet.Uplinks[0].DialPiecestore(ctx, planet.StorageNodes[0])
require.NoError(t, err)
defer ctx.Check(client.Close)
var writeErr error
buffer := make([]byte, memory.KiB)
for i := 0; i < 10; i++ {
testrand.Read(buffer)
_, writeErr = uploader.Write(buffer)
if writeErr != nil {
break
signer := signing.SignerFromFullIdentity(planet.Satellites[0].Identity)
satellite := planet.Satellites[0].Identity
if tt.useUnknownSatellite {
unapprovedSatellite, err := planet.NewIdentity()
require.NoError(t, err)
signer = signing.SignerFromFullIdentity(unapprovedSatellite)
satellite = unapprovedSatellite
}
}
_, commitErr := uploader.Commit(ctx)
err = errs.Combine(writeErr, commitErr)
testIndex := fmt.Sprintf("#%d", i)
if tt.err != "" {
require.Error(t, err, testIndex)
require.Contains(t, err.Error(), tt.err, testIndex)
} else {
require.NoError(t, err, testIndex)
}
orderLimit, piecePrivateKey := GenerateOrderLimit(
t,
satellite.ID,
planet.StorageNodes[0].ID(),
tt.pieceID,
tt.action,
tt.serialNumber,
tt.pieceExpiration,
tt.orderExpiration,
tt.limit,
)
orderLimit, err = signing.SignOrderLimit(ctx, signer, orderLimit)
require.NoError(t, err)
uploader, err := client.Upload(ctx, orderLimit, piecePrivateKey)
require.NoError(t, err)
var writeErr error
buffer := make([]byte, memory.KiB)
for i := 0; i < 10; i++ {
testrand.Read(buffer)
_, writeErr = uploader.Write(buffer)
if writeErr != nil {
break
}
}
_, commitErr := uploader.Commit(ctx)
err = errs.Combine(writeErr, commitErr)
if tt.err != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.err)
} else {
require.NoError(t, err)
}
})
}
}
@ -318,17 +328,6 @@ func setSpace(ctx context.Context, t *testing.T, planet *testplanet.Planet, spac
for _, storageNode := range planet.StorageNodes {
availableSpace, err := storageNode.Storage2.Monitor.AvailableSpace(ctx)
require.NoError(t, err)
diff := (space - availableSpace) * -1
now := time.Now()
err = storageNode.DB.PieceInfo().Add(ctx, &pieces.Info{
SatelliteID: planet.Satellites[0].ID(),
PieceID: storj.PieceID{99},
PieceSize: diff,
PieceCreation: now,
PieceExpiration: time.Time{},
OrderLimit: &pb.OrderLimit{},
UplinkPieceHash: &pb.PieceHash{},
})
require.NoError(t, err)
pieces.StoreForTest{Store: storageNode.Storage2.Store}.ReserveSpace(availableSpace - space)
}
}

View File

@ -54,7 +54,7 @@ func New(log *zap.Logger, config Config) (*DB, error) {
if err != nil {
return nil, err
}
pieces := filestore.New(piecesDir)
pieces := filestore.New(log, piecesDir)
infodb, err := newInfo(config.Info2)
if err != nil {
@ -85,7 +85,7 @@ func NewTest(log *zap.Logger, storageDir string) (*DB, error) {
if err != nil {
return nil, err
}
pieces := filestore.New(piecesDir)
pieces := filestore.New(log, piecesDir)
infodb, err := NewInfoTest()
if err != nil {

View File

@ -15,7 +15,7 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap"
monkit "gopkg.in/spacemonkeygo/monkit.v2"
"gopkg.in/spacemonkeygo/monkit.v2"
"storj.io/storj/internal/dbutil"
"storj.io/storj/internal/dbutil/utccheck"
@ -52,10 +52,11 @@ type SQLDB interface {
// InfoDB implements information database for piecestore.
type InfoDB struct {
db SQLDB
bandwidthdb bandwidthdb
pieceinfo pieceinfo
location string
db SQLDB
bandwidthdb bandwidthdb
v0PieceInfo v0PieceInfo
pieceExpirationDB pieceExpirationDB
location string
}
// newInfo creates or opens InfoDB at the specified path.
@ -72,8 +73,9 @@ func newInfo(path string) (*InfoDB, error) {
dbutil.Configure(db, mon)
infoDb := &InfoDB{db: db}
infoDb.pieceinfo = pieceinfo{InfoDB: infoDb}
infoDb.v0PieceInfo = v0PieceInfo{InfoDB: infoDb}
infoDb.bandwidthdb = bandwidthdb{InfoDB: infoDb}
infoDb.pieceExpirationDB = pieceExpirationDB{InfoDB: infoDb}
infoDb.location = path
return infoDb, nil
@ -99,8 +101,9 @@ func NewInfoTest() (*InfoDB, error) {
}))
infoDb := &InfoDB{db: utccheck.New(db)}
infoDb.pieceinfo = pieceinfo{InfoDB: infoDb}
infoDb.v0PieceInfo = v0PieceInfo{InfoDB: infoDb}
infoDb.bandwidthdb = bandwidthdb{InfoDB: infoDb}
infoDb.pieceExpirationDB = pieceExpirationDB{InfoDB: infoDb}
return infoDb, nil
}
@ -404,6 +407,22 @@ func (db *InfoDB) Migration() *migrate.Migration {
return nil
}),
},
{
Description: "Start piece_expirations table, deprecate pieceinfo table",
Version: 15,
Action: migrate.SQL{
// new table to hold expiration data (and only expirations. no other pieceinfo)
`CREATE TABLE piece_expirations (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_expiration TIMESTAMP NOT NULL, -- date when it can be deleted
deletion_failed_at TIMESTAMP,
PRIMARY KEY (satellite_id, piece_id)
)`,
`CREATE INDEX idx_piece_expirations_piece_expiration ON piece_expirations(piece_expiration)`,
`CREATE INDEX idx_piece_expirations_deletion_failed_at ON piece_expirations(deletion_failed_at)`,
},
},
},
}
}

View File

@ -0,0 +1,99 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package storagenodedb
import (
"context"
"time"
"github.com/zeebo/errs"
"storj.io/storj/pkg/storj"
"storj.io/storj/storagenode/pieces"
)
type pieceExpirationDB struct {
*InfoDB
}
// PieceExpirationDB returns database for storing piece expiration data
func (db *DB) PieceExpirationDB() pieces.PieceExpirationDB { return db.info.PieceExpirationDB() }
// PieceExpirationDB returns database for storing piece expiration data
func (db *InfoDB) PieceExpirationDB() pieces.PieceExpirationDB { return &db.pieceExpirationDB }
// GetExpired gets piece IDs that expire or have expired before the given time
func (db *pieceExpirationDB) GetExpired(ctx context.Context, expiresBefore time.Time, limit int64) (expiredPieceIDs []pieces.ExpiredInfo, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := db.db.QueryContext(ctx, `
SELECT satellite_id, piece_id
FROM piece_expirations
WHERE piece_expiration < ?
AND ((deletion_failed_at IS NULL) OR deletion_failed_at <> ?)
LIMIT ?
`, expiresBefore.UTC(), expiresBefore.UTC(), limit)
if err != nil {
return nil, ErrInfo.Wrap(err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
for rows.Next() {
var satelliteID storj.NodeID
var pieceID storj.PieceID
err = rows.Scan(&satelliteID, &pieceID)
if err != nil {
return nil, ErrInfo.Wrap(err)
}
expiredPieceIDs = append(expiredPieceIDs, pieces.ExpiredInfo{
SatelliteID: satelliteID,
PieceID: pieceID,
InPieceInfo: false,
})
}
return expiredPieceIDs, nil
}
// SetExpiration sets an expiration time for the given piece ID on the given satellite
func (db *pieceExpirationDB) SetExpiration(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID, expiresAt time.Time) (err error) {
defer mon.Task()(&ctx)(&err)
_, err = db.db.ExecContext(ctx, `
INSERT INTO piece_expirations(satellite_id, piece_id, piece_expiration)
VALUES (?,?,?)
`, satellite, pieceID, expiresAt.UTC())
return ErrInfo.Wrap(err)
}
// DeleteExpiration removes an expiration record for the given piece ID on the given satellite
func (db *pieceExpirationDB) DeleteExpiration(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (found bool, err error) {
defer mon.Task()(&ctx)(&err)
result, err := db.db.ExecContext(ctx, `
DELETE FROM piece_expirations
WHERE satellite_id = ? AND piece_id = ?
`, satelliteID, pieceID)
if err != nil {
return false, err
}
numRows, err := result.RowsAffected()
if err != nil {
return false, err
}
return numRows > 0, nil
}
// DeleteFailed marks an expiration record as having experienced a failure in deleting the piece
// from the disk
func (db *pieceExpirationDB) DeleteFailed(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID, when time.Time) (err error) {
defer mon.Task()(&ctx)(&err)
_, err = db.db.ExecContext(ctx, `
UPDATE piece_expirations
SET deletion_failed_at = ?
WHERE satellite_id = ?
AND piece_id = ?
`, when.UTC(), satelliteID, pieceID)
return ErrInfo.Wrap(err)
}

View File

@ -5,9 +5,7 @@ package storagenodedb
import (
"context"
"database/sql"
"sync"
"sync/atomic"
"os"
"time"
"github.com/gogo/protobuf/proto"
@ -15,25 +13,23 @@ import (
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/storj"
"storj.io/storj/storage"
"storj.io/storj/storage/filestore"
"storj.io/storj/storagenode/pieces"
)
type pieceinfo struct {
// Moved to top of struct to resolve alignment issue with atomic operations on ARM
usedSpace int64
loadSpaceOnce sync.Once
type v0PieceInfo struct {
*InfoDB
}
// PieceInfo returns database for storing piece information
func (db *DB) PieceInfo() pieces.DB { return db.info.PieceInfo() }
// V0PieceInfo returns database for storing piece information
func (db *DB) V0PieceInfo() pieces.V0PieceInfoDB { return db.info.V0PieceInfo() }
// PieceInfo returns database for storing piece information
func (db *InfoDB) PieceInfo() pieces.DB { return &db.pieceinfo }
// V0PieceInfo returns database for storing piece information
func (db *InfoDB) V0PieceInfo() pieces.V0PieceInfoDB { return &db.v0PieceInfo }
// Add inserts piece information into the database.
func (db *pieceinfo) Add(ctx context.Context, info *pieces.Info) (err error) {
func (db *v0PieceInfo) Add(ctx context.Context, info *pieces.Info) (err error) {
defer mon.Task()(&ctx)(&err)
orderLimit, err := proto.Marshal(info.OrderLimit)
@ -59,41 +55,65 @@ func (db *pieceinfo) Add(ctx context.Context, info *pieces.Info) (err error) {
VALUES (?,?,?,?,?,?,?,?)
`), info.SatelliteID, info.PieceID, info.PieceSize, info.PieceCreation.UTC(), pieceExpiration, orderLimit, uplinkPieceHash, 0)
if err == nil {
db.loadSpaceUsed(ctx)
atomic.AddInt64(&db.usedSpace, info.PieceSize)
}
return ErrInfo.Wrap(err)
}
// GetPieceIDs gets pieceIDs using the satelliteID
func (db *pieceinfo) GetPieceIDs(ctx context.Context, satelliteID storj.NodeID, createdBefore time.Time, limit int, cursor storj.PieceID) (pieceIDs []storj.PieceID, err error) {
defer mon.Task()(&ctx)(&err)
func (db *v0PieceInfo) getAllPiecesOwnedBy(ctx context.Context, blobStore storage.Blobs, satelliteID storj.NodeID) ([]v0StoredPieceAccess, error) {
rows, err := db.db.QueryContext(ctx, db.Rebind(`
SELECT piece_id
SELECT piece_id, piece_size, piece_creation, piece_expiration
FROM pieceinfo_
WHERE satellite_id = ? AND datetime(piece_creation) < datetime(?) AND piece_id > ?
WHERE satellite_id = ?
ORDER BY piece_id
LIMIT ?
`), satelliteID, createdBefore.UTC(), cursor, limit)
`), satelliteID)
if err != nil {
return nil, ErrInfo.Wrap(err)
}
defer func() { err = errs.Combine(err, rows.Close()) }()
var pieceInfos []v0StoredPieceAccess
for rows.Next() {
var pieceID storj.PieceID
err = rows.Scan(&pieceID)
pieceInfos = append(pieceInfos, v0StoredPieceAccess{
blobStore: blobStore,
satellite: satelliteID,
})
thisAccess := &pieceInfos[len(pieceInfos)-1]
err = rows.Scan(&thisAccess.pieceID, &thisAccess.pieceSize, &thisAccess.creationTime, &thisAccess.expirationTime)
if err != nil {
return pieceIDs, ErrInfo.Wrap(err)
return nil, ErrInfo.Wrap(err)
}
pieceIDs = append(pieceIDs, pieceID)
}
return pieceIDs, nil
return pieceInfos, nil
}
// WalkSatelliteV0Pieces executes walkFunc for each locally stored piece, stored with storage
// format V0 in the namespace of the given satellite. If walkFunc returns a non-nil error,
// WalkSatelliteV0Pieces will stop iterating and return the error immediately. The ctx parameter
// parameter is intended specifically to allow canceling iteration early.
//
// If blobStore is nil, the .Stat() and .FullPath() methods of the provided StoredPieceAccess
// instances will not work, but otherwise everything should be ok.
func (db *v0PieceInfo) WalkSatelliteV0Pieces(ctx context.Context, blobStore storage.Blobs, satelliteID storj.NodeID, walkFunc func(pieces.StoredPieceAccess) error) (err error) {
defer mon.Task()(&ctx)(&err)
// TODO: is it worth paging this query? we hope that SNs will not yet have too many V0 pieces.
pieceInfos, err := db.getAllPiecesOwnedBy(ctx, blobStore, satelliteID)
if err != nil {
return err
}
// note we must not keep a transaction open with the db when calling walkFunc; the callback
// might need to make db calls as well
for i := range pieceInfos {
if err := ctx.Err(); err != nil {
return err
}
if err := walkFunc(&pieceInfos[i]); err != nil {
return err
}
}
return nil
}
// Get gets piece information by satellite id and piece id.
func (db *pieceinfo) Get(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (_ *pieces.Info, err error) {
func (db *v0PieceInfo) Get(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (_ *pieces.Info, err error) {
defer mon.Task()(&ctx)(&err)
info := &pieces.Info{}
info.SatelliteID = satelliteID
@ -132,36 +152,20 @@ func (db *pieceinfo) Get(ctx context.Context, satelliteID storj.NodeID, pieceID
}
// Delete deletes piece information.
func (db *pieceinfo) Delete(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (err error) {
func (db *v0PieceInfo) Delete(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID) (err error) {
defer mon.Task()(&ctx)(&err)
var pieceSize int64
err = db.db.QueryRowContext(ctx, db.Rebind(`
SELECT piece_size
FROM pieceinfo_
WHERE satellite_id = ? AND piece_id = ?
`), satelliteID, pieceID).Scan(&pieceSize)
// Ignore no rows found errors
if err != nil && err != sql.ErrNoRows {
return ErrInfo.Wrap(err)
}
_, err = db.db.ExecContext(ctx, db.Rebind(`
DELETE FROM pieceinfo_
WHERE satellite_id = ?
AND piece_id = ?
`), satelliteID, pieceID)
if pieceSize != 0 && err == nil {
db.loadSpaceUsed(ctx)
atomic.AddInt64(&db.usedSpace, -pieceSize)
}
return ErrInfo.Wrap(err)
}
// DeleteFailed marks piece as a failed deletion.
func (db *pieceinfo) DeleteFailed(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID, now time.Time) (err error) {
func (db *v0PieceInfo) DeleteFailed(ctx context.Context, satelliteID storj.NodeID, pieceID storj.PieceID, now time.Time) (err error) {
defer mon.Task()(&ctx)(&err)
_, err = db.db.ExecContext(ctx, db.Rebind(`
@ -174,12 +178,12 @@ func (db *pieceinfo) DeleteFailed(ctx context.Context, satelliteID storj.NodeID,
return ErrInfo.Wrap(err)
}
// GetExpired gets pieceinformation identites that are expired.
func (db *pieceinfo) GetExpired(ctx context.Context, expiredAt time.Time, limit int64) (infos []pieces.ExpiredInfo, err error) {
// GetExpired gets ExpiredInfo records for pieces that are expired.
func (db *v0PieceInfo) GetExpired(ctx context.Context, expiredAt time.Time, limit int64) (infos []pieces.ExpiredInfo, err error) {
defer mon.Task()(&ctx)(&err)
rows, err := db.db.QueryContext(ctx, db.Rebind(`
SELECT satellite_id, piece_id, piece_size
SELECT satellite_id, piece_id
FROM pieceinfo_
WHERE piece_expiration IS NOT NULL
AND piece_expiration < ?
@ -192,8 +196,8 @@ func (db *pieceinfo) GetExpired(ctx context.Context, expiredAt time.Time, limit
}
defer func() { err = errs.Combine(err, rows.Close()) }()
for rows.Next() {
info := pieces.ExpiredInfo{}
err = rows.Scan(&info.SatelliteID, &info.PieceID, &info.PieceSize)
info := pieces.ExpiredInfo{InPieceInfo: true}
err = rows.Scan(&info.SatelliteID, &info.PieceID)
if err != nil {
return infos, ErrInfo.Wrap(err)
}
@ -202,50 +206,83 @@ func (db *pieceinfo) GetExpired(ctx context.Context, expiredAt time.Time, limit
return infos, nil
}
// SpaceUsed returns disk space used by all pieces from cache
func (db *pieceinfo) SpaceUsed(ctx context.Context) (_ int64, err error) {
defer mon.Task()(&ctx)(&err)
db.loadSpaceUsed(ctx)
return atomic.LoadInt64(&db.usedSpace), nil
type v0StoredPieceAccess struct {
blobStore storage.Blobs
satellite storj.NodeID
pieceID storj.PieceID
pieceSize int64
creationTime time.Time
expirationTime *time.Time
blobInfo storage.BlobInfo
}
func (db *pieceinfo) loadSpaceUsed(ctx context.Context) {
defer mon.Task()(&ctx)(nil)
db.loadSpaceOnce.Do(func() {
usedSpace, _ := db.CalculatedSpaceUsed(ctx)
atomic.AddInt64(&db.usedSpace, usedSpace)
})
// PieceID returns the piece ID for the piece
func (v0Access v0StoredPieceAccess) PieceID() storj.PieceID {
return v0Access.pieceID
}
// CalculatedSpaceUsed calculates disk space used by all pieces
func (db *pieceinfo) CalculatedSpaceUsed(ctx context.Context) (_ int64, err error) {
defer mon.Task()(&ctx)(&err)
var sum sql.NullInt64
err = db.db.QueryRowContext(ctx, db.Rebind(`
SELECT SUM(piece_size)
FROM pieceinfo_
`)).Scan(&sum)
// Satellite returns the satellite ID that owns the piece
func (v0Access v0StoredPieceAccess) Satellite() (storj.NodeID, error) {
return v0Access.satellite, nil
}
if err == sql.ErrNoRows || !sum.Valid {
return 0, nil
// BlobRef returns the relevant storage.BlobRef locator for the piece
func (v0Access v0StoredPieceAccess) BlobRef() storage.BlobRef {
return storage.BlobRef{
Namespace: v0Access.satellite.Bytes(),
Key: v0Access.pieceID.Bytes(),
}
return sum.Int64, err
}
// SpaceUsed calculates disk space used by all pieces
func (db *pieceinfo) SpaceUsedBySatellite(ctx context.Context, satelliteID storj.NodeID) (_ int64, err error) {
defer mon.Task()(&ctx)(&err)
var sum sql.NullInt64
err = db.db.QueryRowContext(ctx, db.Rebind(`
SELECT SUM(piece_size)
FROM pieceinfo_
WHERE satellite_id = ?
`), satelliteID).Scan(&sum)
if err == sql.ErrNoRows || !sum.Valid {
return 0, nil
func (v0Access v0StoredPieceAccess) fillInBlobAccess(ctx context.Context) error {
if v0Access.blobInfo == nil {
if v0Access.blobStore == nil {
return errs.New("this v0StoredPieceAccess instance has no blobStore reference, and cannot look up the relevant blob")
}
blobInfo, err := v0Access.blobStore.StatWithStorageFormat(ctx, v0Access.BlobRef(), v0Access.StorageFormatVersion())
if err != nil {
return err
}
v0Access.blobInfo = blobInfo
}
return sum.Int64, err
return nil
}
// ContentSize gives the size of the piece content (not including the piece header, if applicable)
func (v0Access v0StoredPieceAccess) ContentSize(ctx context.Context) (int64, error) {
return v0Access.pieceSize, nil
}
// CreationTime returns the piece creation time as given in the original order (which is not
// necessarily the same as the file mtime).
func (v0Access v0StoredPieceAccess) CreationTime(ctx context.Context) (time.Time, error) {
return v0Access.creationTime, nil
}
// ModTime returns the same thing as CreationTime for V0 blobs. The intent is for ModTime to
// be a little faster when CreationTime is too slow and the precision is not needed, but in
// this case we already have the exact creation time from the database.
func (v0Access v0StoredPieceAccess) ModTime(ctx context.Context) (time.Time, error) {
return v0Access.creationTime, nil
}
// FullPath gives the full path to the on-disk blob file
func (v0Access v0StoredPieceAccess) FullPath(ctx context.Context) (string, error) {
if err := v0Access.fillInBlobAccess(ctx); err != nil {
return "", err
}
return v0Access.blobInfo.FullPath(ctx)
}
// StorageFormatVersion indicates the storage format version used to store the piece
func (v0Access v0StoredPieceAccess) StorageFormatVersion() storage.FormatVersion {
return filestore.FormatV0
}
// Stat does a stat on the on-disk blob file
func (v0Access v0StoredPieceAccess) Stat(ctx context.Context) (os.FileInfo, error) {
if err := v0Access.fillInBlobAccess(ctx); err != nil {
return nil, err
}
return v0Access.blobInfo.Stat(ctx)
}

View File

@ -1,87 +0,0 @@
// Copyright (C) 2019 Storj Labs, Inc.
// See LICENSE for copying information.
package storagenodedb_test
import (
"testing"
"time"
"github.com/stretchr/testify/require"
"storj.io/storj/internal/testcontext"
"storj.io/storj/internal/testidentity"
"storj.io/storj/internal/testrand"
"storj.io/storj/pkg/pb"
"storj.io/storj/pkg/signing"
"storj.io/storj/pkg/storj"
"storj.io/storj/storagenode"
"storj.io/storj/storagenode/pieces"
"storj.io/storj/storagenode/storagenodedb/storagenodedbtest"
)
// TestGetPieceIDs does the following:
// * Create 90 pieces
// * Request 50 pieces starting from the beginning. Expect 50 pieces.
// * Request 50 pieces starting from the end of the previous request. Expect 40 pieces.
// * Request 50 pieces starting from the end of the previous request. Expect 0 pieces.
func TestGetPieceIDs(t *testing.T) {
storagenodedbtest.Run(t, func(t *testing.T, db storagenode.DB) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
pieceInfos := db.PieceInfo()
satellite := testidentity.MustPregeneratedSignedIdentity(0, storj.LatestIDVersion())
uplink := testidentity.MustPregeneratedSignedIdentity(3, storj.LatestIDVersion())
totalPieces := 90
for i := 0; i < totalPieces; i++ {
newID := testrand.PieceID()
pieceHash, err := signing.SignPieceHash(ctx,
signing.SignerFromFullIdentity(uplink),
&pb.PieceHash{
PieceId: newID,
Hash: []byte{0, 2, 3, 4, 5},
})
require.NoError(t, err)
err = pieceInfos.Add(ctx, &pieces.Info{
SatelliteID: satellite.ID,
PieceSize: 4,
PieceID: newID,
PieceCreation: time.Now().Add(-time.Minute),
UplinkPieceHash: pieceHash,
OrderLimit: &pb.OrderLimit{},
})
require.NoError(t, err)
}
seen := make(map[storj.PieceID]bool)
requestSize := 50
cursor := storj.PieceID{}
pieceIDs, err := pieceInfos.GetPieceIDs(ctx, satellite.ID, time.Now(), requestSize, cursor)
require.NoError(t, err)
require.Len(t, pieceIDs, 50)
for _, id := range pieceIDs {
require.False(t, seen[id])
seen[id] = true
cursor = id
}
pieceIDs, err = pieceInfos.GetPieceIDs(ctx, satellite.ID, time.Now(), requestSize, cursor)
require.NoError(t, err)
require.Len(t, pieceIDs, 40)
for _, id := range pieceIDs {
require.False(t, seen[id])
seen[id] = true
cursor = id
}
pieceIDs, err = pieceInfos.GetPieceIDs(ctx, satellite.ID, time.Now(), requestSize, cursor)
require.NoError(t, err)
require.Len(t, pieceIDs, 0)
})
}

View File

@ -0,0 +1,151 @@
-- table for keeping serials that need to be verified against
CREATE TABLE used_serial_ (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
expiration TIMESTAMP NOT NULL
);
-- primary key on satellite id and serial number
CREATE UNIQUE INDEX pk_used_serial_ ON used_serial_(satellite_id, serial_number);
-- expiration index to allow fast deletion
CREATE INDEX idx_used_serial_ ON used_serial_(expiration);
-- certificate table for storing uplink/satellite certificates
CREATE TABLE certificate (
cert_id INTEGER
);
-- table for storing piece meta info
CREATE TABLE pieceinfo_ (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_size BIGINT NOT NULL,
piece_expiration TIMESTAMP,
order_limit BLOB NOT NULL,
uplink_piece_hash BLOB NOT NULL,
uplink_cert_id INTEGER NOT NULL,
deletion_failed_at TIMESTAMP,
piece_creation TIMESTAMP NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
);
-- primary key by satellite id and piece id
CREATE UNIQUE INDEX pk_pieceinfo_ ON pieceinfo_(satellite_id, piece_id);
-- fast queries for expiration for pieces that have one
CREATE INDEX idx_pieceinfo__expiration ON pieceinfo_(piece_expiration) WHERE piece_expiration IS NOT NULL;
-- table for storing bandwidth usage
CREATE TABLE bandwidth_usage (
satellite_id BLOB NOT NULL,
action INTEGER NOT NULL,
amount BIGINT NOT NULL,
created_at TIMESTAMP NOT NULL
);
CREATE INDEX idx_bandwidth_usage_satellite ON bandwidth_usage(satellite_id);
CREATE INDEX idx_bandwidth_usage_created ON bandwidth_usage(created_at);
-- table for storing all unsent orders
CREATE TABLE unsent_order (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
order_limit_serialized BLOB NOT NULL,
order_serialized BLOB NOT NULL,
order_limit_expiration TIMESTAMP NOT NULL,
uplink_cert_id INTEGER NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
);
CREATE UNIQUE INDEX idx_orders ON unsent_order(satellite_id, serial_number);
-- table for storing all sent orders
CREATE TABLE order_archive_ (
satellite_id BLOB NOT NULL,
serial_number BLOB NOT NULL,
order_limit_serialized BLOB NOT NULL,
order_serialized BLOB NOT NULL,
uplink_cert_id INTEGER NOT NULL,
status INTEGER NOT NULL,
archived_at TIMESTAMP NOT NULL,
FOREIGN KEY(uplink_cert_id) REFERENCES certificate(cert_id)
);
-- table for storing vouchers
CREATE TABLE vouchers (
satellite_id BLOB PRIMARY KEY NOT NULL,
voucher_serialized BLOB NOT NULL,
expiration TIMESTAMP NOT NULL
);
CREATE TABLE bandwidth_usage_rollups (
interval_start TIMESTAMP NOT NULL,
satellite_id BLOB NOT NULL,
action INTEGER NOT NULL,
amount BIGINT NOT NULL,
PRIMARY KEY ( interval_start, satellite_id, action )
);
-- table to hold expiration data (and only expirations. no other pieceinfo)
CREATE TABLE piece_expirations (
satellite_id BLOB NOT NULL,
piece_id BLOB NOT NULL,
piece_expiration TIMESTAMP NOT NULL, -- date when it can be deleted
deletion_failed_at TIMESTAMP,
PRIMARY KEY ( satellite_id, piece_id )
);
CREATE INDEX idx_piece_expirations_piece_expiration ON piece_expirations(piece_expiration);
CREATE INDEX idx_piece_expirations_deletion_failed_at ON piece_expirations(deletion_failed_at);
INSERT INTO unsent_order VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',X'1eddef484b4c03f01332279032796972',X'0a101eddef484b4c03f0133227903279697212202b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf410001a201968996e7ef170a402fdfd88b6753df792c063c07c555905ffac9cd3cbd1c00022200ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac30002a20d00cf14f3c68b56321ace04902dec0484eb6f9098b22b31c6b3f82db249f191630643802420c08dfeb88e50510a8c1a5b9034a0c08dfeb88e50510a8c1a5b9035246304402204df59dc6f5d1bb7217105efbc9b3604d19189af37a81efbf16258e5d7db5549e02203bb4ead16e6e7f10f658558c22b59c3339911841e8dbaae6e2dea821f7326894',X'0a101eddef484b4c03f0133227903279697210321a47304502206d4c106ddec88140414bac5979c95bdea7de2e0ecc5be766e08f7d5ea36641a7022100e932ff858f15885ffa52d07e260c2c25d3861810ea6157956c1793ad0c906284','2019-04-01 16:01:35.9254586+00:00',1);
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',0,0,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',0,0,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',1,1,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',1,1,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',2,2,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',2,2,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',3,3,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',3,3,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',4,4,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',4,4,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',5,5,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',5,5,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',6,6,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',6,6,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',1,1,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',1,1,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',2,2,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',2,2,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',3,3,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',3,3,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',4,4,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',4,4,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',5,5,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',5,5,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',6,6,'2019-04-01 18:51:24.1074772+00:00');
INSERT INTO bandwidth_usage VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',6,6,'2019-04-01 20:51:24.1074772+00:00');
INSERT INTO vouchers VALUES(X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000', X'd5e757fd8d207d1c46583fb58330f803dc961b71147308ff75ff1e72a0df6b0b', '2019-07-04 00:00:00.000000+00:00');
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',0,0);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',0,0);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',1,1);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',1,1);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',2,2);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',2,2);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',3,3);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',3,3);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',4,4);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',4,4);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',5,5);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',5,5);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 18:00:00+00:00',X'0ed28abb2813e184a1e98b0f6605c4911ea468c7e8433eb583e0fca7ceac3000',6,6);
INSERT INTO bandwidth_usage_rollups VALUES('2019-07-12 20:00:00+00:00',X'2b3a5863a41f25408a8f5348839d7a1361dbd886d75786bb139a8ca0bdf41000',6,6);
-- NEW DATA --