private/testblobs, storage, storage/filestore: add storage dir verification to filestore

Sometimes SNOs fail to properly configure or lose connection to their storage directory
which can result in DQ. This causes unnecessary repair and is unfortunate for all parties.

This change introduces the creation of a special file in the storage directory at runtime
containing the node ID. While the storage node runs, it periodically verifies that it can
find said file with the correct contents in the correct location. If not, the node will
shut down with an error message.

This change will solve the issue of nodes losing access to the storage directory, but it will not
solve the issue of nodes pointing to the wrong directory, as the identifying file is created each
time the node starts up. After this change has been the minimum version for a few releases, we will
remove the creation of the directory-identifying file from the storage node run command and add it
to the setup command.

Change-Id: Ib7b10e96ac07373219835e39239e93957e7667a4
This commit is contained in:
Cameron Ayer 2020-07-10 15:36:39 -04:00 committed by Cameron
parent 14ad7a4f1c
commit 586e6f2f13
8 changed files with 144 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import (
"go.uber.org/zap"
"storj.io/common/storj"
"storj.io/storj/storage"
"storj.io/storj/storagenode"
)
@ -206,6 +207,23 @@ func (bad *BadBlobs) SpaceUsedForTrash(ctx context.Context) (int64, error) {
return bad.blobs.SpaceUsedForTrash(ctx)
}
// CreateVerificationFile creates a file to be used for storage directory verification.
func (bad *BadBlobs) CreateVerificationFile(id storj.NodeID) error {
if bad.err != nil {
return bad.err
}
return bad.blobs.CreateVerificationFile(id)
}
// VerifyStorageDir verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
func (bad *BadBlobs) VerifyStorageDir(id storj.NodeID) error {
if bad.err != nil {
return bad.err
}
return bad.blobs.VerifyStorageDir(id)
}
// SetError configures the blob store to return a specific error for all operations.
func (bad *BadBlobs) SetError(err error) {
bad.err = err

View File

@ -10,6 +10,7 @@ import (
"go.uber.org/zap"
"storj.io/common/storj"
"storj.io/storj/storage"
"storj.io/storj/storagenode"
)
@ -170,6 +171,18 @@ func (slow *SlowBlobs) SpaceUsedForTrash(ctx context.Context) (int64, error) {
return slow.blobs.SpaceUsedForTrash(ctx)
}
// CreateVerificationFile creates a file to be used for storage directory verification.
func (slow *SlowBlobs) CreateVerificationFile(id storj.NodeID) error {
slow.sleep()
return slow.blobs.CreateVerificationFile(id)
}
// VerifyStorageDir verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
func (slow *SlowBlobs) VerifyStorageDir(id storj.NodeID) error {
return slow.blobs.VerifyStorageDir(id)
}
// SetLatency configures the blob store to sleep for delay duration for all
// operations. A zero or negative delay means no sleep.
func (slow *SlowBlobs) SetLatency(delay time.Duration) {

View File

@ -10,6 +10,8 @@ import (
"time"
"github.com/zeebo/errs"
"storj.io/common/storj"
)
// ErrInvalidBlobRef is returned when an blob reference is invalid.
@ -108,6 +110,11 @@ type Blobs interface {
// error, WalkNamespace will stop iterating and return the error immediately. The ctx
// parameter is intended to allow canceling iteration early.
WalkNamespace(ctx context.Context, namespace []byte, walkFunc func(BlobInfo) error) error
// CreateVerificationFile creates a file to be used for storage directory verification.
CreateVerificationFile(id storj.NodeID) error
// VerifyStorageDir verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
VerifyStorageDir(id storj.NodeID) error
// Close closes the blob store and any resources associated with it.
Close() error
}

View File

@ -4,6 +4,7 @@
package filestore
import (
"bytes"
"context"
"encoding/base32"
"errors"
@ -19,6 +20,7 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap"
"storj.io/common/storj"
"storj.io/storj/storage"
)
@ -29,6 +31,7 @@ const (
v0PieceFileSuffix = ""
v1PieceFileSuffix = ".sj1"
unknownPieceFileSuffix = "/..error_unknown_format../"
verificationFileName = "storage-dir-verification"
)
var pathEncoding = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
@ -95,6 +98,37 @@ func (dir *Dir) garbagedir() string { return filepath.Join(dir.path, "garbage")
// trashdir contains files staged for deletion for a period of time.
func (dir *Dir) trashdir() string { return filepath.Join(dir.path, "trash") }
// CreateVerificationFile creates a file to be used for storage directory verification.
func (dir *Dir) CreateVerificationFile(id storj.NodeID) error {
f, err := os.Create(filepath.Join(dir.path, verificationFileName))
if err != nil {
return err
}
defer func() {
err = errs.Combine(err, f.Close())
}()
_, err = f.Write(id.Bytes())
return err
}
// Verify verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
func (dir *Dir) Verify(id storj.NodeID) error {
content, err := ioutil.ReadFile(filepath.Join(dir.path, verificationFileName))
if err != nil {
return err
}
if !bytes.Equal(content, id.Bytes()) {
verifyID, err := storj.NodeIDFromBytes(content)
if err != nil {
return errs.New("content of file is not a valid node ID: %x", content)
}
return errs.New("node ID in file (%s) does not match running node's ID (%s)", verifyID, id.String())
}
return nil
}
// CreateTemporaryFile creates a preallocated temporary file in the temp directory
// prealloc preallocates file to make writing faster.
func (dir *Dir) CreateTemporaryFile(ctx context.Context, prealloc int64) (_ *os.File, err error) {

View File

@ -16,6 +16,7 @@ import (
"go.uber.org/zap"
"storj.io/common/memory"
"storj.io/common/storj"
"storj.io/storj/storage"
)
@ -276,3 +277,14 @@ func (store *blobStore) TestCreateV0(ctx context.Context, ref storage.BlobRef) (
}
return newBlobWriter(ref, store, FormatV0, file, store.config.WriteBufferSize.Int()), nil
}
// CreateVerificationFile creates a file to be used for storage directory verification.
func (store *blobStore) CreateVerificationFile(id storj.NodeID) error {
return store.dir.CreateVerificationFile(id)
}
// VerifyStorageDir verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
func (store *blobStore) VerifyStorageDir(id storj.NodeID) error {
return store.dir.Verify(id)
}

View File

@ -20,6 +20,7 @@ import (
"github.com/zeebo/errs"
"go.uber.org/zap/zaptest"
"storj.io/common/identity/testidentity"
"storj.io/common/memory"
"storj.io/common/testcontext"
"storj.io/common/testrand"
@ -870,3 +871,50 @@ func TestBlobMemoryBuffer(t *testing.T) {
}
require.Equal(t, size, len(buf))
}
func TestStorageDirVerification(t *testing.T) {
ctx := testcontext.New(t)
defer ctx.Cleanup()
log := zaptest.NewLogger(t)
dir, err := filestore.NewDir(log, ctx.Dir("store"))
require.NoError(t, err)
ident0, err := testidentity.NewTestIdentity(ctx)
require.NoError(t, err)
ident1, err := testidentity.NewTestIdentity(ctx)
require.NoError(t, err)
store := filestore.New(log, dir, filestore.Config{
WriteBufferSize: 1 * memory.KiB,
})
// test nonexistent file returns error
require.Error(t, store.VerifyStorageDir(ident0.ID))
require.NoError(t, dir.CreateVerificationFile(ident0.ID))
// test correct ID returns no error
require.NoError(t, store.VerifyStorageDir(ident0.ID))
// test incorrect ID returns error
err = store.VerifyStorageDir(ident1.ID)
require.Contains(t, err.Error(), "does not match running node's ID")
// test invalid node ID returns error
f, err := os.Create(filepath.Join(dir.Path(), "storage-dir-verification"))
require.NoError(t, err)
defer func() {
require.NoError(t, f.Close())
}()
_, err = f.Write([]byte{0, 1, 2, 3})
require.NoError(t, err)
err = store.VerifyStorageDir(ident0.ID)
require.Contains(t, err.Error(), "content of file is not a valid node ID")
// test file overwrite returns no error
require.NoError(t, dir.CreateVerificationFile(ident0.ID))
require.NoError(t, store.VerifyStorageDir(ident0.ID))
}

View File

@ -107,6 +107,7 @@ func (service *Service) Run(ctx context.Context) (err error) {
service.log.Error("Total disk space less than required minimum", zap.Int64("bytes", service.Config.MinimumDiskSpace.Int64()))
return Error.New("disk space requirement not met")
}
var group errgroup.Group
group.Go(func() error {
return service.Loop.Run(ctx, func(ctx context.Context) error {

View File

@ -193,6 +193,17 @@ func NewStore(log *zap.Logger, blobs storage.Blobs, v0PieceInfo V0PieceInfoDB, e
}
}
// CreateVerificationFile creates a file to be used for storage directory verification.
func (store *Store) CreateVerificationFile(id storj.NodeID) error {
return store.blobs.CreateVerificationFile(id)
}
// VerifyStorageDir verifies that the storage directory is correct by checking for the existence and validity
// of the verification file.
func (store *Store) VerifyStorageDir(id storj.NodeID) error {
return store.blobs.VerifyStorageDir(id)
}
// Writer returns a new piece writer.
func (store *Store) Writer(ctx context.Context, satellite storj.NodeID, pieceID storj.PieceID) (_ *Writer, err error) {
defer mon.Task()(&ctx)(&err)