From 2f04e20627f92ff9d0ef23aa13c90c0ce743c024 Mon Sep 17 00:00:00 2001 From: paul cannon Date: Mon, 30 Jan 2023 08:39:31 -0600 Subject: [PATCH] storage/filestore: better error message on data corruption A user on the forum was seeing the error "bad message", which was not very helpful. This case from the ext4 filesystem using the code EBADMSG to indicate it detected an invalid CRC, suggesting disk corruption. This change adds some explanatory information about probable disk corruption to all errors coming from the (*blobInfo).Stat() call, which is where storagenode fs corruption problems will usually manifest. Refs: https://github.com/storj/storj/issues/5375 Change-Id: I87f4a800236050415c4191ef1a0fc952f9def315 --- storage/filestore/dir.go | 31 +++++++++++++++++++++++++++++++ storage/filestore/errors_other.go | 23 +++++++++++++++++++++++ storage/filestore/errors_unix.go | 27 +++++++++++++++++++++++++++ storagenode/retain/retain.go | 1 + 4 files changed, 82 insertions(+) create mode 100644 storage/filestore/errors_other.go create mode 100644 storage/filestore/errors_unix.go diff --git a/storage/filestore/dir.go b/storage/filestore/dir.go index 3f15e56af..2ba6b3978 100644 --- a/storage/filestore/dir.go +++ b/storage/filestore/dir.go @@ -8,6 +8,7 @@ import ( "context" "encoding/base32" "errors" + "fmt" "io" "math" "os" @@ -871,6 +872,12 @@ func (info *blobInfo) Stat(ctx context.Context) (os.FileInfo, error) { if info.fileInfo == nil { fileInfo, err := os.Lstat(info.path) if err != nil { + if os.IsNotExist(err) { + return nil, err + } + if isLowLevelCorruptionError(err) { + return nil, &CorruptDataError{path: info.path, error: err} + } return nil, err } if fileInfo.Mode().IsDir() { @@ -884,3 +891,27 @@ func (info *blobInfo) Stat(ctx context.Context) (os.FileInfo, error) { func (info *blobInfo) FullPath(ctx context.Context) (string, error) { return info.path, nil } + +// CorruptDataError represents a filesystem or disk error which indicates data corruption. +// +// We use a custom error type here so that we can add explanatory information and wrap the original +// error at the same time. +type CorruptDataError struct { + path string + error error +} + +// Unwrap unwraps the error. +func (cde CorruptDataError) Unwrap() error { + return cde.error +} + +// Path returns the path at which the error was encountered. +func (cde CorruptDataError) Path() string { + return cde.path +} + +// Error returns an error string describing the condition. +func (cde CorruptDataError) Error() string { + return fmt.Sprintf("unrecoverable error accessing data on the storage file system (path=%v; error=%v). This is most likely due to disk bad sectors or a corrupted file system. Check your disk for bad sectors and integrity", cde.path, cde.error) +} diff --git a/storage/filestore/errors_other.go b/storage/filestore/errors_other.go new file mode 100644 index 000000000..52c8b7ce8 --- /dev/null +++ b/storage/filestore/errors_other.go @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Storj Labs, Inc. +// See LICENSE for copying information. + +//go:build !unix +// +build !unix + +package filestore + +import ( + "errors" + "os" + "strings" +) + +func isLowLevelCorruptionError(err error) bool { + // convert to lowercase the perr.Op because Go returns inconsistently + // "lstat" in Linux and "Lstat" in Windows + var perr *os.PathError + if errors.As(err, &perr) && strings.ToLower(perr.Op) == "lstat" { + return true + } + return false +} diff --git a/storage/filestore/errors_unix.go b/storage/filestore/errors_unix.go new file mode 100644 index 000000000..ab59bbf3a --- /dev/null +++ b/storage/filestore/errors_unix.go @@ -0,0 +1,27 @@ +// Copyright (C) 2023 Storj Labs, Inc. +// See LICENSE for copying information. + +//go:build unix + +package filestore + +import ( + "errors" + "os" + "syscall" +) + +func isLowLevelCorruptionError(err error) bool { + var perr *os.PathError + if errors.As(err, &perr) && perr.Op == "lstat" { + return true + } + var errnoErr syscall.Errno + if errors.As(err, &errnoErr) { + switch errnoErr { + case syscall.EBADMSG, syscall.EIO: + return true + } + } + return false +} diff --git a/storagenode/retain/retain.go b/storagenode/retain/retain.go index e02a3c032..ebd26c427 100644 --- a/storagenode/retain/retain.go +++ b/storagenode/retain/retain.go @@ -398,6 +398,7 @@ func (s *Service) retainPieces(ctx context.Context, req Request) (err error) { // piece was deleted while we were scanning. return nil } + piecesSkipped++ s.log.Warn("failed to determine mtime of blob", zap.Error(err)) // but continue iterating.