storage/filestore: better error message on data corruption
A user on the forum was seeing the error "bad message", which was not very helpful. This case from the ext4 filesystem using the code EBADMSG to indicate it detected an invalid CRC, suggesting disk corruption. This change adds some explanatory information about probable disk corruption to all errors coming from the (*blobInfo).Stat() call, which is where storagenode fs corruption problems will usually manifest. Refs: https://github.com/storj/storj/issues/5375 Change-Id: I87f4a800236050415c4191ef1a0fc952f9def315
This commit is contained in:
parent
ed7c82439d
commit
2f04e20627
@ -8,6 +8,7 @@ import (
|
||||
"context"
|
||||
"encoding/base32"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
@ -871,6 +872,12 @@ func (info *blobInfo) Stat(ctx context.Context) (os.FileInfo, error) {
|
||||
if info.fileInfo == nil {
|
||||
fileInfo, err := os.Lstat(info.path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
if isLowLevelCorruptionError(err) {
|
||||
return nil, &CorruptDataError{path: info.path, error: err}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if fileInfo.Mode().IsDir() {
|
||||
@ -884,3 +891,27 @@ func (info *blobInfo) Stat(ctx context.Context) (os.FileInfo, error) {
|
||||
func (info *blobInfo) FullPath(ctx context.Context) (string, error) {
|
||||
return info.path, nil
|
||||
}
|
||||
|
||||
// CorruptDataError represents a filesystem or disk error which indicates data corruption.
|
||||
//
|
||||
// We use a custom error type here so that we can add explanatory information and wrap the original
|
||||
// error at the same time.
|
||||
type CorruptDataError struct {
|
||||
path string
|
||||
error error
|
||||
}
|
||||
|
||||
// Unwrap unwraps the error.
|
||||
func (cde CorruptDataError) Unwrap() error {
|
||||
return cde.error
|
||||
}
|
||||
|
||||
// Path returns the path at which the error was encountered.
|
||||
func (cde CorruptDataError) Path() string {
|
||||
return cde.path
|
||||
}
|
||||
|
||||
// Error returns an error string describing the condition.
|
||||
func (cde CorruptDataError) Error() string {
|
||||
return fmt.Sprintf("unrecoverable error accessing data on the storage file system (path=%v; error=%v). This is most likely due to disk bad sectors or a corrupted file system. Check your disk for bad sectors and integrity", cde.path, cde.error)
|
||||
}
|
||||
|
23
storage/filestore/errors_other.go
Normal file
23
storage/filestore/errors_other.go
Normal file
@ -0,0 +1,23 @@
|
||||
// Copyright (C) 2023 Storj Labs, Inc.
|
||||
// See LICENSE for copying information.
|
||||
|
||||
//go:build !unix
|
||||
// +build !unix
|
||||
|
||||
package filestore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func isLowLevelCorruptionError(err error) bool {
|
||||
// convert to lowercase the perr.Op because Go returns inconsistently
|
||||
// "lstat" in Linux and "Lstat" in Windows
|
||||
var perr *os.PathError
|
||||
if errors.As(err, &perr) && strings.ToLower(perr.Op) == "lstat" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
27
storage/filestore/errors_unix.go
Normal file
27
storage/filestore/errors_unix.go
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2023 Storj Labs, Inc.
|
||||
// See LICENSE for copying information.
|
||||
|
||||
//go:build unix
|
||||
|
||||
package filestore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func isLowLevelCorruptionError(err error) bool {
|
||||
var perr *os.PathError
|
||||
if errors.As(err, &perr) && perr.Op == "lstat" {
|
||||
return true
|
||||
}
|
||||
var errnoErr syscall.Errno
|
||||
if errors.As(err, &errnoErr) {
|
||||
switch errnoErr {
|
||||
case syscall.EBADMSG, syscall.EIO:
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
@ -398,6 +398,7 @@ func (s *Service) retainPieces(ctx context.Context, req Request) (err error) {
|
||||
// piece was deleted while we were scanning.
|
||||
return nil
|
||||
}
|
||||
|
||||
piecesSkipped++
|
||||
s.log.Warn("failed to determine mtime of blob", zap.Error(err))
|
||||
// but continue iterating.
|
||||
|
Loading…
Reference in New Issue
Block a user