From 85b49bb27c36fdd625dbe1c8086dddefa29f8b73 Mon Sep 17 00:00:00 2001 From: Clement Sam Date: Wed, 20 Oct 2021 14:07:03 +0000 Subject: [PATCH] cmd/uplink: add ranged download to uplink cli Change-Id: Ib274df024a8ffc5db2d5c99f8f363efa3b43723f --- cmd/uplink/cmd/cp.go | 52 +++++++++++++++++++++++++++++++++++------- scripts/test-uplink.sh | 21 +++++++++++++++++ scripts/utils.sh | 4 ++++ 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/cmd/uplink/cmd/cp.go b/cmd/uplink/cmd/cp.go index 06f896051..8e347cbab 100644 --- a/cmd/uplink/cmd/cp.go +++ b/cmd/uplink/cmd/cp.go @@ -20,16 +20,18 @@ import ( "storj.io/common/fpath" "storj.io/common/memory" + "storj.io/common/ranger/httpranger" "storj.io/common/sync2" "storj.io/uplink" "storj.io/uplink/private/object" ) var ( - progress *bool - expires *string - metadata *string - parallelism *int + progress *bool + expires *string + metadata *string + parallelism *int + byteRangeStr *string ) func init() { @@ -44,8 +46,9 @@ func init() { expires = cpCmd.Flags().String("expires", "", "optional expiration date of an object. Please use format (yyyy-mm-ddThh:mm:ssZhh:mm)") metadata = cpCmd.Flags().String("metadata", "", "optional metadata for the object. Please use a single level JSON object of string to string only") parallelism = cpCmd.Flags().Int("parallelism", 1, "controls how many parallel uploads/downloads of a single object will be performed") + byteRangeStr = cpCmd.Flags().String("range", "", "Downloads the specified range bytes of an object. For more information about the HTTP Range header, see https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35") - setBasicFlags(cpCmd.Flags(), "progress", "expires", "metadata", "parallelism") + setBasicFlags(cpCmd.Flags(), "progress", "expires", "metadata", "parallelism", "range") } // upload transfers src from local machine to s3 compatible object dst. @@ -280,6 +283,10 @@ func download(ctx context.Context, src fpath.FPath, dst fpath.FPath, showProgres return fmt.Errorf("parallelism must be at least 1") } + if *parallelism > 1 && *byteRangeStr != "" { + return fmt.Errorf("--parellelism and --range flags are mutually exclusive") + } + project, err := cfg.getProject(ctx, false) if err != nil { return err @@ -306,8 +313,34 @@ func download(ctx context.Context, src fpath.FPath, dst fpath.FPath, showProgres } var bar *progressbar.ProgressBar + var contentLength int64 if *parallelism <= 1 { - download, err := project.DownloadObject(ctx, src.Bucket(), src.Path(), nil) + var downloadOpts *uplink.DownloadOptions + + if *byteRangeStr != "" { + // TODO: if range option will be frequently used we may think about avoiding this call + statObject, err := project.StatObject(ctx, src.Bucket(), src.Path()) + if err != nil { + return err + } + bRange, err := httpranger.ParseRange(*byteRangeStr, statObject.System.ContentLength) + if err != nil && bRange == nil { + return fmt.Errorf("error parsing range: %w", err) + } + if len(bRange) == 0 { + return fmt.Errorf("invalid range") + } + if len(bRange) > 1 { + return fmt.Errorf("retrieval of multiple byte ranges of data not supported: %d provided", len(bRange)) + } + downloadOpts = &uplink.DownloadOptions{ + Offset: bRange[0].Start, + Length: bRange[0].Length, + } + contentLength = bRange[0].Length + } + + download, err := project.DownloadObject(ctx, src.Bucket(), src.Path(), downloadOpts) if err != nil { return err } @@ -315,8 +348,11 @@ func download(ctx context.Context, src fpath.FPath, dst fpath.FPath, showProgres var reader io.ReadCloser if showProgress { - info := download.Info() - bar = progressbar.New64(info.System.ContentLength) + if contentLength <= 0 { + info := download.Info() + contentLength = info.System.ContentLength + } + bar = progressbar.New64(contentLength) reader = bar.NewProxyReader(download) bar.Start() } else { diff --git a/scripts/test-uplink.sh b/scripts/test-uplink.sh index 4453bbf93..7193876a7 100755 --- a/scripts/test-uplink.sh +++ b/scripts/test-uplink.sh @@ -81,6 +81,27 @@ uplink cp "sj://$BUCKET/diff-size-segments_upl_p2" "$DST_DIR/diff-size-segmen uplink ls "sj://$BUCKET/small-upload-testfile" | grep "small-upload-testfile" +# test ranged download of object +uplink cp "sj://$BUCKET/put-file" "$DST_DIR/put-file-from-cp-range" --range bytes=0-5 --progress=false +EXPECTED_FILE_SIZE="6" +ACTUAL_FILE_SIZE=$(get_file_size "$DST_DIR/put-file-from-cp-range") +if [ "$EXPECTED_FILE_SIZE" != "$ACTUAL_FILE_SIZE" ] +then + echo "expected downloaded file size to be equal to $EXPECTED_FILE_SIZE, got $ACTUAL_FILE_SIZE" + exit 1 +fi + +# test ranged download with multiple byte range +set +e +EXPECTED_ERROR="retrieval of multiple byte ranges of data not supported: 2 provided" +ERROR=$(uplink cp "sj://$BUCKET/put-file" "$DST_DIR/put-file-from-cp-range" --range bytes=0-5,6-10) +if [ $ERROR != $EXPECTED_ERROR ] +then + echo EXPECTED_ERROR + exit 1 +fi +set -e + # test server-side move operation uplink mv "sj://$BUCKET/big-upload-testfile" "sj://$BUCKET/moved-big-upload-testfile" uplink ls "sj://$BUCKET/moved-big-upload-testfile" | grep "moved-big-upload-testfile" diff --git a/scripts/utils.sh b/scripts/utils.sh index 277aa723d..669d1d2ae 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -144,4 +144,8 @@ require_error_exit_code(){ else echo "Copy file without permission: PASSED" # Expect unsuccessful exit code fi +} + +get_file_size() { + [ -f "$1" ] && ls -dnL -- "$1" | awk '{print $5;exit}' || { echo 0; return 1; } } \ No newline at end of file