From b0c63ef0d1a16109d6aecdbfb164d96ba8712cc2 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 7 Jul 2022 18:46:37 +0200 Subject: [PATCH] fetch-kde-qt.sh: get sha256 from server --- maintainers/scripts/fetch-kde-qt.sh | 164 +++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 14 deletions(-) diff --git a/maintainers/scripts/fetch-kde-qt.sh b/maintainers/scripts/fetch-kde-qt.sh index 22d78151978b..9e2348fda707 100755 --- a/maintainers/scripts/fetch-kde-qt.sh +++ b/maintainers/scripts/fetch-kde-qt.sh @@ -2,30 +2,159 @@ #! nix-shell -i bash -p coreutils findutils gnused nix wget set -efuo pipefail +export LC_COLLATE=C # fix sort order -SRCS= -if [ -d "$1" ]; then - SRCS="$(pwd)/$1/srcs.nix" - . "$1/fetch.sh" -else - SRCS="$(pwd)/$(dirname $1)/srcs.nix" - . "$1" +# parse files and folders from https://download.kde.org/ and https://download.qt.io/ +# you can override this function in fetch.sh +function PARSE_INDEX() { + cat "$1" | grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' | cut -d'"' -f2 | sort | uniq +} + +if [ $# != 1 ]; then + echo "example use:" >&2 + echo "cd nixpkgs/" >&2 + echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2 + exit 1 fi -tmp=$(mktemp -d) -pushd $tmp >/dev/null -wget -nH -r -c --no-parent "${WGET_ARGS[@]}" >/dev/null +if ! echo "$1" | grep -q '^pkgs/'; then + echo "error: path argument must start with pkgs/" >&2 + exit 1 +fi -csv=$(mktemp) -find . -type f | while read src; do +# need absolute path for the pushd-popd block +if [ -f "$1" ]; then + echo "ok: using fetchfile $1" + fetchfilerel="$1" + fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path + basedir="$(dirname "$fetchfile")" + basedirrel="$(dirname "$fetchfilerel")" +elif [ -d "$1" ]; then + echo "ok: using basedir $1" + basedirrel="$1" + basedir="$(readlink -f "$basedirrel")" # resolve absolute path + if ! [ -d "$basedir" ]; then + basedir="$(dirname "$basedir")" + fi + fetchfile="$basedir/fetch.sh" +else + echo 'error: $1 must be file or dir' >&2 + exit 1 +fi + +pkgname=$(basename "$basedir") +SRCS="$basedir/srcs.nix" +srcsrel="$basedirrel/srcs.nix" + +source "$fetchfile" + +if [ -n "$WGET_ARGS" ]; then # old format + BASE_URL="${WGET_ARGS[0]}" # convert to new format + # validate + if ! echo "$BASE_URL" | grep -q -E '^(http|https|ftp)://'; then + printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2 + exit 1 + fi + printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL" +elif [ -n "$BASE_URL" ]; then # new format + : +else + echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2 + exit 1 +fi + +tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX + +tmp=$(mktemp -d $tmptpl) +pushd $tmp >/dev/null +echo "tempdir is $tmp" + +wgetargs='--quiet --show-progress' +#wgetargs='' # debug + +dirlist="$BASE_URL" +filelist="" +base_url_len=${#BASE_URL} + +clean_urls() { + # // -> / + sed -E 's,//+,/,g' | sed -E 's,^(http|https|ftp):/,&/,' +} + +while [ -n "$dirlist" ] +do + for dirurl in $dirlist + do + echo "fetching index.html from $dirurl" + relpath=$(echo "./${dirurl:$base_url_len}" | clean_urls) + mkdir -p "$relpath" + indexfile=$(echo "$relpath/index.html" | clean_urls) + wget $wgetargs -O "$indexfile" "$dirurl" + echo "parsing $indexfile" + filedirlist="$(PARSE_INDEX "$indexfile")" + filelist_next="$(echo "$filedirlist" | grep '\.tar\.xz$' | while read file; do echo "$dirurl/$file"; done)" + filelist_next="$(echo "$filelist_next" | clean_urls)" + [ -n "$filelist" ] && filelist+=$'\n' + filelist+="$filelist_next" + dirlist="$(echo "$filedirlist" | grep -v '\.tar\.xz$' | while read dir; do echo "$dirurl/$dir"; done || true)" + dirlist="$(echo "$dirlist" | clean_urls)" + done +done + +filecount=$(echo "$filelist" | wc -l) + +if [ -z "$filelist" ] +then + echo "error: no files parsed from $tmp/index.html" + exit 1 +fi + +echo "parsed $filecount tar.xz files:"; echo "$filelist" + +# most time is spent here +echo "fetching $filecount sha256 files ..." +urllist="$(echo "$filelist" | while read file; do echo "$file.sha256"; done)" +# wget -r: keep directory structure +echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent && { + actual=$(find . -type f -name '*.sha256' | wc -l) + echo "fetching $filecount sha256 files done: got $actual files" +} || { + # workaround: in rare cases, the server does not provide the sha256 files + # for example when the release is just a few hours old + # and the servers are not yet fully synced + actual=$(find . -type f -name '*.sha256' | wc -l) + echo "fetching $filecount sha256 files failed: got only $actual files" + + # TODO fetch only missing tar.xz files + echo "fetching $filecount tar.xz files ..." + urllist="$(echo "$filelist" | while read file; do echo "$BASE_URL/$file"; done)" + echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent + + echo "generating sha256 files ..." + find . -type f -name '*.tar.xz' | while read src; do + name=$(basename "$src") + sha256=$(sha256sum "$src" | cut -d' ' -f1) + echo "$sha256 $name" >"$src.sha256" + done +} + +csv=$(mktemp $tmptpl.csv) +echo "writing temporary file $csv ..." +find . -type f -name '*.sha256' | while read sha256file; do + src="${sha256file%.*}" # remove extension + sha256=$(cat $sha256file | cut -d' ' -f1) # base16 + sha256=$(nix-hash --type sha256 --to-base32 $sha256) # Sanitize file name filename=$(basename "$src" | tr '@' '_') nameVersion="${filename%.tar.*}" name=$(echo "$nameVersion" | sed -e 's,-[[:digit:]].*,,' | sed -e 's,-opensource-src$,,' | sed -e 's,-everywhere-src$,,') version=$(echo "$nameVersion" | sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,') - echo "$name,$version,$src,$filename" >>$csv + echo "$name,$version,$src,$filename,$sha256" >>$csv done +files_before=$(grep -c 'src = ' "$SRCS") + +echo "writing output file $SRCS ..." cat >"$SRCS" <>"$SRCS" <>"$SRCS" +files_after=$(grep -c 'src = ' "$SRCS") +echo "files before: $files_before" +echo "files after: $files_after" + +echo "compare:" +echo "git diff $srcsrel" + popd >/dev/null rm -fr $tmp >/dev/null