summary refs log tree commit diff
path: root/maintainers
diff options
context:
space:
mode:
authorArtturi <Artturin@artturin.com>2022-07-17 07:55:22 +0300
committerGitHub <noreply@github.com>2022-07-17 07:55:22 +0300
commitff0e4cfbcd7881f50caa788ebd488e2e6f782431 (patch)
treedb91511b337eed7e547803f85ba5959e1d086709 /maintainers
parent8d78af079998a0dcb47b277da307ec963636cf26 (diff)
parentb0c63ef0d1a16109d6aecdbfb164d96ba8712cc2 (diff)
Merge pull request #148121 from milahu/fetch-kde-qt-sh-get-sha256-from-server
Diffstat (limited to 'maintainers')
-rwxr-xr-xmaintainers/scripts/fetch-kde-qt.sh160
1 files changed, 148 insertions, 12 deletions
diff --git a/maintainers/scripts/fetch-kde-qt.sh b/maintainers/scripts/fetch-kde-qt.sh
index 22d78151978b0..9e2348fda7072 100755
--- a/maintainers/scripts/fetch-kde-qt.sh
+++ b/maintainers/scripts/fetch-kde-qt.sh
@@ -2,30 +2,159 @@
 #! nix-shell -i bash -p coreutils findutils gnused nix wget
 
 set -efuo pipefail
+export LC_COLLATE=C # fix sort order
 
-SRCS=
-if [ -d "$1" ]; then
-    SRCS="$(pwd)/$1/srcs.nix"
-    . "$1/fetch.sh"
+# parse files and folders from https://download.kde.org/ and https://download.qt.io/
+# you can override this function in fetch.sh
+function PARSE_INDEX() {
+    cat "$1" | grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' | cut -d'"' -f2 | sort | uniq
+}
+
+if [ $# != 1 ]; then
+    echo "example use:" >&2
+    echo "cd nixpkgs/" >&2
+    echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2
+    exit 1
+fi
+
+if ! echo "$1" | grep -q '^pkgs/'; then
+    echo "error: path argument must start with pkgs/" >&2
+    exit 1
+fi
+
+# need absolute path for the pushd-popd block
+if [ -f "$1" ]; then
+    echo "ok: using fetchfile $1"
+    fetchfilerel="$1"
+    fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path
+    basedir="$(dirname "$fetchfile")"
+    basedirrel="$(dirname "$fetchfilerel")"
+elif [ -d "$1" ]; then
+    echo "ok: using basedir $1"
+    basedirrel="$1"
+    basedir="$(readlink -f "$basedirrel")" # resolve absolute path
+    if ! [ -d "$basedir" ]; then
+        basedir="$(dirname "$basedir")"
+    fi
+    fetchfile="$basedir/fetch.sh"
 else
-    SRCS="$(pwd)/$(dirname $1)/srcs.nix"
-    . "$1"
+    echo 'error: $1 must be file or dir' >&2
+    exit 1
 fi
 
-tmp=$(mktemp -d)
+pkgname=$(basename "$basedir")
+SRCS="$basedir/srcs.nix"
+srcsrel="$basedirrel/srcs.nix"
+
+source "$fetchfile"
+
+if [ -n "$WGET_ARGS" ]; then # old format
+    BASE_URL="${WGET_ARGS[0]}" # convert to new format
+    # validate
+    if ! echo "$BASE_URL" | grep -q -E '^(http|https|ftp)://'; then
+        printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2
+        exit 1
+    fi
+    printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL"
+elif [ -n "$BASE_URL" ]; then # new format
+    :
+else
+    echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2
+    exit 1
+fi
+
+tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX
+
+tmp=$(mktemp -d $tmptpl)
 pushd $tmp >/dev/null
-wget -nH -r -c --no-parent "${WGET_ARGS[@]}" >/dev/null
+echo "tempdir is $tmp"
+
+wgetargs='--quiet --show-progress'
+#wgetargs='' # debug
+
+dirlist="$BASE_URL"
+filelist=""
+base_url_len=${#BASE_URL}
 
-csv=$(mktemp)
-find . -type f | while read src; do
+clean_urls() {
+    # // -> /
+    sed -E 's,//+,/,g' | sed -E 's,^(http|https|ftp):/,&/,'
+}
+
+while [ -n "$dirlist" ]
+do
+    for dirurl in $dirlist
+    do
+        echo "fetching index.html from $dirurl"
+        relpath=$(echo "./${dirurl:$base_url_len}" | clean_urls)
+        mkdir -p "$relpath"
+        indexfile=$(echo "$relpath/index.html" | clean_urls)
+        wget $wgetargs -O "$indexfile" "$dirurl"
+        echo "parsing $indexfile"
+        filedirlist="$(PARSE_INDEX "$indexfile")"
+        filelist_next="$(echo "$filedirlist" | grep '\.tar\.xz$' | while read file; do echo "$dirurl/$file"; done)"
+        filelist_next="$(echo "$filelist_next" | clean_urls)"
+        [ -n "$filelist" ] && filelist+=$'\n'
+        filelist+="$filelist_next"
+        dirlist="$(echo "$filedirlist" | grep -v '\.tar\.xz$' | while read dir; do echo "$dirurl/$dir"; done || true)"
+        dirlist="$(echo "$dirlist" | clean_urls)"
+    done
+done
+
+filecount=$(echo "$filelist" | wc -l)
+
+if [ -z "$filelist" ]
+then
+    echo "error: no files parsed from $tmp/index.html"
+    exit 1
+fi
+
+echo "parsed $filecount tar.xz files:"; echo "$filelist"
+
+# most time is spent here
+echo "fetching $filecount sha256 files ..."
+urllist="$(echo "$filelist" | while read file; do echo "$file.sha256"; done)"
+# wget -r: keep directory structure
+echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent && {
+    actual=$(find . -type f -name '*.sha256' | wc -l)
+    echo "fetching $filecount sha256 files done: got $actual files"
+} || {
+    # workaround: in rare cases, the server does not provide the sha256 files
+    # for example when the release is just a few hours old
+    # and the servers are not yet fully synced
+    actual=$(find . -type f -name '*.sha256' | wc -l)
+    echo "fetching $filecount sha256 files failed: got only $actual files"
+
+    # TODO fetch only missing tar.xz files
+    echo "fetching $filecount tar.xz files ..."
+    urllist="$(echo "$filelist" | while read file; do echo "$BASE_URL/$file"; done)"
+    echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent
+
+    echo "generating sha256 files ..."
+    find . -type f -name '*.tar.xz' | while read src; do
+        name=$(basename "$src")
+        sha256=$(sha256sum "$src" | cut -d' ' -f1)
+        echo "$sha256  $name" >"$src.sha256"
+    done
+}
+
+csv=$(mktemp $tmptpl.csv)
+echo "writing temporary file $csv ..."
+find . -type f -name '*.sha256' | while read sha256file; do
+    src="${sha256file%.*}" # remove extension
+    sha256=$(cat $sha256file | cut -d' ' -f1) # base16
+    sha256=$(nix-hash --type sha256 --to-base32 $sha256)
     # Sanitize file name
     filename=$(basename "$src" | tr '@' '_')
     nameVersion="${filename%.tar.*}"
     name=$(echo "$nameVersion" | sed -e 's,-[[:digit:]].*,,' | sed -e 's,-opensource-src$,,' | sed -e 's,-everywhere-src$,,')
     version=$(echo "$nameVersion" | sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,')
-    echo "$name,$version,$src,$filename" >>$csv
+    echo "$name,$version,$src,$filename,$sha256" >>$csv
 done
 
+files_before=$(grep -c 'src = ' "$SRCS")
+
+echo "writing output file $SRCS ..."
 cat >"$SRCS" <<EOF
 # DO NOT EDIT! This file is generated automatically.
 # Command: $0 $@
@@ -39,8 +168,8 @@ gawk -F , "{ print \$1 }" $csv | sort | uniq | while read name; do
     latestVersion=$(echo "$versions" | sort -rV | head -n 1)
     src=$(gawk -F , "/^$name,$latestVersion,/ { print \$3 }" $csv)
     filename=$(gawk -F , "/^$name,$latestVersion,/ { print \$4 }" $csv)
+    sha256=$(gawk -F , "/^$name,$latestVersion,/ { print \$5 }" $csv)
     url="${src:2}"
-    sha256=$(nix-hash --type sha256 --base32 --flat "$src")
     cat >>"$SRCS" <<EOF
   $name = {
     version = "$latestVersion";
@@ -55,6 +184,13 @@ done
 
 echo "}" >>"$SRCS"
 
+files_after=$(grep -c 'src = ' "$SRCS")
+echo "files before: $files_before"
+echo "files after:  $files_after"
+
+echo "compare:"
+echo "git diff $srcsrel"
+
 popd >/dev/null
 rm -fr $tmp >/dev/null