From d38a7343feffdbf3466948d2f44adc337df0d5e6 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 27 Nov 2023 22:42:17 +0100 Subject: workflows/check-by-name: Limited and exponential retries We've had a recent PR CI mass failure event, ultimately caused by the mergeability check GitHub API not returning a result. But due to the `pkgs/by-name` check workflow not backing off appropriately between retries, it pummeled the API, resulting in exceeding the API rate limit: https://github.com/NixOS/nixpkgs/actions/runs/7010089143/job/19069845070 This commit fixes that for the future by implementing a retry strategy limited to three retries, with exponential backoff --- .github/workflows/check-by-name.yml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to '.github') diff --git a/.github/workflows/check-by-name.yml b/.github/workflows/check-by-name.yml index b0282cdbc769c..cc9b51b138747 100644 --- a/.github/workflows/check-by-name.yml +++ b/.github/workflows/check-by-name.yml @@ -23,6 +23,11 @@ jobs: run: | # This checks for mergeability of a pull request as recommended in # https://docs.github.com/en/rest/guides/using-the-rest-api-to-interact-with-your-git-database?apiVersion=2022-11-28#checking-mergeability-of-pull-requests + + # Retry the API query this many times + retryCount=3 + # Start with 5 seconds, but double every retry + retryInterval=5 while true; do echo "Checking whether the pull request can be merged" prInfo=$(gh api \ @@ -33,10 +38,19 @@ jobs: mergedSha=$(jq -r .merge_commit_sha <<< "$prInfo") if [[ "$mergeable" == "null" ]]; then - # null indicates that GitHub is still computing whether it's mergeable - # Wait a couple seconds before trying again - echo "GitHub is still computing whether this PR can be merged, waiting 5 seconds before trying again" - sleep 5 + if (( retryCount == 0 )); then + echo "Not retrying anymore, probably GitHub is having internal issues" + exit 1 + else + (( retryCount -= 1 )) || true + + # null indicates that GitHub is still computing whether it's mergeable + # Wait a couple seconds before trying again + echo "GitHub is still computing whether this PR can be merged, waiting $retryInterval seconds before trying again ($retryCount retries left)" + sleep "$retryInterval" + + (( retryInterval *= 2 )) || true + fi else break fi -- cgit 1.4.1 From 5e0644896dce54c876df00c69fc824e3614fe61c Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 27 Nov 2023 23:55:50 +0100 Subject: workflows/check-by-name: Set 10 minute timeout Co-Authored-By: Cole Helbling --- .github/workflows/check-by-name.yml | 3 +++ 1 file changed, 3 insertions(+) (limited to '.github') diff --git a/.github/workflows/check-by-name.yml b/.github/workflows/check-by-name.yml index cc9b51b138747..087ee57145900 100644 --- a/.github/workflows/check-by-name.yml +++ b/.github/workflows/check-by-name.yml @@ -16,6 +16,9 @@ jobs: # This is x86_64-linux, for which the tool is always prebuilt on the nixos-* channels, # as specified in nixos/release-combined.nix runs-on: ubuntu-latest + # This should take 1 minute at most, but let's be generous. + # The default of 6 hours is definitely too long + timeout-minutes: 10 steps: - name: Resolving the merge commit env: -- cgit 1.4.1