about summary refs log tree commit diff
path: root/nixos
diff options
context:
space:
mode:
authorRyan Lahfa <masterancpp@gmail.com>2022-12-22 02:19:19 +0100
committerGitHub <noreply@github.com>2022-12-22 02:19:19 +0100
commit0b0726ae0b889ea5e3a6b1e04d34333b13010b3b (patch)
tree0824ad39cf7e43c64dc2b7e3c8be9e1ba9fdf612 /nixos
parent250a51232c9c74e7ff5f49b0424a094a101e0c26 (diff)
parentc9a5bf4a3897f61956bf8a25b652fcf535e5e821 (diff)
Merge pull request #205983 from m1cr0man/acme-test-fix
nixos/acme: Increase number of retries in testing
Diffstat (limited to 'nixos')
-rw-r--r--nixos/tests/acme.nix62
1 files changed, 41 insertions, 21 deletions
diff --git a/nixos/tests/acme.nix b/nixos/tests/acme.nix
index 64bc99f6d325f..d62bf0c0fd92d 100644
--- a/nixos/tests/acme.nix
+++ b/nixos/tests/acme.nix
@@ -144,7 +144,11 @@
 
 in {
   name = "acme";
-  meta.maintainers = lib.teams.acme.members;
+  meta = {
+    maintainers = lib.teams.acme.members;
+    # Hard timeout in seconds. Average run time is about 7 minutes.
+    timeout = 1800;
+  };
 
   nodes = {
     # The fake ACME server which will respond to client requests
@@ -357,6 +361,30 @@ in {
       import time
 
 
+      TOTAL_RETRIES = 20
+
+
+      class BackoffTracker(object):
+          delay = 1
+          increment = 1
+
+          def handle_fail(self, retries, message) -> int:
+              assert retries < TOTAL_RETRIES, message
+
+              print(f"Retrying in {self.delay}s, {retries + 1}/{TOTAL_RETRIES}")
+              time.sleep(self.delay)
+
+              # Only increment after the first try
+              if retries == 0:
+                  self.delay += self.increment
+                  self.increment *= 2
+
+              return retries + 1
+
+
+      backoff = BackoffTracker()
+
+
       def switch_to(node, name):
           # On first switch, this will create a symlink to the current system so that we can
           # quickly switch between derivations
@@ -404,9 +432,7 @@ in {
           assert False
 
 
-      def check_connection(node, domain, retries=3):
-          assert retries >= 0, f"Failed to connect to https://{domain}"
-
+      def check_connection(node, domain, retries=0):
           result = node.succeed(
               "openssl s_client -brief -verify 2 -CAfile /tmp/ca.crt"
               f" -servername {domain} -connect {domain}:443 < /dev/null 2>&1"
@@ -414,13 +440,11 @@ in {
 
           for line in result.lower().split("\n"):
               if "verification" in line and "error" in line:
-                  time.sleep(3)
-                  return check_connection(node, domain, retries - 1)
+                  retries = backoff.handle_fail(retries, f"Failed to connect to https://{domain}")
+                  return check_connection(node, domain, retries)
 
 
-      def check_connection_key_bits(node, domain, bits, retries=3):
-          assert retries >= 0, f"Did not find expected number of bits ({bits}) in key"
-
+      def check_connection_key_bits(node, domain, bits, retries=0):
           result = node.succeed(
               "openssl s_client -CAfile /tmp/ca.crt"
               f" -servername {domain} -connect {domain}:443 < /dev/null"
@@ -429,13 +453,11 @@ in {
           print("Key type:", result)
 
           if bits not in result:
-              time.sleep(3)
-              return check_connection_key_bits(node, domain, bits, retries - 1)
-
+              retries = backoff.handle_fail(retries, f"Did not find expected number of bits ({bits}) in key")
+              return check_connection_key_bits(node, domain, bits, retries)
 
-      def check_stapling(node, domain, retries=3):
-          assert retries >= 0, "OCSP Stapling check failed"
 
+      def check_stapling(node, domain, retries=0):
           # Pebble doesn't provide a full OCSP responder, so just check the URL
           result = node.succeed(
               "openssl s_client -CAfile /tmp/ca.crt"
@@ -445,21 +467,19 @@ in {
           print("OCSP Responder URL:", result)
 
           if "${caDomain}:4002" not in result.lower():
-              time.sleep(3)
-              return check_stapling(node, domain, retries - 1)
-
+              retries = backoff.handle_fail(retries, "OCSP Stapling check failed")
+              return check_stapling(node, domain, retries)
 
-      def download_ca_certs(node, retries=5):
-          assert retries >= 0, "Failed to connect to pebble to download root CA certs"
 
+      def download_ca_certs(node, retries=0):
           exit_code, _ = node.execute("curl https://${caDomain}:15000/roots/0 > /tmp/ca.crt")
           exit_code_2, _ = node.execute(
               "curl https://${caDomain}:15000/intermediate-keys/0 >> /tmp/ca.crt"
           )
 
           if exit_code + exit_code_2 > 0:
-              time.sleep(3)
-              return download_ca_certs(node, retries - 1)
+              retries = backoff.handle_fail(retries, "Failed to connect to pebble to download root CA certs")
+              return download_ca_certs(node, retries)
 
 
       start_all()