expat: fix tests flakiness in 2.6.0 version

libexpat 2.6.0 introduced fix for performance issue on some of the inputs along with tests that check the parsing performance. Unfortunately, the first implementation of the test case relied on the clock() function to check if the performance is good enough, resulting in flakiness on some of the platforms (eg. aarch64-darwin). https://github.com/libexpat/libexpat/pull/817 fixes tests, but it is not released yet. This commit brings that PR as a patch.
author: Artem Leshchev <matshch@avride.ai> 2024-02-16 17:21:25 +0300
committer: Artem Leshchev <matshch@avride.ai> 2024-02-17 10:49:09 +0300
commit: 6d28b263decf993108cd4917323bde000e4f7646 (patch)
tree: f5ecd00545ac9b73d8d6c840fa9f2c2cf4427896 /pkgs/development/libraries/expat
parent: 50b5b5b529facc043866fc7721f50cd7a688e70a (diff)
2 files changed, 257 insertions, 0 deletions
diff --git a/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch b/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch
new file mode 100644
index 0000000000000..9817b1833627f
--- /dev/null
+++ b/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch
@@ -0,0 +1,252 @@
+diff --git a/lib/internal.h b/lib/internal.h
+index cce71e4c..a217b3f9 100644
+--- a/lib/internal.h
++++ b/lib/internal.h
+@@ -31,7 +31,7 @@
+    Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
+    Copyright (c) 2018      Yury Gribov <tetra2005@gmail.com>
+    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
+-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
++   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
+    Licensed under the MIT license:
+ 
+    Permission is  hereby granted,  free of charge,  to any  person obtaining
+@@ -162,7 +162,7 @@ const char *unsignedCharToPrintable(unsigned char c);
+ #endif
+ 
+ extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+-extern unsigned int g_parseAttempts;             // used for testing only
++extern unsigned int g_bytesScanned;              // used for testing only
+ 
+ #ifdef __cplusplus
+ }
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index aaf0fa9c..6de99d99 100644
+--- a/lib/xmlparse.c
++++ b/lib/xmlparse.c
+@@ -38,7 +38,7 @@
+    Copyright (c) 2022      Jann Horn <jannh@google.com>
+    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
+    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
+-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
++   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
+    Licensed under the MIT license:
+ 
+    Permission is  hereby granted,  free of charge,  to any  person obtaining
+@@ -630,7 +630,7 @@ static unsigned long getDebugLevel(const char *variableName,
+        : ((*((pool)->ptr)++ = c), 1))
+ 
+ XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+-unsigned int g_parseAttempts = 0;                    // used for testing only
++unsigned int g_bytesScanned = 0;                     // used for testing only
+ 
+ struct XML_ParserStruct {
+   /* The first member must be m_userData so that the XML_GetUserData
+@@ -1017,7 +1017,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
+       return XML_ERROR_NONE;
+     }
+   }
+-  g_parseAttempts += 1;
++  g_bytesScanned += (unsigned)have_now;
+   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+   if (ret == XML_ERROR_NONE) {
+     // if we consumed nothing, remember what we had on this parse attempt.
+diff --git a/tests/basic_tests.c b/tests/basic_tests.c
+index 7112a440..a9cc3861 100644
+--- a/tests/basic_tests.c
++++ b/tests/basic_tests.c
+@@ -5202,13 +5202,7 @@ START_TEST(test_nested_entity_suspend) {
+ END_TEST
+ 
+ /* Regression test for quadratic parsing on large tokens */
+-START_TEST(test_big_tokens_take_linear_time) {
+-  const char *const too_slow_failure_message
+-      = "Compared to the baseline runtime of the first test, this test has a "
+-        "slowdown of more than <max_slowdown>. "
+-        "Please keep increasing the value by 1 until it reliably passes the "
+-        "test on your hardware and open a bug sharing that number with us. "
+-        "Thanks in advance!";
++START_TEST(test_big_tokens_scale_linearly) {
+   const struct {
+     const char *pre;
+     const char *post;
+@@ -5220,65 +5214,57 @@ START_TEST(test_big_tokens_take_linear_time) {
+       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
+   };
+   const int num_cases = sizeof(text) / sizeof(text[0]);
+-  // For the test we need a <max_slowdown> value that is:
+-  // (1) big enough that the test passes reliably (avoiding flaky tests), and
+-  // (2) small enough that the test actually catches regressions.
+-  const int max_slowdown = 15;
+   char aaaaaa[4096];
+   const int fillsize = (int)sizeof(aaaaaa);
+   const int fillcount = 100;
++  const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
++  const unsigned max_factor = 4;
++  const unsigned max_scanned = max_factor * approx_bytes;
+ 
+   memset(aaaaaa, 'a', fillsize);
+ 
+   if (! g_reparseDeferralEnabledDefault) {
+     return; // heuristic is disabled; we would get O(n^2) and fail.
+   }
+-#if ! defined(__linux__)
+-  if (CLOCKS_PER_SEC < 100000) {
+-    // Skip this test if clock() doesn't have reasonably good resolution.
+-    // This workaround is primarily targeting Windows and FreeBSD, since
+-    // XSI requires the value to be 1.000.000 (10x the condition here), and
+-    // we want to be very sure that at least one platform in CI can catch
+-    // regressions (through a failing test).
+-    return;
+-  }
+-#endif
+ 
+-  clock_t baseline = 0;
+   for (int i = 0; i < num_cases; ++i) {
+     XML_Parser parser = XML_ParserCreate(NULL);
+     assert_true(parser != NULL);
+     enum XML_Status status;
+-    set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
+-                text[i].pre, text[i].post);
+-    const clock_t start = clock();
++    set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
+ 
+     // parse the start text
++    g_bytesScanned = 0;
+     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
+                                      (int)strlen(text[i].pre), XML_FALSE);
+     if (status != XML_STATUS_OK) {
+       xml_failure(parser);
+     }
++
+     // parse lots of 'a', failing the test early if it takes too long
++    unsigned past_max_count = 0;
+     for (int f = 0; f < fillcount; ++f) {
+       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
+       if (status != XML_STATUS_OK) {
+         xml_failure(parser);
+       }
+-      // i == 0 means we're still calculating the baseline value
+-      if (i > 0) {
+-        const clock_t now = clock();
+-        const clock_t clocks_so_far = now - start;
+-        const int slowdown = clocks_so_far / baseline;
+-        if (slowdown >= max_slowdown) {
+-          fprintf(
+-              stderr,
+-              "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
+-              f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
+-          fail(too_slow_failure_message);
+-        }
++      if (g_bytesScanned > max_scanned) {
++        // We're not done, and have already passed the limit -- the test will
++        // definitely fail. This block allows us to save time by failing early.
++        const unsigned pushed
++            = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
++        fprintf(
++            stderr,
++            "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
++            f + 1, fillcount, pushed, g_bytesScanned,
++            g_bytesScanned / (double)pushed, max_scanned, max_factor);
++        past_max_count++;
++        // We are failing, but allow a few log prints first. If we don't reach
++        // a count of five, the test will fail after the loop instead.
++        assert_true(past_max_count < 5);
+       }
+     }
++
+     // parse the end text
+     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
+                                      (int)strlen(text[i].post), XML_TRUE);
+@@ -5286,18 +5272,14 @@ START_TEST(test_big_tokens_take_linear_time) {
+       xml_failure(parser);
+     }
+ 
+-    // how long did it take in total?
+-    const clock_t end = clock();
+-    const clock_t taken = end - start;
+-    if (i == 0) {
+-      assert_true(taken > 0); // just to make sure we don't div-by-0 later
+-      baseline = taken;
+-    }
+-    const int slowdown = taken / baseline;
+-    if (slowdown >= max_slowdown) {
+-      fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
+-              (int)taken, (int)baseline, slowdown, max_slowdown);
+-      fail(too_slow_failure_message);
++    assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
++    if (g_bytesScanned > max_scanned) {
++      fprintf(
++          stderr,
++          "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
++          g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
++          max_factor);
++      fail("scanned too many bytes");
+     }
+ 
+     XML_ParserFree(parser);
+@@ -5774,19 +5756,17 @@ START_TEST(test_varying_buffer_fills) {
+                 fillsize[2], fillsize[3]);
+     XML_Parser parser = XML_ParserCreate(NULL);
+     assert_true(parser != NULL);
+-    g_parseAttempts = 0;
+ 
+     CharData storage;
+     CharData_Init(&storage);
+     XML_SetUserData(parser, &storage);
+     XML_SetStartElementHandler(parser, start_element_event_handler);
+ 
++    g_bytesScanned = 0;
+     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
+-    int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
+     int offset = 0;
+     while (*fillsize >= 0) {
+       assert_true(offset + *fillsize <= document_length); // or test is invalid
+-      const unsigned attempts_before = g_parseAttempts;
+       const enum XML_Status status
+           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
+       if (status != XML_STATUS_OK) {
+@@ -5796,28 +5776,20 @@ START_TEST(test_varying_buffer_fills) {
+       fillsize++;
+       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
+       worstcase_bytes += offset; // we might've tried to parse all pending bytes
+-      if (g_parseAttempts != attempts_before) {
+-        assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
+-        assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
+-        scanned_bytes += offset; // we *did* try to parse all pending bytes
+-      }
+     }
+     assert_true(storage.count == 1); // the big token should've been parsed
+-    assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
++    assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
+     if (g_reparseDeferralEnabledDefault) {
+       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
+-      const int max_bytes_scanned = -*fillsize;
+-      if (scanned_bytes > max_bytes_scanned) {
++      const unsigned max_bytes_scanned = -*fillsize;
++      if (g_bytesScanned > max_bytes_scanned) {
+         fprintf(stderr,
+-                "bytes scanned in parse attempts: actual=%d limit=%d \n",
+-                scanned_bytes, max_bytes_scanned);
++                "bytes scanned in parse attempts: actual=%u limit=%u \n",
++                g_bytesScanned, max_bytes_scanned);
+         fail("too many bytes scanned in parse attempts");
+       }
+-      assert_true(scanned_bytes <= worstcase_bytes);
+-    } else {
+-      // heuristic is disabled; every XML_Parse() will have reparsed
+-      assert_true(scanned_bytes == worstcase_bytes);
+     }
++    assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
+ 
+     XML_ParserFree(parser);
+   }
+@@ -6065,7 +6037,7 @@ make_basic_test_case(Suite *s) {
+   tcase_add_test__ifdef_xml_dtd(tc_basic,
+                                 test_pool_integrity_with_unfinished_attr);
+   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
+-  tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
++  tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
+   tcase_add_test(tc_basic, test_set_reparse_deferral);
+   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
+   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
diff --git a/pkgs/development/libraries/expat/default.nix b/pkgs/development/libraries/expat/default.nix
index babb19e6cfb04..74c8b0ae609a4 100644
--- a/pkgs/development/libraries/expat/default.nix
+++ b/pkgs/development/libraries/expat/default.nix
@@ -26,6 +26,11 @@ in stdenv.mkDerivation rec {
     hash = "sha256-y19ajqIR4cq9Wb4KkzpS48Aswyboak04fY0hjn7kej4=";
   };
 
+  patches = [
+    # Fix tests flakiness on some platforms (like aarch64-darwin), should be released in 2.6.1
+    ./2.6.0-fix-tests-flakiness.patch
+  ];
+
   strictDeps = true;
 
   outputs = [ "out" "dev" ]; # TODO: fix referrers
author	Artem Leshchev <matshch@avride.ai>	2024-02-16 17:21:25 +0300
committer	Artem Leshchev <matshch@avride.ai>	2024-02-17 10:49:09 +0300
commit	6d28b263decf993108cd4917323bde000e4f7646 (patch)
tree	f5ecd00545ac9b73d8d6c840fa9f2c2cf4427896 /pkgs/development/libraries/expat
parent	50b5b5b529facc043866fc7721f50cd7a688e70a (diff)