Adjust hypothesis deadlines globally and redo the profiles and slowdown factors
Marco Ricci

Marco Ricci commited on 2025-01-31 14:43:03
Zeige 7 geänderte Dateien mit 84 Einfügungen und 105 Löschungen.


Previously, we would define a "worst-case" hypothesis profile for use
under instrumentation (coverage measurement), and decorate every
hypothesis test which we experienced to have deadline problems with this
decorator.  This step was manual, and very error-prone when generating
new hypothesis tests.  Conceptually, it also indicated that adjusted
deadlines applied for some tests, but not for others, even though the
deadline adjustment is actually a side-effect of global instrumentation.

We therefore now adopt a global solution, where the pytest configuration
file detects which coverage instrumentation (if any) is running, and
adjusts the default hypothesis settings appropriately.  The decorator is
removed.

In the course of this, we re-evaluated the profiles and the slowdown
factors, instead of assuming the worst-case instrumentation.  hypothesis
also defines a "ci" profile for use in continuous integration, together
with some commentary on how this profile is expected to be used.  We
cede this profile name to hypothesis (and remove it from our testing
matrix in hatch), and define our own "intense" profile for high-volume
hypothesis testing based on the "ci" profile, to be invoked manually as
a "pre-release" (or perhaps "nightly") run.

Finally, because the test suite now is rather slow in sequential use, we
add `--dist=worksteal` to the test options to better distribute load
during parallel testing.  We choose this over other more "balanced"
strategies because our single-test runtimes wildly fluctuate thanks to
hypothesis, so work stealing appears to be the most reliable
distribution heuristic for us.
... ...
@@ -140,26 +140,17 @@ matrix-name-format = '{variable}_{value}'
140 140
 [[tool.hatch.envs.hatch-test.matrix]]
141 141
 python = ["3.13", "3.12", "3.11", "3.10", "3.9", "pypy3.10", "pypy3.9"]
142 142
 cryptography = ["no", "yes"]
143
-hypothesis-profile = ["user-default"]
144 143
 parser-version = ["PEG"]
145 144
 
146 145
 [[tool.hatch.envs.hatch-test.matrix]]
147 146
 python = ["3.9", "pypy3.9"]
148 147
 cryptography = ["no", "yes"]
149
-hypothesis-profile = ["user-default"]
150 148
 parser-version = ["LL1"]
151 149
 
152
-[[tool.hatch.envs.hatch-test.matrix]]
153
-cryptography = ["yes"]
154
-hypothesis-profile = ["ci"]
155
-
156 150
 [tool.hatch.envs.hatch-test.overrides]
157 151
 matrix.cryptography.features = [
158 152
     { value = "export", if = ["yes"] },
159 153
 ]
160
-matrix.hypothesis-profile.env-vars = [
161
-    { key = "HYPOTHESIS_PROFILE", if = ["ci", "default", "dev", "debug"] },
162
-]
163 154
 matrix.parser-version.env-vars = [
164 155
     { key = "PYTHONOLDPARSER", value = "1", if = ["LL1"] },
165 156
 ]
... ...
@@ -195,7 +186,7 @@ sqlite_cache = true
195 186
 enable_error_code = ['ignore-without-code']
196 187
 
197 188
 [tool.pytest.ini_options]
198
-addopts = '--doctest-modules'
189
+addopts = '--doctest-modules --dist=worksteal'
199 190
 pythonpath = ['src']
200 191
 testpaths = ['src', 'tests']
201 192
 xfail_strict = true
... ...
@@ -16,7 +16,6 @@ import pathlib
16 16
 import re
17 17
 import shlex
18 18
 import stat
19
-import sys
20 19
 import tempfile
21 20
 import zipfile
22 21
 from typing import TYPE_CHECKING
... ...
@@ -1604,83 +1603,6 @@ on cryptography support being available.
1604 1603
 """
1605 1604
 
1606 1605
 
1607
-def hypothesis_settings_coverage_compatible(
1608
-    f: Any = None,
1609
-) -> Any:
1610
-    """Return (or decorate `f` with) coverage-friendly hypothesis settings.
1611
-
1612
-    Specifically, we increase the deadline 40-fold if we detect we are
1613
-    running under coverage testing, because the slow Python trace
1614
-    function (necessary on PyPy) drastically increases runtime for
1615
-    hypothesis tests.
1616
-
1617
-    In any case, we *also* reduce the state machine step count to 32
1618
-    steps per run, because the current state machines defined in the
1619
-    tests rather benefit from broad testing rather than deep testing.
1620
-
1621
-    Args:
1622
-        f:
1623
-            An optional object to decorate with these settings.
1624
-
1625
-    Returns:
1626
-        The modified hypothesis settings, as a settings object.  If
1627
-        decorating a function/class, return that function/class
1628
-        directly, after decorating.
1629
-
1630
-    """
1631
-    settings = (
1632
-        hypothesis.settings(
1633
-            # Running under coverage with the Python tracer increases
1634
-            # running times 40-fold, on my machines.  Sadly, not every
1635
-            # Python version offers the C tracer, so sometimes the Python
1636
-            # tracer is used anyway.
1637
-            deadline=(
1638
-                40 * deadline
1639
-                if (deadline := hypothesis.settings().deadline) is not None
1640
-                else None
1641
-            ),
1642
-            stateful_step_count=32,
1643
-            suppress_health_check=(hypothesis.HealthCheck.too_slow,),
1644
-        )
1645
-        if sys.gettrace() is not None
1646
-        else hypothesis.settings(
1647
-            stateful_step_count=32,
1648
-            suppress_health_check=(hypothesis.HealthCheck.too_slow,),
1649
-        )
1650
-    )
1651
-    return settings if f is None else settings(f)
1652
-
1653
-
1654
-def hypothesis_settings_coverage_compatible_with_caplog(
1655
-    f: Any = None,
1656
-) -> Any:
1657
-    """Return (or decorate `f` with) coverage-friendly hypothesis settings.
1658
-
1659
-    This variant of [`hypothesis_settings_coverage_compatible`][] does
1660
-    all the same, and additionally disables the check for function
1661
-    scoped pytest fixtures such as `caplog`.
1662
-
1663
-    Args:
1664
-        f:
1665
-            An optional object to decorate with these settings.
1666
-
1667
-    Returns:
1668
-        The modified hypothesis settings, as a settings object.  If
1669
-        decorating a function/class, return that function/class
1670
-        directly, after decorating.
1671
-
1672
-    """
1673
-    parent_settings = hypothesis_settings_coverage_compatible()
1674
-    settings = hypothesis.settings(
1675
-        parent=parent_settings,
1676
-        suppress_health_check={
1677
-            hypothesis.HealthCheck.function_scoped_fixture,
1678
-        }
1679
-        | set(parent_settings.suppress_health_check),
1680
-    )
1681
-    return settings if f is None else settings(f)
1682
-
1683
-
1684 1606
 def list_keys(self: Any = None) -> list[_types.SSHKeyCommentPair]:
1685 1607
     """Return a list of all SSH test keys, as key/comment pairs.
1686 1608
 
... ...
@@ -7,11 +7,13 @@ from __future__ import annotations
7 7
 import base64
8 8
 import contextlib
9 9
 import datetime
10
+import importlib
10 11
 import operator
11 12
 import os
12 13
 import shutil
13 14
 import socket
14 15
 import subprocess
16
+import sys
15 17
 from typing import TYPE_CHECKING, Protocol, TypeVar
16 18
 
17 19
 import hypothesis
... ...
@@ -26,15 +28,85 @@ if TYPE_CHECKING:
26 28
 
27 29
 startup_ssh_auth_sock = os.environ.get('SSH_AUTH_SOCK', None)
28 30
 
29
-# https://hypothesis.readthedocs.io/en/latest/settings.html#settings-profiles
30
-hypothesis.settings.register_profile('ci', max_examples=1000)
31
-hypothesis.settings.register_profile('dev', max_examples=10)
31
+
32
+def _hypothesis_settings_setup() -> None:
33
+    """
34
+    Ensure sensible hypothesis settings if running under coverage.
35
+
36
+    In our tests, the sys.monitoring tracer slows down execution speed
37
+    by a factor of roughly 3, the C tracer by roughly 2.5, and the
38
+    Python tracer by roughly 40.  Ensure that hypothesis default
39
+    timeouts apply relative to this *new* execution speed, not the old
40
+    one.
41
+
42
+    In any case, we *also* reduce the state machine step count to 32
43
+    steps per run, because the current state machines defined in the
44
+    tests rather benefit from broad testing rather than deep testing.
45
+
46
+    """
47
+    settings = hypothesis.settings()
48
+    slowdown: float | None = None
49
+    if (
50
+        importlib.util.find_spec('coverage') is not None
51
+        and settings.deadline is not None
52
+        and settings.deadline.total_seconds() < 1.0
53
+    ):  # pragma: no cover
54
+        ctracer_class = (
55
+            importlib.import_module('coverage.tracer').CTracer
56
+            if importlib.util.find_spec('coverage.tracer') is not None
57
+            else type(None)
58
+        )
59
+        pytracer_class = importlib.import_module('coverage.pytracer').PyTracer
60
+        if (
61
+            getattr(sys, 'monitoring', None) is not None
62
+            and sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID)
63
+            == 'coverage.py'
64
+        ):
65
+            slowdown = 3.0
66
+        elif (
67
+            trace_func := getattr(sys, 'gettrace', lambda: None)()
68
+        ) is not None and isinstance(trace_func, ctracer_class):
69
+            slowdown = 2.5
70
+        elif (
71
+            trace_func is not None
72
+            and hasattr(trace_func, '__self__')
73
+            and isinstance(trace_func.__self__, pytracer_class)
74
+        ):
75
+            slowdown = 8.0
76
+    settings = hypothesis.settings(
77
+        deadline=slowdown * settings.deadline
78
+        if slowdown
79
+        else settings.deadline,
80
+        stateful_step_count=32,
81
+        suppress_health_check=(hypothesis.HealthCheck.too_slow,),
82
+    )
83
+    hypothesis.settings.register_profile('default', settings)
84
+    hypothesis.settings.register_profile(
85
+        'dev', derandomize=True, max_examples=10
86
+    )
32 87
     hypothesis.settings.register_profile(
33
-    'debug', max_examples=10, verbosity=hypothesis.Verbosity.verbose
88
+        'debug',
89
+        parent=hypothesis.settings.get_profile('dev'),
90
+        verbosity=hypothesis.Verbosity.verbose,
34 91
     )
35 92
     hypothesis.settings.register_profile(
36
-    'flaky', deadline=datetime.timedelta(milliseconds=150)
93
+        'flaky',
94
+        deadline=(
95
+            settings.deadline - settings.deadline // 4
96
+            if settings.deadline is not None
97
+            else datetime.timedelta(milliseconds=150)
98
+        ),
37 99
     )
100
+    ci_profile = hypothesis.settings.get_profile('ci')
101
+    hypothesis.settings.register_profile(
102
+        'intense',
103
+        parent=ci_profile,
104
+        derandomize=False,
105
+        max_examples=10 * ci_profile.max_examples,
106
+    )
107
+
108
+
109
+_hypothesis_settings_setup()
38 110
 
39 111
 
40 112
 # https://docs.pytest.org/en/stable/explanation/fixtures.html#a-note-about-fixture-cleanup
... ...
@@ -1275,7 +1275,12 @@ class TestCLI:
1275 1275
             map(is_harmless_config_import_warning, caplog.record_tuples)
1276 1276
         ), 'unexpected error output'
1277 1277
 
1278
-    @tests.hypothesis_settings_coverage_compatible_with_caplog
1278
+    @hypothesis.settings(
1279
+        suppress_health_check=[
1280
+            *hypothesis.settings().suppress_health_check,
1281
+            hypothesis.HealthCheck.function_scoped_fixture,
1282
+        ],
1283
+    )
1279 1284
     @hypothesis.given(
1280 1285
         conf=tests.smudged_vault_test_config(
1281 1286
             strategies.sampled_from([
... ...
@@ -3076,7 +3081,6 @@ Boo.
3076 3081
                 assert result.clean_exit()
3077 3082
             assert cli_helpers.load_config() == config
3078 3083
 
3079
-    @tests.hypothesis_settings_coverage_compatible
3080 3084
     @hypothesis.given(
3081 3085
         global_config_settable=tests.vault_full_service_config(),
3082 3086
         global_config_importable=strategies.fixed_dictionaries(
... ...
@@ -4058,7 +4062,6 @@ def vault_full_config() -> strategies.SearchStrategy[_types.VaultConfig]:
4058 4062
     return VAULT_FULL_CONFIG
4059 4063
 
4060 4064
 
4061
-@tests.hypothesis_settings_coverage_compatible
4062 4065
 class ConfigManagementStateMachine(stateful.RuleBasedStateMachine):
4063 4066
     """A state machine recording changes in the vault configuration.
4064 4067
 
... ...
@@ -605,7 +605,6 @@ class TestStoreroom:
605 605
                 ),
606 606
             )
607 607
 
608
-    @tests.hypothesis_settings_coverage_compatible
609 608
     @hypothesis.given(
610 609
         data=strategies.binary(
611 610
             min_size=storeroom.MAC_SIZE, max_size=storeroom.MAC_SIZE
... ...
@@ -66,7 +66,6 @@ def js_nested_strategy(draw: strategies.DrawFn) -> Any:
66 66
     )
67 67
 
68 68
 
69
-@tests.hypothesis_settings_coverage_compatible
70 69
 @hypothesis.given(value=js_nested_strategy())
71 70
 @hypothesis.example(float('nan'))
72 71
 def test_100_js_truthiness(value: Any) -> None:
... ...
@@ -115,7 +114,6 @@ def test_200_is_vault_config(test_config: tests.VaultTestConfig) -> None:
115 114
     )
116 115
 
117 116
 
118
-@tests.hypothesis_settings_coverage_compatible
119 117
 @hypothesis.given(
120 118
     test_config=tests.smudged_vault_test_config(
121 119
         config=strategies.sampled_from([
... ...
@@ -183,7 +181,6 @@ def test_400_validate_vault_config(test_config: tests.VaultTestConfig) -> None:
183 181
             assert not exc, 'failed to validate valid example'  # noqa: PT017
184 182
 
185 183
 
186
-@tests.hypothesis_settings_coverage_compatible
187 184
 @hypothesis.given(
188 185
     test_config=tests.smudged_vault_test_config(
189 186
         config=strategies.sampled_from([
... ...
@@ -217,7 +217,6 @@ class TestVault:
217 217
             phrase=phrase, service=services[0]
218 218
         ) != vault.Vault.create_hash(phrase=phrase, service=services[1])
219 219
 
220
-    @tests.hypothesis_settings_coverage_compatible
221 220
     @hypothesis.given(
222 221
         phrases=strategies.binary(max_size=BLOCK_SIZE // 2).flatmap(
223 222
             lambda bs: strategies.tuples(
... ...
@@ -245,7 +244,6 @@ class TestVault:
245 244
             phrase=phrases[0], service=service
246 245
         ) == vault.Vault.create_hash(phrase=phrases[1], service=service)
247 246
 
248
-    @tests.hypothesis_settings_coverage_compatible
249 247
     @hypothesis.given(
250 248
         phrases=strategies.binary(
251 249
             min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
... ...
@@ -462,7 +460,6 @@ class TestVault:
462 460
             phrase=phrase
463 461
         ).generate(services[1])
464 462
 
465
-    @tests.hypothesis_settings_coverage_compatible
466 463
     @hypothesis.given(
467 464
         phrase=strategies.text(
468 465
             strategies.characters(min_codepoint=32, max_codepoint=126),
... ...
@@ -506,7 +503,6 @@ class TestVault:
506 503
             == b'xDFu'
507 504
         )
508 505
 
509
-    @tests.hypothesis_settings_coverage_compatible
510 506
     @hypothesis.given(
511 507
         phrase=strategies.one_of(
512 508
             strategies.binary(min_size=1, max_size=100),
... ...
@@ -592,7 +588,6 @@ class TestVault:
592 588
             == b': : fv_wqt>a-4w1S  R'
593 589
         )
594 590
 
595
-    @tests.hypothesis_settings_coverage_compatible
596 591
     @hypothesis.given(
597 592
         phrase=strategies.one_of(
598 593
             strategies.binary(min_size=1), strategies.text(min_size=1)
599 594