Marco Ricci commited on 2025-01-31 14:43:03
Zeige 7 geänderte Dateien mit 84 Einfügungen und 105 Löschungen.
Previously, we would define a "worst-case" hypothesis profile for use under instrumentation (coverage measurement), and decorate every hypothesis test which we experienced to have deadline problems with this decorator. This step was manual, and very error-prone when generating new hypothesis tests. Conceptually, it also indicated that adjusted deadlines applied for some tests, but not for others, even though the deadline adjustment is actually a side-effect of global instrumentation. We therefore now adopt a global solution, where the pytest configuration file detects which coverage instrumentation (if any) is running, and adjusts the default hypothesis settings appropriately. The decorator is removed. In the course of this, we re-evaluated the profiles and the slowdown factors, instead of assuming the worst-case instrumentation. hypothesis also defines a "ci" profile for use in continuous integration, together with some commentary on how this profile is expected to be used. We cede this profile name to hypothesis (and remove it from our testing matrix in hatch), and define our own "intense" profile for high-volume hypothesis testing based on the "ci" profile, to be invoked manually as a "pre-release" (or perhaps "nightly") run. Finally, because the test suite now is rather slow in sequential use, we add `--dist=worksteal` to the test options to better distribute load during parallel testing. We choose this over other more "balanced" strategies because our single-test runtimes wildly fluctuate thanks to hypothesis, so work stealing appears to be the most reliable distribution heuristic for us.
... | ... |
@@ -140,26 +140,17 @@ matrix-name-format = '{variable}_{value}' |
140 | 140 |
[[tool.hatch.envs.hatch-test.matrix]] |
141 | 141 |
python = ["3.13", "3.12", "3.11", "3.10", "3.9", "pypy3.10", "pypy3.9"] |
142 | 142 |
cryptography = ["no", "yes"] |
143 |
-hypothesis-profile = ["user-default"] |
|
144 | 143 |
parser-version = ["PEG"] |
145 | 144 |
|
146 | 145 |
[[tool.hatch.envs.hatch-test.matrix]] |
147 | 146 |
python = ["3.9", "pypy3.9"] |
148 | 147 |
cryptography = ["no", "yes"] |
149 |
-hypothesis-profile = ["user-default"] |
|
150 | 148 |
parser-version = ["LL1"] |
151 | 149 |
|
152 |
-[[tool.hatch.envs.hatch-test.matrix]] |
|
153 |
-cryptography = ["yes"] |
|
154 |
-hypothesis-profile = ["ci"] |
|
155 |
- |
|
156 | 150 |
[tool.hatch.envs.hatch-test.overrides] |
157 | 151 |
matrix.cryptography.features = [ |
158 | 152 |
{ value = "export", if = ["yes"] }, |
159 | 153 |
] |
160 |
-matrix.hypothesis-profile.env-vars = [ |
|
161 |
- { key = "HYPOTHESIS_PROFILE", if = ["ci", "default", "dev", "debug"] }, |
|
162 |
-] |
|
163 | 154 |
matrix.parser-version.env-vars = [ |
164 | 155 |
{ key = "PYTHONOLDPARSER", value = "1", if = ["LL1"] }, |
165 | 156 |
] |
... | ... |
@@ -195,7 +186,7 @@ sqlite_cache = true |
195 | 186 |
enable_error_code = ['ignore-without-code'] |
196 | 187 |
|
197 | 188 |
[tool.pytest.ini_options] |
198 |
-addopts = '--doctest-modules' |
|
189 |
+addopts = '--doctest-modules --dist=worksteal' |
|
199 | 190 |
pythonpath = ['src'] |
200 | 191 |
testpaths = ['src', 'tests'] |
201 | 192 |
xfail_strict = true |
... | ... |
@@ -16,7 +16,6 @@ import pathlib |
16 | 16 |
import re |
17 | 17 |
import shlex |
18 | 18 |
import stat |
19 |
-import sys |
|
20 | 19 |
import tempfile |
21 | 20 |
import zipfile |
22 | 21 |
from typing import TYPE_CHECKING |
... | ... |
@@ -1604,83 +1603,6 @@ on cryptography support being available. |
1604 | 1603 |
""" |
1605 | 1604 |
|
1606 | 1605 |
|
1607 |
-def hypothesis_settings_coverage_compatible( |
|
1608 |
- f: Any = None, |
|
1609 |
-) -> Any: |
|
1610 |
- """Return (or decorate `f` with) coverage-friendly hypothesis settings. |
|
1611 |
- |
|
1612 |
- Specifically, we increase the deadline 40-fold if we detect we are |
|
1613 |
- running under coverage testing, because the slow Python trace |
|
1614 |
- function (necessary on PyPy) drastically increases runtime for |
|
1615 |
- hypothesis tests. |
|
1616 |
- |
|
1617 |
- In any case, we *also* reduce the state machine step count to 32 |
|
1618 |
- steps per run, because the current state machines defined in the |
|
1619 |
- tests rather benefit from broad testing rather than deep testing. |
|
1620 |
- |
|
1621 |
- Args: |
|
1622 |
- f: |
|
1623 |
- An optional object to decorate with these settings. |
|
1624 |
- |
|
1625 |
- Returns: |
|
1626 |
- The modified hypothesis settings, as a settings object. If |
|
1627 |
- decorating a function/class, return that function/class |
|
1628 |
- directly, after decorating. |
|
1629 |
- |
|
1630 |
- """ |
|
1631 |
- settings = ( |
|
1632 |
- hypothesis.settings( |
|
1633 |
- # Running under coverage with the Python tracer increases |
|
1634 |
- # running times 40-fold, on my machines. Sadly, not every |
|
1635 |
- # Python version offers the C tracer, so sometimes the Python |
|
1636 |
- # tracer is used anyway. |
|
1637 |
- deadline=( |
|
1638 |
- 40 * deadline |
|
1639 |
- if (deadline := hypothesis.settings().deadline) is not None |
|
1640 |
- else None |
|
1641 |
- ), |
|
1642 |
- stateful_step_count=32, |
|
1643 |
- suppress_health_check=(hypothesis.HealthCheck.too_slow,), |
|
1644 |
- ) |
|
1645 |
- if sys.gettrace() is not None |
|
1646 |
- else hypothesis.settings( |
|
1647 |
- stateful_step_count=32, |
|
1648 |
- suppress_health_check=(hypothesis.HealthCheck.too_slow,), |
|
1649 |
- ) |
|
1650 |
- ) |
|
1651 |
- return settings if f is None else settings(f) |
|
1652 |
- |
|
1653 |
- |
|
1654 |
-def hypothesis_settings_coverage_compatible_with_caplog( |
|
1655 |
- f: Any = None, |
|
1656 |
-) -> Any: |
|
1657 |
- """Return (or decorate `f` with) coverage-friendly hypothesis settings. |
|
1658 |
- |
|
1659 |
- This variant of [`hypothesis_settings_coverage_compatible`][] does |
|
1660 |
- all the same, and additionally disables the check for function |
|
1661 |
- scoped pytest fixtures such as `caplog`. |
|
1662 |
- |
|
1663 |
- Args: |
|
1664 |
- f: |
|
1665 |
- An optional object to decorate with these settings. |
|
1666 |
- |
|
1667 |
- Returns: |
|
1668 |
- The modified hypothesis settings, as a settings object. If |
|
1669 |
- decorating a function/class, return that function/class |
|
1670 |
- directly, after decorating. |
|
1671 |
- |
|
1672 |
- """ |
|
1673 |
- parent_settings = hypothesis_settings_coverage_compatible() |
|
1674 |
- settings = hypothesis.settings( |
|
1675 |
- parent=parent_settings, |
|
1676 |
- suppress_health_check={ |
|
1677 |
- hypothesis.HealthCheck.function_scoped_fixture, |
|
1678 |
- } |
|
1679 |
- | set(parent_settings.suppress_health_check), |
|
1680 |
- ) |
|
1681 |
- return settings if f is None else settings(f) |
|
1682 |
- |
|
1683 |
- |
|
1684 | 1606 |
def list_keys(self: Any = None) -> list[_types.SSHKeyCommentPair]: |
1685 | 1607 |
"""Return a list of all SSH test keys, as key/comment pairs. |
1686 | 1608 |
|
... | ... |
@@ -7,11 +7,13 @@ from __future__ import annotations |
7 | 7 |
import base64 |
8 | 8 |
import contextlib |
9 | 9 |
import datetime |
10 |
+import importlib |
|
10 | 11 |
import operator |
11 | 12 |
import os |
12 | 13 |
import shutil |
13 | 14 |
import socket |
14 | 15 |
import subprocess |
16 |
+import sys |
|
15 | 17 |
from typing import TYPE_CHECKING, Protocol, TypeVar |
16 | 18 |
|
17 | 19 |
import hypothesis |
... | ... |
@@ -26,15 +28,85 @@ if TYPE_CHECKING: |
26 | 28 |
|
27 | 29 |
startup_ssh_auth_sock = os.environ.get('SSH_AUTH_SOCK', None) |
28 | 30 |
|
29 |
-# https://hypothesis.readthedocs.io/en/latest/settings.html#settings-profiles |
|
30 |
-hypothesis.settings.register_profile('ci', max_examples=1000) |
|
31 |
-hypothesis.settings.register_profile('dev', max_examples=10) |
|
31 |
+ |
|
32 |
+def _hypothesis_settings_setup() -> None: |
|
33 |
+ """ |
|
34 |
+ Ensure sensible hypothesis settings if running under coverage. |
|
35 |
+ |
|
36 |
+ In our tests, the sys.monitoring tracer slows down execution speed |
|
37 |
+ by a factor of roughly 3, the C tracer by roughly 2.5, and the |
|
38 |
+ Python tracer by roughly 40. Ensure that hypothesis default |
|
39 |
+ timeouts apply relative to this *new* execution speed, not the old |
|
40 |
+ one. |
|
41 |
+ |
|
42 |
+ In any case, we *also* reduce the state machine step count to 32 |
|
43 |
+ steps per run, because the current state machines defined in the |
|
44 |
+ tests rather benefit from broad testing rather than deep testing. |
|
45 |
+ |
|
46 |
+ """ |
|
47 |
+ settings = hypothesis.settings() |
|
48 |
+ slowdown: float | None = None |
|
49 |
+ if ( |
|
50 |
+ importlib.util.find_spec('coverage') is not None |
|
51 |
+ and settings.deadline is not None |
|
52 |
+ and settings.deadline.total_seconds() < 1.0 |
|
53 |
+ ): # pragma: no cover |
|
54 |
+ ctracer_class = ( |
|
55 |
+ importlib.import_module('coverage.tracer').CTracer |
|
56 |
+ if importlib.util.find_spec('coverage.tracer') is not None |
|
57 |
+ else type(None) |
|
58 |
+ ) |
|
59 |
+ pytracer_class = importlib.import_module('coverage.pytracer').PyTracer |
|
60 |
+ if ( |
|
61 |
+ getattr(sys, 'monitoring', None) is not None |
|
62 |
+ and sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID) |
|
63 |
+ == 'coverage.py' |
|
64 |
+ ): |
|
65 |
+ slowdown = 3.0 |
|
66 |
+ elif ( |
|
67 |
+ trace_func := getattr(sys, 'gettrace', lambda: None)() |
|
68 |
+ ) is not None and isinstance(trace_func, ctracer_class): |
|
69 |
+ slowdown = 2.5 |
|
70 |
+ elif ( |
|
71 |
+ trace_func is not None |
|
72 |
+ and hasattr(trace_func, '__self__') |
|
73 |
+ and isinstance(trace_func.__self__, pytracer_class) |
|
74 |
+ ): |
|
75 |
+ slowdown = 8.0 |
|
76 |
+ settings = hypothesis.settings( |
|
77 |
+ deadline=slowdown * settings.deadline |
|
78 |
+ if slowdown |
|
79 |
+ else settings.deadline, |
|
80 |
+ stateful_step_count=32, |
|
81 |
+ suppress_health_check=(hypothesis.HealthCheck.too_slow,), |
|
82 |
+ ) |
|
83 |
+ hypothesis.settings.register_profile('default', settings) |
|
84 |
+ hypothesis.settings.register_profile( |
|
85 |
+ 'dev', derandomize=True, max_examples=10 |
|
86 |
+ ) |
|
32 | 87 |
hypothesis.settings.register_profile( |
33 |
- 'debug', max_examples=10, verbosity=hypothesis.Verbosity.verbose |
|
88 |
+ 'debug', |
|
89 |
+ parent=hypothesis.settings.get_profile('dev'), |
|
90 |
+ verbosity=hypothesis.Verbosity.verbose, |
|
34 | 91 |
) |
35 | 92 |
hypothesis.settings.register_profile( |
36 |
- 'flaky', deadline=datetime.timedelta(milliseconds=150) |
|
93 |
+ 'flaky', |
|
94 |
+ deadline=( |
|
95 |
+ settings.deadline - settings.deadline // 4 |
|
96 |
+ if settings.deadline is not None |
|
97 |
+ else datetime.timedelta(milliseconds=150) |
|
98 |
+ ), |
|
37 | 99 |
) |
100 |
+ ci_profile = hypothesis.settings.get_profile('ci') |
|
101 |
+ hypothesis.settings.register_profile( |
|
102 |
+ 'intense', |
|
103 |
+ parent=ci_profile, |
|
104 |
+ derandomize=False, |
|
105 |
+ max_examples=10 * ci_profile.max_examples, |
|
106 |
+ ) |
|
107 |
+ |
|
108 |
+ |
|
109 |
+_hypothesis_settings_setup() |
|
38 | 110 |
|
39 | 111 |
|
40 | 112 |
# https://docs.pytest.org/en/stable/explanation/fixtures.html#a-note-about-fixture-cleanup |
... | ... |
@@ -1275,7 +1275,12 @@ class TestCLI: |
1275 | 1275 |
map(is_harmless_config_import_warning, caplog.record_tuples) |
1276 | 1276 |
), 'unexpected error output' |
1277 | 1277 |
|
1278 |
- @tests.hypothesis_settings_coverage_compatible_with_caplog |
|
1278 |
+ @hypothesis.settings( |
|
1279 |
+ suppress_health_check=[ |
|
1280 |
+ *hypothesis.settings().suppress_health_check, |
|
1281 |
+ hypothesis.HealthCheck.function_scoped_fixture, |
|
1282 |
+ ], |
|
1283 |
+ ) |
|
1279 | 1284 |
@hypothesis.given( |
1280 | 1285 |
conf=tests.smudged_vault_test_config( |
1281 | 1286 |
strategies.sampled_from([ |
... | ... |
@@ -3076,7 +3081,6 @@ Boo. |
3076 | 3081 |
assert result.clean_exit() |
3077 | 3082 |
assert cli_helpers.load_config() == config |
3078 | 3083 |
|
3079 |
- @tests.hypothesis_settings_coverage_compatible |
|
3080 | 3084 |
@hypothesis.given( |
3081 | 3085 |
global_config_settable=tests.vault_full_service_config(), |
3082 | 3086 |
global_config_importable=strategies.fixed_dictionaries( |
... | ... |
@@ -4058,7 +4062,6 @@ def vault_full_config() -> strategies.SearchStrategy[_types.VaultConfig]: |
4058 | 4062 |
return VAULT_FULL_CONFIG |
4059 | 4063 |
|
4060 | 4064 |
|
4061 |
-@tests.hypothesis_settings_coverage_compatible |
|
4062 | 4065 |
class ConfigManagementStateMachine(stateful.RuleBasedStateMachine): |
4063 | 4066 |
"""A state machine recording changes in the vault configuration. |
4064 | 4067 |
|
... | ... |
@@ -66,7 +66,6 @@ def js_nested_strategy(draw: strategies.DrawFn) -> Any: |
66 | 66 |
) |
67 | 67 |
|
68 | 68 |
|
69 |
-@tests.hypothesis_settings_coverage_compatible |
|
70 | 69 |
@hypothesis.given(value=js_nested_strategy()) |
71 | 70 |
@hypothesis.example(float('nan')) |
72 | 71 |
def test_100_js_truthiness(value: Any) -> None: |
... | ... |
@@ -115,7 +114,6 @@ def test_200_is_vault_config(test_config: tests.VaultTestConfig) -> None: |
115 | 114 |
) |
116 | 115 |
|
117 | 116 |
|
118 |
-@tests.hypothesis_settings_coverage_compatible |
|
119 | 117 |
@hypothesis.given( |
120 | 118 |
test_config=tests.smudged_vault_test_config( |
121 | 119 |
config=strategies.sampled_from([ |
... | ... |
@@ -183,7 +181,6 @@ def test_400_validate_vault_config(test_config: tests.VaultTestConfig) -> None: |
183 | 181 |
assert not exc, 'failed to validate valid example' # noqa: PT017 |
184 | 182 |
|
185 | 183 |
|
186 |
-@tests.hypothesis_settings_coverage_compatible |
|
187 | 184 |
@hypothesis.given( |
188 | 185 |
test_config=tests.smudged_vault_test_config( |
189 | 186 |
config=strategies.sampled_from([ |
... | ... |
@@ -217,7 +217,6 @@ class TestVault: |
217 | 217 |
phrase=phrase, service=services[0] |
218 | 218 |
) != vault.Vault.create_hash(phrase=phrase, service=services[1]) |
219 | 219 |
|
220 |
- @tests.hypothesis_settings_coverage_compatible |
|
221 | 220 |
@hypothesis.given( |
222 | 221 |
phrases=strategies.binary(max_size=BLOCK_SIZE // 2).flatmap( |
223 | 222 |
lambda bs: strategies.tuples( |
... | ... |
@@ -245,7 +244,6 @@ class TestVault: |
245 | 244 |
phrase=phrases[0], service=service |
246 | 245 |
) == vault.Vault.create_hash(phrase=phrases[1], service=service) |
247 | 246 |
|
248 |
- @tests.hypothesis_settings_coverage_compatible |
|
249 | 247 |
@hypothesis.given( |
250 | 248 |
phrases=strategies.binary( |
251 | 249 |
min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8 |
... | ... |
@@ -462,7 +460,6 @@ class TestVault: |
462 | 460 |
phrase=phrase |
463 | 461 |
).generate(services[1]) |
464 | 462 |
|
465 |
- @tests.hypothesis_settings_coverage_compatible |
|
466 | 463 |
@hypothesis.given( |
467 | 464 |
phrase=strategies.text( |
468 | 465 |
strategies.characters(min_codepoint=32, max_codepoint=126), |
... | ... |
@@ -506,7 +503,6 @@ class TestVault: |
506 | 503 |
== b'xDFu' |
507 | 504 |
) |
508 | 505 |
|
509 |
- @tests.hypothesis_settings_coverage_compatible |
|
510 | 506 |
@hypothesis.given( |
511 | 507 |
phrase=strategies.one_of( |
512 | 508 |
strategies.binary(min_size=1, max_size=100), |
... | ... |
@@ -592,7 +588,6 @@ class TestVault: |
592 | 588 |
== b': : fv_wqt>a-4w1S R' |
593 | 589 |
) |
594 | 590 |
|
595 |
- @tests.hypothesis_settings_coverage_compatible |
|
596 | 591 |
@hypothesis.given( |
597 | 592 |
phrase=strategies.one_of( |
598 | 593 |
strategies.binary(min_size=1), strategies.text(min_size=1) |
599 | 594 |