Marco Ricci commited on 2026-01-17 19:40:14
Zeige 3 geänderte Dateien mit 147 Einfügungen und 85 Löschungen.
Give heavy-duty tests that involve spawning processes less extensive example counts, because spawning processes is expensive (especially on The Annoying OS), and because Python 3.14+ is defaulting to slow-but-safe process spawning machinery that make these costs much more visible than before. Specifically, we introduce new hypothesis machinery for calculating a good `max_example` count for state machines that involve spawning processes on each state transition. There is currently only one such state machine: `FakeConfigurationMutexStateMachine` from the CLI heavy-duty tests. The example count `n'` for state machines is then `sqrt(10 * n)`, where `n` is the example count for other test types. For the "dev", "default" and "intense" profiles (`n = 10`, `100` and `1000`, respectively), this translates to `n' = 10`, `31` and `100`, respectively. In particular, at "dev" they are identical, and at "intense", state machines have "default" behavior. In preparation for this commit, we noticed that the hypothesis settings profiles were not necessarily defined when the state machines query the settings. Accordingly, we moved the settings profiles setup into the `tests.machinery.hypothesis` package, made it idempotent, and ensured it would be called before accessing the profiles.
| ... | ... |
@@ -7,17 +7,12 @@ from __future__ import annotations |
| 7 | 7 |
import base64 |
| 8 | 8 |
import contextlib |
| 9 | 9 |
import ctypes |
| 10 |
-import datetime |
|
| 11 |
-import importlib |
|
| 12 |
-import importlib.util |
|
| 13 | 10 |
import os |
| 14 | 11 |
import shutil |
| 15 | 12 |
import socket |
| 16 | 13 |
import subprocess |
| 17 |
-import sys |
|
| 18 | 14 |
from typing import TYPE_CHECKING, Any, Protocol, TypeVar |
| 19 | 15 |
|
| 20 |
-import hypothesis |
|
| 21 | 16 |
import packaging.version |
| 22 | 17 |
import pytest |
| 23 | 18 |
from typing_extensions import NamedTuple |
| ... | ... |
@@ -26,6 +21,7 @@ from derivepassphrase import _types, ssh_agent |
| 26 | 21 |
from derivepassphrase.ssh_agent import socketprovider |
| 27 | 22 |
from tests import data, machinery |
| 28 | 23 |
from tests.data import callables |
| 24 |
+from tests.machinery import hypothesis as hypothesis_machinery |
|
| 29 | 25 |
from tests.machinery import pytest as pytest_machinery |
| 30 | 26 |
|
| 31 | 27 |
if TYPE_CHECKING: |
| ... | ... |
@@ -34,86 +30,6 @@ if TYPE_CHECKING: |
| 34 | 30 |
startup_ssh_auth_sock = os.environ.get("SSH_AUTH_SOCK", None)
|
| 35 | 31 |
|
| 36 | 32 |
|
| 37 |
-def _hypothesis_settings_setup() -> None: |
|
| 38 |
- """ |
|
| 39 |
- Ensure sensible hypothesis settings if running under coverage. |
|
| 40 |
- |
|
| 41 |
- In our tests, the sys.monitoring tracer slows down execution speed |
|
| 42 |
- by a factor of roughly 3, the C tracer by roughly 2.5, and the |
|
| 43 |
- Python tracer by roughly 40. Ensure that hypothesis default |
|
| 44 |
- timeouts apply relative to this *new* execution speed, not the old |
|
| 45 |
- one. |
|
| 46 |
- |
|
| 47 |
- In any case, we *also* reduce the state machine step count to 32 |
|
| 48 |
- steps per run, because the current state machines defined in the |
|
| 49 |
- tests rather benefit from broad testing rather than deep testing. |
|
| 50 |
- |
|
| 51 |
- """ |
|
| 52 |
- settings = hypothesis.settings() |
|
| 53 |
- slowdown: float | None = None |
|
| 54 |
- if ( |
|
| 55 |
- importlib.util.find_spec("coverage") is not None
|
|
| 56 |
- and settings.deadline is not None |
|
| 57 |
- and settings.deadline.total_seconds() < 1.0 |
|
| 58 |
- ): # pragma: no cover [external] |
|
| 59 |
- ctracer_class = ( |
|
| 60 |
- importlib.import_module("coverage.tracer").CTracer
|
|
| 61 |
- if importlib.util.find_spec("coverage.tracer") is not None
|
|
| 62 |
- else type(None) |
|
| 63 |
- ) |
|
| 64 |
- pytracer_class = importlib.import_module("coverage.pytracer").PyTracer
|
|
| 65 |
- if ( |
|
| 66 |
- getattr(sys, "monitoring", None) is not None |
|
| 67 |
- and sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID) |
|
| 68 |
- == "coverage.py" |
|
| 69 |
- ): |
|
| 70 |
- slowdown = 3.0 |
|
| 71 |
- elif ( |
|
| 72 |
- trace_func := getattr(sys, "gettrace", lambda: None)() |
|
| 73 |
- ) is not None and isinstance(trace_func, ctracer_class): |
|
| 74 |
- slowdown = 2.5 |
|
| 75 |
- elif ( |
|
| 76 |
- trace_func is not None |
|
| 77 |
- and hasattr(trace_func, "__self__") |
|
| 78 |
- and isinstance(trace_func.__self__, pytracer_class) |
|
| 79 |
- ): |
|
| 80 |
- slowdown = 8.0 |
|
| 81 |
- settings = hypothesis.settings( |
|
| 82 |
- deadline=slowdown * settings.deadline |
|
| 83 |
- if slowdown |
|
| 84 |
- else settings.deadline, |
|
| 85 |
- stateful_step_count=32, |
|
| 86 |
- suppress_health_check=(hypothesis.HealthCheck.too_slow,), |
|
| 87 |
- ) |
|
| 88 |
- hypothesis.settings.register_profile("default", settings)
|
|
| 89 |
- hypothesis.settings.register_profile( |
|
| 90 |
- "dev", derandomize=True, max_examples=10 |
|
| 91 |
- ) |
|
| 92 |
- hypothesis.settings.register_profile( |
|
| 93 |
- "debug", |
|
| 94 |
- parent=hypothesis.settings.get_profile("dev"),
|
|
| 95 |
- verbosity=hypothesis.Verbosity.verbose, |
|
| 96 |
- ) |
|
| 97 |
- hypothesis.settings.register_profile( |
|
| 98 |
- "flaky", |
|
| 99 |
- deadline=( |
|
| 100 |
- settings.deadline - settings.deadline // 4 |
|
| 101 |
- if settings.deadline is not None |
|
| 102 |
- else datetime.timedelta(milliseconds=150) |
|
| 103 |
- ), |
|
| 104 |
- ) |
|
| 105 |
- ci_profile = hypothesis.settings.get_profile("ci")
|
|
| 106 |
- hypothesis.settings.register_profile( |
|
| 107 |
- "intense", |
|
| 108 |
- parent=ci_profile, |
|
| 109 |
- derandomize=False, |
|
| 110 |
- max_examples=10 * ci_profile.max_examples, |
|
| 111 |
- ) |
|
| 112 |
- |
|
| 113 |
- |
|
| 114 |
-_hypothesis_settings_setup() |
|
| 115 |
- |
|
| 116 |
- |
|
| 117 | 33 |
def pytest_configure(config: pytest.Config) -> None: |
| 118 | 34 |
"""Configure `pytest`: add the `heavy_duty` marker.""" |
| 119 | 35 |
config.addinivalue_line( |
| ... | ... |
@@ -123,6 +39,7 @@ def pytest_configure(config: pytest.Config) -> None: |
| 123 | 39 |
"mark test as a slow, heavy-duty test (e.g., an integration test)" |
| 124 | 40 |
), |
| 125 | 41 |
) |
| 42 |
+ hypothesis_machinery._hypothesis_settings_setup() |
|
| 126 | 43 |
|
| 127 | 44 |
|
| 128 | 45 |
# https://docs.pytest.org/en/stable/explanation/fixtures.html#a-note-about-fixture-cleanup |
| ... | ... |
@@ -17,6 +17,11 @@ All similar-minded code requiring only plain `pytest` lives in [the |
| 17 | 17 |
from __future__ import annotations |
| 18 | 18 |
|
| 19 | 19 |
import copy |
| 20 |
+import datetime |
|
| 21 |
+import importlib |
|
| 22 |
+import importlib.util |
|
| 23 |
+import math |
|
| 24 |
+import sys |
|
| 20 | 25 |
from typing import TYPE_CHECKING |
| 21 | 26 |
|
| 22 | 27 |
import hypothesis |
| ... | ... |
@@ -35,6 +40,93 @@ if TYPE_CHECKING: |
| 35 | 40 |
# ============================== |
| 36 | 41 |
|
| 37 | 42 |
|
| 43 |
+def _hypothesis_settings_setup() -> None: |
|
| 44 |
+ """ |
|
| 45 |
+ Ensure sensible hypothesis settings if running under coverage. |
|
| 46 |
+ |
|
| 47 |
+ In our tests, the sys.monitoring tracer slows down execution speed |
|
| 48 |
+ by a factor of roughly 3, the C tracer by roughly 2.5, and the |
|
| 49 |
+ Python tracer by roughly 40. Ensure that hypothesis default |
|
| 50 |
+ timeouts apply relative to this *new* execution speed, not the old |
|
| 51 |
+ one. |
|
| 52 |
+ |
|
| 53 |
+ In any case, we *also* reduce the state machine step count to 32 |
|
| 54 |
+ steps per run, because the current state machines defined in the |
|
| 55 |
+ tests rather benefit from broad testing rather than deep testing. |
|
| 56 |
+ |
|
| 57 |
+ This setup function is idempotent: if it detects that the profiles |
|
| 58 |
+ have already been registered, then it silently does nothing. |
|
| 59 |
+ |
|
| 60 |
+ """ |
|
| 61 |
+ try: |
|
| 62 |
+ hypothesis.settings.get_profile("intense")
|
|
| 63 |
+ except hypothesis.errors.InvalidArgument: # pragma: no cover [external] |
|
| 64 |
+ pass |
|
| 65 |
+ else: # pragma: no cover [external] |
|
| 66 |
+ return |
|
| 67 |
+ |
|
| 68 |
+ settings = hypothesis.settings() |
|
| 69 |
+ slowdown: float | None = None |
|
| 70 |
+ if ( |
|
| 71 |
+ importlib.util.find_spec("coverage") is not None
|
|
| 72 |
+ and settings.deadline is not None |
|
| 73 |
+ and settings.deadline.total_seconds() < 1.0 |
|
| 74 |
+ ): # pragma: no cover [external] |
|
| 75 |
+ ctracer_class = ( |
|
| 76 |
+ importlib.import_module("coverage.tracer").CTracer
|
|
| 77 |
+ if importlib.util.find_spec("coverage.tracer") is not None
|
|
| 78 |
+ else type(None) |
|
| 79 |
+ ) |
|
| 80 |
+ pytracer_class = importlib.import_module("coverage.pytracer").PyTracer
|
|
| 81 |
+ if ( |
|
| 82 |
+ getattr(sys, "monitoring", None) is not None |
|
| 83 |
+ and sys.monitoring.get_tool(sys.monitoring.COVERAGE_ID) |
|
| 84 |
+ == "coverage.py" |
|
| 85 |
+ ): |
|
| 86 |
+ slowdown = 3.0 |
|
| 87 |
+ elif ( |
|
| 88 |
+ trace_func := getattr(sys, "gettrace", lambda: None)() |
|
| 89 |
+ ) is not None and isinstance(trace_func, ctracer_class): |
|
| 90 |
+ slowdown = 2.5 |
|
| 91 |
+ elif ( |
|
| 92 |
+ trace_func is not None |
|
| 93 |
+ and hasattr(trace_func, "__self__") |
|
| 94 |
+ and isinstance(trace_func.__self__, pytracer_class) |
|
| 95 |
+ ): |
|
| 96 |
+ slowdown = 8.0 |
|
| 97 |
+ settings = hypothesis.settings( |
|
| 98 |
+ deadline=slowdown * settings.deadline |
|
| 99 |
+ if slowdown |
|
| 100 |
+ else settings.deadline, |
|
| 101 |
+ stateful_step_count=32, |
|
| 102 |
+ suppress_health_check=(hypothesis.HealthCheck.too_slow,), |
|
| 103 |
+ ) |
|
| 104 |
+ hypothesis.settings.register_profile("default", settings)
|
|
| 105 |
+ hypothesis.settings.register_profile( |
|
| 106 |
+ "dev", derandomize=True, max_examples=10 |
|
| 107 |
+ ) |
|
| 108 |
+ hypothesis.settings.register_profile( |
|
| 109 |
+ "debug", |
|
| 110 |
+ parent=hypothesis.settings.get_profile("dev"),
|
|
| 111 |
+ verbosity=hypothesis.Verbosity.verbose, |
|
| 112 |
+ ) |
|
| 113 |
+ hypothesis.settings.register_profile( |
|
| 114 |
+ "flaky", |
|
| 115 |
+ deadline=( |
|
| 116 |
+ settings.deadline - settings.deadline // 4 |
|
| 117 |
+ if settings.deadline is not None |
|
| 118 |
+ else datetime.timedelta(milliseconds=150) |
|
| 119 |
+ ), |
|
| 120 |
+ ) |
|
| 121 |
+ ci_profile = hypothesis.settings.get_profile("ci")
|
|
| 122 |
+ hypothesis.settings.register_profile( |
|
| 123 |
+ "intense", |
|
| 124 |
+ parent=ci_profile, |
|
| 125 |
+ derandomize=False, |
|
| 126 |
+ max_examples=10 * ci_profile.max_examples, |
|
| 127 |
+ ) |
|
| 128 |
+ |
|
| 129 |
+ |
|
| 38 | 130 |
def get_concurrency_step_count( |
| 39 | 131 |
settings: hypothesis.settings | None = None, |
| 40 | 132 |
) -> int: |
| ... | ... |
@@ -55,6 +147,58 @@ def get_concurrency_step_count( |
| 55 | 147 |
return min(machinery.get_concurrency_limit(), settings.stateful_step_count) |
| 56 | 148 |
|
| 57 | 149 |
|
| 150 |
+def get_process_spawning_state_machine_examples_count( |
|
| 151 |
+ settings: hypothesis.settings | None = None, |
|
| 152 |
+) -> int: |
|
| 153 |
+ """Return the examples count for process-spawning state machines. |
|
| 154 |
+ |
|
| 155 |
+ That is, return the desired `max_examples` setting for state |
|
| 156 |
+ machines that spawn processes as part of their operation. Since |
|
| 157 |
+ Python 3.14, process spawning is no longer cheap by default on *any* |
|
| 158 |
+ of the main operating systems (they all default to the "forkserver" |
|
| 159 |
+ or "spawn" startup methods), and on The Annoying OS, process |
|
| 160 |
+ spawning is inherently expensive. Therefore, we want to limit the |
|
| 161 |
+ examples count by default, and require the user to opt-in to the |
|
| 162 |
+ original naive example count explicitly. |
|
| 163 |
+ |
|
| 164 |
+ If the "intense" profile is in effect, or something with even higher |
|
| 165 |
+ `max_examples` and `stateful_step_count`, then we return the |
|
| 166 |
+ unaltered example count for the *default* profile. Otherwise, we |
|
| 167 |
+ return the square root of the `max_examples` setting (rounded down). |
|
| 168 |
+ We *never* return a value below the "dev" profile's example count: |
|
| 169 |
+ any lower computed example count is increased to the "dev" profile's |
|
| 170 |
+ example count. |
|
| 171 |
+ |
|
| 172 |
+ Args: |
|
| 173 |
+ settings: |
|
| 174 |
+ The hypothesis settings for a specific tests. If not given, |
|
| 175 |
+ then the current profile will be queried directly. |
|
| 176 |
+ |
|
| 177 |
+ """ |
|
| 178 |
+ if settings is None: # pragma: no cover |
|
| 179 |
+ settings = hypothesis.settings() |
|
| 180 |
+ |
|
| 181 |
+ # Ensure the "intense" profile exists. |
|
| 182 |
+ _hypothesis_settings_setup() |
|
| 183 |
+ |
|
| 184 |
+ these_values = (settings.max_examples, settings.stateful_step_count) |
|
| 185 |
+ intense_profile = hypothesis.settings.get_profile("intense")
|
|
| 186 |
+ intense_values = ( |
|
| 187 |
+ intense_profile.max_examples, |
|
| 188 |
+ intense_profile.stateful_step_count, |
|
| 189 |
+ ) |
|
| 190 |
+ min_count = hypothesis.settings.get_profile("dev").max_examples
|
|
| 191 |
+ high_count = hypothesis.settings.get_profile("default").max_examples
|
|
| 192 |
+ we_are_intense = ( |
|
| 193 |
+ these_values[0] >= intense_values[0] |
|
| 194 |
+ and these_values[1] >= intense_values[1] |
|
| 195 |
+ ) |
|
| 196 |
+ return max( |
|
| 197 |
+ min_count, |
|
| 198 |
+ high_count if we_are_intense else math.isqrt(settings.max_examples), |
|
| 199 |
+ ) |
|
| 200 |
+ |
|
| 201 |
+ |
|
| 58 | 202 |
# Hypothesis strategies |
| 59 | 203 |
# ===================== |
| 60 | 204 |
|
| ... | ... |
@@ -844,6 +844,7 @@ def run_actions_handler( |
| 844 | 844 |
|
| 845 | 845 |
@hypothesis.settings( |
| 846 | 846 |
stateful_step_count=hypothesis_machinery.get_concurrency_step_count(), |
| 847 |
+ max_examples=hypothesis_machinery.get_process_spawning_state_machine_examples_count(), |
|
| 847 | 848 |
deadline=None, |
| 848 | 849 |
) |
| 849 | 850 |
class FakeConfigurationMutexStateMachine(stateful.RuleBasedStateMachine): |
| 850 | 851 |