Refactor the hypothesis strategies for `vault` tests
Marco Ricci

Marco Ricci commited on 2025-08-15 06:54:29
Zeige 1 geänderte Dateien mit 176 Einfügungen und 108 Löschungen.


Collect, reorganize and reimplement the `hypothesis` strategies for
generating phrases and service names in the `vault` module tests.
Phrases come in three possible size ranges, are usually binary but
sometimes textual (if short), and at times, we explicitly want pairs
of phrases that are not interchangable under `vault`, and at other
times, we want a second interchangable phrase given a first one.  We
reimplement the (size-dependent) binary phrase and pair of phrases
strategies with `hypothesis.strategies.composite`, and add them to a new
namespace of stategies, similar to the `Parametrize` class.

I find the result very pleasing to read, and also much more amenable to
adjusting the strategies than when the definitions are always included
inline.
... ...
@@ -7,6 +7,7 @@
7 7
 from __future__ import annotations
8 8
 
9 9
 import array
10
+import enum
10 11
 import hashlib
11 12
 import math
12 13
 import types
... ...
@@ -15,13 +16,12 @@ from typing import TYPE_CHECKING
15 16
 import hypothesis
16 17
 import pytest
17 18
 from hypothesis import strategies
18
-from typing_extensions import TypeVar
19 19
 
20 20
 from derivepassphrase import vault
21 21
 from tests.machinery import hypothesis as hypothesis_machinery
22 22
 
23 23
 if TYPE_CHECKING:
24
-    from collections.abc import Callable, Iterator
24
+    from collections.abc import Callable
25 25
 
26 26
     from typing_extensions import Buffer
27 27
 
... ...
@@ -116,6 +116,154 @@ def phrases_are_interchangable(
116 116
     return phrase1 == phrase2
117 117
 
118 118
 
119
+class PhraseSize(str, enum.Enum):
120
+    """Size of the generated phrase.
121
+
122
+    Attributes:
123
+        SHORT: A phrase shorter than the SHA-1 block size.
124
+        FULL: A phrase exactly as long as the SHA-1 block size.
125
+        OVERLONG: A phrase longer than the SHA-1 block size.
126
+        MIXED: A `SHORT`, `FULL` or `OVERLONG` phrase.
127
+
128
+    """
129
+
130
+    SHORT = enum.auto()
131
+    """"""
132
+    FULL = enum.auto()
133
+    """"""
134
+    OVERLONG = enum.auto()
135
+    """"""
136
+    MIXED = enum.auto()
137
+    """"""
138
+
139
+
140
+class Strategies:
141
+    """Hypothesis strategies."""
142
+
143
+    @staticmethod
144
+    def text_strategy() -> strategies.SearchStrategy[str]:
145
+        """Return a strategy for textual master passphrases or service names."""
146
+        return strategies.text(
147
+            strategies.characters(min_codepoint=32, max_codepoint=126),
148
+            min_size=1,
149
+            max_size=BLOCK_SIZE // 2,
150
+        )
151
+
152
+    @strategies.composite
153
+    @staticmethod
154
+    def binary_phrase_strategy(
155
+        draw: strategies.DrawFn, size: PhraseSize = PhraseSize.MIXED
156
+    ) -> Buffer:
157
+        """Return a strategy for binary master passphrases.
158
+
159
+        Args:
160
+            draw:
161
+                The [strategy drawing
162
+                function][hypothesis.strategies.composite].
163
+            size:
164
+                The desired phrase size.
165
+
166
+        Returns:
167
+            The strategy.
168
+
169
+        """
170
+        if size == PhraseSize.MIXED:
171
+            size = draw(
172
+                strategies.sampled_from([
173
+                    PhraseSize.SHORT,
174
+                    PhraseSize.FULL,
175
+                    PhraseSize.OVERLONG,
176
+                ]),
177
+                label="concrete_size",
178
+            )
179
+        min_size, max_size = (
180
+            (1, BLOCK_SIZE // 2)
181
+            if size == PhraseSize.SHORT
182
+            else (BLOCK_SIZE, BLOCK_SIZE)
183
+            if size == PhraseSize.FULL
184
+            else (BLOCK_SIZE + 1, BLOCK_SIZE + 8)
185
+        )
186
+        return draw(
187
+            strategies.binary(min_size=min_size, max_size=max_size),
188
+            label="phrase",
189
+        )
190
+
191
+    @strategies.composite
192
+    @staticmethod
193
+    def pair_of_binary_phrases_strategy(
194
+        draw: strategies.DrawFn, size: PhraseSize = PhraseSize.MIXED
195
+    ) -> tuple[Buffer, Buffer]:
196
+        """Return a strategy for two non-interchangable binary master passphrases.
197
+
198
+        Args:
199
+            draw:
200
+                The [strategy drawing
201
+                function][hypothesis.strategies.composite].
202
+            size:
203
+                The desired phrase size.
204
+
205
+        Returns:
206
+            The strategy.
207
+
208
+        """
209
+        phrase1 = draw(
210
+            Strategies.binary_phrase_strategy(size=size), label="phrase1"
211
+        )
212
+        phrase2 = draw(
213
+            Strategies.binary_phrase_strategy(size=size).filter(
214
+                lambda p: not phrases_are_interchangable(phrase1, p)
215
+            ),
216
+            label="phrase2",
217
+        )
218
+        return (phrase1, phrase2)
219
+
220
+    @strategies.composite
221
+    @staticmethod
222
+    def make_interchangable_phrases(
223
+        draw: strategies.DrawFn, phrase: Buffer
224
+    ) -> tuple[Buffer, Buffer]:
225
+        """Transform a phrase into a pair of interchangable phrases.
226
+
227
+        For phrases of size 64 (the SHA-1 block size), [in 99.6% of the
228
+        cases][INTERCHANGABLE_PASSPHRASES], it is infeasible for us to
229
+        find a second interchangable phrase.  (It would be equivalent to
230
+        mounting a pre-image attack on an SHA-1, a cryptographically
231
+        infeasible action.)  However, in the remaining 0.4% of cases,
232
+        the phrase of size 64 is padded with NUL bytes at the end, and
233
+        we can generate the second interchangable phrase by altering the
234
+        padding.  For other phrase sizes, no such problems exist: we can
235
+        obtain interchangable phrases by adding padding (if the phrase
236
+        is shorter than 64 bytes) or by computing the SHA-1 value of the
237
+        phrase (if it is longer than 64 bytes).
238
+
239
+        [INTERCHANGABLE_PASSPHRASES]: https://the13thletter.info/derivepassphrase/0.x/explanation/faq-vault-interchangable-passphrases/ 'What are "interchangable passphrases" in `vault`, and what does that mean in practice?'
240
+
241
+        Args:
242
+            draw:
243
+                The [strategy drawing
244
+                function][hypothesis.strategies.composite].
245
+            phrase:
246
+                The first phrase.
247
+
248
+        Returns:
249
+            The strategy for two interchangable phrases.
250
+
251
+        """
252
+        p = bytes(phrase)
253
+        hypothesis.assume(p.rstrip(b"\x00") != p or len(p) != BLOCK_SIZE)
254
+        base = (
255
+            hashlib.sha1(p).digest()
256
+            if len(p) > BLOCK_SIZE
257
+            else p.rstrip(b"\x00") or b"\x00"
258
+        )
259
+        zero_filled = [
260
+            base + bytes(i)
261
+            for i in range(BLOCK_SIZE - len(base) + 1)
262
+            if base + bytes(i) != p
263
+        ]
264
+        return (p, draw(strategies.sampled_from(zero_filled)))
265
+
266
+
119 267
 class TestVault:
120 268
     """Test passphrase derivation with the "vault" scheme."""
121 269
 
... ...
@@ -126,17 +274,10 @@ class TestPhraseDependence:
126 274
     """Test the dependence of the internal hash on the master passphrase."""
127 275
 
128 276
     @hypothesis.given(
129
-        phrases=strategies.lists(
130
-            strategies.binary(min_size=1, max_size=BLOCK_SIZE // 2),
131
-            min_size=2,
132
-            max_size=2,
133
-            unique=True,
134
-        ).filter(lambda tup: not phrases_are_interchangable(*tup)),
135
-        service=strategies.text(
136
-            strategies.characters(min_codepoint=32, max_codepoint=126),
137
-            min_size=1,
138
-            max_size=BLOCK_SIZE // 2,
277
+        phrases=Strategies.pair_of_binary_phrases_strategy(
278
+            size=PhraseSize.SHORT
139 279
         ),
280
+        service=Strategies.text_strategy(),
140 281
     )
141 282
     @hypothesis.example(phrases=[b"\x00", b"\x00\x00"], service="0").xfail(
142 283
         reason="phrases are interchangable",
... ...
@@ -157,17 +298,10 @@ class TestPhraseDependence:
157 298
         ) != vault.Vault.create_hash(phrase=phrases[1], service=service)
158 299
 
159 300
     @hypothesis.given(
160
-        phrases=strategies.lists(
161
-            strategies.binary(min_size=BLOCK_SIZE, max_size=BLOCK_SIZE),
162
-            min_size=2,
163
-            max_size=2,
164
-            unique=True,
165
-        ).filter(lambda tup: not phrases_are_interchangable(*tup)),
166
-        service=strategies.text(
167
-            strategies.characters(min_codepoint=32, max_codepoint=126),
168
-            min_size=1,
169
-            max_size=BLOCK_SIZE // 2,
301
+        phrases=Strategies.pair_of_binary_phrases_strategy(
302
+            size=PhraseSize.FULL
170 303
         ),
304
+        service=Strategies.text_strategy(),
171 305
     )
172 306
     def test_medium(
173 307
         self,
... ...
@@ -184,19 +318,10 @@ class TestPhraseDependence:
184 318
         ) != vault.Vault.create_hash(phrase=phrases[1], service=service)
185 319
 
186 320
     @hypothesis.given(
187
-        phrases=strategies.lists(
188
-            strategies.binary(
189
-                min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
190
-            ),
191
-            min_size=2,
192
-            max_size=2,
193
-            unique=True,
194
-        ).filter(lambda tup: not phrases_are_interchangable(*tup)),
195
-        service=strategies.text(
196
-            strategies.characters(min_codepoint=32, max_codepoint=126),
197
-            min_size=1,
198
-            max_size=BLOCK_SIZE // 2,
321
+        phrases=Strategies.pair_of_binary_phrases_strategy(
322
+            size=PhraseSize.OVERLONG
199 323
         ),
324
+        service=Strategies.text_strategy(),
200 325
     )
201 326
     def test_large(
202 327
         self,
... ...
@@ -213,23 +338,10 @@ class TestPhraseDependence:
213 338
         ) != vault.Vault.create_hash(phrase=phrases[1], service=service)
214 339
 
215 340
     @hypothesis.given(
216
-        phrases=strategies.lists(
217
-            strategies.one_of(
218
-                strategies.binary(min_size=1, max_size=BLOCK_SIZE // 2),
219
-                strategies.binary(min_size=BLOCK_SIZE, max_size=BLOCK_SIZE),
220
-                strategies.binary(
221
-                    min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
222
-                ),
223
-            ),
224
-            min_size=2,
225
-            max_size=2,
226
-            unique=True,
227
-        ).filter(lambda tup: not phrases_are_interchangable(*tup)),
228
-        service=strategies.text(
229
-            strategies.characters(min_codepoint=32, max_codepoint=126),
230
-            min_size=1,
231
-            max_size=BLOCK_SIZE // 2,
341
+        phrases=Strategies.pair_of_binary_phrases_strategy(
342
+            size=PhraseSize.MIXED
232 343
         ),
344
+        service=Strategies.text_strategy(),
233 345
     )
234 346
     @hypothesis.example(
235 347
         phrases=[
... ...
@@ -267,13 +379,9 @@ class TestServiceNameDependence:
267 379
     """Test the dependence of the internal hash on the service name."""
268 380
 
269 381
     @hypothesis.given(
270
-        phrase=strategies.text(
271
-            strategies.characters(min_codepoint=32, max_codepoint=126),
272
-            min_size=1,
273
-            max_size=32,
274
-        ),
382
+        phrase=Strategies.text_strategy(),
275 383
         services=strategies.lists(
276
-            strategies.binary(min_size=1, max_size=32),
384
+            Strategies.text_strategy(),
277 385
             min_size=2,
278 386
             max_size=2,
279 387
             unique=True,
... ...
@@ -294,20 +402,10 @@ class TestInterchangablePhrases:
294 402
     """Test the interchangability of certain master passphrases."""
295 403
 
296 404
     @hypothesis.given(
297
-        phrases=strategies.binary(max_size=BLOCK_SIZE // 2).flatmap(
298
-            lambda bs: strategies.tuples(
299
-                strategies.just(bs),
300
-                strategies.integers(
301
-                    min_value=1,
302
-                    max_value=BLOCK_SIZE - len(bs),
303
-                ).map(lambda num: bs + b"\x00" * num),
304
-            )
305
-        ),
306
-        service=strategies.text(
307
-            strategies.characters(min_codepoint=32, max_codepoint=126),
308
-            min_size=1,
309
-            max_size=32,
310
-        ),
405
+        phrases=Strategies.binary_phrase_strategy(
406
+            size=PhraseSize.SHORT
407
+        ).flatmap(Strategies.make_interchangable_phrases),
408
+        service=Strategies.text_strategy(),
311 409
     )
312 410
     def test_small(
313 411
         self,
... ...
@@ -321,24 +419,10 @@ class TestInterchangablePhrases:
321 419
         ) == vault.Vault.create_hash(phrase=phrases[1], service=service)
322 420
 
323 421
     @hypothesis.given(
324
-        phrases=strategies.binary(
325
-            min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
326
-        ).flatmap(
327
-            lambda bs: strategies.tuples(
328
-                strategies.just(bs),
329
-                strategies.just(hashlib.sha1(bs).digest()).flatmap(
330
-                    lambda h: strategies.integers(
331
-                        min_value=1,
332
-                        max_value=BLOCK_SIZE - DIGEST_SIZE,
333
-                    ).map(lambda num: h + b"\x00" * num)
334
-                ),
335
-            )
336
-        ),
337
-        service=strategies.text(
338
-            strategies.characters(min_codepoint=32, max_codepoint=126),
339
-            min_size=1,
340
-            max_size=32,
341
-        ),
422
+        phrases=Strategies.binary_phrase_strategy(
423
+            size=PhraseSize.OVERLONG,
424
+        ).flatmap(Strategies.make_interchangable_phrases),
425
+        service=Strategies.text_strategy(),
342 426
     )
343 427
     def test_large(
344 428
         self,
... ...
@@ -397,16 +481,8 @@ class TestStringAndBinaryExchangability(TestVault):
397 481
         ) == vault.Vault(phrase=self.phrase).generate(memoryview(b"google"))
398 482
 
399 483
     @hypothesis.given(
400
-        phrase=strategies.text(
401
-            strategies.characters(min_codepoint=32, max_codepoint=126),
402
-            min_size=1,
403
-            max_size=32,
404
-        ),
405
-        service=strategies.text(
406
-            strategies.characters(min_codepoint=32, max_codepoint=126),
407
-            min_size=1,
408
-            max_size=32,
409
-        ),
484
+        phrase=Strategies.text_strategy(),
485
+        service=Strategies.text_strategy(),
410 486
     )
411 487
     def test_binary_phrases(
412 488
         self,
... ...
@@ -433,16 +509,8 @@ class TestStringAndBinaryExchangability(TestVault):
433 509
             )
434 510
 
435 511
     @hypothesis.given(
436
-        phrase=strategies.text(
437
-            strategies.characters(min_codepoint=32, max_codepoint=126),
438
-            min_size=1,
439
-            max_size=32,
440
-        ),
441
-        service=strategies.text(
442
-            strategies.characters(min_codepoint=32, max_codepoint=126),
443
-            min_size=1,
444
-            max_size=32,
445
-        ),
512
+        phrase=Strategies.text_strategy(),
513
+        service=Strategies.text_strategy(),
446 514
     )
447 515
     def test_binary_service_name(
448 516
         self,
449 517