Add hypothesis-based tests (and bugfix) for the vault derivation scheme
Marco Ricci

Marco Ricci commited on 2024-09-27 19:05:16
Zeige 2 geänderte Dateien mit 205 Einfügungen und 1 Löschungen.


There are currently three such property-based tests: a test for general
adherence to constraints (with small numbers), and two tests
specifically for length and repeat constraints (with larger numbers).
Combining these into a single test caused the test runs to vary wildly
in execution time, which is undesirable from a configuration point of
view (and also causes hypothesis to complain).

The hypothesis tests actually uncovered a bug in the API specification
of the vault derivation scheme: during generation, we may run out of
possible characters for the next letter due to repeat constraints, but
only realize this now.  This already generated a `ValueError`, but with
a confusing error message, and the documentation seemed to suggest that
all unsatisfiable constraints are already detected at construction time.
So now explicitly mention that constraint unsatisfiability may be
detected both in the `Vault` constructor and in `Vault.generate`, and
handle lower-lever `ValueError`s in `Vault.generate` explicitly too.
... ...
@@ -119,6 +119,11 @@ class Vault:
119 119
                 Conflicting passphrase constraints.  Permit more
120 120
                 characters, or increase the desired passphrase length.
121 121
 
122
+        Warning:
123
+            Because of repetition constraints, it is not always possible
124
+            to detect conflicting passphrase constraints at construction
125
+            time.
126
+
122 127
         """
123 128
         self._phrase = self._get_binary_string(phrase)
124 129
         self._length = length
... ...
@@ -325,6 +330,11 @@ class Vault:
325 330
         Returns:
326 331
             The service passphrase.
327 332
 
333
+        Raises:
334
+            ValueError:
335
+                Conflicting passphrase constraints.  Permit more
336
+                characters, or increase the desired passphrase length.
337
+
328 338
         Examples:
329 339
             >>> phrase = b'She cells C shells bye the sea shoars'
330 340
             >>> # Using default options in constructor.
... ...
@@ -334,6 +344,29 @@ class Vault:
334 344
             >>> Vault().generate(b'google', phrase=phrase)
335 345
             b': 4TVH#5:aZl8LueOT\\{'
336 346
 
347
+            Conflicting constraints are sometimes only found during
348
+            generation.
349
+
350
+            >>> # Note: no error here...
351
+            >>> v = Vault(
352
+            ...     lower=0,
353
+            ...     upper=0,
354
+            ...     number=0,
355
+            ...     space=2,
356
+            ...     dash=0,
357
+            ...     symbol=1,
358
+            ...     repeat=2,
359
+            ...     length=3,
360
+            ... )
361
+            >>> # ... but here.
362
+            >>> v.generate(
363
+            ...     '0', phrase=b'\x00'
364
+            ... )  # doctest: +IGNORE_EXCEPTION_DETAIL
365
+            Traceback (most recent call last):
366
+                ...
367
+            ValueError: no allowed characters left
368
+
369
+
337 370
         """
338 371
         hash_length = self._estimate_sufficient_hash_length()
339 372
         assert hash_length >= 1
... ...
@@ -376,6 +409,9 @@ class Vault:
376 409
                             )
377 410
                     pos = seq.generate(len(charset))
378 411
                     result.extend(charset[pos : pos + 1])
412
+            except ValueError as exc:
413
+                msg = 'no allowed characters left'
414
+                raise ValueError(msg) from exc
379 415
             except sequin.SequinExhaustedError:
380 416
                 hash_length *= 2
381 417
             else:
... ...
@@ -7,12 +7,18 @@
7 7
 from __future__ import annotations
8 8
 
9 9
 import math
10
+from typing import TYPE_CHECKING, TypeAlias, TypeVar
10 11
 
12
+import hypothesis
11 13
 import pytest
14
+from hypothesis import strategies
12 15
 
13 16
 import derivepassphrase
14 17
 
15
-Vault = derivepassphrase.vault.Vault
18
+if TYPE_CHECKING:
19
+    from collections.abc import Iterator
20
+
21
+Vault: TypeAlias = derivepassphrase.vault.Vault
16 22
 
17 23
 
18 24
 class TestVault:
... ...
@@ -258,3 +264,165 @@ class TestVault:
258 264
             TypeError, match='invalid safety factor: not a float'
259 265
         ):
260 266
             assert v._estimate_sufficient_hash_length(None)  # type: ignore[arg-type]
267
+
268
+
269
+@strategies.composite
270
+def vault_config(draw: strategies.DrawFn) -> dict[str, int]:
271
+    lower = draw(strategies.integers(min_value=0, max_value=10))
272
+    upper = draw(strategies.integers(min_value=0, max_value=10))
273
+    number = draw(strategies.integers(min_value=0, max_value=10))
274
+    space = draw(strategies.integers(min_value=0, max_value=10))
275
+    dash = draw(strategies.integers(min_value=0, max_value=10))
276
+    symbol = draw(strategies.integers(min_value=0, max_value=10))
277
+    repeat = draw(strategies.integers(min_value=0, max_value=10))
278
+    length = draw(
279
+        strategies.integers(
280
+            min_value=max(1, lower + upper + number + space + dash + symbol),
281
+            max_value=70,
282
+        )
283
+    )
284
+    hypothesis.assume(lower + upper + number + dash + symbol > 0)
285
+    hypothesis.assume(lower + upper + number + space + symbol > 0)
286
+    hypothesis.assume(repeat >= space)
287
+    return {
288
+        'lower': lower,
289
+        'upper': upper,
290
+        'number': number,
291
+        'space': space,
292
+        'dash': dash,
293
+        'symbol': symbol,
294
+        'repeat': repeat,
295
+        'length': length,
296
+    }
297
+
298
+
299
+class TestHypotheses:
300
+    @hypothesis.given(
301
+        phrase=strategies.one_of(
302
+            strategies.binary(min_size=1), strategies.text(min_size=1)
303
+        ),
304
+        config=vault_config(),
305
+        service=strategies.text(min_size=1),
306
+    )
307
+    # regression test
308
+    @hypothesis.example(
309
+        phrase=b'\x00',
310
+        config={
311
+            'lower': 0,
312
+            'upper': 0,
313
+            'number': 0,
314
+            'space': 2,
315
+            'dash': 0,
316
+            'symbol': 1,
317
+            'repeat': 2,
318
+            'length': 3,
319
+        },
320
+        service='0',
321
+    )
322
+    # regression test
323
+    @hypothesis.example(
324
+        phrase=b'\x00',
325
+        config={
326
+            'lower': 0,
327
+            'upper': 0,
328
+            'number': 0,
329
+            'space': 1,
330
+            'dash': 0,
331
+            'symbol': 0,
332
+            'repeat': 9,
333
+            'length': 5,
334
+        },
335
+        service='0',
336
+    )
337
+    def test_100_all_length_character_and_occurrence_constraints_satisfied(
338
+        self,
339
+        phrase: str | bytes,
340
+        config: dict[str, int],
341
+        service: str,
342
+    ) -> None:
343
+        try:
344
+            password = Vault(phrase=phrase, **config).generate(service)
345
+        except ValueError as exc:
346
+            if 'no allowed characters left' in exc.args:
347
+                return
348
+            raise  # pragma: no cover
349
+        n = len(password)
350
+        assert n == config['length'], 'Password has wrong length.'
351
+        for key in ('lower', 'upper', 'number', 'space', 'dash', 'symbol'):
352
+            if config[key] > 0:
353
+                assert (
354
+                    sum(c in Vault._CHARSETS[key] for c in password)
355
+                    >= config[key]
356
+                ), (
357
+                    'Password does not satisfy '
358
+                    'character occurrence constraints.'
359
+                )
360
+            elif key in {'dash', 'symbol'}:
361
+                # Character classes overlap, so "forbidden" characters may
362
+                # appear via the other character class.
363
+                assert True
364
+            else:
365
+                assert (
366
+                    sum(c in Vault._CHARSETS[key] for c in password) == 0
367
+                ), 'Password does not satisfy character ban constraints.'
368
+
369
+        T = TypeVar('T', str, bytes)
370
+
371
+        def length_r_substrings(string: T, *, r: int) -> Iterator[T]:
372
+            for i in range(len(string) - (r - 1)):
373
+                yield string[i : i + r]
374
+
375
+        repeat = config['repeat']
376
+        if repeat:
377
+            for snippet in length_r_substrings(password, r=(repeat + 1)):
378
+                assert (
379
+                    len(set(snippet)) > 1
380
+                ), 'Password does not satisfy character repeat constraints.'
381
+
382
+    @hypothesis.given(
383
+        phrase=strategies.one_of(
384
+            strategies.binary(min_size=1),
385
+            strategies.text(
386
+                min_size=1,
387
+                alphabet=strategies.characters(max_codepoint=255),
388
+            ),
389
+        ),
390
+        length=strategies.integers(min_value=1, max_value=1000),
391
+        service=strategies.text(min_size=1),
392
+    )
393
+    def test_101_password_with_length(
394
+        self,
395
+        phrase: str | bytes,
396
+        length: int,
397
+        service: str,
398
+    ) -> None:
399
+        password = Vault(phrase=phrase, length=length).generate(service)
400
+        assert len(password) == length
401
+
402
+    # This test has time complexity `O(length * repeat)`, both of which
403
+    # are chosen by hypothesis.  So disable the deadline.
404
+    @hypothesis.settings(deadline=None)
405
+    @hypothesis.given(
406
+        phrase=strategies.one_of(
407
+            strategies.binary(min_size=1),
408
+            strategies.text(
409
+                min_size=1,
410
+                alphabet=strategies.characters(max_codepoint=255),
411
+            ),
412
+        ),
413
+        length=strategies.integers(min_value=2, max_value=1000),
414
+        repeat=strategies.integers(min_value=1, max_value=1000),
415
+        service=strategies.text(min_size=1),
416
+    )
417
+    def test_102_password_with_repeat(
418
+        self,
419
+        phrase: str | bytes,
420
+        length: int,
421
+        repeat: int,
422
+        service: str,
423
+    ) -> None:
424
+        password = Vault(phrase=phrase, length=length, repeat=repeat).generate(
425
+            service
426
+        )
427
+        for i in range((length + 1) - (repeat + 1)):
428
+            assert len(set(password[i : i + repeat + 1])) > 1
261 429