Add hypothesis tests for the `sequin` module
Marco Ricci

Marco Ricci commited on 2025-01-25 23:28:10
Zeige 1 geänderte Dateien mit 428 Einfügungen und 57 Löschungen.


Add hypothesis tests for big endian number parsing, the sequin
constructor, the generation and the bit shifting steps, each via their
own parameter object and corresponding hypothesis strategy, and convert
all existing explicit parametrized tests to hypothesis examples.  (The
strategy may be trivial, however.)  Besides the existing helper function
`bitseq`, we add a new `bits` helper function, and add hypothesis tests
for both of these helper functions as well.
... ...
@@ -7,11 +7,47 @@
7 7
 from __future__ import annotations
8 8
 
9 9
 import collections
10
+import contextlib
11
+import functools
12
+import math
13
+import operator
14
+from typing import TYPE_CHECKING, NamedTuple
10 15
 
16
+import hypothesis
11 17
 import pytest
18
+from hypothesis import strategies
12 19
 
13 20
 from derivepassphrase import sequin
14 21
 
22
+if TYPE_CHECKING:
23
+    from collections.abc import Sequence
24
+
25
+
26
+def bits(num: int, /, byte_width: int | None = None) -> list[int]:
27
+    """Return the list of bits of an integer, in big endian order.
28
+
29
+    Args:
30
+        num:
31
+            The number whose bits are to be returned.
32
+        byte_width:
33
+            Pad the returned list of bits to the given byte width if given,
34
+            else its natural byte width.
35
+
36
+    """
37
+    if num < 0:  # pragma: no cover
38
+        err_msg = 'Negative numbers are unsupported'
39
+        raise NotImplementedError(err_msg)
40
+    if byte_width is None:
41
+        byte_width = math.ceil(math.log2(num) / 8) if num else 1
42
+    seq: list[int] = []
43
+    while num:
44
+        seq.append(num % 2)
45
+        num >>= 1
46
+    seq.reverse()
47
+    missing_bit_count = 8 * byte_width - len(seq)
48
+    seq[:0] = [0] * missing_bit_count
49
+    return seq
50
+
15 51
 
16 52
 def bitseq(string: str) -> list[int]:
17 53
     """Convert a 0/1-string into a list of bits."""
... ...
@@ -21,24 +57,120 @@ def bitseq(string: str) -> list[int]:
21 57
 class TestStaticFunctionality:
22 58
     """Test the static functionality in the `sequin` module."""
23 59
 
24
-    @pytest.mark.parametrize(
25
-        ['sequence', 'base', 'expected'],
26
-        [
27
-            ([1, 2, 3, 4, 5, 6], 10, 123456),
28
-            ([1, 2, 3, 4, 5, 6], 100, 10203040506),
29
-            ([0, 0, 1, 4, 9, 7], 10, 1497),
30
-            ([1, 0, 0, 1, 0, 0, 0, 0], 2, 144),
31
-            ([1, 7, 5, 5], 8, 0o1755),
32
-        ],
60
+    @hypothesis.given(
61
+        num=strategies.integers(min_value=0, max_value=0xFFFFFFFFFFFFFFFF),
62
+    )
63
+    def test_100_bits(self, num: int) -> None:
64
+        """Extract the bits from a number in big-endian format."""
65
+        seq1 = bits(num)
66
+        n = len(seq1)
67
+        seq2 = bits(num, byte_width=8)
68
+        m = len(seq2)
69
+        assert m == 64
70
+        assert seq2[-n:] == seq1
71
+        assert seq2[: m - n] == [0] * (m - n)
72
+        text1 = ''.join(str(bit) for bit in seq1)
73
+        text2 = ''.join(str(bit) for bit in seq2)
74
+        assert text1.lstrip('0') == (f'{num:b}' if num else '')
75
+        assert text2 == f'{num:064b}'
76
+
77
+    @hypothesis.given(
78
+        num=strategies.integers(min_value=0, max_value=0xFFFFFFFFFFFFFFFF),
79
+    )
80
+    def test_101_bits(self, num: int) -> None:
81
+        """Extract the bits from a number in big-endian format."""
82
+        text1 = f'{num:064b}'
83
+        seq1 = bitseq(text1)
84
+        seq2 = bits(num, byte_width=8)
85
+        assert seq1 == seq2
86
+        text2 = ''.join(str(bit) for bit in seq1)
87
+        assert int(text2, 2) == num
88
+
89
+    class BigEndianNumberTest(NamedTuple):
90
+        """Test data for
91
+        [`TestStaticFunctionality.test_200_big_endian_number`][].
92
+
93
+        Attributes:
94
+            sequence: A sequence of integers.
95
+            base: The numeric base.
96
+            expected: The expected result.
97
+
98
+        """
99
+
100
+        sequence: list[int]
101
+        """"""
102
+        base: int
103
+        """"""
104
+        expected: int
105
+        """"""
106
+
107
+        @strategies.composite
108
+        @staticmethod
109
+        def strategy(
110
+            draw: strategies.DrawFn,
111
+            *,
112
+            base: int | None = None,
113
+            max_size: int | None = None,
114
+        ) -> TestStaticFunctionality.BigEndianNumberTest:
115
+            """Return a sample BigEndianNumberTest.
116
+
117
+            Args:
118
+                draw:
119
+                    The `draw` function, as provided for by hypothesis.
120
+                base:
121
+                    The numeric base, an integer between 2 and 65536 (inclusive).
122
+                max_size:
123
+                    The maximum size of the sequence, up to 128.
124
+
125
+            Raises:
126
+                AssertionError:
127
+                    `base` or `max_size` are invalid.
128
+
129
+            """
130
+            if base is None:  # pragma: no cover
131
+                base = 256
132
+            assert isinstance(base, int)
133
+            assert base in range(2, 65537)
134
+            if max_size is None:  # pragma: no cover
135
+                max_size = 128
136
+            assert isinstance(max_size, int)
137
+            assert max_size in range(129)
138
+            sequence = draw(
139
+                strategies.lists(
140
+                    strategies.integers(min_value=0, max_value=(base - 1)),
141
+                    max_size=max_size,
142
+                ),
143
+            )
144
+            value = functools.reduce(lambda x, y: x * base + y, sequence, 0)
145
+            return TestStaticFunctionality.BigEndianNumberTest(
146
+                sequence, base, value
147
+            )
148
+
149
+    @hypothesis.given(test_case=BigEndianNumberTest.strategy())
150
+    @hypothesis.example(
151
+        BigEndianNumberTest([1, 2, 3, 4, 5, 6], 10, 123456)
152
+    ).via('manual decimal example')
153
+    @hypothesis.example(
154
+        BigEndianNumberTest([1, 2, 3, 4, 5, 6], 100, 10203040506)
155
+    ).via('manual decimal example in different base')
156
+    @hypothesis.example(BigEndianNumberTest([0, 0, 1, 4, 9, 7], 10, 1497)).via(
157
+        'manual example with leading zeroes'
158
+    )
159
+    @hypothesis.example(
160
+        BigEndianNumberTest([1, 0, 0, 1, 0, 0, 0, 0], 2, 144)
161
+    ).via('manual binary example')
162
+    @hypothesis.example(BigEndianNumberTest([1, 7, 5, 5], 8, 0o1755)).via(
163
+        'manual octal example'
33 164
     )
34 165
     def test_200_big_endian_number(
35
-        self, sequence: list[int], base: int, expected: int
166
+        self, test_case: BigEndianNumberTest
36 167
     ) -> None:
37 168
         """Conversion to big endian numbers in any base works.
38 169
 
39 170
         See [`sequin.Sequin.generate`][] for where this is used.
40 171
 
41 172
         """
173
+        sequence, base, expected = test_case
42 174
         assert (
43 175
             sequin.Sequin._big_endian_number(sequence, base=base)
44 176
         ) == expected
... ...
@@ -70,88 +202,327 @@ class TestStaticFunctionality:
70 202
 class TestSequin:
71 203
     """Test the `Sequin` class."""
72 204
 
73
-    @pytest.mark.parametrize(
74
-        ['sequence', 'is_bitstring', 'expected'],
75
-        [
76
-            (
205
+    class ConstructorTestCase(NamedTuple):
206
+        """A test case for the constructor.
207
+
208
+        Attributes:
209
+            sequence:
210
+                A sequence of ints, bits, or Latin1 characters.
211
+            is_bitstring:
212
+                True if and only if `sequence` denotes bits.
213
+            expected:
214
+                The expected bit sequence of the internal entropy pool.
215
+
216
+        """
217
+
218
+        sequence: Sequence[int] | str
219
+        """"""
220
+        is_bitstring: bool
221
+        """"""
222
+        expected: Sequence[int]
223
+
224
+        @strategies.composite
225
+        @staticmethod
226
+        def strategy(
227
+            draw: strategies.DrawFn,
228
+            *,
229
+            max_entropy: int | None = None,
230
+        ) -> TestSequin.ConstructorTestCase:
231
+            """Return a constructor test case.
232
+
233
+            Args:
234
+                max_entropy:
235
+                    The maximum entropy, in bits.  Must be between 0 and
236
+                    256, inclusive.
237
+
238
+            Raises:
239
+                AssertionError:
240
+                    `max_entropy` is invalid.
241
+
242
+            """
243
+            if max_entropy is None:  # pragma: no branch
244
+                max_entropy = 256
245
+            assert max_entropy in range(257)
246
+            is_bytecount = max_entropy % 8 == 0
247
+            is_bitstring = (
248
+                draw(strategies.randoms()).choice([False, True])
249
+                if is_bytecount
250
+                else True
251
+            )
252
+            sequence: Sequence[int] | str
253
+            expected: Sequence[int]
254
+            if is_bitstring:
255
+                sequence = draw(
256
+                    strategies.lists(
257
+                        strategies.integers(min_value=0, max_value=1),
258
+                        max_size=max_entropy,
259
+                    )
260
+                )
261
+                expected = sequence
262
+            else:
263
+                bytecount = max_entropy // 8
264
+                raw_sequence = draw(strategies.binary(max_size=bytecount))
265
+                sequence_format = draw(strategies.randoms()).choice([
266
+                    'bytes',
267
+                    'ints',
268
+                    'text',
269
+                ])
270
+                if sequence_format == 'bytes':
271
+                    sequence = raw_sequence
272
+                elif sequence_format == 'ints':
273
+                    sequence = list(raw_sequence)
274
+                else:
275
+                    sequence = raw_sequence.decode('latin1')
276
+                bytestring = (
277
+                    sequence.encode('latin1')
278
+                    if isinstance(sequence, str)
279
+                    else bytes(sequence)
280
+                )
281
+                expected = []
282
+                for byte in bytestring:
283
+                    expected.extend(bits(byte, byte_width=1))
284
+            return TestSequin.ConstructorTestCase(
285
+                sequence, is_bitstring, expected
286
+            )
287
+
288
+    @hypothesis.given(test_case=ConstructorTestCase.strategy())
289
+    @hypothesis.example(
290
+        ConstructorTestCase([1, 0, 0, 1, 0, 1], True, [1, 0, 0, 1, 0, 1])
291
+    ).via('manual example bitstring')
292
+    @hypothesis.example(
293
+        ConstructorTestCase(
77 294
             [1, 0, 0, 1, 0, 1],
78 295
             False,
79 296
             bitseq('000000010000000000000000000000010000000000000001'),
80
-            ),
81
-            ([1, 0, 0, 1, 0, 1], True, [1, 0, 0, 1, 0, 1]),
82
-            (b'OK', False, bitseq('0100111101001011')),
83
-            ('OK', False, bitseq('0100111101001011')),
84
-        ],
85 297
         )
298
+    ).via('manual example bitstring as byte string')
299
+    @hypothesis.example(
300
+        ConstructorTestCase(b'OK', False, bitseq('0100111101001011'))
301
+    ).via('manual example true byte string')
302
+    @hypothesis.example(
303
+        ConstructorTestCase('OK', False, bitseq('0100111101001011'))
304
+    ).via('manual example latin1 text')
86 305
     def test_200_constructor(
87 306
         self,
88
-        sequence: str | bytes | bytearray | list[int],
89
-        is_bitstring: bool,
90
-        expected: list[int],
307
+        test_case: ConstructorTestCase,
91 308
     ) -> None:
92 309
         """The constructor handles both bit and integer sequences."""
310
+        sequence, is_bitstring, expected = test_case
93 311
         seq = sequin.Sequin(sequence, is_bitstring=is_bitstring)
94 312
         assert seq.bases == {2: collections.deque(expected)}
95 313
 
96
-    def test_201_generating(self) -> None:
314
+    class GenerationSequence(NamedTuple):
315
+        """A sequence of generation results.
316
+
317
+        Attributes:
318
+            bit_sequence:
319
+                The input bit sequence.
320
+            steps:
321
+                A sequence of generation steps.  Each step details
322
+                a requested number base, and the respective result (a
323
+                number, or [`sequin.SequinExhaustedError`][]).
324
+
325
+        """
326
+
327
+        bit_sequence: Sequence[int]
328
+        """"""
329
+        steps: Sequence[tuple[int, int | type[sequin.SequinExhaustedError]]]
330
+        """"""
331
+
332
+        @strategies.composite
333
+        @staticmethod
334
+        def strategy(draw: strategies.DrawFn) -> TestSequin.GenerationSequence:
335
+            """Return a generation sequence."""
336
+            # Signal that there is only one value.
337
+            draw(strategies.just(None))
338
+            return TestSequin.GenerationSequence(
339
+                bitseq('110101011111001'),
340
+                [
341
+                    (1, 0),
342
+                    (5, 3),
343
+                    (5, 3),
344
+                    (5, 1),
345
+                    (5, sequin.SequinExhaustedError),
346
+                    (1, sequin.SequinExhaustedError),
347
+                ],
348
+            )
349
+
350
+    @hypothesis.example(
351
+        GenerationSequence(
352
+            bitseq('110101011111001'),
353
+            [
354
+                (1, 0),
355
+                (5, 3),
356
+                (5, 3),
357
+                (5, 1),
358
+                (5, sequin.SequinExhaustedError),
359
+                (1, sequin.SequinExhaustedError),
360
+            ],
361
+        )
362
+    ).via('manual, pre-hypothesis parametrization value')
363
+    @hypothesis.given(sequence=GenerationSequence.strategy())
364
+    def test_201_generating(self, sequence: GenerationSequence) -> None:
97 365
         """The sequin generates deterministic sequences."""
98
-        seq = sequin.Sequin(
99
-            [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], is_bitstring=True
366
+        seq = sequin.Sequin(sequence.bit_sequence, is_bitstring=True)
367
+        for i, (num, result) in enumerate(sequence.steps, start=1):
368
+            if isinstance(result, int):
369
+                assert seq.generate(num) == result, (
370
+                    f'Failed to generate {result:d} in step {i}'
100 371
                 )
101
-        assert seq.generate(1) == 0
102
-        assert seq.generate(5) == 3
103
-        assert seq.generate(5) == 3
104
-        assert seq.generate(5) == 1
105
-        with pytest.raises(sequin.SequinExhaustedError):
106
-            seq.generate(5)
107
-        with pytest.raises(sequin.SequinExhaustedError):
108
-            seq.generate(1)
372
+            else:
373
+                # Can't use pytest.raises here, because the assertion error
374
+                # message is not customizable and we would lose information
375
+                # about which step we're executing.
376
+                with contextlib.suppress(sequin.SequinExhaustedError):
377
+                    result2 = seq.generate(num)
378
+                    pytest.fail(
379
+                        f'Expected to be exhausted in step {i}, '
380
+                        f'but generated {result2:d} instead'
381
+                    )
382
+
383
+    def test_201a_generating_errors(self) -> None:
384
+        """The sequin errors deterministically when generating sequences."""
109 385
         seq = sequin.Sequin(
110 386
             [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], is_bitstring=True
111 387
         )
112 388
         with pytest.raises(ValueError, match='invalid target range'):
113 389
             seq.generate(0)
114 390
 
115
-    def test_210_internal_generating(self) -> None:
391
+    @hypothesis.example(
392
+        GenerationSequence(
393
+            bitseq('110101011111001'),
394
+            [
395
+                (1, 0),
396
+                (5, 3),
397
+                (5, 3),
398
+                (5, 1),
399
+                (5, sequin.SequinExhaustedError),
400
+                (1, sequin.SequinExhaustedError),
401
+            ],
402
+        )
403
+    ).via('manual, pre-hypothesis parametrization value')
404
+    @hypothesis.given(sequence=GenerationSequence.strategy())
405
+    def test_210_internal_generating(
406
+        self, sequence: GenerationSequence
407
+    ) -> None:
116 408
         """The sequin internals generate deterministic sequences."""
117
-        seq = sequin.Sequin(
118
-            [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], is_bitstring=True
409
+        seq = sequin.Sequin(sequence.bit_sequence, is_bitstring=True)
410
+        for i, (num, result) in enumerate(sequence.steps, start=1):
411
+            if num == 1:
412
+                assert seq._generate_inner(num) == 0, (
413
+                    f'Failed to generate {result:d} in step {i}'
119 414
                 )
120
-        assert seq._generate_inner(5) == 3
121
-        assert seq._generate_inner(5) == 3
122
-        assert seq._generate_inner(5) == 1
123
-        assert seq._generate_inner(5) == 5
124
-        assert seq._generate_inner(1) == 0
415
+            elif isinstance(result, int):
416
+                assert seq._generate_inner(num) == result, (
417
+                    f'Failed to generate {result:d} in step {i}'
418
+                )
419
+            else:
420
+                result2 = seq._generate_inner(num)
421
+                assert result2 == num, (
422
+                    f'Expected to be exhausted in step {i}, '
423
+                    f'but generated {result2:d} instead'
424
+                )
425
+
426
+    def test_210a_internal_generating_errors(self) -> None:
427
+        """The sequin generation internals error deterministically."""
125 428
         seq = sequin.Sequin(
126 429
             [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1], is_bitstring=True
127 430
         )
128
-        assert seq._generate_inner(1) == 0
129 431
         with pytest.raises(ValueError, match='invalid target range'):
130 432
             seq._generate_inner(0)
131 433
         with pytest.raises(ValueError, match='invalid base:'):
132 434
             seq._generate_inner(16, base=1)
133 435
 
134
-    def test_211_shifting(self) -> None:
135
-        """The sequin manages the pool of remaining entropy for each base.
436
+    class ShiftSequence(NamedTuple):
437
+        """A sequence of bit sequence shift operations.
136 438
 
137
-        Specifically, the sequin implements all-or-nothing fixed-length
138
-        draws from the entropy pool.
439
+        Attributes:
440
+            bit_sequence:
441
+                The input bit sequence.
442
+            steps:
443
+                A sequence of shift steps.  Each step details
444
+                a requested shift size, the respective result, and the
445
+                bit sequence status afterward.
139 446
 
140 447
         """
141
-        seq = sequin.Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1], is_bitstring=True)
142
-        assert seq.bases == {
143
-            2: collections.deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])
144
-        }
145 448
 
146
-        assert seq._all_or_nothing_shift(3) == (1, 0, 1)
147
-        assert seq._all_or_nothing_shift(3) == (0, 0, 1)
148
-        assert seq.bases[2] == collections.deque([0, 0, 0, 1])
449
+        bit_sequence: Sequence[int]
450
+        """"""
451
+        steps: Sequence[tuple[int, Sequence[int], Sequence[int]]]
452
+        """"""
149 453
 
150
-        assert seq._all_or_nothing_shift(5) == ()
151
-        assert seq.bases[2] == collections.deque([0, 0, 0, 1])
454
+        @strategies.composite
455
+        @staticmethod
456
+        def strategy(draw: strategies.DrawFn) -> TestSequin.ShiftSequence:
457
+            """Return a generation sequence."""
458
+            no_op_counts_strategy = strategies.lists(
459
+                strategies.integers(min_value=0, max_value=0),
460
+                min_size=3,
461
+                max_size=3,
462
+            )
463
+            true_counts_strategy = strategies.lists(
464
+                strategies.integers(min_value=1, max_value=5),
465
+                min_size=3,
466
+                max_size=10,
467
+            ).map(sorted)
468
+            bits_strategy = strategies.integers(min_value=0, max_value=1)
469
+            counts = draw(
470
+                strategies.builds(
471
+                    operator.add,
472
+                    no_op_counts_strategy,
473
+                    true_counts_strategy,
474
+                ).flatmap(strategies.permutations)
475
+            )
476
+            bit_sequence: list[int] = []
477
+            steps: list[tuple[int, Sequence[int], list[int]]] = []
478
+            for i, count in enumerate(counts):
479
+                shift_result = draw(
480
+                    strategies.lists(
481
+                        bits_strategy, min_size=count, max_size=count
482
+                    )
483
+                )
484
+                for step in steps[:i]:
485
+                    step[2].extend(shift_result)
486
+                bit_sequence.extend(shift_result)
487
+                steps.append((count, shift_result, []))
488
+            return TestSequin.ShiftSequence(bit_sequence, steps)
152 489
 
153
-        assert seq._all_or_nothing_shift(4), (0, 0, 0, 1)
154
-        assert 2 not in seq.bases
490
+    @hypothesis.given(sequence=ShiftSequence.strategy())
491
+    @hypothesis.example(
492
+        ShiftSequence(
493
+            bitseq('1010010001'),
494
+            [
495
+                (3, bitseq('101'), bitseq('0010001')),
496
+                (3, bitseq('001'), bitseq('0001')),
497
+                (5, bitseq(''), bitseq('0001')),
498
+                (4, bitseq('0001'), bitseq('')),
499
+            ],
500
+        )
501
+    )
502
+    def test_211_shifting(self, sequence: ShiftSequence) -> None:
503
+        """The sequin manages the pool of remaining entropy for each base.
504
+
505
+        Specifically, the sequin implements all-or-nothing fixed-length
506
+        draws from the entropy pool.
507
+
508
+        """
509
+        seq = sequin.Sequin(sequence.bit_sequence, is_bitstring=True)
510
+        assert seq.bases == {2: collections.deque(sequence.bit_sequence)}
511
+        for i, (count, result, remaining) in enumerate(
512
+            sequence.steps, start=1
513
+        ):
514
+            actual_result = seq._all_or_nothing_shift(count)
515
+            assert actual_result == tuple(result), (
516
+                f'At step {i}, the shifting result differs'
517
+            )
518
+            if remaining:
519
+                assert seq.bases[2] == collections.deque(remaining), (
520
+                    f'After step {i}, the remaining bit sequence differs'
521
+                )
522
+            else:
523
+                assert 2 not in seq.bases, (
524
+                    f'After step {i}, the bit sequence is not exhausted yet'
525
+                )
155 526
 
156 527
     @pytest.mark.parametrize(
157 528
         ['sequence', 'is_bitstring', 'exc_type', 'exc_pattern'],
158 529