5403acefe8c2e2872ab3f884401115ab2419caf5
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
22) from __future__ import annotations
23) 
24) import collections
25) import math
26) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

27) from collections.abc import Iterator, MutableSequence, Sequence
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

28) from typing import assert_type, Literal, TypeAlias
29) 
30) __all__ = ('Sequin', 'SequinExhaustedException')
31) 
32) class Sequin:
33)     """Generate pseudorandom non-negative numbers in different ranges.
34) 
35)     Given a (presumed high-quality) uniformly random sequence of input
36)     bits, generate pseudorandom non-negative integers in a certain range
37)     on each call of the `generate` method.  (It is permissible to
38)     specify a different range per call to `generate`; this is the main
39)     use case.)  We use a modified version of rejection sampling, where
40)     rejected values are stored in "rejection queues" if possible, and
41)     these rejection queues re-seed the next round of rejection sampling.
42) 
43)     This is a Python reimplementation of James Coglan's [Node.js sequin
44)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
45)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
46)     I do not know why the original module is called "sequin"; I presume
47)     it to be a pun on "sequence".
48) 
49)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
50)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
51)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
52) 
53)     """
54)     def __init__(
55)         self,
56)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

57)         /, *, is_bitstring: bool = False
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

58)     ):
59)         """Initialize the Sequin.
60) 
61)         Args:
62)             sequence:
63)                 A sequence of bits, or things convertible to bits, to
64)                 seed the pseudorandom number generator.  Byte and text
65)                 strings are converted to 8-bit integer sequences.
66)                 (Conversion will fail if the text string contains
67)                 non-ISO-8859-1 characters.)  The numbers are then
68)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

69)             is_bitstring:
70)                 If true, treat the input as a bitstring.  By default,
71)                 the input is treated as a string of 8-bit integers, from
72)                 which the individual bits must still be extracted.
73) 
74)         Raises:
75)             ValueError:
76)                 The sequence contains values outside the permissible
77)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

78) 
79)         """
80)         def uint8_to_bits(value):
81)             """Yield individual bits of an 8-bit number, MSB first."""
82)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
83)                 yield 1 if value | i == value else 0
84)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

85)             try:
86)                 sequence = tuple(sequence.encode('iso-8859-1'))
87)             except UnicodeError as e:
88)                 raise ValueError('sequence item out of range') from e
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

89)         else:
90)             sequence = tuple(sequence)
91)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

92)         self.bases: dict[int, collections.deque[int]] = {}
93)         def gen() -> Iterator[int]:
94)             for num in sequence:
95)                 if num not in range(2 if is_bitstring else 256):
96)                     raise ValueError('sequence item out of range')
97)                 if is_bitstring:
98)                     yield num
99)                 else:
100)                     yield from uint8_to_bits(num)
101)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

102) 
103)     def _all_or_nothing_shift(
104)         self, count: int, /, *, base: int = 2
105)     ) -> Sequence[int]:
106)         """Shift and return items if and only if there are enough.
107) 
108)         Args:
109)             count: Number of items to shift/consume.
110)             base: Use the base `base` sequence.
111) 
112)         Returns:
113)             If there are sufficient items in the sequence left, then
114)             consume them from the sequence and return them.  Otherwise,
115)             consume nothing, and return nothing.
116) 
117)         Notes:
118)             We currently remove now-empty sequences from the registry of
119)             sequences.
120) 
121)         Examples:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

122)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
123)             ...              is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

124)             >>> seq.bases
125)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
126)             >>> seq._all_or_nothing_shift(3)
127)             (1, 0, 1)
128)             >>> seq._all_or_nothing_shift(3)
129)             (0, 0, 1)
130)             >>> seq.bases[2]
131)             deque([0, 0, 0, 1])
132)             >>> seq._all_or_nothing_shift(5)
133)             ()
134)             >>> seq.bases[2]
135)             deque([0, 0, 0, 1])
136)             >>> seq._all_or_nothing_shift(4)
137)             (0, 0, 0, 1)
138)             >>> 2 in seq.bases  # now-empty sequences are removed
139)             False
140) 
141)         """
142)         try:
143)             seq = self.bases[base]
144)         except KeyError:
145)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

146)         stash: collections.deque[int] = collections.deque()
147)         try:
148)             for i in range(count):
149)                 stash.append(seq.popleft())
150)         except IndexError:
151)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

152)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

153)         # Clean up queues.
154)         if not seq:
155)             del self.bases[base]
156)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

157) 
158)     @staticmethod
159)     def _big_endian_number(
160)         digits: Sequence[int], /, *, base: int = 2
161)     ) -> int:
162)         """Evaluate the given integer sequence as a big endian number.
163) 
164)         Args:
165)             digits: A sequence of integers to evaluate.
166)             base: The number base to evaluate those integers in.
167) 
168)         Raises:
169)             ValueError: `base` is an invalid base.
170)             ValueError: Not all integers are valid base `base` digits.
171) 
172)         Examples:
173)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
174)             12345678
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

175)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100)
176)             102030405060708
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

177)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
178)             1497
179)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
180)             144
181)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
182)             True
183) 
184)         """
185)         if base < 2:
186)             raise ValueError(f'invalid base: {base!r}')
187)         ret = 0
188)         allowed_range = range(base)
189)         n = len(digits)
190)         for i in range(n):
191)             i2 = (n - 1) - i
192)             x = digits[i]
193)             if not isinstance(x, int):
194)                 raise TypeError(f'not an integer: {x!r}')
195)             if x not in allowed_range:
196)                 raise ValueError(f'invalid base {base!r} digit: {x!r}')
197)             ret += (base ** i2) * x
198)         return ret
199) 
200)     def generate(self, n: int, /) -> int:
201)         """Generate a base `n` non-negative integer.
202) 
203)         We attempt to generate a value using rejection sampling.  If the
204)         generated sample is outside the desired range (i.e., is
205)         rejected), then attempt to reuse the sample by seeding
206)         a "higher-order" input sequence of uniformly random numbers (for
207)         a different base).
208) 
209)         Args:
210)             n:
211)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

212)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

213) 
214)         Returns:
215)             A pseudorandom number in the range 0, ..., `n` - 1.
216) 
217)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

218)             ValueError:
219)                 The range is empty.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

220)             SequinExhaustedException:
221)                 The sequin is exhausted.
222) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

223)         Examples:
224)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
225)             ...              is_bitstring=True)
226)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
227)             ...               is_bitstring=True)
228)             >>> seq.generate(5)
229)             3
230)             >>> seq.generate(5)
231)             3
232)             >>> seq.generate(5)
233)             1
234)             >>> seq.generate(5)    # doctest: +IGNORE_EXCEPTION_DETAIL
235)             Traceback (most recent call last):
236)                 ...
237)             SequinExhaustedException: Sequin is exhausted
238) 
239)             Using `n = 1` does not actually consume input bits:
240) 
241)             >>> seq2.generate(1)
242)             0
243) 
244)             But it still won't work on exhausted sequins:
245) 
246)             >>> seq.generate(1)    # doctest: +IGNORE_EXCEPTION_DETAIL
247)             Traceback (most recent call last):
248)                 ...
249)             SequinExhaustedException: Sequin is exhausted
250) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

251)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

252)         if 2 not in self.bases:
253)             raise SequinExhaustedException('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

254)         value = self._generate_inner(n, base=2)
255)         if value == n:
256)             raise SequinExhaustedException('Sequin is exhausted')
257)         return value
258) 
259)     def _generate_inner(
260)         self, n: int, /, *, base: int = 2
261)     ) -> int:
262)         """Recursive call to generate a base `n` non-negative integer.
263) 
264)         We first determine the correct exponent `k` to generate base `n`
265)         numbers from a stream of base `base` numbers, then attempt to
266)         take `k` numbers from the base `base` sequence (or bail if not
267)         possible).  If the resulting number `v` is out of range for
268)         base `n`, then push `v - n` onto the rejection queue for
269)         base `r` = `base` ** `k` - `n`, and attempt to generate the
270)         requested base `n` integer from the sequence of base `r` numbers
271)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
272)         return the number.
273) 
274)         Args:
275)             n:
276)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

277)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

278)             base:
279)                 Use the base `base` sequence as a source for
280)                 pseudorandom numbers.
281) 
282)         Returns:
283)             A pseudorandom number in the range 0, ..., `n` - 1 if
284)             possible, or `n` if the stream is exhausted.
285) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

286)         Raises:
287)             ValueError:
288)                 The range is empty.
289) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

290)         Examples:
291)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
292)             ...              is_bitstring=True)
293)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
294)             ...               is_bitstring=True)
295)             >>> seq._generate_inner(5)
296)             3
297)             >>> seq._generate_inner(5)
298)             3
299)             >>> seq._generate_inner(5)
300)             1
301)             >>> seq._generate_inner(5)  # error condition: sequin exhausted
302)             5
303) 
304)             Using `n = 1` does not actually consume input bits, and
305)             always works, regardless of sequin exhaustion:
306) 
307)             >>> seq2._generate_inner(1)
308)             0
309)             >>> seq._generate_inner(1)
310)             0
311) 
312)             Using an unsuitable range will raise:
313) 
314)             >>> seq2._generate_inner(0)    # doctest: +IGNORE_EXCEPTION_DETAIL
315)             Traceback (most recent call last):
316)                 ...
317)             ValueError: invalid target range
318) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

319)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

320)         if n < 1:
321)             raise ValueError('invalid target range')
322)         if base < 2:
323)             raise ValueError(f'invalid base: {base!r}')
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

324)         # p = base ** k, where k is the smallest integer such that
325)         # p >= n.  We determine p and k inductively.
326)         p = 1
327)         k = 0
328)         while p < n:
329)             p *= base
330)             k += 1
331)         # The remainder r of p and n is used as the base for rejection
332)         # queue.
333)         r = p - n
334)         # The generated number v is initialized to n because of the
335)         # while loop below.
336)         v = n
337)         while v > n - 1:
338)             list_slice = self._all_or_nothing_shift(k, base=base)
339)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

340)                 if n != 1:
341)                     return n
342)                 else:
343)                     v = 0