50749ca2a633eb4d8275de7e9dff586417c79af6
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
22) from __future__ import annotations
23) 
24) import collections
25) import math
26) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

27) from collections.abc import Iterator, MutableSequence, Sequence
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

28) from typing import assert_type, Literal, TypeAlias
29) 
30) __all__ = ('Sequin', 'SequinExhaustedException')
31) 
32) class Sequin:
33)     """Generate pseudorandom non-negative numbers in different ranges.
34) 
35)     Given a (presumed high-quality) uniformly random sequence of input
36)     bits, generate pseudorandom non-negative integers in a certain range
37)     on each call of the `generate` method.  (It is permissible to
38)     specify a different range per call to `generate`; this is the main
39)     use case.)  We use a modified version of rejection sampling, where
40)     rejected values are stored in "rejection queues" if possible, and
41)     these rejection queues re-seed the next round of rejection sampling.
42) 
43)     This is a Python reimplementation of James Coglan's [Node.js sequin
44)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
45)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
46)     I do not know why the original module is called "sequin"; I presume
47)     it to be a pun on "sequence".
48) 
49)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
50)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
51)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
52) 
53)     """
54)     def __init__(
55)         self,
56)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

57)         /, *, is_bitstring: bool = False
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

58)     ):
59)         """Initialize the Sequin.
60) 
61)         Args:
62)             sequence:
63)                 A sequence of bits, or things convertible to bits, to
64)                 seed the pseudorandom number generator.  Byte and text
65)                 strings are converted to 8-bit integer sequences.
66)                 (Conversion will fail if the text string contains
67)                 non-ISO-8859-1 characters.)  The numbers are then
68)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

69)             is_bitstring:
70)                 If true, treat the input as a bitstring.  By default,
71)                 the input is treated as a string of 8-bit integers, from
72)                 which the individual bits must still be extracted.
73) 
74)         Raises:
75)             ValueError:
76)                 The sequence contains values outside the permissible
77)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

78) 
79)         """
80)         def uint8_to_bits(value):
81)             """Yield individual bits of an 8-bit number, MSB first."""
82)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
83)                 yield 1 if value | i == value else 0
84)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

85)             try:
86)                 sequence = tuple(sequence.encode('iso-8859-1'))
87)             except UnicodeError as e:
88)                 raise ValueError('sequence item out of range') from e
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

89)         else:
90)             sequence = tuple(sequence)
91)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

92)         self.bases: dict[int, collections.deque[int]] = {}
93)         def gen() -> Iterator[int]:
94)             for num in sequence:
95)                 if num not in range(2 if is_bitstring else 256):
96)                     raise ValueError('sequence item out of range')
97)                 if is_bitstring:
98)                     yield num
99)                 else:
100)                     yield from uint8_to_bits(num)
101)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

102) 
103)     def _all_or_nothing_shift(
104)         self, count: int, /, *, base: int = 2
105)     ) -> Sequence[int]:
106)         """Shift and return items if and only if there are enough.
107) 
108)         Args:
109)             count: Number of items to shift/consume.
110)             base: Use the base `base` sequence.
111) 
112)         Returns:
113)             If there are sufficient items in the sequence left, then
114)             consume them from the sequence and return them.  Otherwise,
115)             consume nothing, and return nothing.
116) 
117)         Notes:
118)             We currently remove now-empty sequences from the registry of
119)             sequences.
120) 
121)         Examples:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

122)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
123)             ...              is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

124)             >>> seq.bases
125)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
126)             >>> seq._all_or_nothing_shift(3)
127)             (1, 0, 1)
128)             >>> seq._all_or_nothing_shift(3)
129)             (0, 0, 1)
130)             >>> seq.bases[2]
131)             deque([0, 0, 0, 1])
132)             >>> seq._all_or_nothing_shift(5)
133)             ()
134)             >>> seq.bases[2]
135)             deque([0, 0, 0, 1])
136)             >>> seq._all_or_nothing_shift(4)
137)             (0, 0, 0, 1)
138)             >>> 2 in seq.bases  # now-empty sequences are removed
139)             False
140) 
141)         """
142)         try:
143)             seq = self.bases[base]
144)         except KeyError:
145)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

146)         stash: collections.deque[int] = collections.deque()
147)         try:
148)             for i in range(count):
149)                 stash.append(seq.popleft())
150)         except IndexError:
151)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

152)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

153)         # Clean up queues.
154)         if not seq:
155)             del self.bases[base]
156)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

157) 
158)     @staticmethod
159)     def _big_endian_number(
160)         digits: Sequence[int], /, *, base: int = 2
161)     ) -> int:
162)         """Evaluate the given integer sequence as a big endian number.
163) 
164)         Args:
165)             digits: A sequence of integers to evaluate.
166)             base: The number base to evaluate those integers in.
167) 
168)         Raises:
169)             ValueError: `base` is an invalid base.
170)             ValueError: Not all integers are valid base `base` digits.
171) 
172)         Examples:
173)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
174)             12345678
175)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
176)             1497
177)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
178)             144
179)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
180)             True
181)             >>> Sequin._big_endian_number([-1], base=3)  # doctest: +ELLIPSIS
182)             Traceback (most recent call last):
183)                 ...
184)             ValueError: ...
185)             >>> Sequin._big_endian_number([0], base=1)  # doctest: +ELLIPSIS
186)             Traceback (most recent call last):
187)                 ...
188)             ValueError: ...
189) 
190)         """
191)         if base < 2:
192)             raise ValueError(f'invalid base: {base!r}')
193)         ret = 0
194)         allowed_range = range(base)
195)         n = len(digits)
196)         for i in range(n):
197)             i2 = (n - 1) - i
198)             x = digits[i]
199)             if not isinstance(x, int):
200)                 raise TypeError(f'not an integer: {x!r}')
201)             if x not in allowed_range:
202)                 raise ValueError(f'invalid base {base!r} digit: {x!r}')
203)             ret += (base ** i2) * x
204)         return ret
205) 
206)     def generate(self, n: int, /) -> int:
207)         """Generate a base `n` non-negative integer.
208) 
209)         We attempt to generate a value using rejection sampling.  If the
210)         generated sample is outside the desired range (i.e., is
211)         rejected), then attempt to reuse the sample by seeding
212)         a "higher-order" input sequence of uniformly random numbers (for
213)         a different base).
214) 
215)         Args:
216)             n:
217)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

218)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

219) 
220)         Returns:
221)             A pseudorandom number in the range 0, ..., `n` - 1.
222) 
223)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

224)             ValueError:
225)                 The range is empty.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

226)             SequinExhaustedException:
227)                 The sequin is exhausted.
228) 
229)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

230)         if 2 not in self.bases:
231)             raise SequinExhaustedException('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

232)         value = self._generate_inner(n, base=2)
233)         if value == n:
234)             raise SequinExhaustedException('Sequin is exhausted')
235)         return value
236) 
237)     def _generate_inner(
238)         self, n: int, /, *, base: int = 2
239)     ) -> int:
240)         """Recursive call to generate a base `n` non-negative integer.
241) 
242)         We first determine the correct exponent `k` to generate base `n`
243)         numbers from a stream of base `base` numbers, then attempt to
244)         take `k` numbers from the base `base` sequence (or bail if not
245)         possible).  If the resulting number `v` is out of range for
246)         base `n`, then push `v - n` onto the rejection queue for
247)         base `r` = `base` ** `k` - `n`, and attempt to generate the
248)         requested base `n` integer from the sequence of base `r` numbers
249)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
250)         return the number.
251) 
252)         Args:
253)             n:
254)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

255)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

256)             base:
257)                 Use the base `base` sequence as a source for
258)                 pseudorandom numbers.
259) 
260)         Returns:
261)             A pseudorandom number in the range 0, ..., `n` - 1 if
262)             possible, or `n` if the stream is exhausted.
263) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

264)         Raises:
265)             ValueError:
266)                 The range is empty.
267) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

268)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

269)         if n < 1:
270)             raise ValueError('invalid target range')
271)         if base < 2:
272)             raise ValueError(f'invalid base: {base!r}')
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

273)         # p = base ** k, where k is the smallest integer such that
274)         # p >= n.  We determine p and k inductively.
275)         p = 1
276)         k = 0
277)         while p < n:
278)             p *= base
279)             k += 1
280)         # The remainder r of p and n is used as the base for rejection
281)         # queue.
282)         r = p - n
283)         # The generated number v is initialized to n because of the
284)         # while loop below.
285)         v = n
286)         while v > n - 1:
287)             list_slice = self._all_or_nothing_shift(k, base=base)
288)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

289)                 if n != 1:
290)                     return n
291)                 else:
292)                     v = 0