56ea02d5de542487633a1444cc55ccfb2269a498
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
22) from __future__ import annotations
23) 
24) import collections
25) import math
26) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

27) from collections.abc import Iterator, MutableSequence, Sequence
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

28) from typing import assert_type, Literal, TypeAlias
29) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

30) __all__ = ('Sequin', 'SequinExhaustedError')
Marco Ricci Remove __about__.py files,...

Marco Ricci authored 3 months ago

31) __author__ = 'Marco Ricci <m@the13thletter.info>'
32) __version__ = "0.1.0"
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

33) 
34) class Sequin:
35)     """Generate pseudorandom non-negative numbers in different ranges.
36) 
37)     Given a (presumed high-quality) uniformly random sequence of input
38)     bits, generate pseudorandom non-negative integers in a certain range
39)     on each call of the `generate` method.  (It is permissible to
40)     specify a different range per call to `generate`; this is the main
41)     use case.)  We use a modified version of rejection sampling, where
42)     rejected values are stored in "rejection queues" if possible, and
43)     these rejection queues re-seed the next round of rejection sampling.
44) 
45)     This is a Python reimplementation of James Coglan's [Node.js sequin
46)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
47)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
48)     I do not know why the original module is called "sequin"; I presume
49)     it to be a pun on "sequence".
50) 
51)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
52)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
53)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
54) 
55)     """
56)     def __init__(
57)         self,
58)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

59)         /, *, is_bitstring: bool = False
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

60)     ):
61)         """Initialize the Sequin.
62) 
63)         Args:
64)             sequence:
65)                 A sequence of bits, or things convertible to bits, to
66)                 seed the pseudorandom number generator.  Byte and text
67)                 strings are converted to 8-bit integer sequences.
68)                 (Conversion will fail if the text string contains
69)                 non-ISO-8859-1 characters.)  The numbers are then
70)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

71)             is_bitstring:
72)                 If true, treat the input as a bitstring.  By default,
73)                 the input is treated as a string of 8-bit integers, from
74)                 which the individual bits must still be extracted.
75) 
76)         Raises:
77)             ValueError:
78)                 The sequence contains values outside the permissible
79)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

80) 
81)         """
82)         def uint8_to_bits(value):
83)             """Yield individual bits of an 8-bit number, MSB first."""
84)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
85)                 yield 1 if value | i == value else 0
86)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

87)             try:
88)                 sequence = tuple(sequence.encode('iso-8859-1'))
89)             except UnicodeError as e:
90)                 raise ValueError('sequence item out of range') from e
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

91)         else:
92)             sequence = tuple(sequence)
93)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

94)         self.bases: dict[int, collections.deque[int]] = {}
95)         def gen() -> Iterator[int]:
96)             for num in sequence:
97)                 if num not in range(2 if is_bitstring else 256):
98)                     raise ValueError('sequence item out of range')
99)                 if is_bitstring:
100)                     yield num
101)                 else:
102)                     yield from uint8_to_bits(num)
103)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

104) 
105)     def _all_or_nothing_shift(
106)         self, count: int, /, *, base: int = 2
107)     ) -> Sequence[int]:
108)         """Shift and return items if and only if there are enough.
109) 
110)         Args:
111)             count: Number of items to shift/consume.
112)             base: Use the base `base` sequence.
113) 
114)         Returns:
115)             If there are sufficient items in the sequence left, then
116)             consume them from the sequence and return them.  Otherwise,
117)             consume nothing, and return nothing.
118) 
119)         Notes:
120)             We currently remove now-empty sequences from the registry of
121)             sequences.
122) 
123)         Examples:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

124)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
125)             ...              is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

126)             >>> seq.bases
127)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
128)             >>> seq._all_or_nothing_shift(3)
129)             (1, 0, 1)
130)             >>> seq._all_or_nothing_shift(3)
131)             (0, 0, 1)
132)             >>> seq.bases[2]
133)             deque([0, 0, 0, 1])
134)             >>> seq._all_or_nothing_shift(5)
135)             ()
136)             >>> seq.bases[2]
137)             deque([0, 0, 0, 1])
138)             >>> seq._all_or_nothing_shift(4)
139)             (0, 0, 0, 1)
140)             >>> 2 in seq.bases  # now-empty sequences are removed
141)             False
142) 
143)         """
144)         try:
145)             seq = self.bases[base]
146)         except KeyError:
147)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

148)         stash: collections.deque[int] = collections.deque()
149)         try:
150)             for i in range(count):
151)                 stash.append(seq.popleft())
152)         except IndexError:
153)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

154)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

155)         # Clean up queues.
156)         if not seq:
157)             del self.bases[base]
158)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

159) 
160)     @staticmethod
161)     def _big_endian_number(
162)         digits: Sequence[int], /, *, base: int = 2
163)     ) -> int:
164)         """Evaluate the given integer sequence as a big endian number.
165) 
166)         Args:
167)             digits: A sequence of integers to evaluate.
168)             base: The number base to evaluate those integers in.
169) 
170)         Raises:
171)             ValueError: `base` is an invalid base.
172)             ValueError: Not all integers are valid base `base` digits.
173) 
174)         Examples:
175)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
176)             12345678
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

177)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100)
178)             102030405060708
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

179)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
180)             1497
181)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
182)             144
183)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
184)             True
185) 
186)         """
187)         if base < 2:
188)             raise ValueError(f'invalid base: {base!r}')
189)         ret = 0
190)         allowed_range = range(base)
191)         n = len(digits)
192)         for i in range(n):
193)             i2 = (n - 1) - i
194)             x = digits[i]
195)             if not isinstance(x, int):
196)                 raise TypeError(f'not an integer: {x!r}')
197)             if x not in allowed_range:
198)                 raise ValueError(f'invalid base {base!r} digit: {x!r}')
199)             ret += (base ** i2) * x
200)         return ret
201) 
202)     def generate(self, n: int, /) -> int:
203)         """Generate a base `n` non-negative integer.
204) 
205)         We attempt to generate a value using rejection sampling.  If the
206)         generated sample is outside the desired range (i.e., is
207)         rejected), then attempt to reuse the sample by seeding
208)         a "higher-order" input sequence of uniformly random numbers (for
209)         a different base).
210) 
211)         Args:
212)             n:
213)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

214)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

215) 
216)         Returns:
217)             A pseudorandom number in the range 0, ..., `n` - 1.
218) 
219)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

220)             ValueError:
221)                 The range is empty.
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

222)             SequinExhaustedError:
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

223)                 The sequin is exhausted.
224) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

225)         Examples:
226)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
227)             ...              is_bitstring=True)
228)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
229)             ...               is_bitstring=True)
230)             >>> seq.generate(5)
231)             3
232)             >>> seq.generate(5)
233)             3
234)             >>> seq.generate(5)
235)             1
236)             >>> seq.generate(5)    # doctest: +IGNORE_EXCEPTION_DETAIL
237)             Traceback (most recent call last):
238)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

239)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

240) 
241)             Using `n = 1` does not actually consume input bits:
242) 
243)             >>> seq2.generate(1)
244)             0
245) 
246)             But it still won't work on exhausted sequins:
247) 
248)             >>> seq.generate(1)    # doctest: +IGNORE_EXCEPTION_DETAIL
249)             Traceback (most recent call last):
250)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

251)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

252) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

253)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

254)         if 2 not in self.bases:
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

255)             raise SequinExhaustedError('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

256)         value = self._generate_inner(n, base=2)
257)         if value == n:
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

258)             raise SequinExhaustedError('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

259)         return value
260) 
261)     def _generate_inner(
262)         self, n: int, /, *, base: int = 2
263)     ) -> int:
264)         """Recursive call to generate a base `n` non-negative integer.
265) 
266)         We first determine the correct exponent `k` to generate base `n`
267)         numbers from a stream of base `base` numbers, then attempt to
268)         take `k` numbers from the base `base` sequence (or bail if not
269)         possible).  If the resulting number `v` is out of range for
270)         base `n`, then push `v - n` onto the rejection queue for
271)         base `r` = `base` ** `k` - `n`, and attempt to generate the
272)         requested base `n` integer from the sequence of base `r` numbers
273)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
274)         return the number.
275) 
276)         Args:
277)             n:
278)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

279)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

280)             base:
281)                 Use the base `base` sequence as a source for
282)                 pseudorandom numbers.
283) 
284)         Returns:
285)             A pseudorandom number in the range 0, ..., `n` - 1 if
286)             possible, or `n` if the stream is exhausted.
287) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

288)         Raises:
289)             ValueError:
290)                 The range is empty.
291) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

292)         Examples:
293)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
294)             ...              is_bitstring=True)
295)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
296)             ...               is_bitstring=True)
297)             >>> seq._generate_inner(5)
298)             3
299)             >>> seq._generate_inner(5)
300)             3
301)             >>> seq._generate_inner(5)
302)             1
303)             >>> seq._generate_inner(5)  # error condition: sequin exhausted
304)             5
305) 
306)             Using `n = 1` does not actually consume input bits, and
307)             always works, regardless of sequin exhaustion:
308) 
309)             >>> seq2._generate_inner(1)
310)             0
311)             >>> seq._generate_inner(1)
312)             0
313) 
314)             Using an unsuitable range will raise:
315) 
316)             >>> seq2._generate_inner(0)    # doctest: +IGNORE_EXCEPTION_DETAIL
317)             Traceback (most recent call last):
318)                 ...
319)             ValueError: invalid target range
320) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

321)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

322)         if n < 1:
323)             raise ValueError('invalid target range')
324)         if base < 2:
325)             raise ValueError(f'invalid base: {base!r}')
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

326)         # p = base ** k, where k is the smallest integer such that
327)         # p >= n.  We determine p and k inductively.
328)         p = 1
329)         k = 0
330)         while p < n:
331)             p *= base
332)             k += 1
333)         # The remainder r of p and n is used as the base for rejection
334)         # queue.
335)         r = p - n
336)         # The generated number v is initialized to n because of the
337)         # while loop below.
338)         v = n
339)         while v > n - 1:
340)             list_slice = self._all_or_nothing_shift(k, base=base)
341)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

342)                 if n != 1:
343)                     return n
344)                 else:
345)                     v = 0
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

346)             v = self._big_endian_number(list_slice, base=base)
347)             if v > n - 1:
348)                 # If r is 0, then p == n, so v < n, or rather
349)                 # v <= n - 1.
350)                 assert r > 0
351)                 if r == 1:
352)                     continue
353)                 self._stash(v - n, base=r)
354)                 v = self._generate_inner(n, base=r)
355)         return v
356) 
357)     def _stash(self, value: int, /, *, base: int = 2) -> None:
358)         """Stash `value` on the base `base` sequence."""
359)         if base not in self.bases:
360)             self.bases[base] = collections.deque()
361)         self.bases[base].append(value)
362) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

363) class SequinExhaustedError(Exception):