565dd46f8506662fd4abcc8e7f9c7a99ae15958f
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
22) from __future__ import annotations
23) 
24) import collections
25) 
Marco Ricci Support Python 3.10 and PyP...

Marco Ricci authored 3 months ago

26) from collections.abc import Iterator, Sequence
27) from typing_extensions import assert_type
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

28) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

29) __all__ = ('Sequin', 'SequinExhaustedError')
Marco Ricci Remove __about__.py files,...

Marco Ricci authored 5 months ago

30) __author__ = 'Marco Ricci <m@the13thletter.info>'
Marco Ricci Set project version statica...

Marco Ricci authored 3 months ago

31) # Use importlib.metadata.version(...) to query the version.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

32) 
33) class Sequin:
34)     """Generate pseudorandom non-negative numbers in different ranges.
35) 
36)     Given a (presumed high-quality) uniformly random sequence of input
37)     bits, generate pseudorandom non-negative integers in a certain range
38)     on each call of the `generate` method.  (It is permissible to
39)     specify a different range per call to `generate`; this is the main
40)     use case.)  We use a modified version of rejection sampling, where
41)     rejected values are stored in "rejection queues" if possible, and
42)     these rejection queues re-seed the next round of rejection sampling.
43) 
44)     This is a Python reimplementation of James Coglan's [Node.js sequin
45)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
46)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
47)     I do not know why the original module is called "sequin"; I presume
48)     it to be a pun on "sequence".
49) 
50)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
51)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
52)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
53) 
54)     """
55)     def __init__(
56)         self,
57)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

58)         /, *, is_bitstring: bool = False
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

59)     ):
60)         """Initialize the Sequin.
61) 
62)         Args:
63)             sequence:
64)                 A sequence of bits, or things convertible to bits, to
65)                 seed the pseudorandom number generator.  Byte and text
66)                 strings are converted to 8-bit integer sequences.
67)                 (Conversion will fail if the text string contains
68)                 non-ISO-8859-1 characters.)  The numbers are then
69)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

70)             is_bitstring:
71)                 If true, treat the input as a bitstring.  By default,
72)                 the input is treated as a string of 8-bit integers, from
73)                 which the individual bits must still be extracted.
74) 
75)         Raises:
76)             ValueError:
77)                 The sequence contains values outside the permissible
78)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

79) 
80)         """
81)         def uint8_to_bits(value):
82)             """Yield individual bits of an 8-bit number, MSB first."""
83)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
84)                 yield 1 if value | i == value else 0
85)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

86)             try:
87)                 sequence = tuple(sequence.encode('iso-8859-1'))
88)             except UnicodeError as e:
89)                 raise ValueError('sequence item out of range') from e
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

90)         else:
91)             sequence = tuple(sequence)
92)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

93)         self.bases: dict[int, collections.deque[int]] = {}
94)         def gen() -> Iterator[int]:
95)             for num in sequence:
96)                 if num not in range(2 if is_bitstring else 256):
97)                     raise ValueError('sequence item out of range')
98)                 if is_bitstring:
99)                     yield num
100)                 else:
101)                     yield from uint8_to_bits(num)
102)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

103) 
104)     def _all_or_nothing_shift(
105)         self, count: int, /, *, base: int = 2
106)     ) -> Sequence[int]:
107)         """Shift and return items if and only if there are enough.
108) 
109)         Args:
110)             count: Number of items to shift/consume.
111)             base: Use the base `base` sequence.
112) 
113)         Returns:
114)             If there are sufficient items in the sequence left, then
115)             consume them from the sequence and return them.  Otherwise,
116)             consume nothing, and return nothing.
117) 
118)         Notes:
119)             We currently remove now-empty sequences from the registry of
120)             sequences.
121) 
122)         Examples:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

123)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
124)             ...              is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

125)             >>> seq.bases
126)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
127)             >>> seq._all_or_nothing_shift(3)
128)             (1, 0, 1)
129)             >>> seq._all_or_nothing_shift(3)
130)             (0, 0, 1)
131)             >>> seq.bases[2]
132)             deque([0, 0, 0, 1])
133)             >>> seq._all_or_nothing_shift(5)
134)             ()
135)             >>> seq.bases[2]
136)             deque([0, 0, 0, 1])
137)             >>> seq._all_or_nothing_shift(4)
138)             (0, 0, 0, 1)
139)             >>> 2 in seq.bases  # now-empty sequences are removed
140)             False
141) 
142)         """
143)         try:
144)             seq = self.bases[base]
145)         except KeyError:
146)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

147)         stash: collections.deque[int] = collections.deque()
148)         try:
149)             for i in range(count):
150)                 stash.append(seq.popleft())
151)         except IndexError:
152)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

153)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

154)         # Clean up queues.
155)         if not seq:
156)             del self.bases[base]
157)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

158) 
159)     @staticmethod
160)     def _big_endian_number(
161)         digits: Sequence[int], /, *, base: int = 2
162)     ) -> int:
163)         """Evaluate the given integer sequence as a big endian number.
164) 
165)         Args:
166)             digits: A sequence of integers to evaluate.
167)             base: The number base to evaluate those integers in.
168) 
169)         Raises:
170)             ValueError: `base` is an invalid base.
171)             ValueError: Not all integers are valid base `base` digits.
172) 
173)         Examples:
174)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
175)             12345678
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

176)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100)
177)             102030405060708
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

178)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
179)             1497
180)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
181)             144
182)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
183)             True
184) 
185)         """
186)         if base < 2:
187)             raise ValueError(f'invalid base: {base!r}')
188)         ret = 0
189)         allowed_range = range(base)
190)         n = len(digits)
191)         for i in range(n):
192)             i2 = (n - 1) - i
193)             x = digits[i]
194)             if not isinstance(x, int):
195)                 raise TypeError(f'not an integer: {x!r}')
196)             if x not in allowed_range:
197)                 raise ValueError(f'invalid base {base!r} digit: {x!r}')
198)             ret += (base ** i2) * x
199)         return ret
200) 
201)     def generate(self, n: int, /) -> int:
202)         """Generate a base `n` non-negative integer.
203) 
204)         We attempt to generate a value using rejection sampling.  If the
205)         generated sample is outside the desired range (i.e., is
206)         rejected), then attempt to reuse the sample by seeding
207)         a "higher-order" input sequence of uniformly random numbers (for
208)         a different base).
209) 
210)         Args:
211)             n:
212)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

213)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

214) 
215)         Returns:
216)             A pseudorandom number in the range 0, ..., `n` - 1.
217) 
218)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

219)             ValueError:
220)                 The range is empty.
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

221)             SequinExhaustedError:
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

222)                 The sequin is exhausted.
223) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

224)         Examples:
225)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
226)             ...              is_bitstring=True)
227)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
228)             ...               is_bitstring=True)
229)             >>> seq.generate(5)
230)             3
231)             >>> seq.generate(5)
232)             3
233)             >>> seq.generate(5)
234)             1
235)             >>> seq.generate(5)    # doctest: +IGNORE_EXCEPTION_DETAIL
236)             Traceback (most recent call last):
237)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

238)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

239) 
240)             Using `n = 1` does not actually consume input bits:
241) 
242)             >>> seq2.generate(1)
243)             0
244) 
245)             But it still won't work on exhausted sequins:
246) 
247)             >>> seq.generate(1)    # doctest: +IGNORE_EXCEPTION_DETAIL
248)             Traceback (most recent call last):
249)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

250)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

251) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

252)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

253)         if 2 not in self.bases:
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

254)             raise SequinExhaustedError('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

255)         value = self._generate_inner(n, base=2)
256)         if value == n:
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

257)             raise SequinExhaustedError('Sequin is exhausted')
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

258)         return value
259) 
260)     def _generate_inner(
261)         self, n: int, /, *, base: int = 2
262)     ) -> int:
263)         """Recursive call to generate a base `n` non-negative integer.
264) 
265)         We first determine the correct exponent `k` to generate base `n`
266)         numbers from a stream of base `base` numbers, then attempt to
267)         take `k` numbers from the base `base` sequence (or bail if not
268)         possible).  If the resulting number `v` is out of range for
269)         base `n`, then push `v - n` onto the rejection queue for
270)         base `r` = `base` ** `k` - `n`, and attempt to generate the
271)         requested base `n` integer from the sequence of base `r` numbers
272)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
273)         return the number.
274) 
275)         Args:
276)             n:
277)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

278)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

279)             base:
280)                 Use the base `base` sequence as a source for
281)                 pseudorandom numbers.
282) 
283)         Returns:
284)             A pseudorandom number in the range 0, ..., `n` - 1 if
285)             possible, or `n` if the stream is exhausted.
286) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

287)         Raises:
288)             ValueError:
289)                 The range is empty.
290) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

291)         Examples:
292)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
293)             ...              is_bitstring=True)
294)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
295)             ...               is_bitstring=True)
296)             >>> seq._generate_inner(5)
297)             3
298)             >>> seq._generate_inner(5)
299)             3
300)             >>> seq._generate_inner(5)
301)             1
302)             >>> seq._generate_inner(5)  # error condition: sequin exhausted
303)             5
304) 
305)             Using `n = 1` does not actually consume input bits, and
306)             always works, regardless of sequin exhaustion:
307) 
308)             >>> seq2._generate_inner(1)
309)             0
310)             >>> seq._generate_inner(1)
311)             0
312) 
313)             Using an unsuitable range will raise:
314) 
315)             >>> seq2._generate_inner(0)    # doctest: +IGNORE_EXCEPTION_DETAIL
316)             Traceback (most recent call last):
317)                 ...
318)             ValueError: invalid target range
319) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

320)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

321)         if n < 1:
322)             raise ValueError('invalid target range')
323)         if base < 2:
324)             raise ValueError(f'invalid base: {base!r}')
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

325)         # p = base ** k, where k is the smallest integer such that
326)         # p >= n.  We determine p and k inductively.
327)         p = 1
328)         k = 0
329)         while p < n:
330)             p *= base
331)             k += 1
332)         # The remainder r of p and n is used as the base for rejection
333)         # queue.
334)         r = p - n
335)         # The generated number v is initialized to n because of the
336)         # while loop below.
337)         v = n
338)         while v > n - 1:
339)             list_slice = self._all_or_nothing_shift(k, base=base)
340)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

341)                 if n != 1:
342)                     return n
343)                 else:
344)                     v = 0
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

345)             v = self._big_endian_number(list_slice, base=base)
346)             if v > n - 1:
347)                 # If r is 0, then p == n, so v < n, or rather
348)                 # v <= n - 1.
349)                 assert r > 0
350)                 if r == 1:
351)                     continue
352)                 self._stash(v - n, base=r)
353)                 v = self._generate_inner(n, base=r)
354)         return v
355) 
356)     def _stash(self, value: int, /, *, base: int = 2) -> None:
357)         """Stash `value` on the base `base` sequence."""
358)         if base not in self.bases:
359)             self.bases[base] = collections.deque()
360)         self.bases[base].append(value)
361) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

362) class SequinExhaustedError(Exception):