a101d1f604a3ad6de242989f7a4887b78ab012a1
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

22) # ruff: noqa: RUF002,RUF003
23) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

24) from __future__ import annotations
25) 
26) import collections
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

27) from typing import TYPE_CHECKING
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

28) 
Marco Ricci Support Python 3.10 and PyP...

Marco Ricci authored 2 months ago

29) from typing_extensions import assert_type
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

30) 
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

31) if TYPE_CHECKING:
32)     from collections.abc import Iterator, Sequence
33) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

34) __all__ = ('Sequin', 'SequinExhaustedError')
Marco Ricci Restore __version__ attribu...

Marco Ricci authored 2 months ago

35) __author__ = "Marco Ricci <m@the13thletter.info>"
Marco Ricci Release 0.1.1

Marco Ricci authored 2 months ago

36) __version__ = "0.1.1"
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

37) 
38) class Sequin:
39)     """Generate pseudorandom non-negative numbers in different ranges.
40) 
41)     Given a (presumed high-quality) uniformly random sequence of input
42)     bits, generate pseudorandom non-negative integers in a certain range
43)     on each call of the `generate` method.  (It is permissible to
44)     specify a different range per call to `generate`; this is the main
45)     use case.)  We use a modified version of rejection sampling, where
46)     rejected values are stored in "rejection queues" if possible, and
47)     these rejection queues re-seed the next round of rejection sampling.
48) 
49)     This is a Python reimplementation of James Coglan's [Node.js sequin
50)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
51)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
52)     I do not know why the original module is called "sequin"; I presume
53)     it to be a pun on "sequence".
54) 
55)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
56)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
57)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
58) 
59)     """
60)     def __init__(
61)         self,
62)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

63)         /, *, is_bitstring: bool = False
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

64)     ):
65)         """Initialize the Sequin.
66) 
67)         Args:
68)             sequence:
69)                 A sequence of bits, or things convertible to bits, to
70)                 seed the pseudorandom number generator.  Byte and text
71)                 strings are converted to 8-bit integer sequences.
72)                 (Conversion will fail if the text string contains
73)                 non-ISO-8859-1 characters.)  The numbers are then
74)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

75)             is_bitstring:
76)                 If true, treat the input as a bitstring.  By default,
77)                 the input is treated as a string of 8-bit integers, from
78)                 which the individual bits must still be extracted.
79) 
80)         Raises:
81)             ValueError:
82)                 The sequence contains values outside the permissible
83)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

84) 
85)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

86)         msg = 'sequence item out of range'
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

87)         def uint8_to_bits(value):
88)             """Yield individual bits of an 8-bit number, MSB first."""
89)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
90)                 yield 1 if value | i == value else 0
91)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

92)             try:
93)                 sequence = tuple(sequence.encode('iso-8859-1'))
94)             except UnicodeError as e:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

95)                 raise ValueError(msg) from e
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

96)         else:
97)             sequence = tuple(sequence)
98)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

99)         self.bases: dict[int, collections.deque[int]] = {}
100)         def gen() -> Iterator[int]:
101)             for num in sequence:
102)                 if num not in range(2 if is_bitstring else 256):
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

103)                     raise ValueError(msg)
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

104)                 if is_bitstring:
105)                     yield num
106)                 else:
107)                     yield from uint8_to_bits(num)
108)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

109) 
110)     def _all_or_nothing_shift(
111)         self, count: int, /, *, base: int = 2
112)     ) -> Sequence[int]:
113)         """Shift and return items if and only if there are enough.
114) 
115)         Args:
116)             count: Number of items to shift/consume.
117)             base: Use the base `base` sequence.
118) 
119)         Returns:
120)             If there are sufficient items in the sequence left, then
121)             consume them from the sequence and return them.  Otherwise,
122)             consume nothing, and return nothing.
123) 
124)         Notes:
125)             We currently remove now-empty sequences from the registry of
126)             sequences.
127) 
128)         Examples:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

129)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
130)             ...              is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

131)             >>> seq.bases
132)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
133)             >>> seq._all_or_nothing_shift(3)
134)             (1, 0, 1)
135)             >>> seq._all_or_nothing_shift(3)
136)             (0, 0, 1)
137)             >>> seq.bases[2]
138)             deque([0, 0, 0, 1])
139)             >>> seq._all_or_nothing_shift(5)
140)             ()
141)             >>> seq.bases[2]
142)             deque([0, 0, 0, 1])
143)             >>> seq._all_or_nothing_shift(4)
144)             (0, 0, 0, 1)
145)             >>> 2 in seq.bases  # now-empty sequences are removed
146)             False
147) 
148)         """
149)         try:
150)             seq = self.bases[base]
151)         except KeyError:
152)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

153)         stash: collections.deque[int] = collections.deque()
154)         try:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

155)             for _ in range(count):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

156)                 stash.append(seq.popleft())
157)         except IndexError:
158)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

159)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

160)         # Clean up queues.
161)         if not seq:
162)             del self.bases[base]
163)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

164) 
165)     @staticmethod
166)     def _big_endian_number(
167)         digits: Sequence[int], /, *, base: int = 2
168)     ) -> int:
169)         """Evaluate the given integer sequence as a big endian number.
170) 
171)         Args:
172)             digits: A sequence of integers to evaluate.
173)             base: The number base to evaluate those integers in.
174) 
175)         Raises:
176)             ValueError: `base` is an invalid base.
177)             ValueError: Not all integers are valid base `base` digits.
178) 
179)         Examples:
180)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
181)             12345678
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

182)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100)
183)             102030405060708
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

184)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
185)             1497
186)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
187)             144
188)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
189)             True
190) 
191)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

192)         if base < 2:  # noqa: PLR2004
193)             msg = f'invalid base: {base!r}'
194)             raise ValueError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

195)         ret = 0
196)         allowed_range = range(base)
197)         n = len(digits)
198)         for i in range(n):
199)             i2 = (n - 1) - i
200)             x = digits[i]
201)             if not isinstance(x, int):
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

202)                 msg = f'not an integer: {x!r}'
203)                 raise TypeError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

204)             if x not in allowed_range:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

205)                 msg = f'invalid base {base!r} digit: {x!r}'
206)                 raise ValueError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

207)             ret += (base ** i2) * x
208)         return ret
209) 
210)     def generate(self, n: int, /) -> int:
211)         """Generate a base `n` non-negative integer.
212) 
213)         We attempt to generate a value using rejection sampling.  If the
214)         generated sample is outside the desired range (i.e., is
215)         rejected), then attempt to reuse the sample by seeding
216)         a "higher-order" input sequence of uniformly random numbers (for
217)         a different base).
218) 
219)         Args:
220)             n:
221)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

222)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

223) 
224)         Returns:
225)             A pseudorandom number in the range 0, ..., `n` - 1.
226) 
227)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

228)             ValueError:
229)                 The range is empty.
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

230)             SequinExhaustedError:
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

231)                 The sequin is exhausted.
232) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

233)         Examples:
234)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
235)             ...              is_bitstring=True)
236)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
237)             ...               is_bitstring=True)
238)             >>> seq.generate(5)
239)             3
240)             >>> seq.generate(5)
241)             3
242)             >>> seq.generate(5)
243)             1
244)             >>> seq.generate(5)    # doctest: +IGNORE_EXCEPTION_DETAIL
245)             Traceback (most recent call last):
246)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

247)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

248) 
249)             Using `n = 1` does not actually consume input bits:
250) 
251)             >>> seq2.generate(1)
252)             0
253) 
254)             But it still won't work on exhausted sequins:
255) 
256)             >>> seq.generate(1)    # doctest: +IGNORE_EXCEPTION_DETAIL
257)             Traceback (most recent call last):
258)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

259)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

260) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

261)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

262)         if 2 not in self.bases:  # noqa: PLR2004
263)             raise SequinExhaustedError
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

264)         value = self._generate_inner(n, base=2)
265)         if value == n:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

266)             raise SequinExhaustedError
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

267)         return value
268) 
269)     def _generate_inner(
270)         self, n: int, /, *, base: int = 2
271)     ) -> int:
272)         """Recursive call to generate a base `n` non-negative integer.
273) 
274)         We first determine the correct exponent `k` to generate base `n`
275)         numbers from a stream of base `base` numbers, then attempt to
276)         take `k` numbers from the base `base` sequence (or bail if not
277)         possible).  If the resulting number `v` is out of range for
278)         base `n`, then push `v - n` onto the rejection queue for
279)         base `r` = `base` ** `k` - `n`, and attempt to generate the
280)         requested base `n` integer from the sequence of base `r` numbers
281)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
282)         return the number.
283) 
284)         Args:
285)             n:
286)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

287)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

288)             base:
289)                 Use the base `base` sequence as a source for
290)                 pseudorandom numbers.
291) 
292)         Returns:
293)             A pseudorandom number in the range 0, ..., `n` - 1 if
294)             possible, or `n` if the stream is exhausted.
295) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

296)         Raises:
297)             ValueError:
298)                 The range is empty.
299) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 3 months ago

300)         Examples:
301)             >>> seq = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
302)             ...              is_bitstring=True)
303)             >>> seq2 = Sequin([1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
304)             ...               is_bitstring=True)
305)             >>> seq._generate_inner(5)
306)             3
307)             >>> seq._generate_inner(5)
308)             3
309)             >>> seq._generate_inner(5)
310)             1
311)             >>> seq._generate_inner(5)  # error condition: sequin exhausted
312)             5
313) 
314)             Using `n = 1` does not actually consume input bits, and
315)             always works, regardless of sequin exhaustion:
316) 
317)             >>> seq2._generate_inner(1)
318)             0
319)             >>> seq._generate_inner(1)
320)             0
321) 
322)             Using an unsuitable range will raise:
323) 
324)             >>> seq2._generate_inner(0)    # doctest: +IGNORE_EXCEPTION_DETAIL
325)             Traceback (most recent call last):
326)                 ...
327)             ValueError: invalid target range
328) 
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

329)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

330)         if n < 1:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

331)             msg = 'invalid target range'
332)             raise ValueError(msg)
333)         if base < 2:  # noqa: PLR2004
334)             msg = f'invalid base: {base!r}'
335)             raise ValueError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

336)         # p = base ** k, where k is the smallest integer such that
337)         # p >= n.  We determine p and k inductively.
338)         p = 1
339)         k = 0
340)         while p < n:
341)             p *= base
342)             k += 1
343)         # The remainder r of p and n is used as the base for rejection
344)         # queue.
345)         r = p - n
346)         # The generated number v is initialized to n because of the
347)         # while loop below.
348)         v = n
349)         while v > n - 1:
350)             list_slice = self._all_or_nothing_shift(k, base=base)
351)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 3 months ago

352)                 if n != 1:
353)                     return n
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 1 month ago

354)                 v = 0
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

355)             v = self._big_endian_number(list_slice, base=base)
356)             if v > n - 1:
357)                 # If r is 0, then p == n, so v < n, or rather
358)                 # v <= n - 1.
359)                 assert r > 0
360)                 if r == 1:
361)                     continue
362)                 self._stash(v - n, base=r)
363)                 v = self._generate_inner(n, base=r)
364)         return v
365) 
366)     def _stash(self, value: int, /, *, base: int = 2) -> None:
367)         """Stash `value` on the base `base` sequence."""
368)         if base not in self.bases:
369)             self.bases[base] = collections.deque()
370)         self.bases[base].append(value)
371) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 2 months ago

372) class SequinExhaustedError(Exception):
Marco Ricci Add prototype implementation

Marco Ricci authored 4 months ago

373)     """The sequin is exhausted.
374) 
375)     No more values can be generated from this sequin.
376) 
377)     """