99863c4d7b2a3ee8f7b0d0d3e3f924afe7d40fb8
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
4) 
5) """A Python reimplementation of James Coglan's "sequin" Node.js module.
6) 
7) James Coglan's "sequin" Node.js module provides a pseudorandom number
8) generator (using rejection sampling on a stream of input numbers) that
9) attempts to minimize the amount of information it throws away:
10) (non-degenerate) rejected samples are fed into a stream of higher-order
11) numbers from which the next random number generation request will be
12) served.  The sequin module is used in Coglan's "vault" module (a
13) deterministic, stateless password manager that recomputes passwords
14) instead of storing them), and this reimplementation is used for
15) a similar purpose.
16) 
17) The main API is the [`Sequin`] [sequin.Sequin] class, which is
18) thoroughly documented.
19) 
20) """
21) 
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

22) # ruff: noqa: RUF002,RUF003
23) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

24) from __future__ import annotations
25) 
26) import collections
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

27) from typing import TYPE_CHECKING
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

28) 
Marco Ricci Support Python 3.10 and PyP...

Marco Ricci authored 3 months ago

29) from typing_extensions import assert_type
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

30) 
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

31) if TYPE_CHECKING:
32)     from collections.abc import Iterator, Sequence
33) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

34) __all__ = ('Sequin', 'SequinExhaustedError')
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

35) __author__ = 'Marco Ricci <m@the13thletter.info>'
36) __version__ = '0.1.1'
37) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

38) 
39) class Sequin:
40)     """Generate pseudorandom non-negative numbers in different ranges.
41) 
42)     Given a (presumed high-quality) uniformly random sequence of input
43)     bits, generate pseudorandom non-negative integers in a certain range
44)     on each call of the `generate` method.  (It is permissible to
45)     specify a different range per call to `generate`; this is the main
46)     use case.)  We use a modified version of rejection sampling, where
47)     rejected values are stored in "rejection queues" if possible, and
48)     these rejection queues re-seed the next round of rejection sampling.
49) 
50)     This is a Python reimplementation of James Coglan's [Node.js sequin
51)     module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST].  It
52)     uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE].
53)     I do not know why the original module is called "sequin"; I presume
54)     it to be a pun on "sequence".
55) 
56)     [JS_SEQUIN]: https://www.npmjs.com/package/sequin
57)     [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
58)     [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/
59) 
60)     """
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

61) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

62)     def __init__(
63)         self,
64)         sequence: str | bytes | bytearray | Sequence[int],
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

65)         /,
66)         *,
67)         is_bitstring: bool = False,
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

68)     ):
69)         """Initialize the Sequin.
70) 
71)         Args:
72)             sequence:
73)                 A sequence of bits, or things convertible to bits, to
74)                 seed the pseudorandom number generator.  Byte and text
75)                 strings are converted to 8-bit integer sequences.
76)                 (Conversion will fail if the text string contains
77)                 non-ISO-8859-1 characters.)  The numbers are then
78)                 converted to bits.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

79)             is_bitstring:
80)                 If true, treat the input as a bitstring.  By default,
81)                 the input is treated as a string of 8-bit integers, from
82)                 which the individual bits must still be extracted.
83) 
84)         Raises:
85)             ValueError:
86)                 The sequence contains values outside the permissible
87)                 range.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

88) 
89)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

90)         msg = 'sequence item out of range'
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

91) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

92)         def uint8_to_bits(value):
93)             """Yield individual bits of an 8-bit number, MSB first."""
94)             for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01):
95)                 yield 1 if value | i == value else 0
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

96) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

97)         if isinstance(sequence, str):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

98)             try:
99)                 sequence = tuple(sequence.encode('iso-8859-1'))
100)             except UnicodeError as e:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

101)                 raise ValueError(msg) from e
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

102)         else:
103)             sequence = tuple(sequence)
104)         assert_type(sequence, tuple[int, ...])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

105)         self.bases: dict[int, collections.deque[int]] = {}
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

106) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

107)         def gen() -> Iterator[int]:
108)             for num in sequence:
109)                 if num not in range(2 if is_bitstring else 256):
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

110)                     raise ValueError(msg)
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

111)                 if is_bitstring:
112)                     yield num
113)                 else:
114)                     yield from uint8_to_bits(num)
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

115) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

116)         self.bases[2] = collections.deque(gen())
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

117) 
118)     def _all_or_nothing_shift(
119)         self, count: int, /, *, base: int = 2
120)     ) -> Sequence[int]:
121)         """Shift and return items if and only if there are enough.
122) 
123)         Args:
124)             count: Number of items to shift/consume.
125)             base: Use the base `base` sequence.
126) 
127)         Returns:
128)             If there are sufficient items in the sequence left, then
129)             consume them from the sequence and return them.  Otherwise,
130)             consume nothing, and return nothing.
131) 
132)         Notes:
133)             We currently remove now-empty sequences from the registry of
134)             sequences.
135) 
136)         Examples:
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

137)             >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1], is_bitstring=True)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

138)             >>> seq.bases
139)             {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])}
140)             >>> seq._all_or_nothing_shift(3)
141)             (1, 0, 1)
142)             >>> seq._all_or_nothing_shift(3)
143)             (0, 0, 1)
144)             >>> seq.bases[2]
145)             deque([0, 0, 0, 1])
146)             >>> seq._all_or_nothing_shift(5)
147)             ()
148)             >>> seq.bases[2]
149)             deque([0, 0, 0, 1])
150)             >>> seq._all_or_nothing_shift(4)
151)             (0, 0, 0, 1)
152)             >>> 2 in seq.bases  # now-empty sequences are removed
153)             False
154) 
155)         """
156)         try:
157)             seq = self.bases[base]
158)         except KeyError:
159)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

160)         stash: collections.deque[int] = collections.deque()
161)         try:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

162)             for _ in range(count):
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

163)                 stash.append(seq.popleft())
164)         except IndexError:
165)             seq.extendleft(reversed(stash))
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

166)             return ()
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

167)         # Clean up queues.
168)         if not seq:
169)             del self.bases[base]
170)         return tuple(stash)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

171) 
172)     @staticmethod
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

173)     def _big_endian_number(digits: Sequence[int], /, *, base: int = 2) -> int:
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

174)         """Evaluate the given integer sequence as a big endian number.
175) 
176)         Args:
177)             digits: A sequence of integers to evaluate.
178)             base: The number base to evaluate those integers in.
179) 
180)         Raises:
181)             ValueError: `base` is an invalid base.
182)             ValueError: Not all integers are valid base `base` digits.
183) 
184)         Examples:
185)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10)
186)             12345678
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

187)             >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=100)
188)             102030405060708
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

189)             >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10)
190)             1497
191)             >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2)
192)             144
193)             >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755
194)             True
195) 
196)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

197)         if base < 2:  # noqa: PLR2004
198)             msg = f'invalid base: {base!r}'
199)             raise ValueError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

200)         ret = 0
201)         allowed_range = range(base)
202)         n = len(digits)
203)         for i in range(n):
204)             i2 = (n - 1) - i
205)             x = digits[i]
206)             if not isinstance(x, int):
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

207)                 msg = f'not an integer: {x!r}'
208)                 raise TypeError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

209)             if x not in allowed_range:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

210)                 msg = f'invalid base {base!r} digit: {x!r}'
211)                 raise ValueError(msg)
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

212)             ret += (base**i2) * x
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

213)         return ret
214) 
215)     def generate(self, n: int, /) -> int:
216)         """Generate a base `n` non-negative integer.
217) 
218)         We attempt to generate a value using rejection sampling.  If the
219)         generated sample is outside the desired range (i.e., is
220)         rejected), then attempt to reuse the sample by seeding
221)         a "higher-order" input sequence of uniformly random numbers (for
222)         a different base).
223) 
224)         Args:
225)             n:
226)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

227)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

228) 
229)         Returns:
230)             A pseudorandom number in the range 0, ..., `n` - 1.
231) 
232)         Raises:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

233)             ValueError:
234)                 The range is empty.
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

235)             SequinExhaustedError:
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

236)                 The sequin is exhausted.
237) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

238)         Examples:
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

239)             >>> seq = Sequin(
240)             ...     [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
241)             ...     is_bitstring=True,
242)             ... )
243)             >>> seq2 = Sequin(
244)             ...     [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
245)             ...     is_bitstring=True,
246)             ... )
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

247)             >>> seq.generate(5)
248)             3
249)             >>> seq.generate(5)
250)             3
251)             >>> seq.generate(5)
252)             1
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

253)             >>> seq.generate(5)  # doctest: +IGNORE_EXCEPTION_DETAIL
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

254)             Traceback (most recent call last):
255)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

256)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

257) 
258)             Using `n = 1` does not actually consume input bits:
259) 
260)             >>> seq2.generate(1)
261)             0
262) 
263)             But it still won't work on exhausted sequins:
264) 
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

265)             >>> seq.generate(1)  # doctest: +IGNORE_EXCEPTION_DETAIL
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

266)             Traceback (most recent call last):
267)                 ...
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

268)             SequinExhaustedError: Sequin is exhausted
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

269) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

270)         """
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

271)         if 2 not in self.bases:  # noqa: PLR2004
272)             raise SequinExhaustedError
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

273)         value = self._generate_inner(n, base=2)
274)         if value == n:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

275)             raise SequinExhaustedError
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

276)         return value
277) 
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

278)     def _generate_inner(self, n: int, /, *, base: int = 2) -> int:
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

279)         """Recursive call to generate a base `n` non-negative integer.
280) 
281)         We first determine the correct exponent `k` to generate base `n`
282)         numbers from a stream of base `base` numbers, then attempt to
283)         take `k` numbers from the base `base` sequence (or bail if not
284)         possible).  If the resulting number `v` is out of range for
285)         base `n`, then push `v - n` onto the rejection queue for
286)         base `r` = `base` ** `k` - `n`, and attempt to generate the
287)         requested base `n` integer from the sequence of base `r` numbers
288)         next.  (This recursion is not attempted if `r` = 1.)  Otherwise,
289)         return the number.
290) 
291)         Args:
292)             n:
293)                 Generate numbers in the range 0, ..., `n` - 1.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

294)                 (Inclusive.)  Must be larger than 0.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

295)             base:
296)                 Use the base `base` sequence as a source for
297)                 pseudorandom numbers.
298) 
299)         Returns:
300)             A pseudorandom number in the range 0, ..., `n` - 1 if
301)             possible, or `n` if the stream is exhausted.
302) 
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

303)         Raises:
304)             ValueError:
305)                 The range is empty.
306) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

307)         Examples:
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

308)             >>> seq = Sequin(
309)             ...     [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
310)             ...     is_bitstring=True,
311)             ... )
312)             >>> seq2 = Sequin(
313)             ...     [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1],
314)             ...     is_bitstring=True,
315)             ... )
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

316)             >>> seq._generate_inner(5)
317)             3
318)             >>> seq._generate_inner(5)
319)             3
320)             >>> seq._generate_inner(5)
321)             1
322)             >>> seq._generate_inner(5)  # error condition: sequin exhausted
323)             5
324) 
325)             Using `n = 1` does not actually consume input bits, and
326)             always works, regardless of sequin exhaustion:
327) 
328)             >>> seq2._generate_inner(1)
329)             0
330)             >>> seq._generate_inner(1)
331)             0
332) 
333)             Using an unsuitable range will raise:
334) 
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

335)             >>> seq2._generate_inner(0)  # doctest: +IGNORE_EXCEPTION_DETAIL
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

336)             Traceback (most recent call last):
337)                 ...
338)             ValueError: invalid target range
339) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

340)         """
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

341)         if n < 1:
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

342)             msg = 'invalid target range'
343)             raise ValueError(msg)
344)         if base < 2:  # noqa: PLR2004
345)             msg = f'invalid base: {base!r}'
346)             raise ValueError(msg)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

347)         # p = base ** k, where k is the smallest integer such that
348)         # p >= n.  We determine p and k inductively.
349)         p = 1
350)         k = 0
351)         while p < n:
352)             p *= base
353)             k += 1
354)         # The remainder r of p and n is used as the base for rejection
355)         # queue.
356)         r = p - n
357)         # The generated number v is initialized to n because of the
358)         # while loop below.
359)         v = n
360)         while v > n - 1:
361)             list_slice = self._all_or_nothing_shift(k, base=base)
362)             if not list_slice:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

363)                 if n != 1:
364)                     return n
Marco Ricci Fix style issues with ruff...

Marco Ricci authored 3 months ago

365)                 v = 0
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

366)             v = self._big_endian_number(list_slice, base=base)
367)             if v > n - 1:
368)                 # If r is 0, then p == n, so v < n, or rather
369)                 # v <= n - 1.
370)                 assert r > 0
371)                 if r == 1:
372)                     continue
373)                 self._stash(v - n, base=r)
374)                 v = self._generate_inner(n, base=r)
375)         return v
376) 
377)     def _stash(self, value: int, /, *, base: int = 2) -> None:
378)         """Stash `value` on the base `base` sequence."""
379)         if base not in self.bases:
380)             self.bases[base] = collections.deque()
381)         self.bases[base].append(value)
382) 
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

383) 
Marco Ricci Rename SequinExhaustedExcep...

Marco Ricci authored 4 months ago

384) class SequinExhaustedError(Exception):
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

385)     """The sequin is exhausted.
386) 
387)     No more values can be generated from this sequin.
388) 
389)     """
Marco Ricci Reformat everything with ruff

Marco Ricci authored 3 months ago

390)