# SPDX-FileCopyrightText: 2024 Marco Ricci # # SPDX-License-Identifier: MIT """A Python reimplementation of James Coglan's "sequin" Node.js module. James Coglan's "sequin" Node.js module provides a pseudorandom number generator (using rejection sampling on a stream of input numbers) that attempts to minimize the amount of information it throws away: (non-degenerate) rejected samples are fed into a stream of higher-order numbers from which the next random number generation request will be served. The sequin module is used in Coglan's "vault" module (a deterministic, stateless password manager that recomputes passwords instead of storing them), and this reimplementation is used for a similar purpose. The main API is the [`Sequin`] [sequin.Sequin] class, which is thoroughly documented. """ from __future__ import annotations import collections import math from collections.abc import Sequence, MutableSequence from typing import assert_type, Literal, TypeAlias __all__ = ('Sequin', 'SequinExhaustedException') class Sequin: """Generate pseudorandom non-negative numbers in different ranges. Given a (presumed high-quality) uniformly random sequence of input bits, generate pseudorandom non-negative integers in a certain range on each call of the `generate` method. (It is permissible to specify a different range per call to `generate`; this is the main use case.) We use a modified version of rejection sampling, where rejected values are stored in "rejection queues" if possible, and these rejection queues re-seed the next round of rejection sampling. This is a Python reimplementation of James Coglan's [Node.js sequin module][JS_SEQUIN], as introduced in [his blog post][BLOG_POST]. It uses a [technique by Christian Lawson-Perfect][SEQUIN_TECHNIQUE]. I do not know why the original module is called "sequin"; I presume it to be a pun on "sequence". [JS_SEQUIN]: https://www.npmjs.com/package/sequin [BLOG_POST]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/ [SEQUIN_TECHNIQUE]: https://checkmyworking.com/2012/06/converting-a-stream-of-binary-digits-to-a-stream-of-base-n-digits/ """ def __init__( self, sequence: str | bytes | bytearray | Sequence[int], ): """Initialize the Sequin. Args: sequence: A sequence of bits, or things convertible to bits, to seed the pseudorandom number generator. Byte and text strings are converted to 8-bit integer sequences. (Conversion will fail if the text string contains non-ISO-8859-1 characters.) The numbers are then converted to bits. """ def uint8_to_bits(value): """Yield individual bits of an 8-bit number, MSB first.""" for i in (0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01): yield 1 if value | i == value else 0 if isinstance(sequence, str): sequence = tuple(sequence.encode('iso-8859-1')) else: sequence = tuple(sequence) assert_type(sequence, tuple[int, ...]) self.bases: dict[int, MutableSequence[int]] = {} gen = (bit for num in sequence for bit in uint8_to_bits(num)) self.bases[2] = collections.deque(gen) def _all_or_nothing_shift( self, count: int, /, *, base: int = 2 ) -> Sequence[int]: """Shift and return items if and only if there are enough. Args: count: Number of items to shift/consume. base: Use the base `base` sequence. Returns: If there are sufficient items in the sequence left, then consume them from the sequence and return them. Otherwise, consume nothing, and return nothing. Notes: We currently remove now-empty sequences from the registry of sequences. Examples: >>> seq = Sequin([1, 0, 1, 0, 0, 1, 0, 0, 0, 1]) >>> seq.bases {2: deque([1, 0, 1, 0, 0, 1, 0, 0, 0, 1])} >>> seq._all_or_nothing_shift(3) (1, 0, 1) >>> seq._all_or_nothing_shift(3) (0, 0, 1) >>> seq.bases[2] deque([0, 0, 0, 1]) >>> seq._all_or_nothing_shift(5) () >>> seq.bases[2] deque([0, 0, 0, 1]) >>> seq._all_or_nothing_shift(4) (0, 0, 0, 1) >>> 2 in seq.bases # now-empty sequences are removed False """ try: seq = self.bases[base] except KeyError: return () else: chunk = tuple(seq[:count]) if len(chunk) == count: del seq[:count] # Clean up queues. if not seq: del self.bases[base] return chunk return () @staticmethod def _big_endian_number( digits: Sequence[int], /, *, base: int = 2 ) -> int: """Evaluate the given integer sequence as a big endian number. Args: digits: A sequence of integers to evaluate. base: The number base to evaluate those integers in. Raises: ValueError: `base` is an invalid base. ValueError: Not all integers are valid base `base` digits. Examples: >>> Sequin._big_endian_number([1, 2, 3, 4, 5, 6, 7, 8], base=10) 12345678 >>> Sequin._big_endian_number([0, 0, 0, 0, 1, 4, 9, 7], base=10) 1497 >>> Sequin._big_endian_number([1, 0, 0, 1, 0, 0, 0, 0], base=2) 144 >>> Sequin._big_endian_number([1, 7, 5, 5], base=8) == 0o1755 True >>> Sequin._big_endian_number([-1], base=3) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: ... >>> Sequin._big_endian_number([0], base=1) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: ... """ if base < 2: raise ValueError(f'invalid base: {base!r}') ret = 0 allowed_range = range(base) n = len(digits) for i in range(n): i2 = (n - 1) - i x = digits[i] if not isinstance(x, int): raise TypeError(f'not an integer: {x!r}') if x not in allowed_range: raise ValueError(f'invalid base {base!r} digit: {x!r}') ret += (base ** i2) * x return ret def generate(self, n: int, /) -> int: """Generate a base `n` non-negative integer. We attempt to generate a value using rejection sampling. If the generated sample is outside the desired range (i.e., is rejected), then attempt to reuse the sample by seeding a "higher-order" input sequence of uniformly random numbers (for a different base). Args: n: Generate numbers in the range 0, ..., `n` - 1. (Inclusive.) Returns: A pseudorandom number in the range 0, ..., `n` - 1. Raises: SequinExhaustedException: The sequin is exhausted. """ value = self._generate_inner(n, base=2) if value == n: raise SequinExhaustedException('Sequin is exhausted') return value def _generate_inner( self, n: int, /, *, base: int = 2 ) -> int: """Recursive call to generate a base `n` non-negative integer. We first determine the correct exponent `k` to generate base `n` numbers from a stream of base `base` numbers, then attempt to take `k` numbers from the base `base` sequence (or bail if not possible). If the resulting number `v` is out of range for base `n`, then push `v - n` onto the rejection queue for base `r` = `base` ** `k` - `n`, and attempt to generate the requested base `n` integer from the sequence of base `r` numbers next. (This recursion is not attempted if `r` = 1.) Otherwise, return the number. Args: n: Generate numbers in the range 0, ..., `n` - 1. (Inclusive.) base: Use the base `base` sequence as a source for pseudorandom numbers. Returns: A pseudorandom number in the range 0, ..., `n` - 1 if possible, or `n` if the stream is exhausted. """ # p = base ** k, where k is the smallest integer such that # p >= n. We determine p and k inductively. p = 1 k = 0 while p < n: p *= base k += 1 # The remainder r of p and n is used as the base for rejection # queue. r = p - n # The generated number v is initialized to n because of the # while loop below. v = n while v > n - 1: list_slice = self._all_or_nothing_shift(k, base=base) if not list_slice: return n v = self._big_endian_number(list_slice, base=base) if v > n - 1: # If r is 0, then p == n, so v < n, or rather # v <= n - 1. assert r > 0 if r == 1: continue self._stash(v - n, base=r) v = self._generate_inner(n, base=r) return v def _stash(self, value: int, /, *, base: int = 2) -> None: """Stash `value` on the base `base` sequence.""" if base not in self.bases: self.bases[base] = collections.deque() self.bases[base].append(value) class SequinExhaustedException(Exception): """The sequin is exhausted. No more values can be generated from this sequin. """