26249bcee644fc26eae214f29539cb539f32b879
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 3 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <software@the13thletter.info>
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

2) #
3) # SPDX-License-Identifier: MIT
4) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 3 months ago

5) """Python port of the vault(1) password generation scheme."""
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

6) 
7) from __future__ import annotations
8) 
9) import base64
10) import collections
11) import hashlib
12) import math
Marco Ricci Fix miscellaneous small doc...

Marco Ricci authored 3 months ago

13) import types
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

14) from collections.abc import Callable
15) from typing import TypeAlias
16) 
17) from typing_extensions import assert_type
18) 
19) from derivepassphrase import sequin, ssh_agent
20) 
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 3 months ago

21) __author__ = 'Marco Ricci <software@the13thletter.info>'
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

22) 
23) 
24) class Vault:
25)     """A work-alike of James Coglan's vault.
26) 
27)     Store settings for generating (actually: deriving) passphrases for
28)     named services, with various constraints, given only a master
29)     passphrase.  Also, actually generate the passphrase.  The derivation
30)     is deterministic and non-secret; only the master passphrase need be
31)     kept secret.  The implementation is compatible with [vault][].
32) 
33)     [James Coglan explains the passphrase derivation algorithm in great
34)     detail][ALGORITHM] in his blog post on said topic: A principally
35)     infinite bit stream is obtained by running a key-derivation function
36)     on the master passphrase and the service name, then this bit stream
Marco Ricci Update documentation to use...

Marco Ricci authored 4 months ago

37)     is fed into a [Sequin][derivepassphrase.sequin.Sequin] to generate
38)     random numbers in the correct range, and finally these random
39)     numbers select passphrase characters until the desired length is
40)     reached.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

41) 
Marco Ricci Update all URLs to stable a...

Marco Ricci authored 3 months ago

42)     [vault]: https://www.npmjs.com/package/vault
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

43)     [ALGORITHM]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
44) 
45)     """
46) 
47)     _UUID = b'e87eb0f4-34cb-46b9-93ad-766c5ab063e7'
48)     """A tag used by vault in the bit stream generation."""
Marco Ricci Fix miscellaneous small doc...

Marco Ricci authored 3 months ago

49)     _CHARSETS = types.MappingProxyType(
50)         collections.OrderedDict([
51)             ('lower', b'abcdefghijklmnopqrstuvwxyz'),
52)             ('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
53)             (
54)                 'alpha',
55)                 (
56)                     # _CHARSETS['lower']
57)                     b'abcdefghijklmnopqrstuvwxyz'
58)                     # _CHARSETS['upper']
59)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
60)                 ),
61)             ),
62)             ('number', b'0123456789'),
63)             (
64)                 'alphanum',
65)                 (
66)                     # _CHARSETS['lower']
67)                     b'abcdefghijklmnopqrstuvwxyz'
68)                     # _CHARSETS['upper']
69)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
70)                     # _CHARSETS['number']
71)                     b'0123456789'
72)                 ),
73)             ),
74)             ('space', b' '),
75)             ('dash', b'-_'),
76)             ('symbol', b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'),
77)             (
78)                 'all',
79)                 (
80)                     # _CHARSETS['lower']
81)                     b'abcdefghijklmnopqrstuvwxyz'
82)                     # _CHARSETS['upper']
83)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
84)                     # _CHARSETS['number']
85)                     b'0123456789'
86)                     # _CHARSETS['space']
87)                     b' '
88)                     # _CHARSETS['symbol']
89)                     b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'
90)                 ),
91)             ),
92)         ])
93)     )
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

94)     """
95)         Known character sets from which to draw passphrase characters.
96)         Relies on a certain, fixed order for their definition and their
97)         contents.
98) 
99)     """
100) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 3 months ago

101)     def __init__(  # noqa: PLR0913
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

102)         self,
103)         *,
104)         phrase: bytes | bytearray | str = b'',
105)         length: int = 20,
106)         repeat: int = 0,
107)         lower: int | None = None,
108)         upper: int | None = None,
109)         number: int | None = None,
110)         space: int | None = None,
111)         dash: int | None = None,
112)         symbol: int | None = None,
113)     ) -> None:
114)         """Initialize the Vault object.
115) 
116)         Args:
117)             phrase:
118)                 The master passphrase from which to derive the service
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

119)                 passphrases.  If a string, then the UTF-8 encoding of
120)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

121)             length:
122)                 Desired passphrase length.
123)             repeat:
124)                 The maximum number of immediate character repetitions
125)                 allowed in the passphrase.  Disabled if set to 0.
126)             lower:
127)                 Optional constraint on ASCII lowercase characters.  If
128)                 positive, include this many lowercase characters
129)                 somewhere in the passphrase.  If 0, avoid lowercase
130)                 characters altogether.
131)             upper:
132)                 Same as `lower`, but for ASCII uppercase characters.
133)             number:
134)                 Same as `lower`, but for ASCII digits.
135)             space:
136)                 Same as `lower`, but for the space character.
137)             dash:
138)                 Same as `lower`, but for the hyphen-minus and underscore
139)                 characters.
140)             symbol:
141)                 Same as `lower`, but for all other hitherto unlisted
142)                 ASCII printable characters (except backquote).
143) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 3 months ago

144)         Raises:
145)             ValueError:
146)                 Conflicting passphrase constraints.  Permit more
147)                 characters, or increase the desired passphrase length.
148) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 2 months ago

149)         Warning:
150)             Because of repetition constraints, it is not always possible
151)             to detect conflicting passphrase constraints at construction
152)             time.
153) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

154)         """
155)         self._phrase = self._get_binary_string(phrase)
156)         self._length = length
157)         self._repeat = repeat
158)         self._allowed = bytearray(self._CHARSETS['all'])
159)         self._required: list[bytes] = []
160) 
161)         def subtract_or_require(
162)             count: int | None, characters: bytes | bytearray
163)         ) -> None:
164)             if not isinstance(count, int):
165)                 return
166)             if count <= 0:
167)                 self._allowed = self._subtract(characters, self._allowed)
168)             else:
169)                 for _ in range(count):
170)                     self._required.append(characters)
171) 
172)         subtract_or_require(lower, self._CHARSETS['lower'])
173)         subtract_or_require(upper, self._CHARSETS['upper'])
174)         subtract_or_require(number, self._CHARSETS['number'])
175)         subtract_or_require(space, self._CHARSETS['space'])
176)         subtract_or_require(dash, self._CHARSETS['dash'])
177)         subtract_or_require(symbol, self._CHARSETS['symbol'])
178)         if len(self._required) > self._length:
179)             msg = 'requested passphrase length too short'
180)             raise ValueError(msg)
181)         if not self._allowed:
182)             msg = 'no allowed characters left'
183)             raise ValueError(msg)
184)         for _ in range(len(self._required), self._length):
185)             self._required.append(bytes(self._allowed))
186) 
187)     def _entropy(self) -> float:
188)         """Estimate the passphrase entropy, given the current settings.
189) 
190)         The entropy is the base 2 logarithm of the amount of
191)         possibilities.  We operate directly on the logarithms, and use
192)         sorting and [`math.fsum`][] to keep high accuracy.
193) 
194)         Note:
195)             We actually overestimate the entropy here because of poor
196)             handling of character repetitions.  In the extreme, assuming
197)             that only one character were allowed, then because there is
198)             only one possible string of each given length, the entropy
199)             of that string `s` is always be zero.  However, we calculate
200)             the entropy as `math.log2(math.factorial(len(s)))`, i.e. we
201)             assume the characters at the respective string position are
202)             distinguishable from each other.
203) 
204)         Returns:
205)             A valid (and somewhat close) upper bound to the entropy.
206) 
207)         """
208)         factors: list[int] = []
209)         if not self._required or any(not x for x in self._required):
210)             return float('-inf')
211)         for i, charset in enumerate(self._required):
212)             factors.extend([i + 1, len(charset)])
213)         factors.sort()
214)         return math.fsum(math.log2(f) for f in factors)
215) 
216)     def _estimate_sufficient_hash_length(
217)         self,
218)         safety_factor: float = 2.0,
219)     ) -> int:
220)         """Estimate the sufficient hash length, given the current settings.
221) 
222)         Using the entropy (via `_entropy`) and a safety factor, give an
223)         initial estimate of the length to use for `create_hash` such
224)         that using a `Sequin` with this hash will not exhaust it during
225)         passphrase generation.
226) 
227)         Args:
228)             safety_factor: The safety factor.  Must be at least 1.
229) 
230)         Returns:
231)             The estimated sufficient hash length.
232) 
233)         Warning:
234)             This is a heuristic, not an exact computation; it may
235)             underestimate the true necessary hash length.  It is
236)             intended as a starting point for searching for a sufficient
237)             hash length, usually by doubling the hash length each time
238)             it does not yet prove so.
239) 
240)         """
241)         try:
242)             safety_factor = float(safety_factor)
243)         except TypeError as e:
244)             msg = f'invalid safety factor: not a float: {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 3 months ago

245)             raise TypeError(msg) from e  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

246)         if not math.isfinite(safety_factor) or safety_factor < 1.0:
247)             msg = f'invalid safety factor {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 3 months ago

248)             raise ValueError(msg)  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

249)         # Ensure the bound is strictly positive.
250)         entropy_bound = max(1, self._entropy())
251)         return int(math.ceil(safety_factor * entropy_bound / 8))
252) 
253)     @staticmethod
254)     def _get_binary_string(s: bytes | bytearray | str, /) -> bytes:
255)         """Convert the input string to a read-only, binary string.
256) 
Marco Ricci Allow all textual strings,...

Marco Ricci authored 3 months ago

257)         If it is a text string, return the string's UTF-8
258)         representation.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

259) 
260)         Args:
261)             s: The string to (check and) convert.
262) 
263)         Returns:
264)             A read-only, binary copy of the string.
265) 
266)         """
267)         if isinstance(s, str):
268)             return s.encode('UTF-8')
269)         return bytes(s)
270) 
271)     @classmethod
272)     def create_hash(
273)         cls,
274)         phrase: bytes | bytearray | str,
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

275)         service: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

276)         *,
277)         length: int = 32,
278)     ) -> bytes:
279)         r"""Create a pseudorandom byte stream from phrase and service.
280) 
281)         Create a pseudorandom byte stream from `phrase` and `service` by
282)         feeding them into the key-derivation function PBKDF2
283)         (8 iterations, using SHA-1).
284) 
285)         Args:
286)             phrase:
287)                 A master passphrase, or sometimes an SSH signature.
288)                 Used as the key for PBKDF2, the underlying cryptographic
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

289)                 primitive.  If a string, then the UTF-8 encoding of the
290)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

291)             service:
292)                 A vault service name.  Will be suffixed with
293)                 `Vault._UUID`, and then used as the salt value for
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

294)                 PBKDF2.  If a string, then the UTF-8 encoding of the
295)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

296)             length:
297)                 The length of the byte stream to generate.
298) 
299)         Returns:
300)             A pseudorandom byte string of length `length`.
301) 
302)         Note:
303)             Shorter values returned from this method (with the same key
304)             and message) are prefixes of longer values returned from
305)             this method.  (This property is inherited from the
306)             underlying PBKDF2 function.)  It is thus safe (if slow) to
307)             call this method with the same input with ever-increasing
308)             target lengths.
309) 
310)         Examples:
311)             >>> # See also Vault.phrase_from_key examples.
312)             >>> phrase = bytes.fromhex('''
313)             ... 00 00 00 0b 73 73 68 2d 65 64 32 35 35 31 39
314)             ... 00 00 00 40
315)             ... f0 98 19 80 6c 1a 97 d5 26 03 6e cc e3 65 8f 86
316)             ... 66 07 13 19 13 09 21 33 33 f9 e4 36 53 1d af fd
317)             ... 0d 08 1f ec f8 73 9b 8c 5f 55 39 16 7c 53 54 2c
318)             ... 1e 52 bb 30 ed 7f 89 e2 2f 69 51 55 d8 9e a6 02
319)             ... ''')
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

320)             >>> Vault.create_hash(phrase, 'some_service', length=4)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

321)             b'M\xb1<S'
322)             >>> Vault.create_hash(phrase, b'some_service', length=16)
323)             b'M\xb1<S\x827E\xd1M\xaf\xf8~\xc8n\x10\xcc'
324)             >>> Vault.create_hash(phrase, b'NOSUCHSERVICE', length=16)
325)             b'\x1c\xc3\x9c\xd9\xb6\x1a\x99CS\x07\xc41\xf4\x85#s'
326) 
327)         """
328)         phrase = cls._get_binary_string(phrase)
329)         assert not isinstance(phrase, str)
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

330)         salt = cls._get_binary_string(service) + cls._UUID
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

331)         return hashlib.pbkdf2_hmac(
332)             hash_name='sha1',
333)             password=phrase,
334)             salt=salt,
335)             iterations=8,
336)             dklen=length,
337)         )
338) 
339)     def generate(
340)         self,
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

341)         service_name: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

342)         /,
343)         *,
344)         phrase: bytes | bytearray | str = b'',
345)     ) -> bytes:
346)         r"""Generate a service passphrase.
347) 
348)         Args:
349)             service_name:
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

350)                 The service name.  If a string, then the UTF-8 encoding
351)                 of the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

352)             phrase:
353)                 If given, override the passphrase given during
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

354)                 construction.  If a string, then the UTF-8 encoding of
355)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

356) 
357)         Returns:
358)             The service passphrase.
359) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 2 months ago

360)         Raises:
361)             ValueError:
362)                 Conflicting passphrase constraints.  Permit more
363)                 characters, or increase the desired passphrase length.
364) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

365)         Examples:
366)             >>> phrase = b'She cells C shells bye the sea shoars'
367)             >>> # Using default options in constructor.
368)             >>> Vault(phrase=phrase).generate(b'google')
369)             b': 4TVH#5:aZl8LueOT\\{'
370)             >>> # Also possible:
371)             >>> Vault().generate(b'google', phrase=phrase)
372)             b': 4TVH#5:aZl8LueOT\\{'
373) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 2 months ago

374)             Conflicting constraints are sometimes only found during
375)             generation.
376) 
377)             >>> # Note: no error here...
378)             >>> v = Vault(
379)             ...     lower=0,
380)             ...     upper=0,
381)             ...     number=0,
382)             ...     space=2,
383)             ...     dash=0,
384)             ...     symbol=1,
385)             ...     repeat=2,
386)             ...     length=3,
387)             ... )
388)             >>> # ... but here.
389)             >>> v.generate(
390)             ...     '0', phrase=b'\x00'
391)             ... )  # doctest: +IGNORE_EXCEPTION_DETAIL
392)             Traceback (most recent call last):
393)                 ...
394)             ValueError: no allowed characters left
395) 
396) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

397)         """
398)         hash_length = self._estimate_sufficient_hash_length()
399)         assert hash_length >= 1
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

400)         # Ensure the phrase and the service name are bytes objects.
401)         # This is needed later for safe concatenation.
402)         service_name = self._get_binary_string(service_name)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

403)         assert_type(service_name, bytes)
404)         if not phrase:
405)             phrase = self._phrase
406)         phrase = self._get_binary_string(phrase)
Marco Ricci Support text string service...

Marco Ricci authored 3 months ago

407)         assert_type(phrase, bytes)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 4 months ago

408)         # Repeat the passphrase generation with ever-increasing hash
409)         # lengths, until the passphrase can be formed without exhausting
410)         # the sequin.  See the guarantee in the create_hash method for
411)         # why this works.
412)         while True:
413)             try:
414)                 required = self._required[:]
415)                 seq = sequin.Sequin(
416)                     self.create_hash(
417)                         phrase=phrase, service=service_name, length=hash_length
418)                     )
419)                 )
420)                 result = bytearray()
421)                 while len(result) < self._length:
422)                     pos = seq.generate(len(required))
423)                     charset = required.pop(pos)
424)                     # Determine if an unlucky choice right now might
425)                     # violate the restriction on repeated characters.
426)                     # That is, check if the current partial passphrase
427)                     # ends with r - 1 copies of the same character
428)                     # (where r is the repeat limit that must not be
429)                     # reached), and if so, remove this same character
430)                     # from the current character's allowed set.
431)                     if self._repeat and result:
432)                         bad_suffix = bytes(result[-1:]) * (self._repeat - 1)
433)                         if result.endswith(bad_suffix):
434)                             charset = self._subtract(
435)                                 bytes(result[-1:]), charset
436)                             )
437)                     pos = seq.generate(len(charset))
438)                     result.extend(charset[pos : pos + 1])
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 2 months ago

439)             except ValueError as exc:
440)                 msg = 'no allowed characters left'
441)                 raise ValueError(msg) from exc