eea542a5c4507c46f795152b3e6577b1408df4bd
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 2 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <software@the13thletter.info>
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

2) #
3) # SPDX-License-Identifier: MIT
4) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

5) """Python port of the vault(1) password generation scheme."""
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

6) 
7) from __future__ import annotations
8) 
9) import base64
10) import collections
11) import hashlib
12) import math
Marco Ricci Fix miscellaneous small doc...

Marco Ricci authored 2 months ago

13) import types
Marco Ricci Add support for Python 3.9

Marco Ricci authored 1 month ago

14) from typing import TYPE_CHECKING
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

15) 
Marco Ricci Add support for Python 3.9

Marco Ricci authored 1 month ago

16) from typing_extensions import TypeAlias, assert_type
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

17) 
18) from derivepassphrase import sequin, ssh_agent
19) 
Marco Ricci Add support for Python 3.9

Marco Ricci authored 1 month ago

20) if TYPE_CHECKING:
21)     from collections.abc import Callable
22) 
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 2 months ago

23) __author__ = 'Marco Ricci <software@the13thletter.info>'
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

24) 
25) 
26) class Vault:
27)     """A work-alike of James Coglan's vault.
28) 
29)     Store settings for generating (actually: deriving) passphrases for
30)     named services, with various constraints, given only a master
31)     passphrase.  Also, actually generate the passphrase.  The derivation
32)     is deterministic and non-secret; only the master passphrase need be
33)     kept secret.  The implementation is compatible with [vault][].
34) 
35)     [James Coglan explains the passphrase derivation algorithm in great
36)     detail][ALGORITHM] in his blog post on said topic: A principally
37)     infinite bit stream is obtained by running a key-derivation function
38)     on the master passphrase and the service name, then this bit stream
Marco Ricci Generate nicer documentatio...

Marco Ricci authored 1 month ago

39)     is fed into a [sequin.Sequin][] to generate random numbers in the
40)     correct range, and finally these random numbers select passphrase
41)     characters until the desired length is reached.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

42) 
Marco Ricci Update all URLs to stable a...

Marco Ricci authored 1 month ago

43)     [vault]: https://www.npmjs.com/package/vault
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

44)     [ALGORITHM]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
45) 
46)     """
47) 
48)     _UUID = b'e87eb0f4-34cb-46b9-93ad-766c5ab063e7'
49)     """A tag used by vault in the bit stream generation."""
Marco Ricci Fix miscellaneous small doc...

Marco Ricci authored 2 months ago

50)     _CHARSETS = types.MappingProxyType(
51)         collections.OrderedDict([
52)             ('lower', b'abcdefghijklmnopqrstuvwxyz'),
53)             ('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
54)             (
55)                 'alpha',
56)                 (
57)                     # _CHARSETS['lower']
58)                     b'abcdefghijklmnopqrstuvwxyz'
59)                     # _CHARSETS['upper']
60)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
61)                 ),
62)             ),
63)             ('number', b'0123456789'),
64)             (
65)                 'alphanum',
66)                 (
67)                     # _CHARSETS['lower']
68)                     b'abcdefghijklmnopqrstuvwxyz'
69)                     # _CHARSETS['upper']
70)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
71)                     # _CHARSETS['number']
72)                     b'0123456789'
73)                 ),
74)             ),
75)             ('space', b' '),
76)             ('dash', b'-_'),
77)             ('symbol', b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'),
78)             (
79)                 'all',
80)                 (
81)                     # _CHARSETS['lower']
82)                     b'abcdefghijklmnopqrstuvwxyz'
83)                     # _CHARSETS['upper']
84)                     b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
85)                     # _CHARSETS['number']
86)                     b'0123456789'
87)                     # _CHARSETS['space']
88)                     b' '
89)                     # _CHARSETS['symbol']
90)                     b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'
91)                 ),
92)             ),
93)         ])
94)     )
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

95)     """
96)         Known character sets from which to draw passphrase characters.
97)         Relies on a certain, fixed order for their definition and their
98)         contents.
99) 
100)     """
101) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

102)     def __init__(  # noqa: PLR0913
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

103)         self,
104)         *,
105)         phrase: bytes | bytearray | str = b'',
106)         length: int = 20,
107)         repeat: int = 0,
108)         lower: int | None = None,
109)         upper: int | None = None,
110)         number: int | None = None,
111)         space: int | None = None,
112)         dash: int | None = None,
113)         symbol: int | None = None,
114)     ) -> None:
115)         """Initialize the Vault object.
116) 
117)         Args:
118)             phrase:
119)                 The master passphrase from which to derive the service
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

120)                 passphrases.  If a string, then the UTF-8 encoding of
121)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

122)             length:
123)                 Desired passphrase length.
124)             repeat:
125)                 The maximum number of immediate character repetitions
126)                 allowed in the passphrase.  Disabled if set to 0.
127)             lower:
128)                 Optional constraint on ASCII lowercase characters.  If
129)                 positive, include this many lowercase characters
130)                 somewhere in the passphrase.  If 0, avoid lowercase
131)                 characters altogether.
132)             upper:
133)                 Same as `lower`, but for ASCII uppercase characters.
134)             number:
135)                 Same as `lower`, but for ASCII digits.
136)             space:
137)                 Same as `lower`, but for the space character.
138)             dash:
139)                 Same as `lower`, but for the hyphen-minus and underscore
140)                 characters.
141)             symbol:
142)                 Same as `lower`, but for all other hitherto unlisted
143)                 ASCII printable characters (except backquote).
144) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

145)         Raises:
146)             ValueError:
147)                 Conflicting passphrase constraints.  Permit more
148)                 characters, or increase the desired passphrase length.
149) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 1 month ago

150)         Warning:
151)             Because of repetition constraints, it is not always possible
152)             to detect conflicting passphrase constraints at construction
153)             time.
154) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

155)         """
156)         self._phrase = self._get_binary_string(phrase)
157)         self._length = length
158)         self._repeat = repeat
159)         self._allowed = bytearray(self._CHARSETS['all'])
160)         self._required: list[bytes] = []
161) 
162)         def subtract_or_require(
163)             count: int | None, characters: bytes | bytearray
164)         ) -> None:
165)             if not isinstance(count, int):
166)                 return
167)             if count <= 0:
168)                 self._allowed = self._subtract(characters, self._allowed)
169)             else:
170)                 for _ in range(count):
171)                     self._required.append(characters)
172) 
173)         subtract_or_require(lower, self._CHARSETS['lower'])
174)         subtract_or_require(upper, self._CHARSETS['upper'])
175)         subtract_or_require(number, self._CHARSETS['number'])
176)         subtract_or_require(space, self._CHARSETS['space'])
177)         subtract_or_require(dash, self._CHARSETS['dash'])
178)         subtract_or_require(symbol, self._CHARSETS['symbol'])
179)         if len(self._required) > self._length:
180)             msg = 'requested passphrase length too short'
181)             raise ValueError(msg)
182)         if not self._allowed:
183)             msg = 'no allowed characters left'
184)             raise ValueError(msg)
185)         for _ in range(len(self._required), self._length):
186)             self._required.append(bytes(self._allowed))
187) 
188)     def _entropy(self) -> float:
189)         """Estimate the passphrase entropy, given the current settings.
190) 
191)         The entropy is the base 2 logarithm of the amount of
192)         possibilities.  We operate directly on the logarithms, and use
193)         sorting and [`math.fsum`][] to keep high accuracy.
194) 
195)         Note:
196)             We actually overestimate the entropy here because of poor
197)             handling of character repetitions.  In the extreme, assuming
198)             that only one character were allowed, then because there is
199)             only one possible string of each given length, the entropy
200)             of that string `s` is always be zero.  However, we calculate
201)             the entropy as `math.log2(math.factorial(len(s)))`, i.e. we
202)             assume the characters at the respective string position are
203)             distinguishable from each other.
204) 
205)         Returns:
206)             A valid (and somewhat close) upper bound to the entropy.
207) 
208)         """
209)         factors: list[int] = []
210)         if not self._required or any(not x for x in self._required):
211)             return float('-inf')
212)         for i, charset in enumerate(self._required):
213)             factors.extend([i + 1, len(charset)])
214)         factors.sort()
215)         return math.fsum(math.log2(f) for f in factors)
216) 
217)     def _estimate_sufficient_hash_length(
218)         self,
219)         safety_factor: float = 2.0,
220)     ) -> int:
221)         """Estimate the sufficient hash length, given the current settings.
222) 
Marco Ricci Generate nicer documentatio...

Marco Ricci authored 1 month ago

223)         Using the entropy (via [`_entropy`][]) and a safety factor, give
224)         an initial estimate of the length to use for [`create_hash`][]
225)         such that using a [`sequin.Sequin`][] with this hash will not
226)         exhaust it during passphrase generation.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

227) 
228)         Args:
229)             safety_factor: The safety factor.  Must be at least 1.
230) 
231)         Returns:
232)             The estimated sufficient hash length.
233) 
234)         Warning:
235)             This is a heuristic, not an exact computation; it may
236)             underestimate the true necessary hash length.  It is
237)             intended as a starting point for searching for a sufficient
238)             hash length, usually by doubling the hash length each time
239)             it does not yet prove so.
240) 
241)         """
242)         try:
243)             safety_factor = float(safety_factor)
244)         except TypeError as e:
245)             msg = f'invalid safety factor: not a float: {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

246)             raise TypeError(msg) from e  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

247)         if not math.isfinite(safety_factor) or safety_factor < 1.0:
248)             msg = f'invalid safety factor {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

249)             raise ValueError(msg)  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

250)         # Ensure the bound is strictly positive.
251)         entropy_bound = max(1, self._entropy())
252)         return int(math.ceil(safety_factor * entropy_bound / 8))
253) 
254)     @staticmethod
255)     def _get_binary_string(s: bytes | bytearray | str, /) -> bytes:
256)         """Convert the input string to a read-only, binary string.
257) 
Marco Ricci Allow all textual strings,...

Marco Ricci authored 2 months ago

258)         If it is a text string, return the string's UTF-8
259)         representation.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

260) 
261)         Args:
262)             s: The string to (check and) convert.
263) 
264)         Returns:
265)             A read-only, binary copy of the string.
266) 
267)         """
268)         if isinstance(s, str):
269)             return s.encode('UTF-8')
270)         return bytes(s)
271) 
272)     @classmethod
273)     def create_hash(
274)         cls,
275)         phrase: bytes | bytearray | str,
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

276)         service: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

277)         *,
278)         length: int = 32,
279)     ) -> bytes:
280)         r"""Create a pseudorandom byte stream from phrase and service.
281) 
282)         Create a pseudorandom byte stream from `phrase` and `service` by
283)         feeding them into the key-derivation function PBKDF2
284)         (8 iterations, using SHA-1).
285) 
286)         Args:
287)             phrase:
288)                 A master passphrase, or sometimes an SSH signature.
289)                 Used as the key for PBKDF2, the underlying cryptographic
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

290)                 primitive.  If a string, then the UTF-8 encoding of the
291)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

292)             service:
293)                 A vault service name.  Will be suffixed with
294)                 `Vault._UUID`, and then used as the salt value for
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

295)                 PBKDF2.  If a string, then the UTF-8 encoding of the
296)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

297)             length:
298)                 The length of the byte stream to generate.
299) 
300)         Returns:
301)             A pseudorandom byte string of length `length`.
302) 
303)         Note:
304)             Shorter values returned from this method (with the same key
305)             and message) are prefixes of longer values returned from
306)             this method.  (This property is inherited from the
307)             underlying PBKDF2 function.)  It is thus safe (if slow) to
308)             call this method with the same input with ever-increasing
309)             target lengths.
310) 
311)         Examples:
312)             >>> # See also Vault.phrase_from_key examples.
313)             >>> phrase = bytes.fromhex('''
314)             ... 00 00 00 0b 73 73 68 2d 65 64 32 35 35 31 39
315)             ... 00 00 00 40
316)             ... f0 98 19 80 6c 1a 97 d5 26 03 6e cc e3 65 8f 86
317)             ... 66 07 13 19 13 09 21 33 33 f9 e4 36 53 1d af fd
318)             ... 0d 08 1f ec f8 73 9b 8c 5f 55 39 16 7c 53 54 2c
319)             ... 1e 52 bb 30 ed 7f 89 e2 2f 69 51 55 d8 9e a6 02
320)             ... ''')
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

321)             >>> Vault.create_hash(phrase, 'some_service', length=4)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

322)             b'M\xb1<S'
323)             >>> Vault.create_hash(phrase, b'some_service', length=16)
324)             b'M\xb1<S\x827E\xd1M\xaf\xf8~\xc8n\x10\xcc'
325)             >>> Vault.create_hash(phrase, b'NOSUCHSERVICE', length=16)
326)             b'\x1c\xc3\x9c\xd9\xb6\x1a\x99CS\x07\xc41\xf4\x85#s'
327) 
328)         """
329)         phrase = cls._get_binary_string(phrase)
330)         assert not isinstance(phrase, str)
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

331)         salt = cls._get_binary_string(service) + cls._UUID
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

332)         return hashlib.pbkdf2_hmac(
333)             hash_name='sha1',
334)             password=phrase,
335)             salt=salt,
336)             iterations=8,
337)             dklen=length,
338)         )
339) 
340)     def generate(
341)         self,
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

342)         service_name: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

343)         /,
344)         *,
345)         phrase: bytes | bytearray | str = b'',
346)     ) -> bytes:
347)         r"""Generate a service passphrase.
348) 
349)         Args:
350)             service_name:
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

351)                 The service name.  If a string, then the UTF-8 encoding
352)                 of the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

353)             phrase:
354)                 If given, override the passphrase given during
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

355)                 construction.  If a string, then the UTF-8 encoding of
356)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

357) 
358)         Returns:
359)             The service passphrase.
360) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 1 month ago

361)         Raises:
362)             ValueError:
363)                 Conflicting passphrase constraints.  Permit more
364)                 characters, or increase the desired passphrase length.
365) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

366)         Examples:
367)             >>> phrase = b'She cells C shells bye the sea shoars'
368)             >>> # Using default options in constructor.
369)             >>> Vault(phrase=phrase).generate(b'google')
370)             b': 4TVH#5:aZl8LueOT\\{'
371)             >>> # Also possible:
372)             >>> Vault().generate(b'google', phrase=phrase)
373)             b': 4TVH#5:aZl8LueOT\\{'
374) 
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 1 month ago

375)             Conflicting constraints are sometimes only found during
376)             generation.
377) 
378)             >>> # Note: no error here...
379)             >>> v = Vault(
380)             ...     lower=0,
381)             ...     upper=0,
382)             ...     number=0,
383)             ...     space=2,
384)             ...     dash=0,
385)             ...     symbol=1,
386)             ...     repeat=2,
387)             ...     length=3,
388)             ... )
389)             >>> # ... but here.
390)             >>> v.generate(
391)             ...     '0', phrase=b'\x00'
392)             ... )  # doctest: +IGNORE_EXCEPTION_DETAIL
393)             Traceback (most recent call last):
394)                 ...
395)             ValueError: no allowed characters left
396) 
397) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

398)         """
399)         hash_length = self._estimate_sufficient_hash_length()
400)         assert hash_length >= 1
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

401)         # Ensure the phrase and the service name are bytes objects.
402)         # This is needed later for safe concatenation.
403)         service_name = self._get_binary_string(service_name)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

404)         assert_type(service_name, bytes)
405)         if not phrase:
406)             phrase = self._phrase
407)         phrase = self._get_binary_string(phrase)
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

408)         assert_type(phrase, bytes)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

409)         # Repeat the passphrase generation with ever-increasing hash
410)         # lengths, until the passphrase can be formed without exhausting
411)         # the sequin.  See the guarantee in the create_hash method for
412)         # why this works.
413)         while True:
414)             try:
415)                 required = self._required[:]
416)                 seq = sequin.Sequin(
417)                     self.create_hash(
418)                         phrase=phrase, service=service_name, length=hash_length
419)                     )
420)                 )
421)                 result = bytearray()
422)                 while len(result) < self._length:
423)                     pos = seq.generate(len(required))
424)                     charset = required.pop(pos)
425)                     # Determine if an unlucky choice right now might
426)                     # violate the restriction on repeated characters.
427)                     # That is, check if the current partial passphrase
428)                     # ends with r - 1 copies of the same character
429)                     # (where r is the repeat limit that must not be
430)                     # reached), and if so, remove this same character
431)                     # from the current character's allowed set.
432)                     if self._repeat and result:
433)                         bad_suffix = bytes(result[-1:]) * (self._repeat - 1)
434)                         if result.endswith(bad_suffix):
435)                             charset = self._subtract(
436)                                 bytes(result[-1:]), charset
437)                             )
438)                     pos = seq.generate(len(charset))
439)                     result.extend(charset[pos : pos + 1])
Marco Ricci Add hypothesis-based tests...

Marco Ricci authored 1 month ago

440)             except ValueError as exc:
441)                 msg = 'no allowed characters left'
442)                 raise ValueError(msg) from exc
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

443)             except sequin.SequinExhaustedError:
444)                 hash_length *= 2
445)             else:
446)                 return bytes(result)
447) 
448)     @staticmethod
449)     def _is_suitable_ssh_key(key: bytes | bytearray, /) -> bool:
450)         """Check whether the key is suitable for passphrase derivation.
451) 
452)         Currently, this only checks whether signatures with this key
453)         type are deterministic.
454) 
455)         Args:
456)             key: SSH public key to check.
457) 
458)         Returns:
459)             True if and only if the key is suitable for use in deriving
460)             a passphrase deterministically.
461) 
462)         """
Marco Ricci Add support for Python 3.9

Marco Ricci authored 1 month ago

463)         TestFunc: TypeAlias = 'Callable[[bytes | bytearray], bool]'