e8f3ec854c425cc36565a40adbf00d22a2febeec
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 2 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <software@the13thletter.info>
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

2) #
3) # SPDX-License-Identifier: MIT
4) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

5) """Python port of the vault(1) password generation scheme."""
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

6) 
7) from __future__ import annotations
8) 
9) import base64
10) import collections
11) import hashlib
12) import math
13) from collections.abc import Callable
14) from typing import TypeAlias
15) 
16) from typing_extensions import assert_type
17) 
18) from derivepassphrase import sequin, ssh_agent
19) 
Marco Ricci Change the author e-mail ad...

Marco Ricci authored 2 months ago

20) __author__ = 'Marco Ricci <software@the13thletter.info>'
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

21) 
22) 
23) _CHARSETS = collections.OrderedDict([
24)     ('lower', b'abcdefghijklmnopqrstuvwxyz'),
25)     ('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
26)     ('alpha', b''),  # Placeholder.
27)     ('number', b'0123456789'),
28)     ('alphanum', b''),  # Placeholder.
29)     ('space', b' '),
30)     ('dash', b'-_'),
31)     ('symbol', b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'),
32)     ('all', b''),  # Placeholder.
33) ])
34) _CHARSETS['alpha'] = _CHARSETS['lower'] + _CHARSETS['upper']
35) _CHARSETS['alphanum'] = _CHARSETS['alpha'] + _CHARSETS['number']
36) _CHARSETS['all'] = (
37)     _CHARSETS['alphanum'] + _CHARSETS['space'] + _CHARSETS['symbol']
38) )
39) 
40) 
41) class Vault:
42)     """A work-alike of James Coglan's vault.
43) 
44)     Store settings for generating (actually: deriving) passphrases for
45)     named services, with various constraints, given only a master
46)     passphrase.  Also, actually generate the passphrase.  The derivation
47)     is deterministic and non-secret; only the master passphrase need be
48)     kept secret.  The implementation is compatible with [vault][].
49) 
50)     [James Coglan explains the passphrase derivation algorithm in great
51)     detail][ALGORITHM] in his blog post on said topic: A principally
52)     infinite bit stream is obtained by running a key-derivation function
53)     on the master passphrase and the service name, then this bit stream
Marco Ricci Update documentation to use...

Marco Ricci authored 3 months ago

54)     is fed into a [Sequin][derivepassphrase.sequin.Sequin] to generate
55)     random numbers in the correct range, and finally these random
56)     numbers select passphrase characters until the desired length is
57)     reached.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

58) 
59)     [vault]: https://getvau.lt
60)     [ALGORITHM]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
61) 
62)     """
63) 
64)     _UUID = b'e87eb0f4-34cb-46b9-93ad-766c5ab063e7'
65)     """A tag used by vault in the bit stream generation."""
66)     _CHARSETS = _CHARSETS
67)     """
68)         Known character sets from which to draw passphrase characters.
69)         Relies on a certain, fixed order for their definition and their
70)         contents.
71) 
72)     """
73) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

74)     def __init__(  # noqa: PLR0913
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

75)         self,
76)         *,
77)         phrase: bytes | bytearray | str = b'',
78)         length: int = 20,
79)         repeat: int = 0,
80)         lower: int | None = None,
81)         upper: int | None = None,
82)         number: int | None = None,
83)         space: int | None = None,
84)         dash: int | None = None,
85)         symbol: int | None = None,
86)     ) -> None:
87)         """Initialize the Vault object.
88) 
89)         Args:
90)             phrase:
91)                 The master passphrase from which to derive the service
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

92)                 passphrases.  If a string, then the UTF-8 encoding of
93)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

94)             length:
95)                 Desired passphrase length.
96)             repeat:
97)                 The maximum number of immediate character repetitions
98)                 allowed in the passphrase.  Disabled if set to 0.
99)             lower:
100)                 Optional constraint on ASCII lowercase characters.  If
101)                 positive, include this many lowercase characters
102)                 somewhere in the passphrase.  If 0, avoid lowercase
103)                 characters altogether.
104)             upper:
105)                 Same as `lower`, but for ASCII uppercase characters.
106)             number:
107)                 Same as `lower`, but for ASCII digits.
108)             space:
109)                 Same as `lower`, but for the space character.
110)             dash:
111)                 Same as `lower`, but for the hyphen-minus and underscore
112)                 characters.
113)             symbol:
114)                 Same as `lower`, but for all other hitherto unlisted
115)                 ASCII printable characters (except backquote).
116) 
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

117)         Raises:
118)             ValueError:
119)                 Conflicting passphrase constraints.  Permit more
120)                 characters, or increase the desired passphrase length.
121) 
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

122)         """
123)         self._phrase = self._get_binary_string(phrase)
124)         self._length = length
125)         self._repeat = repeat
126)         self._allowed = bytearray(self._CHARSETS['all'])
127)         self._required: list[bytes] = []
128) 
129)         def subtract_or_require(
130)             count: int | None, characters: bytes | bytearray
131)         ) -> None:
132)             if not isinstance(count, int):
133)                 return
134)             if count <= 0:
135)                 self._allowed = self._subtract(characters, self._allowed)
136)             else:
137)                 for _ in range(count):
138)                     self._required.append(characters)
139) 
140)         subtract_or_require(lower, self._CHARSETS['lower'])
141)         subtract_or_require(upper, self._CHARSETS['upper'])
142)         subtract_or_require(number, self._CHARSETS['number'])
143)         subtract_or_require(space, self._CHARSETS['space'])
144)         subtract_or_require(dash, self._CHARSETS['dash'])
145)         subtract_or_require(symbol, self._CHARSETS['symbol'])
146)         if len(self._required) > self._length:
147)             msg = 'requested passphrase length too short'
148)             raise ValueError(msg)
149)         if not self._allowed:
150)             msg = 'no allowed characters left'
151)             raise ValueError(msg)
152)         for _ in range(len(self._required), self._length):
153)             self._required.append(bytes(self._allowed))
154) 
155)     def _entropy(self) -> float:
156)         """Estimate the passphrase entropy, given the current settings.
157) 
158)         The entropy is the base 2 logarithm of the amount of
159)         possibilities.  We operate directly on the logarithms, and use
160)         sorting and [`math.fsum`][] to keep high accuracy.
161) 
162)         Note:
163)             We actually overestimate the entropy here because of poor
164)             handling of character repetitions.  In the extreme, assuming
165)             that only one character were allowed, then because there is
166)             only one possible string of each given length, the entropy
167)             of that string `s` is always be zero.  However, we calculate
168)             the entropy as `math.log2(math.factorial(len(s)))`, i.e. we
169)             assume the characters at the respective string position are
170)             distinguishable from each other.
171) 
172)         Returns:
173)             A valid (and somewhat close) upper bound to the entropy.
174) 
175)         """
176)         factors: list[int] = []
177)         if not self._required or any(not x for x in self._required):
178)             return float('-inf')
179)         for i, charset in enumerate(self._required):
180)             factors.extend([i + 1, len(charset)])
181)         factors.sort()
182)         return math.fsum(math.log2(f) for f in factors)
183) 
184)     def _estimate_sufficient_hash_length(
185)         self,
186)         safety_factor: float = 2.0,
187)     ) -> int:
188)         """Estimate the sufficient hash length, given the current settings.
189) 
190)         Using the entropy (via `_entropy`) and a safety factor, give an
191)         initial estimate of the length to use for `create_hash` such
192)         that using a `Sequin` with this hash will not exhaust it during
193)         passphrase generation.
194) 
195)         Args:
196)             safety_factor: The safety factor.  Must be at least 1.
197) 
198)         Returns:
199)             The estimated sufficient hash length.
200) 
201)         Warning:
202)             This is a heuristic, not an exact computation; it may
203)             underestimate the true necessary hash length.  It is
204)             intended as a starting point for searching for a sufficient
205)             hash length, usually by doubling the hash length each time
206)             it does not yet prove so.
207) 
208)         """
209)         try:
210)             safety_factor = float(safety_factor)
211)         except TypeError as e:
212)             msg = f'invalid safety factor: not a float: {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

213)             raise TypeError(msg) from e  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

214)         if not math.isfinite(safety_factor) or safety_factor < 1.0:
215)             msg = f'invalid safety factor {safety_factor!r}'
Marco Ricci Apply new ruff ruleset to c...

Marco Ricci authored 2 months ago

216)             raise ValueError(msg)  # noqa: DOC501
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

217)         # Ensure the bound is strictly positive.
218)         entropy_bound = max(1, self._entropy())
219)         return int(math.ceil(safety_factor * entropy_bound / 8))
220) 
221)     @staticmethod
222)     def _get_binary_string(s: bytes | bytearray | str, /) -> bytes:
223)         """Convert the input string to a read-only, binary string.
224) 
Marco Ricci Allow all textual strings,...

Marco Ricci authored 2 months ago

225)         If it is a text string, return the string's UTF-8
226)         representation.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

227) 
228)         Args:
229)             s: The string to (check and) convert.
230) 
231)         Returns:
232)             A read-only, binary copy of the string.
233) 
234)         """
235)         if isinstance(s, str):
236)             return s.encode('UTF-8')
237)         return bytes(s)
238) 
239)     @classmethod
240)     def create_hash(
241)         cls,
242)         phrase: bytes | bytearray | str,
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

243)         service: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

244)         *,
245)         length: int = 32,
246)     ) -> bytes:
247)         r"""Create a pseudorandom byte stream from phrase and service.
248) 
249)         Create a pseudorandom byte stream from `phrase` and `service` by
250)         feeding them into the key-derivation function PBKDF2
251)         (8 iterations, using SHA-1).
252) 
253)         Args:
254)             phrase:
255)                 A master passphrase, or sometimes an SSH signature.
256)                 Used as the key for PBKDF2, the underlying cryptographic
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

257)                 primitive.  If a string, then the UTF-8 encoding of the
258)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

259)             service:
260)                 A vault service name.  Will be suffixed with
261)                 `Vault._UUID`, and then used as the salt value for
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

262)                 PBKDF2.  If a string, then the UTF-8 encoding of the
263)                 string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

264)             length:
265)                 The length of the byte stream to generate.
266) 
267)         Returns:
268)             A pseudorandom byte string of length `length`.
269) 
270)         Note:
271)             Shorter values returned from this method (with the same key
272)             and message) are prefixes of longer values returned from
273)             this method.  (This property is inherited from the
274)             underlying PBKDF2 function.)  It is thus safe (if slow) to
275)             call this method with the same input with ever-increasing
276)             target lengths.
277) 
278)         Examples:
279)             >>> # See also Vault.phrase_from_key examples.
280)             >>> phrase = bytes.fromhex('''
281)             ... 00 00 00 0b 73 73 68 2d 65 64 32 35 35 31 39
282)             ... 00 00 00 40
283)             ... f0 98 19 80 6c 1a 97 d5 26 03 6e cc e3 65 8f 86
284)             ... 66 07 13 19 13 09 21 33 33 f9 e4 36 53 1d af fd
285)             ... 0d 08 1f ec f8 73 9b 8c 5f 55 39 16 7c 53 54 2c
286)             ... 1e 52 bb 30 ed 7f 89 e2 2f 69 51 55 d8 9e a6 02
287)             ... ''')
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

288)             >>> Vault.create_hash(phrase, 'some_service', length=4)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

289)             b'M\xb1<S'
290)             >>> Vault.create_hash(phrase, b'some_service', length=16)
291)             b'M\xb1<S\x827E\xd1M\xaf\xf8~\xc8n\x10\xcc'
292)             >>> Vault.create_hash(phrase, b'NOSUCHSERVICE', length=16)
293)             b'\x1c\xc3\x9c\xd9\xb6\x1a\x99CS\x07\xc41\xf4\x85#s'
294) 
295)         """
296)         phrase = cls._get_binary_string(phrase)
297)         assert not isinstance(phrase, str)
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

298)         salt = cls._get_binary_string(service) + cls._UUID
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

299)         return hashlib.pbkdf2_hmac(
300)             hash_name='sha1',
301)             password=phrase,
302)             salt=salt,
303)             iterations=8,
304)             dklen=length,
305)         )
306) 
307)     def generate(
308)         self,
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

309)         service_name: bytes | bytearray | str,
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

310)         /,
311)         *,
312)         phrase: bytes | bytearray | str = b'',
313)     ) -> bytes:
314)         r"""Generate a service passphrase.
315) 
316)         Args:
317)             service_name:
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

318)                 The service name.  If a string, then the UTF-8 encoding
319)                 of the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

320)             phrase:
321)                 If given, override the passphrase given during
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

322)                 construction.  If a string, then the UTF-8 encoding of
323)                 the string is used.
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

324) 
325)         Returns:
326)             The service passphrase.
327) 
328)         Examples:
329)             >>> phrase = b'She cells C shells bye the sea shoars'
330)             >>> # Using default options in constructor.
331)             >>> Vault(phrase=phrase).generate(b'google')
332)             b': 4TVH#5:aZl8LueOT\\{'
333)             >>> # Also possible:
334)             >>> Vault().generate(b'google', phrase=phrase)
335)             b': 4TVH#5:aZl8LueOT\\{'
336) 
337)         """
338)         hash_length = self._estimate_sufficient_hash_length()
339)         assert hash_length >= 1
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

340)         # Ensure the phrase and the service name are bytes objects.
341)         # This is needed later for safe concatenation.
342)         service_name = self._get_binary_string(service_name)
Marco Ricci Move `sequin` and `ssh_agen...

Marco Ricci authored 3 months ago

343)         assert_type(service_name, bytes)
344)         if not phrase:
345)             phrase = self._phrase
346)         phrase = self._get_binary_string(phrase)
Marco Ricci Support text string service...

Marco Ricci authored 2 months ago

347)         assert_type(phrase, bytes)