5403acefe8c2e2872ab3f884401115ab2419caf5
Marco Ricci Import initial project files

Marco Ricci authored 6 months ago

1) # SPDX-FileCopyrightText: 2024 Marco Ricci <m@the13thletter.info>
2) #
3) # SPDX-License-Identifier: MIT
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

4) 
5) """Work-alike of vault(1) – a deterministic, stateless password manager
6) 
7) """
8) 
9) from __future__ import annotations
10) 
11) import collections
12) import hashlib
13) import math
14) import warnings
15) 
16) from typing import assert_type, reveal_type
17) 
18) import sequin
19) import ssh_agent_client
20) 
21) class Vault:
22)     """A work-alike of James Coglan's vault.
23) 
24)     Store settings for generating (actually: deriving) passphrases for
25)     named services, with various constraints, given only a master
26)     passphrase.  Also, actually generate the passphrase.  The derivation
27)     is deterministic and non-secret; only the master passphrase need be
28)     kept secret.  The implementation is compatible with [vault][].
29) 
30)     [James Coglan explains the passphrase derivation algorithm in great
31)     detail][ALGORITHM] in his blog post on said topic: A principally
32)     infinite bit stream is obtained by running a key-derivation function
33)     on the master passphrase and the service name, then this bit stream
34)     is fed into a [sequin][] to generate random numbers in the correct
35)     range, and finally these random numbers select passphrase characters
36)     until the desired length is reached.
37) 
38)     [vault]: https://getvau.lt
39)     [ALGORITHM]: https://blog.jcoglan.com/2012/07/16/designing-vaults-generator-algorithm/
40) 
41)     """
42)     _UUID = b'e87eb0f4-34cb-46b9-93ad-766c5ab063e7'
43)     """A tag used by vault in the bit stream generation."""
44)     _CHARSETS: collections.OrderedDict[str, bytes]
45)     """
46)         Known character sets from which to draw passphrase characters.
47)         Relies on a certain, fixed order for their definition and their
48)         contents.
49) 
50)     """
51)     _CHARSETS = collections.OrderedDict([
52)         ('lower', b'abcdefghijklmnopqrstuvwxyz'),
53)         ('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
54)         ('alpha', b''),  # Placeholder.
55)         ('number', b'0123456789'),
56)         ('alphanum', b''),  # Placeholder.
57)         ('space', b' '),
58)         ('dash', b'-_'),
59)         ('symbol', b'!"#$%&\'()*+,./:;<=>?@[\\]^{|}~-_'),
60)         ('all', b''),  # Placeholder.
61)     ])
62)     _CHARSETS['alpha'] = _CHARSETS['lower'] + _CHARSETS['upper']
63)     _CHARSETS['alphanum'] = _CHARSETS['alpha'] + _CHARSETS['number']
64)     _CHARSETS['all'] = (_CHARSETS['alphanum'] + _CHARSETS['space']
65)                         + _CHARSETS['symbol'])
66) 
67)     def __init__(
68)         self, *, phrase: bytes | bytearray = b'', length: int = 20,
69)         repeat: int = 0, lower: int | None = None,
70)         upper: int | None = None, number: int | None = None,
71)         space: int | None = None, dash: int | None = None,
72)         symbol: int | None = None,
73)     ) -> None:
74)         """Initialize the Vault object.
75) 
76)         Args:
77)             phrase:
78)                 The master passphrase from which to derive the service
79)                 passphrases.
80)             length:
81)                 Desired passphrase length.
82)             repeat:
83)                 The maximum number of immediate character repetitions
84)                 allowed in the passphrase.  Disabled if set to 0.
85)             lower:
86)                 Optional constraint on lowercase characters.  If
87)                 positive, include this many lowercase characters
88)                 somewhere in the passphrase.  If 0, avoid lowercase
89)                 characters altogether.
90)             upper:
91)                 Same as `lower`, but for uppercase characters.
92)             number:
93)                 Same as `lower`, but for ASCII digits.
94)             space:
95)                 Same as `lower`, but for the space character.
96)             dash:
97)                 Same as `lower`, but for the hyphen-minus and underscore
98)                 characters.
99)             symbol:
100)                 Same as `lower`, but for all other hitherto unlisted
101)                 ASCII printable characters (except backquote).
102) 
103)         """
104)         self._phrase = bytes(phrase)
105)         self._length = length
106)         self._repeat = repeat
107)         self._allowed = bytearray(self._CHARSETS['all'])
108)         self._required: list[bytes] = []
109)         def subtract_or_require(
110)             count: int | None, characters: bytes | bytearray
111)         ) -> None:
112)             if not isinstance(count, int):
113)                 return
114)             elif count <= 0:
115)                 self._allowed = self._subtract(characters, self._allowed)
116)             else:
117)                 for _ in range(count):
118)                     self._required.append(characters)
119)         subtract_or_require(lower, self._CHARSETS['lower'])
120)         subtract_or_require(upper, self._CHARSETS['upper'])
121)         subtract_or_require(number, self._CHARSETS['number'])
122)         subtract_or_require(space, self._CHARSETS['space'])
123)         subtract_or_require(dash, self._CHARSETS['dash'])
124)         subtract_or_require(symbol, self._CHARSETS['symbol'])
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

125)         if len(self._required) > self._length:
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

126)             raise ValueError('requested passphrase length too short')
127)         if not self._allowed:
128)             raise ValueError('no allowed characters left')
129)         for _ in range(len(self._required), self._length):
130)             self._required.append(bytes(self._allowed))
131) 
132)     def _entropy_upper_bound(self) -> int:
133)         """Estimate the passphrase entropy, given the current settings.
134) 
135)         The entropy is the base 2 logarithm of the amount of
136)         possibilities.  We operate directly on the logarithms, and round
137)         each summand up, overestimating the true entropy.
138) 
139)         """
140)         factors: list[int] = []
141)         for i, charset in enumerate(self._required):
142)             factors.append(i + 1)
143)             factors.append(len(charset))
144)         return sum(int(math.ceil(math.log2(f))) for f in factors)
145) 
146)     @classmethod
147)     def create_hash(
148)         cls, key: bytes | bytearray, message: bytes | bytearray, *,
149)         length: int = 32,
150)     ) -> bytes:
151)         """Create a pseudorandom byte stream from key and message.
152) 
153)         Args:
154)             key:
155)                 A cryptographic key.  Typically a master passphrase, or
156)                 an SSH signature.
157)             message:
158)                 A message.  Typically a vault service name.
159)             length:
160)                 The length of the byte stream to generate.
161) 
162)         Returns:
163)             A pseudorandom byte string of length `length`.
164) 
165)         Note:
166)             Shorter values returned from this method (with the same key
167)             and message) are prefixes of longer values returned from
168)             this method.  (This property is inherited from the
169)             underlying PBKDF2 function.)  It is thus safe (if slow) to
170)             call this method with the same input with ever-increasing
171)             target lengths.
172) 
173)         """
174)         return hashlib.pbkdf2_hmac(hash_name='sha1', password=key,
175)                                    salt=message, iterations=8, dklen=length)
176) 
177)     def generate(
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

178)         self, service_name: str | bytes | bytearray, /, *,
179)         phrase: bytes | bytearray = b'',
180)     ) -> bytes:
181)         r"""Generate a service passphrase.
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

182) 
183)         Args:
184)             service_name:
185)                 The service name.
186)             phrase:
187)                 If given, override the passphrase given during
188)                 construction.
189) 
Marco Ricci Add unit tests, both new an...

Marco Ricci authored 5 months ago

190)         Examples:
191)             >>> phrase = b'She cells C shells bye the sea shoars'
192)             >>> # Using default options in constructor.
193)             >>> Vault(phrase=phrase).generate(b'google')
194)             b': 4TVH#5:aZl8LueOT\\{'
195)             >>> # Also possible:
196)             >>> Vault().generate(b'google', phrase=phrase)
197)             b': 4TVH#5:aZl8LueOT\\{'
198) 
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

199)         """
200)         entropy_bound = self._entropy_upper_bound()
201)         # Use a safety factor, because a sequin will potentially throw
202)         # bits away and we cannot rely on having generated a hash of
203)         # exactly the right length.
204)         safety_factor = 2
205)         hash_length = int(math.ceil(safety_factor * entropy_bound / 8))
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

206)         # Ensure the phrase is a bytes object.  Needed later for safe
207)         # concatenation.
208)         if isinstance(service_name, str):
209)             service_name = service_name.encode('utf-8')
210)         elif not isinstance(service_name, bytes):
211)             service_name = bytes(service_name)
212)         assert_type(service_name, bytes)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

213)         if not phrase:
214)             phrase = self._phrase
215)         # Repeat the passphrase generation with ever-increasing hash
216)         # lengths, until the passphrase can be formed without exhausting
217)         # the sequin.  See the guarantee in the create_hash method for
218)         # why this works.
219)         while True:
220)             try:
221)                 required = self._required[:]
222)                 seq = sequin.Sequin(self.create_hash(
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

223)                     key=phrase, message=(service_name + self._UUID),
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

224)                     length=hash_length))
225)                 result = bytearray()
226)                 while len(result) < self._length:
227)                     pos = seq.generate(len(required))
228)                     charset = required.pop(pos)
229)                     # Determine if an unlucky choice right now might
230)                     # violate the restriction on repeated characters.
231)                     # That is, check if the current partial passphrase
232)                     # ends with r - 1 copies of the same character
233)                     # (where r is the repeat limit that must not be
234)                     # reached), and if so, remove this same character
235)                     # from the current character's allowed set.
236)                     previous = result[-1] if result else None
237)                     i = self._repeat - 1
238)                     same = (i >= 0) if previous is not None else False
239)                     while same and i > 0:
240)                         i -= 1
241)                         if same:
242)                             other_pos = -(self._repeat - i)
243)                             same = (result[other_pos] == previous)
244)                     if same:
245)                         assert previous is not None  # for the type checker
246)                         charset = self._subtract(bytes([previous]), charset)
247)                     # End checking for repeated characters.
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

248)                     pos = seq.generate(len(charset))
249)                     result.extend(charset[pos:pos+1])
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

250)             except sequin.SequinExhaustedException:
251)                 hash_length *= 2
252)             else:
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

253)                 return bytes(result)
Marco Ricci Add prototype implementation

Marco Ricci authored 5 months ago

254) 
255)     @classmethod
256)     def phrase_from_signature(
257)         cls, key: bytes | bytearray, /
258)     ) -> bytes | bytearray:
259)         """Obtain the master passphrase from a configured SSH key.
260) 
261)         vault allows the usage of certain SSH keys to derive a master
262)         passphrase, by signing the vault UUID with the SSH key.  The key
263)         type must ensure that signatures are deterministic.
264) 
265)         Args:
266)             key: The (public) SSH key to use for signing.
267) 
268)         Returns:
269)             The signature of the vault UUID under this key.
270) 
271)         Raises:
272)             ValueError:
273)                 The SSH key is principally unsuitable for this use case.
274)                 Usually this means that the signature is not
275)                 deterministic.
276) 
277)         """
278)         deterministic_signature_types = {
279)             'ssh-ed25519':
280)                 lambda k: k.startswith(b'\x00\x00\x00\x0bssh-ed25519'),
281)             'ssh-rsa':
282)                 lambda k: k.startswith(b'\x00\x00\x00\x07ssh-rsa'),
283)         }
284)         if not any(v(key) for v in deterministic_signature_types.values()):
285)             raise ValueError(
286)                 'unsuitable SSH key: bad key, or signature not deterministic')
287)         with ssh_agent_client.SSHAgentClient() as client:
288)             ret = client.sign(key, cls._UUID)
289)         return ret
290) 
291)     def _subtract(
292)         self, charset: bytes | bytearray, allowed: bytes | bytearray,
293)     ) -> bytearray:
294)         """Remove the characters in charset from allowed.
295) 
296)         This preserves the relative order of characters in `allowed`.
297) 
298)         Args:
299)             charset: Characters to remove.
300)             allowed: Character set to remove the other characters from.
301) 
302)         Returns:
303)             The pruned character set.
304) 
305)         Raises:
306)             ValueError: `charset` contained duplicate characters.
307) 
308)         """
309)         allowed = (allowed if isinstance(allowed, bytearray)
310)                    else bytearray(allowed))
311)         assert_type(allowed, bytearray)
312)         if len(frozenset(charset)) != len(charset):
313)             raise ValueError('duplicate characters in set')
314)         for c in charset:
315)             try:
316)                 pos = allowed.index(c)
Marco Ricci Fix numerous argument type...

Marco Ricci authored 5 months ago

317)             except ValueError: