Marco Ricci commited on 2024-06-22 21:19:30
Zeige 1 geänderte Dateien mit 70 Einfügungen und 9 Löschungen.
Some level of awareness is necessary to support passphrases stored in (JSON) config files. We reject the passphrase if there are multiple Unicode representations (and thus UTF-8 representations) of the same text but with different normalizations. In such a case, a byte string must be used, and the value cannot currently be stored in the JSON config file.
... | ... |
@@ -8,9 +8,11 @@ |
8 | 8 |
|
9 | 9 |
from __future__ import annotations |
10 | 10 |
|
11 |
+import base64 |
|
11 | 12 |
import collections |
12 | 13 |
import hashlib |
13 | 14 |
import math |
15 |
+import unicodedata |
|
14 | 16 |
import warnings |
15 | 17 |
|
16 | 18 |
from typing import assert_type, reveal_type |
... | ... |
@@ -21,6 +23,9 @@ import ssh_agent_client |
21 | 23 |
__author__ = "Marco Ricci <m@the13thletter.info>" |
22 | 24 |
__version__ = "0.1.0" |
23 | 25 |
|
26 |
+class AmbiguousByteRepresentationError(ValueError): |
|
27 |
+ """The object has an ambiguous byte representation.""" |
|
28 |
+ |
|
24 | 29 |
class Vault: |
25 | 30 |
"""A work-alike of James Coglan's vault. |
26 | 31 |
|
... | ... |
@@ -68,8 +73,8 @@ class Vault: |
68 | 73 |
+ _CHARSETS['symbol']) |
69 | 74 |
|
70 | 75 |
def __init__( |
71 |
- self, *, phrase: bytes | bytearray = b'', length: int = 20, |
|
72 |
- repeat: int = 0, lower: int | None = None, |
|
76 |
+ self, *, phrase: bytes | bytearray | str = b'', |
|
77 |
+ length: int = 20, repeat: int = 0, lower: int | None = None, |
|
73 | 78 |
upper: int | None = None, number: int | None = None, |
74 | 79 |
space: int | None = None, dash: int | None = None, |
75 | 80 |
symbol: int | None = None, |
... | ... |
@@ -79,19 +84,20 @@ class Vault: |
79 | 84 |
Args: |
80 | 85 |
phrase: |
81 | 86 |
The master passphrase from which to derive the service |
82 |
- passphrases. |
|
87 |
+ passphrases. If a text string, then the byte |
|
88 |
+ representation must be unique. |
|
83 | 89 |
length: |
84 | 90 |
Desired passphrase length. |
85 | 91 |
repeat: |
86 | 92 |
The maximum number of immediate character repetitions |
87 | 93 |
allowed in the passphrase. Disabled if set to 0. |
88 | 94 |
lower: |
89 |
- Optional constraint on lowercase characters. If |
|
95 |
+ Optional constraint on ASCII lowercase characters. If |
|
90 | 96 |
positive, include this many lowercase characters |
91 | 97 |
somewhere in the passphrase. If 0, avoid lowercase |
92 | 98 |
characters altogether. |
93 | 99 |
upper: |
94 |
- Same as `lower`, but for uppercase characters. |
|
100 |
+ Same as `lower`, but for ASCII uppercase characters. |
|
95 | 101 |
number: |
96 | 102 |
Same as `lower`, but for ASCII digits. |
97 | 103 |
space: |
... | ... |
@@ -103,8 +109,13 @@ class Vault: |
103 | 109 |
Same as `lower`, but for all other hitherto unlisted |
104 | 110 |
ASCII printable characters (except backquote). |
105 | 111 |
|
112 |
+ Raises: |
|
113 |
+ AmbiguousByteRepresentationError: |
|
114 |
+ The phrase is a text string with differing NFC- and |
|
115 |
+ NFD-normalized UTF-8 byte representations. |
|
116 |
+ |
|
106 | 117 |
""" |
107 |
- self._phrase = bytes(phrase) |
|
118 |
+ self._phrase = self._get_binary_string(phrase) |
|
108 | 119 |
self._length = length |
109 | 120 |
self._repeat = repeat |
110 | 121 |
self._allowed = bytearray(self._CHARSETS['all']) |
... | ... |
@@ -197,10 +208,38 @@ class Vault: |
197 | 208 |
entropy_bound = max(1, self._entropy()) |
198 | 209 |
return int(math.ceil(safety_factor * entropy_bound / 8)) |
199 | 210 |
|
211 |
+ @staticmethod |
|
212 |
+ def _get_binary_string(s: bytes | bytearray | str, /) -> bytes: |
|
213 |
+ """Convert the input string to a read-only, binary string. |
|
214 |
+ |
|
215 |
+ If it is a text string, then test for an unambiguous UTF-8 |
|
216 |
+ representation, otherwise abort. (That is, check whether the |
|
217 |
+ NFC and NFD forms of the string coincide.) |
|
218 |
+ |
|
219 |
+ Args: |
|
220 |
+ s: The string to (check and) convert. |
|
221 |
+ |
|
222 |
+ Returns: |
|
223 |
+ A read-only, binary copy of the string. |
|
224 |
+ |
|
225 |
+ Raises: |
|
226 |
+ AmbiguousByteRepresentationError: |
|
227 |
+ The text string has differing NFC- and NFD-normalized |
|
228 |
+ UTF-8 byte representations. |
|
229 |
+ |
|
230 |
+ """ |
|
231 |
+ if isinstance(s, str): |
|
232 |
+ norm = unicodedata.normalize |
|
233 |
+ if norm('NFC', s) != norm('NFD', s): |
|
234 |
+ raise AmbiguousByteRepresentationError( |
|
235 |
+ 'text string has ambiguous byte representation') |
|
236 |
+ return s.encode('UTF-8') |
|
237 |
+ return bytes(s) |
|
238 |
+ |
|
200 | 239 |
@classmethod |
201 | 240 |
def create_hash( |
202 |
- cls, phrase: bytes | bytearray, service: bytes | bytearray, *, |
|
203 |
- length: int = 32, |
|
241 |
+ cls, phrase: bytes | bytearray | str, |
|
242 |
+ service: bytes | bytearray, *, length: int = 32, |
|
204 | 243 |
) -> bytes: |
205 | 244 |
r"""Create a pseudorandom byte stream from phrase and service. |
206 | 245 |
|
... | ... |
@@ -213,6 +252,9 @@ class Vault: |
213 | 252 |
A master passphrase, or sometimes an SSH signature. |
214 | 253 |
Used as the key for PBKDF2, the underlying cryptographic |
215 | 254 |
primitive. |
255 |
+ |
|
256 |
+ If a text string, then the byte representation must be |
|
257 |
+ unique. |
|
216 | 258 |
service: |
217 | 259 |
A vault service name. Will be suffixed with |
218 | 260 |
`Vault._UUID`, and then used as the salt value for |
... | ... |
@@ -223,6 +265,11 @@ class Vault: |
223 | 265 |
Returns: |
224 | 266 |
A pseudorandom byte string of length `length`. |
225 | 267 |
|
268 |
+ Raises: |
|
269 |
+ AmbiguousByteRepresentationError: |
|
270 |
+ The phrase is a text string with differing NFC- and |
|
271 |
+ NFD-normalized UTF-8 byte representations. |
|
272 |
+ |
|
226 | 273 |
Note: |
227 | 274 |
Shorter values returned from this method (with the same key |
228 | 275 |
and message) are prefixes of longer values returned from |
... | ... |
@@ -249,13 +296,15 @@ class Vault: |
249 | 296 |
b'\x1c\xc3\x9c\xd9\xb6\x1a\x99CS\x07\xc41\xf4\x85#s' |
250 | 297 |
|
251 | 298 |
""" |
299 |
+ phrase = cls._get_binary_string(phrase) |
|
300 |
+ assert not isinstance(phrase, str) |
|
252 | 301 |
salt = bytes(service) + cls._UUID |
253 | 302 |
return hashlib.pbkdf2_hmac(hash_name='sha1', password=phrase, |
254 | 303 |
salt=salt, iterations=8, dklen=length) |
255 | 304 |
|
256 | 305 |
def generate( |
257 | 306 |
self, service_name: str | bytes | bytearray, /, *, |
258 |
- phrase: bytes | bytearray = b'', |
|
307 |
+ phrase: bytes | bytearray | str = b'', |
|
259 | 308 |
) -> bytes: |
260 | 309 |
r"""Generate a service passphrase. |
261 | 310 |
|
... | ... |
@@ -266,6 +315,17 @@ class Vault: |
266 | 315 |
If given, override the passphrase given during |
267 | 316 |
construction. |
268 | 317 |
|
318 |
+ If a text string, then the byte representation must be |
|
319 |
+ unique. |
|
320 |
+ |
|
321 |
+ Returns: |
|
322 |
+ The service passphrase. |
|
323 |
+ |
|
324 |
+ Raises: |
|
325 |
+ AmbiguousByteRepresentationError: |
|
326 |
+ The phrase is a text string with differing NFC- and |
|
327 |
+ NFD-normalized UTF-8 byte representations. |
|
328 |
+ |
|
269 | 329 |
Examples: |
270 | 330 |
>>> phrase = b'She cells C shells bye the sea shoars' |
271 | 331 |
>>> # Using default options in constructor. |
... | ... |
@@ -287,6 +347,7 @@ class Vault: |
287 | 347 |
assert_type(service_name, bytes) |
288 | 348 |
if not phrase: |
289 | 349 |
phrase = self._phrase |
350 |
+ phrase = self._get_binary_string(phrase) |
|
290 | 351 |
# Repeat the passphrase generation with ever-increasing hash |
291 | 352 |
# lengths, until the passphrase can be formed without exhausting |
292 | 353 |
# the sequin. See the guarantee in the create_hash method for |
293 | 354 |