Marco Ricci commited on 2024-09-01 00:32:49
Zeige 6 geänderte Dateien mit 181 Einfügungen und 61 Löschungen.
Remove the "check if Unicode string has unambiguous byte encoding" misfeature, which is based on an incorrect understanding of Unicode normalization forms. Also remove the `AmbiguousByteRepresentationError` class. Now we allow all textual strings as master passphrases. When storing a (master) passphrase to the configuration or when interactively generating a passphrase, we emit a warning if we detect that the master passphrase is not normalized (with the configured normalization form, stored in `.global.unicode_normalization_form`).
... | ... |
@@ -7,7 +7,7 @@ |
7 | 7 |
from __future__ import annotations |
8 | 8 |
|
9 | 9 |
import enum |
10 |
-from typing import NamedTuple, TypeGuard |
|
10 |
+from typing import Literal, NamedTuple, TypeGuard |
|
11 | 11 |
|
12 | 12 |
from typing_extensions import ( |
13 | 13 |
Any, |
... | ... |
@@ -34,11 +34,18 @@ class VaultConfigGlobalSettings(TypedDict, total=False): |
34 | 34 |
master passphrase. Optional. |
35 | 35 |
phrase: |
36 | 36 |
The master passphrase. Optional. |
37 |
+ unicode_normalization_form: |
|
38 |
+ The preferred Unicode normalization form; we warn the user |
|
39 |
+ if textual passphrases do not match their normalized forms. |
|
40 |
+ Optional, and a `derivepassphrase` extension. |
|
37 | 41 |
|
38 | 42 |
""" |
39 | 43 |
|
40 | 44 |
key: NotRequired[str] |
41 | 45 |
phrase: NotRequired[str] |
46 |
+ unicode_normalization_form: NotRequired[ |
|
47 |
+ Literal['NFC', 'NFD', 'NFKC', 'NFKD'] |
|
48 |
+ ] |
|
42 | 49 |
|
43 | 50 |
|
44 | 51 |
class VaultConfigServicesSettings(VaultConfigGlobalSettings, total=False): |
... | ... |
@@ -123,7 +130,7 @@ def is_vault_config(obj: Any) -> TypeGuard[VaultConfig]: |
123 | 130 |
o_global = obj['global'] |
124 | 131 |
if not isinstance(o_global, dict): |
125 | 132 |
return False |
126 |
- for key in ('key', 'phrase'): |
|
133 |
+ for key in ('key', 'phrase', 'unicode_normalization_form'): |
|
127 | 134 |
if key in o_global and not isinstance(o_global[key], str): |
128 | 135 |
return False |
129 | 136 |
if 'key' in o_global and 'phrase' in o_global: |
... | ... |
@@ -14,8 +14,10 @@ import inspect |
14 | 14 |
import json |
15 | 15 |
import os |
16 | 16 |
import socket |
17 |
+import unicodedata |
|
17 | 18 |
from typing import ( |
18 | 19 |
TYPE_CHECKING, |
20 |
+ Literal, |
|
19 | 21 |
NoReturn, |
20 | 22 |
TextIO, |
21 | 23 |
cast, |
... | ... |
@@ -358,6 +360,34 @@ def _prompt_for_passphrase() -> str: |
358 | 360 |
) |
359 | 361 |
|
360 | 362 |
|
363 |
+def _check_for_misleading_passphrase( |
|
364 |
+ key: tuple[str, ...], |
|
365 |
+ value: dict[str, Any], |
|
366 |
+ *, |
|
367 |
+ form: Literal['NFC', 'NFD', 'NFKC', 'NFKD'] = 'NFC', |
|
368 |
+) -> None: |
|
369 |
+ def is_json_identifier(x: str) -> bool: |
|
370 |
+ return not x.startswith(tuple('0123456789')) and not any( |
|
371 |
+ c.lower() not in set('0123456789abcdefghijklmnopqrstuvwxyz_') |
|
372 |
+ for c in x |
|
373 |
+ ) |
|
374 |
+ |
|
375 |
+ if 'phrase' in value: |
|
376 |
+ phrase = value['phrase'] |
|
377 |
+ if not unicodedata.is_normalized(form, phrase): |
|
378 |
+ key_path = '.'.join( |
|
379 |
+ x if is_json_identifier(x) else repr(x) for x in key |
|
380 |
+ ) |
|
381 |
+ click.echo( |
|
382 |
+ ( |
|
383 |
+ f'{PROG_NAME}: Warning: the {key_path} passphrase ' |
|
384 |
+ f'is not {form}-normalized. Make sure to double-check ' |
|
385 |
+ f'this is really the passphrase you want.' |
|
386 |
+ ), |
|
387 |
+ err=True, |
|
388 |
+ ) |
|
389 |
+ |
|
390 |
+ |
|
361 | 391 |
class OptionGroupOption(click.Option): |
362 | 392 |
"""A [`click.Option`][] with an associated group name and group epilog. |
363 | 393 |
|
... | ... |
@@ -978,6 +1008,24 @@ def derivepassphrase( |
978 | 1008 |
except OSError as e: |
979 | 1009 |
err(f'Cannot load config: {e.strerror}: {e.filename!r}') |
980 | 1010 |
if _types.is_vault_config(maybe_config): |
1011 |
+ form = cast( |
|
1012 |
+ Literal['NFC', 'NFD', 'NFKC', 'NFKD'], |
|
1013 |
+ maybe_config.get('global', {}).get( |
|
1014 |
+ 'unicode_normalization_form', 'NFC' |
|
1015 |
+ ), |
|
1016 |
+ ) |
|
1017 |
+ assert form in {'NFC', 'NFD', 'NFKC', 'NFKD'} |
|
1018 |
+ _check_for_misleading_passphrase( |
|
1019 |
+ ('global',), |
|
1020 |
+ cast(dict[str, Any], maybe_config.get('global', {})), |
|
1021 |
+ form=form, |
|
1022 |
+ ) |
|
1023 |
+ for key, value in maybe_config['services'].items(): |
|
1024 |
+ _check_for_misleading_passphrase( |
|
1025 |
+ ('services', key), |
|
1026 |
+ cast(dict[str, Any], value), |
|
1027 |
+ form=form, |
|
1028 |
+ ) |
|
981 | 1029 |
put_config(maybe_config) |
982 | 1030 |
else: |
983 | 1031 |
err(f'Cannot load config: {_INVALID_VAULT_CONFIG}') |
... | ... |
@@ -1064,6 +1112,10 @@ def derivepassphrase( |
1064 | 1112 |
for m in view.maps: |
1065 | 1113 |
m.pop('phrase', '') |
1066 | 1114 |
elif use_phrase: |
1115 |
+ _check_for_misleading_passphrase( |
|
1116 |
+ ('services', service) if service else ('global',), |
|
1117 |
+ {'phrase': phrase}, |
|
1118 |
+ ) |
|
1067 | 1119 |
view['phrase'] = phrase |
1068 | 1120 |
for m in view.maps: |
1069 | 1121 |
m.pop('key', '') |
... | ... |
@@ -1099,6 +1151,18 @@ def derivepassphrase( |
1099 | 1151 |
base64.standard_b64decode(key) |
1100 | 1152 |
) |
1101 | 1153 |
|
1154 |
+ if use_phrase: |
|
1155 |
+ form = cast( |
|
1156 |
+ Literal['NFC', 'NFD', 'NFKC', 'NFKD'], |
|
1157 |
+ configuration.get('global', {}).get( |
|
1158 |
+ 'unicode_normalization_form', 'NFC' |
|
1159 |
+ ), |
|
1160 |
+ ) |
|
1161 |
+ assert form in {'NFC', 'NFD', 'NFKC', 'NFKD'} |
|
1162 |
+ _check_for_misleading_passphrase( |
|
1163 |
+ ('interactive',), {'phrase': phrase}, form=form |
|
1164 |
+ ) |
|
1165 |
+ |
|
1102 | 1166 |
# If either --key or --phrase are given, use that setting. |
1103 | 1167 |
# Otherwise, if both key and phrase are set in the config, |
1104 | 1168 |
# one must be global (ignore it) and one must be |
... | ... |
@@ -10,7 +10,6 @@ import base64 |
10 | 10 |
import collections |
11 | 11 |
import hashlib |
12 | 12 |
import math |
13 |
-import unicodedata |
|
14 | 13 |
from collections.abc import Callable |
15 | 14 |
from typing import TypeAlias |
16 | 15 |
|
... | ... |
@@ -21,13 +20,6 @@ from derivepassphrase import sequin, ssh_agent |
21 | 20 |
__author__ = 'Marco Ricci <m@the13thletter.info>' |
22 | 21 |
|
23 | 22 |
|
24 |
-class AmbiguousByteRepresentationError(ValueError): |
|
25 |
- """The object has an ambiguous byte representation.""" |
|
26 |
- |
|
27 |
- def __init__(self) -> None: |
|
28 |
- super().__init__('text string has ambiguous byte representation') |
|
29 |
- |
|
30 |
- |
|
31 | 23 |
_CHARSETS = collections.OrderedDict([ |
32 | 24 |
('lower', b'abcdefghijklmnopqrstuvwxyz'), |
33 | 25 |
('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'), |
... | ... |
@@ -97,8 +89,7 @@ class Vault: |
97 | 89 |
Args: |
98 | 90 |
phrase: |
99 | 91 |
The master passphrase from which to derive the service |
100 |
- passphrases. If a text string, then the byte |
|
101 |
- representation must be unique. |
|
92 |
+ passphrases. |
|
102 | 93 |
length: |
103 | 94 |
Desired passphrase length. |
104 | 95 |
repeat: |
... | ... |
@@ -122,11 +113,6 @@ class Vault: |
122 | 113 |
Same as `lower`, but for all other hitherto unlisted |
123 | 114 |
ASCII printable characters (except backquote). |
124 | 115 |
|
125 |
- Raises: |
|
126 |
- AmbiguousByteRepresentationError: |
|
127 |
- The phrase is a text string with differing NFC- and |
|
128 |
- NFD-normalized UTF-8 byte representations. |
|
129 |
- |
|
130 | 116 |
""" |
131 | 117 |
self._phrase = self._get_binary_string(phrase) |
132 | 118 |
self._length = length |
... | ... |
@@ -230,9 +216,8 @@ class Vault: |
230 | 216 |
def _get_binary_string(s: bytes | bytearray | str, /) -> bytes: |
231 | 217 |
"""Convert the input string to a read-only, binary string. |
232 | 218 |
|
233 |
- If it is a text string, then test for an unambiguous UTF-8 |
|
234 |
- representation, otherwise abort. (That is, check whether the |
|
235 |
- NFC and NFD forms of the string coincide.) |
|
219 |
+ If it is a text string, return the string's UTF-8 |
|
220 |
+ representation. |
|
236 | 221 |
|
237 | 222 |
Args: |
238 | 223 |
s: The string to (check and) convert. |
... | ... |
@@ -240,16 +225,8 @@ class Vault: |
240 | 225 |
Returns: |
241 | 226 |
A read-only, binary copy of the string. |
242 | 227 |
|
243 |
- Raises: |
|
244 |
- AmbiguousByteRepresentationError: |
|
245 |
- The text string has differing NFC- and NFD-normalized |
|
246 |
- UTF-8 byte representations. |
|
247 |
- |
|
248 | 228 |
""" |
249 | 229 |
if isinstance(s, str): |
250 |
- norm = unicodedata.normalize |
|
251 |
- if norm('NFC', s) != norm('NFD', s): |
|
252 |
- raise AmbiguousByteRepresentationError |
|
253 | 230 |
return s.encode('UTF-8') |
254 | 231 |
return bytes(s) |
255 | 232 |
|
... | ... |
@@ -272,9 +249,6 @@ class Vault: |
272 | 249 |
A master passphrase, or sometimes an SSH signature. |
273 | 250 |
Used as the key for PBKDF2, the underlying cryptographic |
274 | 251 |
primitive. |
275 |
- |
|
276 |
- If a text string, then the byte representation must be |
|
277 |
- unique. |
|
278 | 252 |
service: |
279 | 253 |
A vault service name. Will be suffixed with |
280 | 254 |
`Vault._UUID`, and then used as the salt value for |
... | ... |
@@ -285,11 +259,6 @@ class Vault: |
285 | 259 |
Returns: |
286 | 260 |
A pseudorandom byte string of length `length`. |
287 | 261 |
|
288 |
- Raises: |
|
289 |
- AmbiguousByteRepresentationError: |
|
290 |
- The phrase is a text string with differing NFC- and |
|
291 |
- NFD-normalized UTF-8 byte representations. |
|
292 |
- |
|
293 | 262 |
Note: |
294 | 263 |
Shorter values returned from this method (with the same key |
295 | 264 |
and message) are prefixes of longer values returned from |
... | ... |
@@ -343,17 +312,9 @@ class Vault: |
343 | 312 |
If given, override the passphrase given during |
344 | 313 |
construction. |
345 | 314 |
|
346 |
- If a text string, then the byte representation must be |
|
347 |
- unique. |
|
348 |
- |
|
349 | 315 |
Returns: |
350 | 316 |
The service passphrase. |
351 | 317 |
|
352 |
- Raises: |
|
353 |
- AmbiguousByteRepresentationError: |
|
354 |
- The phrase is a text string with differing NFC- and |
|
355 |
- NFD-normalized UTF-8 byte representations. |
|
356 |
- |
|
357 | 318 |
Examples: |
358 | 319 |
>>> phrase = b'She cells C shells bye the sea shoars' |
359 | 320 |
>>> # Using default options in constructor. |
... | ... |
@@ -205,27 +205,19 @@ class TestVault: |
205 | 205 |
assert v.generate(service) == expected |
206 | 206 |
|
207 | 207 |
@pytest.mark.parametrize( |
208 |
- ['s', 'raises'], |
|
208 |
+ 's', |
|
209 | 209 |
[ |
210 |
- ('ñ', True), |
|
211 |
- ('Düsseldorf', True), |
|
212 |
- ('liberté, egalité, fraternité', True), |
|
213 |
- ('ASCII', False), |
|
214 |
- (b'D\xc3\xbcsseldorf', False), |
|
215 |
- (bytearray([2, 3, 5, 7, 11, 13]), False), |
|
210 |
+ 'ñ', |
|
211 |
+ 'Düsseldorf', |
|
212 |
+ 'liberté, egalité, fraternité', |
|
213 |
+ 'ASCII', |
|
214 |
+ b'D\xc3\xbcsseldorf', |
|
215 |
+ bytearray([2, 3, 5, 7, 11, 13]), |
|
216 | 216 |
], |
217 | 217 |
) |
218 |
- def test_224_binary_strings( |
|
219 |
- self, s: str | bytes | bytearray, raises: bool |
|
220 |
- ) -> None: |
|
218 |
+ def test_224_binary_strings(self, s: str | bytes | bytearray) -> None: |
|
221 | 219 |
binstr = Vault._get_binary_string |
222 |
- AmbiguousByteRepresentationError = ( # noqa: N806 |
|
223 |
- derivepassphrase.vault.AmbiguousByteRepresentationError |
|
224 |
- ) |
|
225 |
- if raises: |
|
226 |
- with pytest.raises(AmbiguousByteRepresentationError): |
|
227 |
- binstr(s) |
|
228 |
- elif isinstance(s, str): |
|
220 |
+ if isinstance(s, str): |
|
229 | 221 |
assert binstr(s) == s.encode('UTF-8') |
230 | 222 |
assert binstr(binstr(s)) == s.encode('UTF-8') |
231 | 223 |
else: |
... | ... |
@@ -1237,6 +1237,101 @@ contents go here |
1237 | 1237 |
b'Cannot store config' in result.stderr_bytes |
1238 | 1238 |
), 'program unexpectedly failed?!' |
1239 | 1239 |
|
1240 |
+ @pytest.mark.parametrize( |
|
1241 |
+ ['command_line', 'input', 'error_message'], |
|
1242 |
+ [ |
|
1243 |
+ pytest.param( |
|
1244 |
+ ['--import', '-'], |
|
1245 |
+ json.dumps({ |
|
1246 |
+ 'global': {'phrase': 'Du\u0308sseldorf'}, |
|
1247 |
+ 'services': {}, |
|
1248 |
+ }), |
|
1249 |
+ 'the global passphrase is not NFC-normalized', |
|
1250 |
+ id='global-NFC', |
|
1251 |
+ ), |
|
1252 |
+ pytest.param( |
|
1253 |
+ ['--import', '-'], |
|
1254 |
+ json.dumps({ |
|
1255 |
+ 'services': { |
|
1256 |
+ DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy(), |
|
1257 |
+ 'weird entry name': {'phrase': 'Du\u0308sseldorf'}, |
|
1258 |
+ } |
|
1259 |
+ }), |
|
1260 |
+ ( |
|
1261 |
+ "the services.'weird entry name' passphrase " |
|
1262 |
+ 'is not NFC-normalized' |
|
1263 |
+ ), |
|
1264 |
+ id='service-weird-name-NFC', |
|
1265 |
+ ), |
|
1266 |
+ pytest.param( |
|
1267 |
+ ['--config', '-p', DUMMY_SERVICE], |
|
1268 |
+ 'Du\u0308sseldorf', |
|
1269 |
+ ( |
|
1270 |
+ f'the services.{DUMMY_SERVICE} passphrase ' |
|
1271 |
+ f'is not NFC-normalized' |
|
1272 |
+ ), |
|
1273 |
+ id='config-NFC', |
|
1274 |
+ ), |
|
1275 |
+ pytest.param( |
|
1276 |
+ ['-p', DUMMY_SERVICE], |
|
1277 |
+ 'Du\u0308sseldorf', |
|
1278 |
+ 'the interactive passphrase is not NFC-normalized', |
|
1279 |
+ id='direct-input-NFC', |
|
1280 |
+ ), |
|
1281 |
+ pytest.param( |
|
1282 |
+ ['--import', '-'], |
|
1283 |
+ json.dumps({ |
|
1284 |
+ 'global': { |
|
1285 |
+ 'unicode_normalization_form': 'NFD', |
|
1286 |
+ 'phrase': 'D\u00fcsseldorf', |
|
1287 |
+ }, |
|
1288 |
+ 'services': {}, |
|
1289 |
+ }), |
|
1290 |
+ 'the global passphrase is not NFD-normalized', |
|
1291 |
+ id='global-NFD', |
|
1292 |
+ ), |
|
1293 |
+ pytest.param( |
|
1294 |
+ ['--import', '-'], |
|
1295 |
+ json.dumps({ |
|
1296 |
+ 'global': { |
|
1297 |
+ 'unicode_normalization_form': 'NFD', |
|
1298 |
+ }, |
|
1299 |
+ 'services': { |
|
1300 |
+ DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy(), |
|
1301 |
+ 'weird entry name': {'phrase': 'D\u00fcsseldorf'}, |
|
1302 |
+ } |
|
1303 |
+ }), |
|
1304 |
+ ( |
|
1305 |
+ "the services.'weird entry name' passphrase " |
|
1306 |
+ 'is not NFD-normalized' |
|
1307 |
+ ), |
|
1308 |
+ id='service-weird-name-NFD', |
|
1309 |
+ ), |
|
1310 |
+ ], |
|
1311 |
+ ) |
|
1312 |
+ def test_300_unicode_normalization_form_warning( |
|
1313 |
+ self, |
|
1314 |
+ monkeypatch: Any, |
|
1315 |
+ command_line: list[str], |
|
1316 |
+ input: str | None, |
|
1317 |
+ error_message: str, |
|
1318 |
+ ) -> None: |
|
1319 |
+ runner = click.testing.CliRunner(mix_stderr=False) |
|
1320 |
+ with tests.isolated_config( |
|
1321 |
+ monkeypatch=monkeypatch, |
|
1322 |
+ runner=runner, |
|
1323 |
+ config={'services': {DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy()}}, |
|
1324 |
+ ): |
|
1325 |
+ result = runner.invoke( |
|
1326 |
+ cli.derivepassphrase, |
|
1327 |
+ command_line, |
|
1328 |
+ catch_exceptions=False, |
|
1329 |
+ input=input, |
|
1330 |
+ ) |
|
1331 |
+ assert (result.exception, result.exit_code) == (None, 0) |
|
1332 |
+ assert result.stderr_bytes is not None |
|
1333 |
+ assert error_message in result.stderr |
|
1334 |
+ |
|
1240 | 1335 |
|
1241 | 1336 |
class TestCLIUtils: |
1242 | 1337 |
def test_100_save_bad_config(self, monkeypatch: Any) -> None: |