Allow all textual strings, but warn on unnormalized ones
Marco Ricci

Marco Ricci commited on 2024-09-01 00:32:49
Zeige 6 geänderte Dateien mit 181 Einfügungen und 61 Löschungen.


Remove the "check if Unicode string has unambiguous byte encoding"
misfeature, which is based on an incorrect understanding of Unicode
normalization forms.  Also remove the `AmbiguousByteRepresentationError`
class.  Now we allow all textual strings as master passphrases.

When storing a (master) passphrase to the configuration or when
interactively generating a passphrase, we emit a warning if we detect
that the master passphrase is not normalized (with the configured
normalization form, stored in `.global.unicode_normalization_form`).
... ...
@@ -7,7 +7,7 @@
7 7
 from __future__ import annotations
8 8
 
9 9
 import enum
10
-from typing import NamedTuple, TypeGuard
10
+from typing import Literal, NamedTuple, TypeGuard
11 11
 
12 12
 from typing_extensions import (
13 13
     Any,
... ...
@@ -34,11 +34,18 @@ class VaultConfigGlobalSettings(TypedDict, total=False):
34 34
             master passphrase. Optional.
35 35
         phrase:
36 36
             The master passphrase. Optional.
37
+        unicode_normalization_form:
38
+            The preferred Unicode normalization form; we warn the user
39
+            if textual passphrases do not match their normalized forms.
40
+            Optional, and a `derivepassphrase` extension.
37 41
 
38 42
     """
39 43
 
40 44
     key: NotRequired[str]
41 45
     phrase: NotRequired[str]
46
+    unicode_normalization_form: NotRequired[
47
+        Literal['NFC', 'NFD', 'NFKC', 'NFKD']
48
+    ]
42 49
 
43 50
 
44 51
 class VaultConfigServicesSettings(VaultConfigGlobalSettings, total=False):
... ...
@@ -123,7 +130,7 @@ def is_vault_config(obj: Any) -> TypeGuard[VaultConfig]:
123 130
         o_global = obj['global']
124 131
         if not isinstance(o_global, dict):
125 132
             return False
126
-        for key in ('key', 'phrase'):
133
+        for key in ('key', 'phrase', 'unicode_normalization_form'):
127 134
             if key in o_global and not isinstance(o_global[key], str):
128 135
                 return False
129 136
         if 'key' in o_global and 'phrase' in o_global:
... ...
@@ -14,8 +14,10 @@ import inspect
14 14
 import json
15 15
 import os
16 16
 import socket
17
+import unicodedata
17 18
 from typing import (
18 19
     TYPE_CHECKING,
20
+    Literal,
19 21
     NoReturn,
20 22
     TextIO,
21 23
     cast,
... ...
@@ -358,6 +360,34 @@ def _prompt_for_passphrase() -> str:
358 360
     )
359 361
 
360 362
 
363
+def _check_for_misleading_passphrase(
364
+    key: tuple[str, ...],
365
+    value: dict[str, Any],
366
+    *,
367
+    form: Literal['NFC', 'NFD', 'NFKC', 'NFKD'] = 'NFC',
368
+) -> None:
369
+    def is_json_identifier(x: str) -> bool:
370
+        return not x.startswith(tuple('0123456789')) and not any(
371
+            c.lower() not in set('0123456789abcdefghijklmnopqrstuvwxyz_')
372
+            for c in x
373
+        )
374
+
375
+    if 'phrase' in value:
376
+        phrase = value['phrase']
377
+        if not unicodedata.is_normalized(form, phrase):
378
+            key_path = '.'.join(
379
+                x if is_json_identifier(x) else repr(x) for x in key
380
+            )
381
+            click.echo(
382
+                (
383
+                    f'{PROG_NAME}: Warning: the {key_path} passphrase '
384
+                    f'is not {form}-normalized. Make sure to double-check '
385
+                    f'this is really the passphrase you want.'
386
+                ),
387
+                err=True,
388
+            )
389
+
390
+
361 391
 class OptionGroupOption(click.Option):
362 392
     """A [`click.Option`][] with an associated group name and group epilog.
363 393
 
... ...
@@ -978,6 +1008,24 @@ def derivepassphrase(
978 1008
         except OSError as e:
979 1009
             err(f'Cannot load config: {e.strerror}: {e.filename!r}')
980 1010
         if _types.is_vault_config(maybe_config):
1011
+            form = cast(
1012
+                Literal['NFC', 'NFD', 'NFKC', 'NFKD'],
1013
+                maybe_config.get('global', {}).get(
1014
+                    'unicode_normalization_form', 'NFC'
1015
+                ),
1016
+            )
1017
+            assert form in {'NFC', 'NFD', 'NFKC', 'NFKD'}
1018
+            _check_for_misleading_passphrase(
1019
+                ('global',),
1020
+                cast(dict[str, Any], maybe_config.get('global', {})),
1021
+                form=form,
1022
+            )
1023
+            for key, value in maybe_config['services'].items():
1024
+                _check_for_misleading_passphrase(
1025
+                    ('services', key),
1026
+                    cast(dict[str, Any], value),
1027
+                    form=form,
1028
+                )
981 1029
             put_config(maybe_config)
982 1030
         else:
983 1031
             err(f'Cannot load config: {_INVALID_VAULT_CONFIG}')
... ...
@@ -1064,6 +1112,10 @@ def derivepassphrase(
1064 1112
                 for m in view.maps:
1065 1113
                     m.pop('phrase', '')
1066 1114
             elif use_phrase:
1115
+                _check_for_misleading_passphrase(
1116
+                    ('services', service) if service else ('global',),
1117
+                    {'phrase': phrase},
1118
+                )
1067 1119
                 view['phrase'] = phrase
1068 1120
                 for m in view.maps:
1069 1121
                     m.pop('key', '')
... ...
@@ -1099,6 +1151,18 @@ def derivepassphrase(
1099 1151
                     base64.standard_b64decode(key)
1100 1152
                 )
1101 1153
 
1154
+            if use_phrase:
1155
+                form = cast(
1156
+                    Literal['NFC', 'NFD', 'NFKC', 'NFKD'],
1157
+                    configuration.get('global', {}).get(
1158
+                        'unicode_normalization_form', 'NFC'
1159
+                    ),
1160
+                )
1161
+                assert form in {'NFC', 'NFD', 'NFKC', 'NFKD'}
1162
+                _check_for_misleading_passphrase(
1163
+                    ('interactive',), {'phrase': phrase}, form=form
1164
+                )
1165
+
1102 1166
             # If either --key or --phrase are given, use that setting.
1103 1167
             # Otherwise, if both key and phrase are set in the config,
1104 1168
             # one must be global (ignore it) and one must be
... ...
@@ -10,7 +10,6 @@ import base64
10 10
 import collections
11 11
 import hashlib
12 12
 import math
13
-import unicodedata
14 13
 from collections.abc import Callable
15 14
 from typing import TypeAlias
16 15
 
... ...
@@ -21,13 +20,6 @@ from derivepassphrase import sequin, ssh_agent
21 20
 __author__ = 'Marco Ricci <m@the13thletter.info>'
22 21
 
23 22
 
24
-class AmbiguousByteRepresentationError(ValueError):
25
-    """The object has an ambiguous byte representation."""
26
-
27
-    def __init__(self) -> None:
28
-        super().__init__('text string has ambiguous byte representation')
29
-
30
-
31 23
 _CHARSETS = collections.OrderedDict([
32 24
     ('lower', b'abcdefghijklmnopqrstuvwxyz'),
33 25
     ('upper', b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
... ...
@@ -97,8 +89,7 @@ class Vault:
97 89
         Args:
98 90
             phrase:
99 91
                 The master passphrase from which to derive the service
100
-                passphrases.  If a text string, then the byte
101
-                representation must be unique.
92
+                passphrases.
102 93
             length:
103 94
                 Desired passphrase length.
104 95
             repeat:
... ...
@@ -122,11 +113,6 @@ class Vault:
122 113
                 Same as `lower`, but for all other hitherto unlisted
123 114
                 ASCII printable characters (except backquote).
124 115
 
125
-        Raises:
126
-            AmbiguousByteRepresentationError:
127
-                The phrase is a text string with differing NFC- and
128
-                NFD-normalized UTF-8 byte representations.
129
-
130 116
         """
131 117
         self._phrase = self._get_binary_string(phrase)
132 118
         self._length = length
... ...
@@ -230,9 +216,8 @@ class Vault:
230 216
     def _get_binary_string(s: bytes | bytearray | str, /) -> bytes:
231 217
         """Convert the input string to a read-only, binary string.
232 218
 
233
-        If it is a text string, then test for an unambiguous UTF-8
234
-        representation, otherwise abort.  (That is, check whether the
235
-        NFC and NFD forms of the string coincide.)
219
+        If it is a text string, return the string's UTF-8
220
+        representation.
236 221
 
237 222
         Args:
238 223
             s: The string to (check and) convert.
... ...
@@ -240,16 +225,8 @@ class Vault:
240 225
         Returns:
241 226
             A read-only, binary copy of the string.
242 227
 
243
-        Raises:
244
-            AmbiguousByteRepresentationError:
245
-                The text string has differing NFC- and NFD-normalized
246
-                UTF-8 byte representations.
247
-
248 228
         """
249 229
         if isinstance(s, str):
250
-            norm = unicodedata.normalize
251
-            if norm('NFC', s) != norm('NFD', s):
252
-                raise AmbiguousByteRepresentationError
253 230
             return s.encode('UTF-8')
254 231
         return bytes(s)
255 232
 
... ...
@@ -272,9 +249,6 @@ class Vault:
272 249
                 A master passphrase, or sometimes an SSH signature.
273 250
                 Used as the key for PBKDF2, the underlying cryptographic
274 251
                 primitive.
275
-
276
-                If a text string, then the byte representation must be
277
-                unique.
278 252
             service:
279 253
                 A vault service name.  Will be suffixed with
280 254
                 `Vault._UUID`, and then used as the salt value for
... ...
@@ -285,11 +259,6 @@ class Vault:
285 259
         Returns:
286 260
             A pseudorandom byte string of length `length`.
287 261
 
288
-        Raises:
289
-            AmbiguousByteRepresentationError:
290
-                The phrase is a text string with differing NFC- and
291
-                NFD-normalized UTF-8 byte representations.
292
-
293 262
         Note:
294 263
             Shorter values returned from this method (with the same key
295 264
             and message) are prefixes of longer values returned from
... ...
@@ -343,17 +312,9 @@ class Vault:
343 312
                 If given, override the passphrase given during
344 313
                 construction.
345 314
 
346
-                If a text string, then the byte representation must be
347
-                unique.
348
-
349 315
         Returns:
350 316
             The service passphrase.
351 317
 
352
-        Raises:
353
-            AmbiguousByteRepresentationError:
354
-                The phrase is a text string with differing NFC- and
355
-                NFD-normalized UTF-8 byte representations.
356
-
357 318
         Examples:
358 319
             >>> phrase = b'She cells C shells bye the sea shoars'
359 320
             >>> # Using default options in constructor.
... ...
@@ -205,27 +205,19 @@ class TestVault:
205 205
         assert v.generate(service) == expected
206 206
 
207 207
     @pytest.mark.parametrize(
208
-        ['s', 'raises'],
208
+        's',
209 209
         [
210
-            ('ñ', True),
211
-            ('Düsseldorf', True),
212
-            ('liberté, egalité, fraternité', True),
213
-            ('ASCII', False),
214
-            (b'D\xc3\xbcsseldorf', False),
215
-            (bytearray([2, 3, 5, 7, 11, 13]), False),
210
+            'ñ',
211
+            'Düsseldorf',
212
+            'liberté, egalité, fraternité',
213
+            'ASCII',
214
+            b'D\xc3\xbcsseldorf',
215
+            bytearray([2, 3, 5, 7, 11, 13]),
216 216
         ],
217 217
     )
218
-    def test_224_binary_strings(
219
-        self, s: str | bytes | bytearray, raises: bool
220
-    ) -> None:
218
+    def test_224_binary_strings(self, s: str | bytes | bytearray) -> None:
221 219
         binstr = Vault._get_binary_string
222
-        AmbiguousByteRepresentationError = (  # noqa: N806
223
-            derivepassphrase.vault.AmbiguousByteRepresentationError
224
-        )
225
-        if raises:
226
-            with pytest.raises(AmbiguousByteRepresentationError):
227
-                binstr(s)
228
-        elif isinstance(s, str):
220
+        if isinstance(s, str):
229 221
             assert binstr(s) == s.encode('UTF-8')
230 222
             assert binstr(binstr(s)) == s.encode('UTF-8')
231 223
         else:
... ...
@@ -1237,6 +1237,101 @@ contents go here
1237 1237
                 b'Cannot store config' in result.stderr_bytes
1238 1238
             ), 'program unexpectedly failed?!'
1239 1239
 
1240
+    @pytest.mark.parametrize(
1241
+        ['command_line', 'input', 'error_message'],
1242
+        [
1243
+            pytest.param(
1244
+                ['--import', '-'],
1245
+                json.dumps({
1246
+                    'global': {'phrase': 'Du\u0308sseldorf'},
1247
+                    'services': {},
1248
+                }),
1249
+                'the global passphrase is not NFC-normalized',
1250
+                id='global-NFC',
1251
+            ),
1252
+            pytest.param(
1253
+                ['--import', '-'],
1254
+                json.dumps({
1255
+                    'services': {
1256
+                        DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy(),
1257
+                        'weird entry name': {'phrase': 'Du\u0308sseldorf'},
1258
+                    }
1259
+                }),
1260
+                (
1261
+                    "the services.'weird entry name' passphrase "
1262
+                    'is not NFC-normalized'
1263
+                ),
1264
+                id='service-weird-name-NFC',
1265
+            ),
1266
+            pytest.param(
1267
+                ['--config', '-p', DUMMY_SERVICE],
1268
+                'Du\u0308sseldorf',
1269
+                (
1270
+                    f'the services.{DUMMY_SERVICE} passphrase '
1271
+                    f'is not NFC-normalized'
1272
+                ),
1273
+                id='config-NFC',
1274
+            ),
1275
+            pytest.param(
1276
+                ['-p', DUMMY_SERVICE],
1277
+                'Du\u0308sseldorf',
1278
+                'the interactive passphrase is not NFC-normalized',
1279
+                id='direct-input-NFC',
1280
+            ),
1281
+            pytest.param(
1282
+                ['--import', '-'],
1283
+                json.dumps({
1284
+                    'global': {
1285
+                        'unicode_normalization_form': 'NFD',
1286
+                        'phrase': 'D\u00fcsseldorf',
1287
+                    },
1288
+                    'services': {},
1289
+                }),
1290
+                'the global passphrase is not NFD-normalized',
1291
+                id='global-NFD',
1292
+            ),
1293
+            pytest.param(
1294
+                ['--import', '-'],
1295
+                json.dumps({
1296
+                    'global': {
1297
+                        'unicode_normalization_form': 'NFD',
1298
+                    },
1299
+                    'services': {
1300
+                        DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy(),
1301
+                        'weird entry name': {'phrase': 'D\u00fcsseldorf'},
1302
+                    }
1303
+                }),
1304
+                (
1305
+                    "the services.'weird entry name' passphrase "
1306
+                    'is not NFD-normalized'
1307
+                ),
1308
+                id='service-weird-name-NFD',
1309
+            ),
1310
+        ],
1311
+    )
1312
+    def test_300_unicode_normalization_form_warning(
1313
+        self,
1314
+        monkeypatch: Any,
1315
+        command_line: list[str],
1316
+        input: str | None,
1317
+        error_message: str,
1318
+    ) -> None:
1319
+        runner = click.testing.CliRunner(mix_stderr=False)
1320
+        with tests.isolated_config(
1321
+            monkeypatch=monkeypatch,
1322
+            runner=runner,
1323
+            config={'services': {DUMMY_SERVICE: DUMMY_CONFIG_SETTINGS.copy()}},
1324
+        ):
1325
+            result = runner.invoke(
1326
+                cli.derivepassphrase,
1327
+                command_line,
1328
+                catch_exceptions=False,
1329
+                input=input,
1330
+            )
1331
+            assert (result.exception, result.exit_code) == (None, 0)
1332
+            assert result.stderr_bytes is not None
1333
+            assert error_message in result.stderr
1334
+
1240 1335
 
1241 1336
 class TestCLIUtils:
1242 1337
     def test_100_save_bad_config(self, monkeypatch: Any) -> None:
... ...
@@ -0,0 +1 @@
1
+Warn the user when entering (directly, or via configuration editing/importing) a passphrase that is not in the configured Unicode normalization form. (But don't otherwise reject any textual master passphrases.)
0 2