Signal and list falsy value cleanup steps that were actually performed
Marco Ricci

Marco Ricci commited on 2024-10-09 16:20:12
Zeige 5 geänderte Dateien mit 217 Einfügungen und 80 Löschungen.


Signal whether cleanup was actually perfomed on the requested object or
not, and if yes, list the actual cleanup steps undertaken.  When
importing a vault configuration on the command-line, issue a warning for
each cleaning step.

Since the warning messages in both the cleanup steps and the check for
non-normalized passphrases report on "paths" in a JSON object, implement
a fully general JSONPath formatting function (single item selection from
the root only).  This harmonizes the warnings output, but also causes
changes in the test cases and expected output.  Additionally, the
JSONPath function name clashes with a common local variable name,
necessitating renaming, and control flow for the validation function and
the vault configuration import action have changed somewhat; the former
to impose a consistent validation order (global first, service-specific
next), the latter to avoid extraneous else-branches.

As a result of all this, this patch is somewhat larger and less concise
than it should be, given the modest magnitude of changes it actually
introduces.
... ...
@@ -5,12 +5,15 @@
5 5
     matter the level.
6 6
 
7 7
     This is a command-line only change.
8
-  - In `derivepassphrase vault`, accept falsy values everywhere `vault` does:
9
-    depending on the setting, they are equivalent to zero, the empty string, or
10
-    "not set". ([#17])
11 8
 
12
-    This is a command-line only change.  The API provides a new function to
13
-    normalize falsy settings, but still otherwise requires settings to be of
14
-    the correct type.
9
+  - In `derivepassphrase vault`, when importing settings, accept falsy values
10
+    everywhere `vault` does, with a warning.  Depending on the setting, they
11
+    are equivalent to zero, the empty string, or "not set".  ([#17])
12
+
13
+    This is a command-line only change, and only affects importing.  The API
14
+    provides a new function to normalize falsy settings, but still otherwise
15
+    requires settings to be of the correct type.  Storing a malformed
16
+    configuration with such falsy values will still generate errors when
17
+    `derivepassphrase vault` loads the settings from disk.
15 18
 
16 19
 [#17]: https://github.com/the-13th-letter/derivepassphrase/issues/17
... ...
@@ -6,7 +6,9 @@
6 6
 
7 7
 from __future__ import annotations
8 8
 
9
+import collections
9 10
 import enum
11
+import json
10 12
 import math
11 13
 from typing import TYPE_CHECKING
12 14
 
... ...
@@ -17,7 +19,7 @@ from typing_extensions import (
17 19
 )
18 20
 
19 21
 if TYPE_CHECKING:
20
-    from collections.abc import Sequence
22
+    from collections.abc import MutableSequence, Sequence
21 23
     from typing import Literal
22 24
 
23 25
     from typing_extensions import (
... ...
@@ -136,6 +138,58 @@ class VaultConfig(TypedDict, _VaultConfig, total=False):
136 138
     services: Required[dict[str, VaultConfigServicesSettings]]
137 139
 
138 140
 
141
+def json_path(path: Sequence[str | int], /) -> str:
142
+    r"""Transform a series of keys and indices into a JSONPath selector.
143
+
144
+    The resulting JSONPath selector conforms to RFC 9535, is always
145
+    rooted at the JSON root node (i.e., starts with `$`), and only
146
+    contains name and index selectors (in shorthand dot notation, where
147
+    possible).
148
+
149
+    Args:
150
+        path:
151
+            A sequence of object keys or array indices to navigate to
152
+            the desired JSON value, starting from the root node.
153
+
154
+    Returns:
155
+        A valid JSONPath selector (a string) identifying the desired
156
+        JSON value.
157
+
158
+    Examples:
159
+        >>> json_path(['global', 'phrase'])
160
+        '$.global.phrase'
161
+        >>> json_path(['services', 'service name with spaces', 'length'])
162
+        '$.services["service name with spaces"].length'
163
+        >>> json_path(['services', 'special\u000acharacters', 'notes'])
164
+        '$.services["special\\ncharacters"].notes'
165
+        >>> print(json_path(['services', 'special\u000acharacters', 'notes']))
166
+        $.services["special\ncharacters"].notes
167
+        >>> json_path(['custom_array', 2, 0])
168
+        '$.custom_array[2][0]'
169
+
170
+    """
171
+
172
+    def needs_longhand(x: str | int) -> bool:
173
+        initial = (
174
+            frozenset('abcdefghijklmnopqrstuvwxyz')
175
+            | frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
176
+            | frozenset('_')
177
+        )
178
+        chars = initial | frozenset('0123456789')
179
+        return not (
180
+            isinstance(x, str)
181
+            and x
182
+            and set(x).issubset(chars)
183
+            and x[:1] in initial
184
+        )
185
+
186
+    chunks = ['$']
187
+    chunks.extend(
188
+        f'[{json.dumps(x)}]' if needs_longhand(x) else f'.{x}' for x in path
189
+    )
190
+    return ''.join(chunks)
191
+
192
+
139 193
 def validate_vault_config(  # noqa: C901,PLR0912,PLR0915
140 194
     obj: Any,  # noqa: ANN401
141 195
     /,
... ...
@@ -162,54 +216,34 @@ def validate_vault_config(  # noqa: C901,PLR0912,PLR0915
162 216
             disallowed value.
163 217
 
164 218
     """
165
-
166
-    def maybe_quote(x: str) -> str:
167
-        chars = (
168
-            frozenset('abcdefghijklmnopqrstuvwxyz')
169
-            | frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
170
-            | frozenset('0123456789')
171
-            | frozenset('_')
172
-        )
173
-        initial = (
174
-            frozenset('abcdefghijklmnopqrstuvwxyz')
175
-            | frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
176
-            | frozenset('_')
177
-        )
178
-        return (
179
-            x if x and set(x).issubset(chars) and x[:1] in initial else repr(x)
180
-        )
181
-
182
-    def as_json_path_string(json_path: Sequence[str], /) -> str:
183
-        return ''.join('.' + maybe_quote(x) for x in json_path)
184
-
185 219
     err_obj_not_a_dict = 'vault config is not a dict'
186 220
     err_non_str_service_name = (
187 221
         'vault config contains non-string service name {!r}'
188 222
     )
189 223
 
190
-    def err_not_a_dict(json_path: Sequence[str], /) -> str:
191
-        json_path_str = as_json_path_string(json_path)
224
+    def err_not_a_dict(path: Sequence[str], /) -> str:
225
+        json_path_str = json_path(path)
192 226
         return f'vault config entry {json_path_str} is not a dict'
193 227
 
194
-    def err_not_a_string(json_path: Sequence[str], /) -> str:
195
-        json_path_str = as_json_path_string(json_path)
228
+    def err_not_a_string(path: Sequence[str], /) -> str:
229
+        json_path_str = json_path(path)
196 230
         return f'vault config entry {json_path_str} is not a string'
197 231
 
198
-    def err_not_an_int(json_path: Sequence[str], /) -> str:
199
-        json_path_str = as_json_path_string(json_path)
232
+    def err_not_an_int(path: Sequence[str], /) -> str:
233
+        json_path_str = json_path(path)
200 234
         return f'vault config entry {json_path_str} is not an integer'
201 235
 
202 236
     def err_derivepassphrase_extension(
203
-        key: str, json_path: Sequence[str], /
237
+        key: str, path: Sequence[str], /
204 238
     ) -> str:
205
-        json_path_str = as_json_path_string(json_path)
239
+        json_path_str = json_path(path)
206 240
         return (
207 241
             f'vault config entry {json_path_str} uses '
208 242
             f'`derivepassphrase` extension {key!r}'
209 243
         )
210 244
 
211
-    def err_unknown_setting(key: str, json_path: Sequence[str], /) -> str:
212
-        json_path_str = as_json_path_string(json_path)
245
+    def err_unknown_setting(key: str, path: Sequence[str], /) -> str:
246
+        json_path_str = json_path(path)
213 247
         return (
214 248
             f'vault config entry {json_path_str} uses '
215 249
             f'unknown setting {key!r}'
... ...
@@ -217,12 +251,12 @@ def validate_vault_config(  # noqa: C901,PLR0912,PLR0915
217 251
 
218 252
     def err_bad_number(
219 253
         key: str,
220
-        json_path: Sequence[str],
254
+        path: Sequence[str],
221 255
         /,
222 256
         *,
223 257
         strictly_positive: bool = False,
224 258
     ) -> str:
225
-        json_path_str = as_json_path_string((*json_path, key))
259
+        json_path_str = json_path((*path, key))
226 260
         return f'vault config entry {json_path_str} is ' + (
227 261
             'not positive' if strictly_positive else 'negative'
228 262
         )
... ...
@@ -344,7 +378,36 @@ def js_truthiness(value: Any, /) -> bool:  # noqa: ANN401
344 378
     return not (isinstance(value, float) and math.isnan(value))
345 379
 
346 380
 
347
-def clean_up_falsy_vault_config_values(obj: Any) -> None:  # noqa: ANN401,C901,PLR0912
381
+class CleanupStep(NamedTuple):
382
+    """A single executed step during vault config cleanup.
383
+
384
+    Attributes:
385
+        path:
386
+            A sequence of object keys or array indices to navigate to
387
+            the JSON value that was cleaned up.
388
+        old_value:
389
+            The old value.
390
+        action:
391
+            Either `'replace'` if `old_value` was replaced with
392
+            `new_value`, or `'remove'` if `old_value` was removed.
393
+        new_value:
394
+            The new value.
395
+
396
+    """
397
+
398
+    path: Sequence[str | int]
399
+    """"""
400
+    old_value: Any
401
+    """"""
402
+    action: Literal['replace', 'remove']
403
+    """"""
404
+    new_value: Any
405
+    """"""
406
+
407
+
408
+def clean_up_falsy_vault_config_values(  # noqa: C901,PLR0912
409
+    obj: Any,  # noqa: ANN401
410
+) -> Sequence[CleanupStep] | None:
348 411
     """Convert falsy values in a vault config to correct types, in-place.
349 412
 
350 413
     Needed for compatibility with vault(1), which sometimes uses only
... ...
@@ -358,6 +421,19 @@ def clean_up_falsy_vault_config_values(obj: Any) -> None:  # noqa: ANN401,C901,P
358 421
             A presumed valid vault configuration save for using falsy
359 422
             values of the wrong type.
360 423
 
424
+    Returns:
425
+        A list of 4-tuples `(key_tup, old_value, action, new_value)`,
426
+        indicating the cleanup actions performed.  `key_tup` is
427
+        a sequence of object keys and/or array indices indicating the
428
+        JSON path to the leaf value that was cleaned up, `old_value` is
429
+        the old value, `new_value` is the new value, and `action` is
430
+        either `replace` (`old_value` was replaced with `new_value`) or
431
+        `remove` (`old_value` was removed, and `new_value` is
432
+        meaningless).
433
+
434
+        If cleanup was never attempted because of an obviously invalid
435
+        vault configuration, then `None` is returned, directly.
436
+
361 437
     """
362 438
     if (  # pragma: no cover
363 439
         not isinstance(obj, dict)
... ...
@@ -365,27 +441,43 @@ def clean_up_falsy_vault_config_values(obj: Any) -> None:  # noqa: ANN401,C901,P
365 441
         or not isinstance(obj['services'], dict)
366 442
     ):
367 443
         # config is invalid
368
-        return
369
-    service_objects = list(obj['services'].values())
370
-    if not all(  # pragma: no cover
371
-        isinstance(service_obj, dict) for service_obj in service_objects
372
-    ):
373
-        # config is invalid
374
-        return
444
+        return None
445
+    service_objects: MutableSequence[
446
+        tuple[Sequence[str | int], dict[str, Any]]
447
+    ] = collections.deque()
375 448
     if 'global' in obj:
376 449
         if isinstance(obj['global'], dict):
377
-            service_objects.append(obj['global'])
450
+            service_objects.append((['global'], obj['global']))
378 451
         else:  # pragma: no cover
379 452
             # config is invalid
380
-            return
381
-    for service_obj in service_objects:
453
+            return None
454
+    service_objects.extend(
455
+        (['services', sv], val) for sv, val in obj['services'].items()
456
+    )
457
+    if not all(  # pragma: no cover
458
+        isinstance(service_obj, dict) for _, service_obj in service_objects
459
+    ):
460
+        # config is invalid
461
+        return None
462
+    cleanup_completed: MutableSequence[CleanupStep] = collections.deque()
463
+    for path, service_obj in service_objects:
382 464
         for key, value in list(service_obj.items()):
383 465
             # Use match/case here once Python 3.9 becomes unsupported.
384 466
             if key == 'phrase':
385
-                if not js_truthiness(value):
467
+                if not js_truthiness(value) and value != '':  # noqa: PLC1901
468
+                    cleanup_completed.append(
469
+                        CleanupStep(
470
+                            (*path, key), service_obj[key], 'replace', ''
471
+                        )
472
+                    )
386 473
                     service_obj[key] = ''
387 474
             elif key in {'notes', 'key', 'length', 'repeat'}:
388 475
                 if not js_truthiness(value):
476
+                    cleanup_completed.append(
477
+                        CleanupStep(
478
+                            (*path, key), service_obj[key], 'remove', None
479
+                        )
480
+                    )
389 481
                     service_obj.pop(key)
390 482
             elif key in {  # noqa: SIM102
391 483
                 'lower',
... ...
@@ -396,7 +488,13 @@ def clean_up_falsy_vault_config_values(obj: Any) -> None:  # noqa: ANN401,C901,P
396 488
                 'symbol',
397 489
             }:
398 490
                 if not js_truthiness(value) and value != 0:
491
+                    cleanup_completed.append(
492
+                        CleanupStep(
493
+                            (*path, key), service_obj[key], 'replace', 0
494
+                        )
495
+                    )
399 496
                     service_obj.pop(key)
497
+    return cleanup_completed
400 498
 
401 499
 
402 500
 class KeyCommentPair(NamedTuple):
... ...
@@ -10,6 +10,7 @@ import base64
10 10
 import collections
11 11
 import contextlib
12 12
 import copy
13
+import enum
13 14
 import importlib
14 15
 import inspect
15 16
 import json
... ...
@@ -716,29 +717,29 @@ def _prompt_for_passphrase() -> str:
716 717
     )
717 718
 
718 719
 
720
+class _ORIGIN(enum.Enum):
721
+    INTERACTIVE: str = 'interactive'
722
+
723
+
719 724
 def _check_for_misleading_passphrase(
720
-    key: tuple[str, ...],
725
+    key: tuple[str, ...] | _ORIGIN,
721 726
     value: dict[str, Any],
722 727
     *,
723 728
     form: Literal['NFC', 'NFD', 'NFKC', 'NFKD'] = 'NFC',
724 729
 ) -> None:
725
-    def is_json_identifier(x: str) -> bool:
726
-        return not x.startswith(tuple('0123456789')) and not any(
727
-            c.lower() not in set('0123456789abcdefghijklmnopqrstuvwxyz_')
728
-            for c in x
729
-        )
730
-
731 730
     if 'phrase' in value:
732 731
         phrase = value['phrase']
733 732
         if not unicodedata.is_normalized(form, phrase):
734
-            key_path = '.'.join(
735
-                x if is_json_identifier(x) else repr(x) for x in key
733
+            formatted_key = (
734
+                key.value
735
+                if isinstance(key, _ORIGIN)
736
+                else _types.json_path(key)
736 737
             )
737 738
             click.echo(
738 739
                 (
739
-                    f'{PROG_NAME}: Warning: the {key_path} passphrase '
740
-                    f'is not {form}-normalized. Make sure to double-check '
741
-                    f'this is really the passphrase you want.'
740
+                    f'{PROG_NAME}: Warning: the {formatted_key} '
741
+                    f'passphrase is not {form}-normalized. Make sure to '
742
+                    f'double-check this is really the passphrase you want.'
742 743
                 ),
743 744
                 err=True,
744 745
             )
... ...
@@ -1410,8 +1411,28 @@ def derivepassphrase_vault(  # noqa: C901,PLR0912,PLR0913,PLR0914,PLR0915
1410 1411
             err(f'Cannot load config: cannot decode JSON: {e}')
1411 1412
         except OSError as e:
1412 1413
             err(f'Cannot load config: {e.strerror}: {e.filename!r}')
1413
-        _types.clean_up_falsy_vault_config_values(maybe_config)
1414
-        if _types.is_vault_config(maybe_config):
1414
+        cleaned = _types.clean_up_falsy_vault_config_values(maybe_config)
1415
+        if not _types.is_vault_config(maybe_config):
1416
+            err(f'Cannot load config: {_INVALID_VAULT_CONFIG}')
1417
+        assert cleaned is not None
1418
+        for step in cleaned:
1419
+            # These are never fatal errors, because the semantics of
1420
+            # vault upon encountering these settings are ill-specified,
1421
+            # but not ill-defined.
1422
+            if step.action == 'replace':
1423
+                err_msg = (
1424
+                    f'{PROG_NAME}: Warning: Replacing invalid value '
1425
+                    f'{json.dumps(step.old_value)} for key '
1426
+                    f'{_types.json_path(step.path)} with '
1427
+                    f'{json.dumps(step.new_value)}.'
1428
+                )
1429
+            else:
1430
+                err_msg = (
1431
+                    f'{PROG_NAME}: Warning: Removing ineffective setting '
1432
+                    f'{_types.json_path(step.path)} = '
1433
+                    f'{json.dumps(step.old_value)}.'
1434
+                )
1435
+            click.echo(err_msg, err=True)
1415 1436
         form = cast(
1416 1437
             Literal['NFC', 'NFD', 'NFKC', 'NFKD'],
1417 1438
             maybe_config.get('global', {}).get(
... ...
@@ -1431,8 +1452,6 @@ def derivepassphrase_vault(  # noqa: C901,PLR0912,PLR0913,PLR0914,PLR0915
1431 1452
                 form=form,
1432 1453
             )
1433 1454
         put_config(maybe_config)
1434
-        else:
1435
-            err(f'Cannot load config: {_INVALID_VAULT_CONFIG}')
1436 1455
     elif export_settings:
1437 1456
         configuration = get_config()
1438 1457
         try:
... ...
@@ -1570,7 +1589,7 @@ def derivepassphrase_vault(  # noqa: C901,PLR0912,PLR0913,PLR0914,PLR0915
1570 1589
                 )
1571 1590
                 assert form in {'NFC', 'NFD', 'NFKC', 'NFKD'}
1572 1591
                 _check_for_misleading_passphrase(
1573
-                    ('interactive',), {'phrase': phrase}, form=form
1592
+                    _ORIGIN.INTERACTIVE, {'phrase': phrase}, form=form
1574 1593
                 )
1575 1594
 
1576 1595
             # If either --key or --phrase are given, use that setting.
... ...
@@ -642,8 +642,19 @@ class TestCLI:
642 642
             ) as infile:
643 643
                 config3 = json.load(infile)
644 644
         result = tests.ReadableResult.parse(_result)
645
-        assert result.clean_exit(empty_stderr=True), 'expected clean exit'
645
+
646
+        def expected_warning_line(line: str) -> bool:
647
+            return (
648
+                ' Warning: Replacing invalid value ' in line
649
+                or ' Warning: Removing ineffective setting ' in line
650
+            )
651
+
652
+        assert result.clean_exit(empty_stderr=False), 'expected clean exit'
646 653
         assert config3 == config2, 'config not imported correctly'
654
+        assert not result.stderr or all(
655
+            expected_warning_line(line)
656
+            for line in result.stderr.splitlines(True)
657
+        ), 'unexpected error output'
647 658
 
648 659
     def test_213b_import_bad_config_not_vault_config(
649 660
         self,
... ...
@@ -1277,7 +1288,7 @@ contents go here
1277 1288
                     'global': {'phrase': 'Du\u0308sseldorf'},
1278 1289
                     'services': {},
1279 1290
                 }),
1280
-                'the global passphrase is not NFC-normalized',
1291
+                'the $.global passphrase is not NFC-normalized',
1281 1292
                 id='global-NFC',
1282 1293
             ),
1283 1294
             pytest.param(
... ...
@@ -1289,7 +1300,7 @@ contents go here
1289 1300
                     }
1290 1301
                 }),
1291 1302
                 (
1292
-                    "the services.'weird entry name' passphrase "
1303
+                    'the $.services["weird entry name"] passphrase '
1293 1304
                     'is not NFC-normalized'
1294 1305
                 ),
1295 1306
                 id='service-weird-name-NFC',
... ...
@@ -1298,7 +1309,7 @@ contents go here
1298 1309
                 ['--config', '-p', DUMMY_SERVICE],
1299 1310
                 'Du\u0308sseldorf',
1300 1311
                 (
1301
-                    f'the services.{DUMMY_SERVICE} passphrase '
1312
+                    f'the $.services.{DUMMY_SERVICE} passphrase '
1302 1313
                     f'is not NFC-normalized'
1303 1314
                 ),
1304 1315
                 id='config-NFC',
... ...
@@ -1318,7 +1329,7 @@ contents go here
1318 1329
                     },
1319 1330
                     'services': {},
1320 1331
                 }),
1321
-                'the global passphrase is not NFD-normalized',
1332
+                'the $.global passphrase is not NFD-normalized',
1322 1333
                 id='global-NFD',
1323 1334
             ),
1324 1335
             pytest.param(
... ...
@@ -1333,7 +1344,7 @@ contents go here
1333 1344
                     },
1334 1345
                 }),
1335 1346
                 (
1336
-                    "the services.'weird entry name' passphrase "
1347
+                    'the $.services["weird entry name"] passphrase '
1337 1348
                     'is not NFD-normalized'
1338 1349
                 ),
1339 1350
                 id='service-weird-name-NFD',
... ...
@@ -99,14 +99,17 @@ def test_200_is_vault_config(test_config: tests.VaultTestConfig) -> None:
99 99
 def test_200a_is_vault_config_smudged(
100 100
     test_config: tests.VaultTestConfig,
101 101
 ) -> None:
102
-    obj, comment, _ = test_config
103
-    obj = copy.deepcopy(obj)
104
-    _types.clean_up_falsy_vault_config_values(obj)
102
+    _obj, comment, _ = test_config
103
+    obj = copy.deepcopy(_obj)
104
+    did_cleanup = _types.clean_up_falsy_vault_config_values(obj)
105 105
     assert _types.is_vault_config(obj) == (not comment), (
106 106
         'failed to complain about: ' + comment
107 107
         if comment
108 108
         else 'failed on valid example'
109 109
     )
110
+    assert did_cleanup is None or bool(did_cleanup) == (
111
+        obj != _obj
112
+    ), 'mismatched report on cleanup work'
110 113
 
111 114
 
112 115
 @pytest.mark.parametrize(
... ...
@@ -148,12 +151,12 @@ def test_400_validate_vault_config(test_config: tests.VaultTestConfig) -> None:
148 151
 def test_400a_validate_vault_config_smudged(
149 152
     test_config: tests.VaultTestConfig,
150 153
 ) -> None:
151
-    obj, comment, validation_settings = test_config
154
+    _obj, comment, validation_settings = test_config
152 155
     allow_unknown_settings, allow_derivepassphrase_extensions = (
153 156
         validation_settings or (True, True)
154 157
     )
155
-    obj = copy.deepcopy(obj)
156
-    _types.clean_up_falsy_vault_config_values(obj)
158
+    obj = copy.deepcopy(_obj)
159
+    did_cleanup = _types.clean_up_falsy_vault_config_values(obj)
157 160
     if comment:
158 161
         with pytest.raises((TypeError, ValueError)):
159 162
             _types.validate_vault_config(
... ...
@@ -170,3 +173,6 @@ def test_400a_validate_vault_config_smudged(
170 173
             )
171 174
         except (TypeError, ValueError) as exc:  # pragma: no cover
172 175
             assert not exc, 'failed to validate valid example'  # noqa: PT017
176
+    assert did_cleanup is None or bool(did_cleanup) == (
177
+        obj != _obj
178
+    ), 'mismatched report on cleanup work'
173 179