Add vault_native exporter function and module docstrings
Marco Ricci

Marco Ricci commited on 2024-09-01 16:13:48
Zeige 4 geänderte Dateien mit 192 Einfügungen und 12 Löschungen.


Add an all-in-one exporter function to the
`derivepassphrase.exporter.vault_native` module, similar to the function
in the `derivepassphrase.exporter.storeroom` module. These two functions
now define the public API of the respective module, and the other
visible names are considered non-public (though documented, because they
explain the inner workings of the respective exporters).  The module
docstrings have been appropriately expanded, and the tests have been
adapted and expanded for the new function.
... ...
@@ -47,7 +47,9 @@ def _load_data(
47 47
                 raise ModuleNotFoundError
48 48
             with open(path, 'rb') as infile:
49 49
                 contents = base64.standard_b64decode(infile.read())
50
-            return module.VaultNativeV02ConfigParser(contents, key)()
50
+            return module.export_vault_native_data(
51
+                contents, key, try_formats=['v0.2']
52
+            )
51 53
         case 'v0.3':
52 54
             module = importlib.import_module(
53 55
                 'derivepassphrase.exporter.vault_native'
... ...
@@ -56,7 +58,9 @@ def _load_data(
56 58
                 raise ModuleNotFoundError
57 59
             with open(path, 'rb') as infile:
58 60
                 contents = base64.standard_b64decode(infile.read())
59
-            return module.VaultNativeV03ConfigParser(contents, key)()
61
+            return module.export_vault_native_data(
62
+                contents, key, try_formats=['v0.3']
63
+            )
60 64
         case 'storeroom':
61 65
             module = importlib.import_module(
62 66
                 'derivepassphrase.exporter.storeroom'
... ...
@@ -2,7 +2,24 @@
2 2
 #
3 3
 # SPDX-License-Identifier: MIT
4 4
 
5
-"""Exporter for the vault "storeroom" configuration format."""
5
+"""Exporter for the vault "storeroom" configuration format.
6
+
7
+The "storeroom" format is the experimental format used in alpha and beta
8
+versions of vault beyond v0.3.0.  The configuration is stored as
9
+a separate directory, which acts like a hash table (i.e. has named
10
+slots) and provides an impure quasi-filesystem interface.  Each hash
11
+table entry is separately encrypted and authenticated.  James Coglan
12
+designed this format to avoid concurrent write issues when updating or
13
+synchronizing the vault configuration with e.g. a cloud service.
14
+
15
+The public interface is the
16
+[`derivepassphrase.exporter.storeroom.export_storeroom_data`][]
17
+function.  Multiple *non-public* functions are additionally documented
18
+here for didactical and educational reasons, but they are not part of
19
+the module API, are subject to change without notice (including
20
+removal), and should *not* be used or relied on.
21
+
22
+"""
6 23
 
7 24
 from __future__ import annotations
8 25
 
... ...
@@ -58,6 +75,8 @@ KEY_SIZE = MAC_SIZE = 32
58 75
 ENCRYPTED_KEYPAIR_SIZE = 128
59 76
 VERSION_SIZE = 1
60 77
 
78
+__all__ = ('export_storeroom_data',)
79
+
61 80
 logger = logging.getLogger(__name__)
62 81
 
63 82
 
... ...
@@ -119,6 +138,11 @@ def derive_master_keys_keys(password: str | bytes, iterations: int) -> KeyPair:
119 138
         A 2-tuple of keys, the encryption key and the signing key, to
120 139
         decrypt and verify the master keys data with.
121 140
 
141
+    Warning:
142
+        Non-public function, provided for didactical and educational
143
+        purposes only.  Subject to change without notice, including
144
+        removal.
145
+
122 146
     """
123 147
     if isinstance(password, str):
124 148
         password = password.encode('ASCII')
... ...
@@ -195,6 +219,11 @@ def decrypt_master_keys_data(data: bytes, keys: KeyPair) -> MasterKeys:
195 219
             example, it contains an unsupported version marker, or
196 220
             unexpected extra contents, or invalid padding.)
197 221
 
222
+    Warning:
223
+        Non-public function, provided for didactical and educational
224
+        purposes only.  Subject to change without notice, including
225
+        removal.
226
+
198 227
     """
199 228
     ciphertext, claimed_mac = struct.unpack(
200 229
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
... ...
@@ -285,6 +314,11 @@ def decrypt_session_keys(data: bytes, master_keys: MasterKeys) -> KeyPair:
285 314
             example, it contains an unsupported version marker, or
286 315
             unexpected extra contents, or invalid padding.)
287 316
 
317
+    Warning:
318
+        Non-public function, provided for didactical and educational
319
+        purposes only.  Subject to change without notice, including
320
+        removal.
321
+
288 322
     """
289 323
     ciphertext, claimed_mac = struct.unpack(
290 324
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
... ...
@@ -391,6 +425,11 @@ def decrypt_contents(data: bytes, session_keys: KeyPair) -> bytes:
391 425
             example, it contains an unsupported version marker, or
392 426
             unexpected extra contents, or invalid padding.)
393 427
 
428
+    Warning:
429
+        Non-public function, provided for didactical and educational
430
+        purposes only.  Subject to change without notice, including
431
+        removal.
432
+
394 433
     """
395 434
     ciphertext, claimed_mac = struct.unpack(
396 435
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
... ...
@@ -466,6 +505,11 @@ def decrypt_bucket_item(bucket_item: bytes, master_keys: MasterKeys) -> bytes:
466 505
             example, it contains an unsupported version marker, or
467 506
             unexpected extra contents, or invalid padding.)
468 507
 
508
+    Warning:
509
+        Non-public function, provided for didactical and educational
510
+        purposes only.  Subject to change without notice, including
511
+        removal.
512
+
469 513
     """
470 514
     logger.debug(
471 515
         (
... ...
@@ -497,7 +541,7 @@ def decrypt_bucket_file(
497 541
     *,
498 542
     root_dir: str | bytes | os.PathLike = '.',
499 543
 ) -> Iterator[bytes]:
500
-    """Decrypt a bucket item.
544
+    """Decrypt a complete bucket.
501 545
 
502 546
     Args:
503 547
         filename:
... ...
@@ -524,6 +568,11 @@ def decrypt_bucket_file(
524 568
             example, it contains an unsupported version marker, or
525 569
             unexpected extra contents, or invalid padding.)
526 570
 
571
+    Warning:
572
+        Non-public function, provided for didactical and educational
573
+        purposes only.  Subject to change without notice, including
574
+        removal.
575
+
527 576
     """
528 577
     with open(
529 578
         os.path.join(os.fsdecode(root_dir), filename), 'rb'
... ...
@@ -543,7 +592,7 @@ def decrypt_bucket_file(
543 592
             )
544 593
 
545 594
 
546
-def store(config: dict[str, Any], path: str, json_contents: bytes) -> None:
595
+def _store(config: dict[str, Any], path: str, json_contents: bytes) -> None:
547 596
     """Store the JSON contents at path in the config structure.
548 597
 
549 598
     Traverse the config structure according to path, and set the value
... ...
@@ -669,14 +718,14 @@ def export_storeroom_data(  # noqa: C901,PLR0912,PLR0914,PLR0915
669 718
             logger.debug(
670 719
                 'Setting contents (empty directory): %s -> %s', path, '{}'
671 720
             )
672
-            store(config_structure, path, b'{}')
721
+            _store(config_structure, path, b'{}')
673 722
         else:
674 723
             logger.debug(
675 724
                 'Setting contents: %s -> %s',
676 725
                 path,
677 726
                 json_content.decode('utf-8'),
678 727
             )
679
-            store(config_structure, path, json_content)
728
+            _store(config_structure, path, json_content)
680 729
     for _dir, namelist in dirs_to_check.items():
681 730
         namelist = [x.rstrip('/') for x in namelist]  # noqa: PLW2901
682 731
         try:
... ...
@@ -2,7 +2,25 @@
2 2
 #
3 3
 # SPDX-License-Identifier: MIT
4 4
 
5
-"""Exporter for the vault native configuration format (v0.2 or v0.3)."""
5
+"""Exporter for the vault native configuration format (v0.2 or v0.3).
6
+
7
+The vault native formats are the configuration formats used by vault
8
+v0.2 and v0.3.  The configuration is stored as a single encrypted file,
9
+which is encrypted and authenticated.  v0.2 and v0.3 differ in some
10
+details concerning key derivation and expected format of internal
11
+structures, so they are *not* compatible.  v0.2 additionally contains
12
+cryptographic weaknesses (API misuse of a key derivation function, and
13
+a low-entropy method of generating initialization vectors for CBC block
14
+encryption mode) and should thus be avoided if possible.
15
+
16
+The public interface is the
17
+[`derivepassphrase.exporter.vault_native.export_vault_native_data`][]
18
+function.  Multiple *non-public* classes are additionally documented
19
+here for didactical and educational reasons, but they are not part of
20
+the module API, are subject to change without notice (including
21
+removal), and should *not* be used or relied on.
22
+
23
+"""
6 24
 
7 25
 from __future__ import annotations
8 26
 
... ...
@@ -16,6 +34,7 @@ from typing import TYPE_CHECKING
16 34
 from derivepassphrase import exporter, vault
17 35
 
18 36
 if TYPE_CHECKING:
37
+    from collections.abc import Sequence
19 38
     from typing import Any
20 39
 
21 40
     from typing_extensions import Buffer
... ...
@@ -57,6 +76,8 @@ else:
57 76
     else:
58 77
         STUBBED = False
59 78
 
79
+__all__ = ('export_vault_native_data',)
80
+
60 81
 logger = logging.getLogger(__name__)
61 82
 
62 83
 
... ...
@@ -93,6 +114,11 @@ class VaultNativeConfigParser(abc.ABC):
93 114
                 If this is a text string, then the UTF-8 encoding of the
94 115
                 string is used as the binary password.
95 116
 
117
+        Warning:
118
+            Non-public class, provided for didactical and educational
119
+            purposes only. Subject to change without notice, including
120
+            removal.
121
+
96 122
         """
97 123
         if not password:
98 124
             msg = 'Password must not be empty'
... ...
@@ -237,16 +263,21 @@ class VaultNativeV03ConfigParser(VaultNativeConfigParser):
237 263
 
238 264
     This is the modern, pre-storeroom configuration format.
239 265
 
266
+    Warning:
267
+        Non-public class, provided for didactical and educational
268
+        purposes only. Subject to change without notice, including
269
+        removal.
270
+
240 271
     """
241 272
 
242 273
     KEY_SIZE = 32
243 274
 
244
-    def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: ANN401,D107
275
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: ANN401
245 276
         super().__init__(*args, **kwargs)
246 277
         self._iv_size = 16
247 278
         self._mac_size = 32
248 279
 
249
-    def __call__(self) -> Any:  # noqa: ANN401,D102
280
+    def __call__(self) -> Any:  # noqa: ANN401
250 281
         if self._data is self._sentinel:
251 282
             logger.info('Attempting to parse as v0.3 configuration')
252 283
             return super().__call__()
... ...
@@ -277,14 +308,19 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
277 308
     v0.2 configurations should be upgraded to at least v0.3 as soon as
278 309
     possible.
279 310
 
311
+    Warning:
312
+        Non-public class, provided for didactical and educational
313
+        purposes only. Subject to change without notice, including
314
+        removal.
315
+
280 316
     """
281 317
 
282
-    def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: ANN401,D107
318
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: ANN401
283 319
         super().__init__(*args, **kwargs)
284 320
         self._iv_size = 16
285 321
         self._mac_size = 64
286 322
 
287
-    def __call__(self) -> Any:  # noqa: ANN401,D102
323
+    def __call__(self) -> Any:  # noqa: ANN401
288 324
         if self._data is self._sentinel:
289 325
             logger.info('Attempting to parse as v0.2 configuration')
290 326
             return super().__call__()
... ...
@@ -355,6 +391,84 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
355 391
         ).decryptor()
356 392
 
357 393
 
394
+def export_vault_native_data(
395
+    contents: Buffer | None = None,
396
+    key: str | Buffer | None = None,
397
+    *,
398
+    try_formats: Sequence[str] = ('v0.3', 'v0.2'),
399
+) -> Any:  # noqa: ANN401
400
+    """Export the full configuration stored in vault native format.
401
+
402
+    Args:
403
+        contents:
404
+            The binary encrypted contents of the vault configuration
405
+            file.  If not given, then query
406
+            [`derivepassphrase.exporter.get_vault_path`][] for the
407
+            correct filename and read the contents from there.
408
+
409
+            Note: On disk, these are usually stored in base64-encoded
410
+            form, not in the "raw" form as needed here.
411
+        key:
412
+            Encryption key/password for the configuration file, usually
413
+            the username, or passed via the `VAULT_KEY` environment
414
+            variable.  If not given, then query
415
+            [`derivepassphrase.exporter.get_vault_key`][] for the value.
416
+        try_formats:
417
+            A sequence of formats to try out, in order.  Each key must
418
+            be one of `v0.2` or `v0.3`.
419
+
420
+    Returns:
421
+        The vault configuration, as recorded in the configuration file.
422
+
423
+        This may or may not be a valid configuration according to vault
424
+        or derivepassphrase.
425
+
426
+    Raises:
427
+        RuntimeError:
428
+            Something went wrong during data collection, e.g. we
429
+            encountered unsupported or corrupted data in the storeroom.
430
+        json.JSONDecodeError:
431
+            An internal JSON data structure failed to parse from disk.
432
+            The storeroom is probably corrupted.
433
+        ValueError:
434
+            The requested formats to try out are invalid, or the
435
+            encrypted contents aren't in any of the attempted
436
+            configuration formats.
437
+
438
+    """
439
+    if contents is None:
440
+        with open(exporter.get_vault_path(), 'rb') as infile:
441
+            contents = base64.standard_b64decode(infile.read())
442
+    if key is None:
443
+        key = exporter.get_vault_key()
444
+    stored_exception: Exception | None = None
445
+    for config_format in try_formats:
446
+        match config_format:
447
+            case 'v0.2':
448
+                try:
449
+                    return VaultNativeV02ConfigParser(contents, key)()
450
+                except ValueError as exc:
451
+                    exc.__context__ = stored_exception
452
+                    stored_exception = exc
453
+            case 'v0.3':
454
+                try:
455
+                    return VaultNativeV03ConfigParser(contents, key)()
456
+                except ValueError as exc:
457
+                    exc.__context__ = stored_exception
458
+                    stored_exception = exc
459
+            case _:  # pragma: no cover
460
+                msg = (
461
+                    f'Invalid vault native configuration format: '
462
+                    f'{config_format!r}'
463
+                )
464
+                raise ValueError(msg)
465
+    msg = (
466
+        f'Not a valid vault native configuration. '
467
+        f'(We tried: {try_formats!r}.)'
468
+    )
469
+    raise stored_exception or ValueError(msg)
470
+
471
+
358 472
 if __name__ == '__main__':
359 473
     import os
360 474
 
... ...
@@ -323,6 +323,19 @@ class TestVaultNativeConfig:
323 323
             == result
324 324
         )
325 325
 
326
+    def test_201_export_vault_native_data_no_arguments(
327
+        self, monkeypatch: pytest.MonkeyPatch
328
+    ) -> None:
329
+        runner = click.testing.CliRunner(mix_stderr=False)
330
+        with tests.isolated_vault_exporter_config(
331
+            monkeypatch=monkeypatch,
332
+            runner=runner,
333
+            vault_config=tests.VAULT_V03_CONFIG,
334
+            vault_key=tests.VAULT_MASTER_KEY,
335
+        ):
336
+            parsed_config = vault_native.export_vault_native_data(None)
337
+        assert parsed_config == tests.VAULT_V03_CONFIG_DATA
338
+
326 339
     @pytest.mark.parametrize(
327 340
         ['parser_class', 'config', 'result'],
328 341
         [
329 342