Add an actual storeroom exporter, not just a reader
Marco Ricci

Marco Ricci commited on 2024-08-16 22:49:06
Zeige 1 geänderte Dateien mit 136 Einfügungen und 19 Löschungen.


Remove the script-like main function, which wrote decrypted files to the
current directory, in favor of an exporter which actually synthesizes
the configuration from the storeroom data files.
... ...
@@ -9,12 +9,15 @@ import logging
9 9
 import os
10 10
 import os.path
11 11
 import struct
12
-from typing import TypedDict
12
+from typing import TYPE_CHECKING, Any, TypedDict
13 13
 
14 14
 from cryptography.hazmat.primitives import ciphers, hashes, hmac, padding
15 15
 from cryptography.hazmat.primitives.ciphers import algorithms, modes
16 16
 from cryptography.hazmat.primitives.kdf import pbkdf2
17 17
 
18
+if TYPE_CHECKING:
19
+    from collections.abc import Iterator
20
+
18 21
 STOREROOM_MASTER_KEYS_UUID = b'35b7c7ed-f71e-4adf-9051-02fb0f1e0e17'
19 22
 VAULT_CIPHER_UUID = b'73e69e8a-cb05-4b50-9f42-59d76a511299'
20 23
 IV_SIZE = 16
... ...
@@ -27,9 +30,11 @@ MASTER_KEYS_KEY = (
27 30
     or os.getenv('USER')
28 31
     or os.getenv('USERNAME')
29 32
 )
33
+VAULT_PATH = os.path.join(
34
+    os.path.expanduser('~'), os.getenv('VAULT_PATH', '.vault')
35
+)
30 36
 
31
-logging.basicConfig(level=('DEBUG' if os.getenv('DEBUG') else 'WARNING'))
32
-logger = logging.getLogger('derivepassphrase.exporter.vault_storeroom')
37
+logger = logging.getLogger(__name__)
33 38
 
34 39
 
35 40
 class KeyPair(TypedDict):
... ...
@@ -388,11 +393,10 @@ def decrypt_bucket_item(bucket_item: bytes, master_keys: MasterKeys) -> bytes:
388 393
     return decrypt_contents(data_contents, session_keys)
389 394
 
390 395
 
391
-def decrypt_bucket_file(filename: str, master_keys: MasterKeys) -> None:
392
-    with (
393
-        open(filename, 'rb') as bucket_file,
394
-        open(filename + '.decrypted', 'wb') as decrypted_file,
395
-    ):
396
+def decrypt_bucket_file(
397
+    filename: str, master_keys: MasterKeys
398
+) -> Iterator[bytes]:
399
+    with open(filename, 'rb') as bucket_file:
396 400
         header_line = bucket_file.readline()
397 401
         try:
398 402
             header = json.loads(header_line)
... ...
@@ -402,19 +406,78 @@ def decrypt_bucket_file(filename: str, master_keys: MasterKeys) -> None:
402 406
         if header != {'version': 1}:
403 407
             msg = f'Invalid bucket file: {filename}'
404 408
             raise RuntimeError(msg) from None
405
-        decrypted_file.write(header_line)
406 409
         for line in bucket_file:
407
-            decrypted_contents = (
408
-                decrypt_bucket_item(
410
+            yield decrypt_bucket_item(
409 411
                 base64.standard_b64decode(line), master_keys
410
-                ).removesuffix(b'\n')
411
-                + b'\n'
412 412
             )
413
-            decrypted_file.write(decrypted_contents)
414 413
 
415 414
 
416
-def main() -> None:
417
-    with open('.keys', encoding='utf-8') as master_keys_file:
415
+def store(config: dict[str, Any], path: str, json_contents: bytes) -> None:
416
+    """Store the JSON contents at path in the config structure.
417
+
418
+    Traverse the config structure according to path, and set the value
419
+    of the leaf to the decoded JSON contents.
420
+
421
+    A path `/foo/bar/xyz` translates to the JSON structure
422
+    `{"foo": {"bar": {"xyz": ...}}}`.
423
+
424
+    Args:
425
+        config:
426
+            The (top-level) configuration structure to update.
427
+        path:
428
+            The path within the configuration structure to traverse.
429
+        json_contents:
430
+            The contents to set the item to, after JSON-decoding.
431
+
432
+    Raises:
433
+        json.JSONDecodeError:
434
+            There was an error parsing the JSON contents.
435
+
436
+    """
437
+    contents = json.loads(json_contents)
438
+    path_parts = [part for part in path.split('/') if part]
439
+    for part in path_parts[:-1]:
440
+        config = config.setdefault(part, {})
441
+    if path_parts:
442
+        config[path_parts[-1]] = contents
443
+
444
+
445
+def export_storeroom_data(
446
+    storeroom_path: str | bytes | os.PathLike = VAULT_PATH,
447
+    master_keys_key: str | bytes | None = MASTER_KEYS_KEY,
448
+) -> dict[str, Any]:
449
+    """Export the full configuration stored in the storeroom.
450
+
451
+    Args:
452
+        storeroom_path:
453
+            Path to the storeroom; usually `~/.vault`.
454
+        master_keys_key:
455
+            Encryption key/password for the master keys.  If not set via
456
+            the `VAULT_KEY` environment variable, this usually is the
457
+            user's username.
458
+
459
+    Returns:
460
+        The full configuration, as stored in the storeroom.
461
+
462
+        This may or may not be a valid configuration according to vault
463
+        or derivepassphrase.
464
+
465
+    Raises:
466
+        RuntimeError:
467
+            Something went wrong during data collection, e.g. we
468
+            encountered unsupported or corrupted data in the storeroom.
469
+        json.JSONDecodeError:
470
+            An internal JSON data structure failed to parse from disk.
471
+            The storeroom is probably corrupted.
472
+
473
+    """
474
+
475
+    if master_keys_key is None:
476
+        msg = 'Cannot determine master key; please set VAULT_KEY'
477
+        raise RuntimeError(msg)
478
+    with open(
479
+        os.path.join(os.fsdecode(storeroom_path), '.keys'), encoding='utf-8'
480
+    ) as master_keys_file:
418 481
         header = json.loads(master_keys_file.readline())
419 482
         if header != {'version': 1}:
420 483
             msg = 'bad or unsupported keys version header'
... ...
@@ -432,13 +495,67 @@ def main() -> None:
432 495
         raise RuntimeError(msg)
433 496
     encrypted_keys_iterations = 2 ** (10 + (encrypted_keys_params & 0x0F))
434 497
     master_keys_keys = derive_master_keys_keys(
435
-        MASTER_KEYS_KEY, encrypted_keys_iterations
498
+        master_keys_key, encrypted_keys_iterations
436 499
     )
437 500
     master_keys = decrypt_master_keys_data(encrypted_keys, master_keys_keys)
438 501
 
502
+    config_structure: dict[str, Any] = {}
503
+    json_contents: dict[str, bytes] = {}
439 504
     for file in glob.glob('[01][0-9a-f]'):
440
-        decrypt_bucket_file(file, master_keys)
505
+        bucket_contents = list(decrypt_bucket_file(file, master_keys))
506
+        bucket_index = json.loads(bucket_contents.pop(0))
507
+        for pos, item in enumerate(bucket_index):
508
+            json_contents[item] = bucket_contents[pos]
509
+            logger.debug(
510
+                'Found bucket item: %s -> %s', item, bucket_contents[pos]
511
+            )
512
+    dirs_to_check: dict[str, list[str]] = {}
513
+    json_payload: Any
514
+    for path, json_content in sorted(json_contents.items()):
515
+        if path.endswith('/'):
516
+            logger.debug(
517
+                'Postponing dir check: %s -> %s',
518
+                path,
519
+                json_content.decode('utf-8'),
520
+            )
521
+            json_payload = json.loads(json_content)
522
+            if not isinstance(json_payload, list) or any(
523
+                not isinstance(x, str) for x in json_payload
524
+            ):
525
+                msg = (
526
+                    f'Directory index is not actually an index: '
527
+                    f'{json_content!r}'
528
+                )
529
+                raise RuntimeError(msg)
530
+            dirs_to_check[path] = json_payload
531
+            logger.debug(
532
+                'Setting contents (empty directory): %s -> %s', path, '{}'
533
+            )
534
+            store(config_structure, path, b'{}')
535
+        else:
536
+            logger.debug(
537
+                'Setting contents: %s -> %s',
538
+                path,
539
+                json_content.decode('utf-8'),
540
+            )
541
+            store(config_structure, path, json_content)
542
+    for _dir, namelist in dirs_to_check.items():
543
+        namelist = [x.rstrip('/') for x in namelist]  # noqa: PLW2901
544
+        try:
545
+            obj = config_structure
546
+            for part in _dir.split('/'):
547
+                if part:
548
+                    obj = obj[part]
549
+        except KeyError as exc:
550
+            msg = f'Cannot traverse storage path: {_dir!r}'
551
+            raise RuntimeError(msg) from exc
552
+        if set(obj.keys()) != set(namelist):
553
+            msg = f'Object key mismatch for path {_dir!r}'
554
+            raise RuntimeError(msg)
555
+    return config_structure
441 556
 
442 557
 
443 558
 if __name__ == '__main__':
444
-    main()
559
+    logging.basicConfig(level=('DEBUG' if os.getenv('DEBUG') else 'WARNING'))
560
+    config_structure = export_storeroom_data()
561
+    print(json.dumps(config_structure, indent=2, sort_keys=True))  # noqa: T201
445 562