Document internal functions of the vault config exporter
Marco Ricci

Marco Ricci commited on 2025-01-19 21:10:38
Zeige 2 geänderte Dateien mit 251 Einfügungen und 0 Löschungen.


Add missing docstrings, and make the (private) methods visible.
... ...
@@ -6,3 +6,15 @@
6 6
 
7 7
 ::: derivepassphrase.exporter.vault_native
8 8
     heading_level: 1
9
+    filters:
10
+      - "^[A-Za-z0-9]"
11
+      - "^__[a-zA-Z0-9_-]+__"
12
+      - "^_pbkdf2$"
13
+      - "^_parse_contents$"
14
+      - "^_derive_keys$"
15
+      - "^_generate_keys$"
16
+      - "^_check_signature$"
17
+      - "^_hmac_input$"
18
+      - "^_decrypt_payload$"
19
+      - "^_make_decryptor$"
20
+      - "^_evp_bytestokey_md5_one_iteration_no_salt$"
... ...
@@ -170,6 +170,34 @@ class VaultNativeConfigParser(abc.ABC):
170 170
     def _pbkdf2(
171 171
         password: str | Buffer, key_size: int, iterations: int
172 172
     ) -> bytes:
173
+        """Generate a key from a password.
174
+
175
+        Uses PBKDF2 with HMAC-SHA1, with the vault UUID as a fixed salt
176
+        value.
177
+
178
+        Args:
179
+            password:
180
+                The password from which to derive the key.
181
+            key_size:
182
+                The size of the output string.  The effective key size
183
+                (in bytes) is thus half of this output string size.
184
+            iterations:
185
+                The PBKDF2 iteration count.
186
+
187
+        Returns:
188
+            The PBKDF2-derived key, encoded as a lowercase ASCII
189
+            hexadecimal string.
190
+
191
+        Danger: Insecure use of cryptography
192
+            This function is insecure because it uses a fixed salt
193
+            value, which is not secure against rainbow tables.  It is
194
+            further difficult to use because the effective key size is
195
+            only half as large as the "size" parameter (output string
196
+            size).  Finally, though the use of SHA-1 in HMAC per se is
197
+            not known to be insecure, SHA-1 is known not to be
198
+            collision-resistant.
199
+
200
+        """
173 201
         if isinstance(password, str):
174 202
             password = password.encode('utf-8')
175 203
         raw_key = pbkdf2.PBKDF2HMAC(
... ...
@@ -194,6 +222,16 @@ class VaultNativeConfigParser(abc.ABC):
194 222
         return result_key
195 223
 
196 224
     def _parse_contents(self) -> None:
225
+        """Parse the contents into IV, payload and MAC.
226
+
227
+        This operates on, and sets, multiple internal attributes of the
228
+        parser.
229
+
230
+        Raises:
231
+            ValueError:
232
+                The configuration file contents are clearly truncated.
233
+
234
+        """
197 235
         logger.info(
198 236
             _msg.TranslatedString(
199 237
                 _msg.InfoMsgTemplate.VAULT_NATIVE_PARSING_IV_PAYLOAD_MAC,
... ...
@@ -224,6 +262,12 @@ class VaultNativeConfigParser(abc.ABC):
224 262
         )
225 263
 
226 264
     def _derive_keys(self) -> None:
265
+        """Derive the signing and encryption keys.
266
+
267
+        This is a bookkeeping method.  The actual work is done in
268
+        [`_generate_keys`][].
269
+
270
+        """
227 271
         logger.info(
228 272
             _msg.TranslatedString(
229 273
                 _msg.InfoMsgTemplate.VAULT_NATIVE_DERIVING_KEYS,
... ...
@@ -239,9 +283,29 @@ class VaultNativeConfigParser(abc.ABC):
239 283
 
240 284
     @abc.abstractmethod
241 285
     def _generate_keys(self) -> None:
286
+        """Derive the signing and encryption keys, and set the key sizes.
287
+
288
+        Subclasses must override this, as the derivation system is
289
+        version-specific.  The default implementation raises an error.
290
+
291
+        Raises:
292
+            AssertionError:
293
+                There is no default implementation.
294
+
295
+        """
242 296
         raise AssertionError
243 297
 
244 298
     def _check_signature(self) -> None:
299
+        """Check for a valid MAC on the encrypted vault configuration.
300
+
301
+        The MAC uses HMAC-SHA1, and thus is 32 bytes long, before
302
+        encoding.
303
+
304
+        Raises:
305
+            ValueError:
306
+                The MAC is invalid.
307
+
308
+        """
245 309
         logger.info(
246 310
             _msg.TranslatedString(
247 311
                 _msg.InfoMsgTemplate.VAULT_NATIVE_CHECKING_MAC,
... ...
@@ -265,9 +329,26 @@ class VaultNativeConfigParser(abc.ABC):
265 329
 
266 330
     @abc.abstractmethod
267 331
     def _hmac_input(self) -> bytes:
332
+        """Return the input the MAC is supposed to verify.
333
+
334
+        Subclasses must override this, as the MAC-attested data is
335
+        version-specific.  The default implementation raises an error.
336
+
337
+        Raises:
338
+            AssertionError:
339
+                There is no default implementation.
340
+
341
+        """
268 342
         raise AssertionError
269 343
 
270 344
     def _decrypt_payload(self) -> Any:  # noqa: ANN401
345
+        """Return the decrypted vault configuration.
346
+
347
+        Requires [`_parse_contents`][] and [`_derive_keys`][] to have
348
+        run, and relies on [`_check_signature`][] for tampering
349
+        detection.
350
+
351
+        """
271 352
         logger.info(
272 353
             _msg.TranslatedString(
273 354
                 _msg.InfoMsgTemplate.VAULT_NATIVE_DECRYPTING_CONTENTS,
... ...
@@ -297,6 +378,16 @@ class VaultNativeConfigParser(abc.ABC):
297 378
 
298 379
     @abc.abstractmethod
299 380
     def _make_decryptor(self) -> ciphers.CipherContext:
381
+        """Return the cipher context object used for decryption.
382
+
383
+        Subclasses must override this, as the cipher setup is
384
+        version-specific.  The default implementation raises an error.
385
+
386
+        Raises:
387
+            AssertionError:
388
+                There is no default implementation.
389
+
390
+        """
300 391
         raise AssertionError
301 392
 
302 393
 
... ...
@@ -313,6 +404,11 @@ class VaultNativeV03ConfigParser(VaultNativeConfigParser):
313 404
     """
314 405
 
315 406
     KEY_SIZE = 32
407
+    """
408
+    Key size for both the encryption and the signing key, including the
409
+    encoding as a hexadecimal string.  (The effective cryptographic
410
+    strength is half of this value.)
411
+    """
316 412
 
317 413
     def __init__(self, *args: Any, **kwargs: Any) -> None:  # noqa: ANN401
318 414
         super().__init__(*args, **kwargs)
... ...
@@ -320,14 +416,45 @@ class VaultNativeV03ConfigParser(VaultNativeConfigParser):
320 416
         self._mac_size = 32
321 417
 
322 418
     def _generate_keys(self) -> None:
419
+        """Derive the signing and encryption keys, and set the key sizes.
420
+
421
+        Version 0.3 vault configurations use a constant key size; see
422
+        [`KEY_SIZE`][].  The encryption and signing keys differ in how
423
+        many rounds of PBKDF2 they use (100 and 200, respectively).
424
+
425
+        Danger: Insecure use of cryptography
426
+            This function makes use of the insecure function
427
+            [`VaultNativeConfigParser._pbkdf2`][], without any attempts
428
+            at mitigating its insecurity.  It further uses `_pbkdf2`
429
+            with the low iteration count of 100 and 200 rounds, which is
430
+            *drastically* insufficient to defend against password
431
+            guessing attacks using GPUs or ASICs.  We provide this
432
+            function for the purpose of interoperability with existing
433
+            vault installations.  Do not rely on this system to keep
434
+            your vault configuration secure against access by even
435
+            moderately determined attackers!
436
+
437
+        """
323 438
         self._encryption_key = self._pbkdf2(self._password, self.KEY_SIZE, 100)
324 439
         self._signing_key = self._pbkdf2(self._password, self.KEY_SIZE, 200)
325 440
         self._encryption_key_size = self._signing_key_size = self.KEY_SIZE
326 441
 
327 442
     def _hmac_input(self) -> bytes:
443
+        """Return the input the MAC is supposed to verify.
444
+
445
+        This includes hexadecimal encoding of the message payload.
446
+
447
+        """
328 448
         return self._message.hex().lower().encode('ASCII')
329 449
 
330 450
     def _make_decryptor(self) -> ciphers.CipherContext:
451
+        """Return the cipher context object used for decryption.
452
+
453
+        This is a standard AES256-CBC cipher context using the
454
+        previously derived encryption key and the IV declared in the
455
+        (MAC-verified) message payload.
456
+
457
+        """
331 458
         return ciphers.Cipher(
332 459
             algorithms.AES256(self._encryption_key), modes.CBC(self._iv)
333 460
         ).decryptor()
... ...
@@ -357,6 +484,22 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
357 484
         self._mac_size = 64
358 485
 
359 486
     def _parse_contents(self) -> None:
487
+        """Parse the contents into IV, payload and MAC.
488
+
489
+        Like the base class implementation, this operates on, and sets,
490
+        multiple internal attributes of the parser.  In version 0.2
491
+        vault configurations, the payload is encoded in base64 and the
492
+        message tag (MAC) is encoded in hexadecimal, so unlike the base
493
+        class implementation, we additionally decode the payload and the
494
+        MAC.
495
+
496
+        Raises:
497
+            ValueError:
498
+                The configuration file contents are clearly truncated,
499
+                or the payload or the message tag cannot be decoded
500
+                properly.
501
+
502
+        """
360 503
         super()._parse_contents()
361 504
         self._payload = base64.standard_b64decode(self._payload)
362 505
         self._message_tag = bytes.fromhex(self._message_tag.decode('ASCII'))
... ...
@@ -369,18 +512,114 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
369 512
         )
370 513
 
371 514
     def _generate_keys(self) -> None:
515
+        """Derive the signing and encryption keys, and set the key sizes.
516
+
517
+        Version 0.2 vault configurations use 8-byte encryption keys and
518
+        16-byte signing keys, including the hexadecimal encoding.  They
519
+        both use 16 rounds of PBKDF2.  This is due to an oversight in
520
+        vault, where the author mistakenly supplied the intended
521
+        iteration count as the key size, and the key size as the
522
+        iteration count.
523
+
524
+        Danger: Insecure use of cryptography
525
+            This function makes use of the insecure function
526
+            [`VaultNativeConfigParser._pbkdf2`][], without any attempts
527
+            at mitigating its insecurity.  It further uses `_pbkdf2`
528
+            with the low iteration count of 16 rounds, which is
529
+            *drastically* insufficient to defend against password
530
+            guessing attacks using GPUs or ASICs, and generates the
531
+            encryption key as a truncation of the signing key.  We
532
+            provide this function for the purpose of interoperability
533
+            with existing vault installations.  Do not rely on this
534
+            system to keep your vault configuration secure against
535
+            access by even moderately determined attackers!
536
+
537
+        """
372 538
         self._encryption_key = self._pbkdf2(self._password, 8, 16)
373 539
         self._signing_key = self._pbkdf2(self._password, 16, 16)
374 540
         self._encryption_key_size = 8
375 541
         self._signing_key_size = 16
376 542
 
377 543
     def _hmac_input(self) -> bytes:
544
+        """Return the input the MAC is supposed to verify.
545
+
546
+        This includes hexadecimal encoding of the message payload.
547
+
548
+        """
378 549
         return base64.standard_b64encode(self._message)
379 550
 
380 551
     def _make_decryptor(self) -> ciphers.CipherContext:
552
+        """Return the cipher context object used for decryption.
553
+
554
+        This is a standard AES256-CBC cipher context. The encryption key
555
+        and the IV are derived via the OpenSSL `EVP_BytesToKey` function
556
+        (using MD5, no salt, and one iteration).  This is what the
557
+        Node.js `crypto` library (v21 series and older) used in its
558
+        implementation of `crypto.createCipher("aes256", password)`.
559
+
560
+        Danger: Insecure use of cryptography
561
+            This function makes use of (an implementation of) the
562
+            OpenSSL function `EVP_BytesToKey`, which generates
563
+            cryptographically weak keys, without any attempts at
564
+            mitigating its insecurity.  We provide this function for the
565
+            purpose of interoperability with existing vault
566
+            installations.  Do not rely on this system to keep your
567
+            vault configuration secure against access by even moderately
568
+            determined attackers!
569
+
570
+        """
571
+
381 572
         def evp_bytestokey_md5_one_iteration_no_salt(
382 573
             data: bytes, key_size: int, iv_size: int
383 574
         ) -> tuple[bytes, bytes]:
575
+            """Reimplement OpenSSL's `EVP_BytesToKey` with fixed parameters.
576
+
577
+            `EVP_BytesToKey` in general is a key derivation function,
578
+            i.e., a function that derives key material from an input
579
+            byte string.  `EVP_BytesToKey` conceptually splits the
580
+            derived key material into an encryption key and an
581
+            initialization vector (IV).
582
+
583
+            Note: Algorithm description
584
+                `EVP_BytesToKey` takes an input byte string, two output
585
+                size (encryption key size and IV size), a message digest
586
+                function, a salt value and an iteration count.  The
587
+                derived key material is calculated in blocks, each of
588
+                which is the output of (iterated application of) the
589
+                message digest function.  The input to the message
590
+                digest function is the concatenation of the previous
591
+                block (if any) with the input byte string and the salt
592
+                value (if any):
593
+
594
+                ~~~~ python
595
+
596
+                data = block_input = b''.join([
597
+                    previous_block, input_string, salt
598
+                ])
599
+                for i in range(iteration_count):
600
+                    data = message_digest(data)
601
+                block = data
602
+
603
+                ~~~~
604
+
605
+                We use as many blocks as are necessary to cover the
606
+                total output byte string size.  The first few bytes
607
+                (dictated by the encryption key size) form the
608
+                encryption key, the other bytes (dictated by the IV
609
+                size) form the IV.
610
+
611
+            We implement exactly the subset of `EVP_BytesToKey` that the
612
+            Node.js `crypto` library (v21 series and older) uses in its
613
+            implementation of `crypto.createCipher("aes256", password)`.
614
+            Specifically, the message digest function is fixed to MD5,
615
+            the salt is always empty, and the iteration count is fixed
616
+            at one.
617
+
618
+            Returns:
619
+                A 2-tuple containing the derived encryption key and the
620
+                derived initialization vector.
621
+
622
+            """
384 623
             total_size = key_size + iv_size
385 624
             buffer = bytearray()
386 625
             last_block = b''
387 626