Detect (and test for) pairs of interchangable vault passphrases
Marco Ricci

Marco Ricci commited on 2025-01-27 15:36:38
Zeige 2 geänderte Dateien mit 354 Einfügungen und 0 Löschungen.


The "vault" derivation scheme internally uses PBKDF2-HMAC-SHA1 to
generate a pool of random bits from which the derived passphrase is
constructed.  Here, the master passphrase is passed directly to
HMAC-SHA1 as the key; see [RFC 2898][].  By construction, HMAC-SHA1
requires keys to be exactly 64 bytes long (the internal block size of
SHA1), so keys larger or smaller than 64 bytes are mapped to equivalent
64-byte keys.  This keyspace reduction means that master passphrases
under vault are not unique, and since the mapping is simple enough,
"master passphrase collisions" have actually been observed during
hypothesis testing.

Therefore, we implement a new interface to query whether two master
passphrases are interchangable under vault.  We also test expected
interchangability and non-interchangability of master passphrases
explicitly with hypothesis-based tests, both for mixed passphrase
categories (larger than, smaller than, or exactly 64 bytes) and for each
category separately.

[RFC 2898]: https://datatracker.ietf.org/doc/html/rfc2898  "See Section 5.2 and Appendix B.1.1."
... ...
@@ -9,6 +9,7 @@ from __future__ import annotations
9 9
 import base64
10 10
 import collections
11 11
 import hashlib
12
+import hmac
12 13
 import math
13 14
 import types
14 15
 from typing import TYPE_CHECKING
... ...
@@ -589,6 +590,83 @@ class Vault:
589 590
         signature_blob = ssh_agent.SSHAgentClient.unstring(trailer)
590 591
         return bytes(base64.standard_b64encode(signature_blob))
591 592
 
593
+    @classmethod
594
+    def phrases_are_interchangable(
595
+        cls,
596
+        phrase1: bytes | bytearray,
597
+        phrase2: bytes | bytearray,
598
+        /,
599
+    ) -> bool:
600
+        """Return true if the passphrases are interchangable to Vault.
601
+
602
+        Vault internally passes the passphrase as the key to HMAC-SHA1.
603
+        HMAC requires keys to have a certain fixed length, and therefore
604
+        transforms keys of other lengths suitably.  Because of this, in
605
+        general, there exist multiple passphrases that behave
606
+        identically under Vault.
607
+
608
+        Note: HMAC key transformation
609
+            Keys strictly larger than the SHA1 block size (64 bytes) are
610
+            first hashed with SHA1, then the digest is used in place of
611
+            the original key.  Then, any keys/digests smaller than the
612
+            block size are padded with NUL bytes on the right, up to the
613
+            block size.
614
+
615
+            As a result, keys smaller than the block size are padded,
616
+            keys larger than the block size are hashed and then padded,
617
+            and keys exactly as large as the block size are used as-is.
618
+
619
+        Args:
620
+            phrase1:
621
+                A passphrase to compare.  Must be a binary string to
622
+                mitigate timing attacks.
623
+            phrase2:
624
+                A passphrase to compare.  Must be a binary string to
625
+                mitigate timing attacks.
626
+
627
+        Warning: Likely non-resistant to timing attacks
628
+            This method makes some effort to be resistant to timing
629
+            attacks, but cannot guarantee that Python
630
+            micro-optimizations, version or platform differences affect
631
+            the effectiveness of these efforts.
632
+
633
+            Callers can definitely observe timing differences due to the
634
+            length of the passphrase passed in.
635
+
636
+        """
637
+        to_key = cls._phrase_to_hmac_key
638
+        return hmac.compare_digest(to_key(phrase1), to_key(phrase2))
639
+
640
+    @classmethod
641
+    def _phrase_to_hmac_key(
642
+        cls,
643
+        phrase: bytes | bytearray | str,
644
+        /,
645
+    ) -> bytes:
646
+        r"""Return the HMAC key belonging to a passphrase.
647
+
648
+        This is the actual HMAC key this passphrase would be transformed
649
+        into when used within Vault.
650
+
651
+        See [`phrases_are_interchangable`][] for further explanations
652
+        and warnings about timing attack resistance.
653
+
654
+        Args:
655
+            phrase:
656
+                A passphrase to compare.  Must be a binary string to
657
+                mitigate timing attacks.
658
+
659
+        """
660
+        phrase = cls._get_binary_string(phrase)
661
+        h = hashlib.sha1(phrase, usedforsecurity=False)
662
+        try:
663
+            key = bytearray(h.block_size)
664
+            for i, byte in enumerate(phrase):
665
+                key[i] = byte
666
+            return bytes(key)
667
+        except IndexError:
668
+            return h.digest() + b'\x00' * (h.block_size - h.digest_size)
669
+
592 670
     @staticmethod
593 671
     def _subtract(
594 672
         charset: bytes | bytearray,
... ...
@@ -6,6 +6,7 @@
6 6
 
7 7
 from __future__ import annotations
8 8
 
9
+import hashlib
9 10
 import math
10 11
 from typing import TYPE_CHECKING
11 12
 
... ...
@@ -22,6 +23,43 @@ if TYPE_CHECKING:
22 23
 
23 24
 Vault: TypeAlias = derivepassphrase.vault.Vault
24 25
 
26
+BLOCK_SIZE = hashlib.sha1().block_size
27
+DIGEST_SIZE = hashlib.sha1().digest_size
28
+
29
+
30
+def phrases_are_interchangable(
31
+    phrase1: bytes | bytearray | str,
32
+    phrase2: bytes | bytearray | str,
33
+    /,
34
+) -> bool:
35
+    """Work-alike of [`Vault.phrases_are_interchangable`][].
36
+
37
+    This version is not resistant to timing attacks, but faster, and
38
+    supports strings directly.
39
+
40
+    Args:
41
+        phrase1:
42
+            A passphrase to compare.
43
+        phrase2:
44
+            A passphrase to compare.
45
+
46
+    Returns:
47
+        True if the phrases behave identically under [`Vault`][],
48
+        false otherwise.
49
+
50
+    """
51
+
52
+    def canon(bs: bytes, /) -> bytes:
53
+        return (
54
+            hashlib.sha1(bs).digest() + b'\x00' * (BLOCK_SIZE - DIGEST_SIZE)
55
+            if len(bs) > BLOCK_SIZE
56
+            else bs.rstrip(b'\x00')
57
+        )
58
+
59
+    phrase1 = canon(Vault._get_binary_string(phrase1))
60
+    phrase2 = canon(Vault._get_binary_string(phrase2))
61
+    return phrase1 == phrase2
62
+
25 63
 
26 64
 class TestVault:
27 65
     """Test passphrase derivation with the "vault" scheme."""
... ...
@@ -39,6 +77,130 @@ class TestVault:
39 77
     <i>vault</i>(1)'s test suite.
40 78
     """
41 79
 
80
+    @hypothesis.given(
81
+        phrases=strategies.lists(
82
+            strategies.binary(min_size=1, max_size=BLOCK_SIZE // 2),
83
+            min_size=2,
84
+            max_size=2,
85
+            unique=True,
86
+        ).filter(
87
+            lambda tup: not phrases_are_interchangable(*tup)
88
+        ),
89
+        service=strategies.text(
90
+            strategies.characters(min_codepoint=32, max_codepoint=126),
91
+            min_size=1,
92
+            max_size=BLOCK_SIZE // 2,
93
+        ),
94
+    )
95
+    def test_100a_create_hash_phrase_dependence_small(
96
+        self,
97
+        phrases: list[bytes],
98
+        service: str,
99
+    ) -> None:
100
+        """The internal hash is dependent on the master passphrase.
101
+
102
+        We filter out interchangable passphrases during generation.
103
+
104
+        """
105
+        assert Vault.create_hash(
106
+            phrase=phrases[0], service=service
107
+        ) != Vault.create_hash(phrase=phrases[1], service=service)
108
+
109
+    @hypothesis.given(
110
+        phrases=strategies.lists(
111
+            strategies.binary(min_size=BLOCK_SIZE, max_size=BLOCK_SIZE),
112
+            min_size=2,
113
+            max_size=2,
114
+            unique=True,
115
+        ).filter(
116
+            lambda tup: not phrases_are_interchangable(*tup)
117
+        ),
118
+        service=strategies.text(
119
+            strategies.characters(min_codepoint=32, max_codepoint=126),
120
+            min_size=1,
121
+            max_size=BLOCK_SIZE // 2,
122
+        ),
123
+    )
124
+    def test_100b_create_hash_phrase_dependence_medium(
125
+        self,
126
+        phrases: list[bytes],
127
+        service: str,
128
+    ) -> None:
129
+        """The internal hash is dependent on the master passphrase.
130
+
131
+        We filter out interchangable passphrases during generation.
132
+
133
+        """
134
+        assert Vault.create_hash(
135
+            phrase=phrases[0], service=service
136
+        ) != Vault.create_hash(phrase=phrases[1], service=service)
137
+
138
+    @hypothesis.given(
139
+        phrases=strategies.lists(
140
+            strategies.binary(
141
+                min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
142
+            ),
143
+            min_size=2,
144
+            max_size=2,
145
+            unique=True,
146
+        ).filter(
147
+            lambda tup: not phrases_are_interchangable(*tup)
148
+        ),
149
+        service=strategies.text(
150
+            strategies.characters(min_codepoint=32, max_codepoint=126),
151
+            min_size=1,
152
+            max_size=BLOCK_SIZE // 2,
153
+        ),
154
+    )
155
+    def test_100c_create_hash_phrase_dependence_large(
156
+        self,
157
+        phrases: tuple[bytes, bytes],
158
+        service: str,
159
+    ) -> None:
160
+        """The internal hash is dependent on the master passphrase.
161
+
162
+        We filter out interchangable passphrases during generation.
163
+
164
+        """
165
+        assert Vault.create_hash(
166
+            phrase=phrases[0], service=service
167
+        ) != Vault.create_hash(phrase=phrases[1], service=service)
168
+
169
+    @hypothesis.given(
170
+        phrases=strategies.lists(
171
+            strategies.one_of(
172
+                strategies.binary(min_size=1, max_size=BLOCK_SIZE // 2),
173
+                strategies.binary(min_size=BLOCK_SIZE, max_size=BLOCK_SIZE),
174
+                strategies.binary(
175
+                    min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
176
+                ),
177
+            ),
178
+            min_size=2,
179
+            max_size=2,
180
+            unique=True,
181
+        ).filter(
182
+            lambda tup: not phrases_are_interchangable(*tup)
183
+        ),
184
+        service=strategies.text(
185
+            strategies.characters(min_codepoint=32, max_codepoint=126),
186
+            min_size=1,
187
+            max_size=BLOCK_SIZE // 2,
188
+        ),
189
+    )
190
+    def test_100d_create_hash_phrase_dependence_mixed(
191
+        self,
192
+        phrases: list[bytes],
193
+        service: str,
194
+    ) -> None:
195
+        """The internal hash is dependent on the master passphrase.
196
+
197
+        We filter out interchangable passphrases during generation.
198
+
199
+        """
200
+        assert Vault.create_hash(
201
+            phrase=phrases[0], service=service
202
+        ) != Vault.create_hash(phrase=phrases[1], service=service)
203
+
42 204
     @hypothesis.given(
43 205
         phrase=strategies.text(
44 206
             strategies.characters(min_codepoint=32, max_codepoint=126),
... ...
@@ -62,6 +224,66 @@ class TestVault:
62 224
             phrase=phrase, service=services[0]
63 225
         ) != Vault.create_hash(phrase=phrase, service=services[1])
64 226
 
227
+    @tests.hypothesis_settings_coverage_compatible
228
+    @hypothesis.given(
229
+        phrases=strategies.binary(max_size=BLOCK_SIZE // 2).flatmap(
230
+            lambda bs: strategies.tuples(
231
+                strategies.just(bs),
232
+                strategies.integers(
233
+                    min_value=1,
234
+                    max_value=BLOCK_SIZE - len(bs),
235
+                ).map(lambda num: bs + b'\x00' * num)
236
+            )
237
+        ),
238
+        service=strategies.text(
239
+            strategies.characters(min_codepoint=32, max_codepoint=126),
240
+            min_size=1,
241
+            max_size=32,
242
+        ),
243
+    )
244
+    def test_102a_interchangable_phrases_small(
245
+        self,
246
+        phrases: tuple[bytes, bytes],
247
+        service: str,
248
+    ) -> None:
249
+        """Claimed interchangable passphrases are actually interchangable."""
250
+        assert Vault.phrases_are_interchangable(*phrases)
251
+        assert Vault.create_hash(
252
+            phrase=phrases[0], service=service
253
+        ) == Vault.create_hash(phrase=phrases[1], service=service)
254
+
255
+    @tests.hypothesis_settings_coverage_compatible
256
+    @hypothesis.given(
257
+        phrases=strategies.binary(
258
+            min_size=BLOCK_SIZE + 1, max_size=BLOCK_SIZE + 8
259
+        ).flatmap(
260
+            lambda bs: strategies.tuples(
261
+                strategies.just(bs),
262
+                strategies.just(hashlib.sha1(bs).digest()).flatmap(
263
+                    lambda h: strategies.integers(
264
+                        min_value=1,
265
+                        max_value=BLOCK_SIZE - DIGEST_SIZE,
266
+                    ).map(lambda num: h + b'\x00' * num)
267
+                ),
268
+            )
269
+        ),
270
+        service=strategies.text(
271
+            strategies.characters(min_codepoint=32, max_codepoint=126),
272
+            min_size=1,
273
+            max_size=32,
274
+        ),
275
+    )
276
+    def test_102b_interchangable_phrases_large(
277
+        self,
278
+        phrases: tuple[bytes, bytes],
279
+        service: str,
280
+    ) -> None:
281
+        """Claimed interchangable passphrases are actually interchangable."""
282
+        assert Vault.phrases_are_interchangable(*phrases)
283
+        assert Vault.create_hash(
284
+            phrase=phrases[0], service=service
285
+        ) == Vault.create_hash(phrase=phrases[1], service=service)
286
+
65 287
     @pytest.mark.parametrize(
66 288
         ['service', 'expected'],
67 289
         [
... ...
@@ -82,6 +304,60 @@ class TestVault:
82 304
             == b'n+oIz6sL>K*lTEWYRO%7'
83 305
         )
84 306
 
307
+    @hypothesis.given(
308
+        phrases=strategies.lists(
309
+            strategies.binary(min_size=1, max_size=32),
310
+            min_size=2,
311
+            max_size=2,
312
+            unique=True,
313
+        ).filter(
314
+            lambda tup: not phrases_are_interchangable(*tup)
315
+        ),
316
+        service=strategies.text(
317
+            strategies.characters(min_codepoint=32, max_codepoint=126),
318
+            min_size=1,
319
+            max_size=32,
320
+        ),
321
+    )
322
+    @hypothesis.example(phrases=[b'\x00', b'\x00\x00'], service='0').xfail(
323
+        reason='phrases are interchangable',
324
+        raises=AssertionError,
325
+    )
326
+    @hypothesis.example(
327
+        phrases=[
328
+            (
329
+                b'plnlrtfpijpuhqylxbgqiiyipieyxvfs'
330
+                b'avzgxbbcfusqkozwpngsyejqlmjsytrmd'
331
+            ),
332
+            b"eBkXQTfuBqp'cTcar&g*",
333
+        ],
334
+        service='any service name here',
335
+    ).xfail(
336
+        reason=(
337
+            'phrases are interchangable (Wikipedia example:'
338
+            'https://en.wikipedia.org/w/index.php?title=PBKDF2&oldid=1264881215#HMAC_collisions'
339
+            ')'
340
+        ),
341
+        raises=AssertionError,
342
+    )
343
+    def test_201a_phrase_dependence(
344
+        self,
345
+        phrases: list[bytes],
346
+        service: str,
347
+    ) -> None:
348
+        """The derived passphrase is dependent on the master passphrase.
349
+
350
+        Certain pairs of master passphrases are known to be
351
+        interchangable; see [`Vault.phrases_are_interchangable`][].
352
+        These are excluded from consideration by the hypothesis
353
+        strategy.
354
+
355
+        """
356
+        # See test_100_create_hash_phrase_dependence for context.
357
+        assert Vault(phrase=phrases[0]).generate(
358
+            service
359
+        ) != Vault(phrase=phrases[1]).generate(service)
360
+
85 361
     def test_202a_reproducibility_and_bytes_service_name(self) -> None:
86 362
         """Deriving a passphrase works equally for byte strings."""
87 363
         assert Vault(phrase=self.phrase).generate(b'google') == Vault(
88 364