Support textual passphrases, if it is safe
Marco Ricci

Marco Ricci commited on 2024-06-22 21:19:30
Zeige 1 geänderte Dateien mit 70 Einfügungen und 9 Löschungen.


Some level of awareness is necessary to support passphrases stored in
(JSON) config files.  We reject the passphrase if there are multiple
Unicode representations (and thus UTF-8 representations) of the same
text but with different normalizations.  In such a case, a byte string
must be used, and the value cannot currently be stored in the JSON
config file.
... ...
@@ -8,9 +8,11 @@
8 8
 
9 9
 from __future__ import annotations
10 10
 
11
+import base64
11 12
 import collections
12 13
 import hashlib
13 14
 import math
15
+import unicodedata
14 16
 import warnings
15 17
 
16 18
 from typing import assert_type, reveal_type
... ...
@@ -21,6 +23,9 @@ import ssh_agent_client
21 23
 __author__ = "Marco Ricci <m@the13thletter.info>"
22 24
 __version__ = "0.1.0"
23 25
 
26
+class AmbiguousByteRepresentationError(ValueError):
27
+    """The object has an ambiguous byte representation."""
28
+
24 29
 class Vault:
25 30
     """A work-alike of James Coglan's vault.
26 31
 
... ...
@@ -68,8 +73,8 @@ class Vault:
68 73
                         + _CHARSETS['symbol'])
69 74
 
70 75
     def __init__(
71
-        self, *, phrase: bytes | bytearray = b'', length: int = 20,
72
-        repeat: int = 0, lower: int | None = None,
76
+        self, *, phrase: bytes | bytearray | str = b'',
77
+        length: int = 20, repeat: int = 0, lower: int | None = None,
73 78
         upper: int | None = None, number: int | None = None,
74 79
         space: int | None = None, dash: int | None = None,
75 80
         symbol: int | None = None,
... ...
@@ -79,19 +84,20 @@ class Vault:
79 84
         Args:
80 85
             phrase:
81 86
                 The master passphrase from which to derive the service
82
-                passphrases.
87
+                passphrases.  If a text string, then the byte
88
+                representation must be unique.
83 89
             length:
84 90
                 Desired passphrase length.
85 91
             repeat:
86 92
                 The maximum number of immediate character repetitions
87 93
                 allowed in the passphrase.  Disabled if set to 0.
88 94
             lower:
89
-                Optional constraint on lowercase characters.  If
95
+                Optional constraint on ASCII lowercase characters.  If
90 96
                 positive, include this many lowercase characters
91 97
                 somewhere in the passphrase.  If 0, avoid lowercase
92 98
                 characters altogether.
93 99
             upper:
94
-                Same as `lower`, but for uppercase characters.
100
+                Same as `lower`, but for ASCII uppercase characters.
95 101
             number:
96 102
                 Same as `lower`, but for ASCII digits.
97 103
             space:
... ...
@@ -103,8 +109,13 @@ class Vault:
103 109
                 Same as `lower`, but for all other hitherto unlisted
104 110
                 ASCII printable characters (except backquote).
105 111
 
112
+        Raises:
113
+            AmbiguousByteRepresentationError:
114
+                The phrase is a text string with differing NFC- and
115
+                NFD-normalized UTF-8 byte representations.
116
+
106 117
         """
107
-        self._phrase = bytes(phrase)
118
+        self._phrase = self._get_binary_string(phrase)
108 119
         self._length = length
109 120
         self._repeat = repeat
110 121
         self._allowed = bytearray(self._CHARSETS['all'])
... ...
@@ -197,10 +208,38 @@ class Vault:
197 208
         entropy_bound = max(1, self._entropy())
198 209
         return int(math.ceil(safety_factor * entropy_bound / 8))
199 210
 
211
+    @staticmethod
212
+    def _get_binary_string(s: bytes | bytearray | str, /) -> bytes:
213
+        """Convert the input string to a read-only, binary string.
214
+
215
+        If it is a text string, then test for an unambiguous UTF-8
216
+        representation, otherwise abort.  (That is, check whether the
217
+        NFC and NFD forms of the string coincide.)
218
+
219
+        Args:
220
+            s: The string to (check and) convert.
221
+
222
+        Returns:
223
+            A read-only, binary copy of the string.
224
+
225
+        Raises:
226
+            AmbiguousByteRepresentationError:
227
+                The text string has differing NFC- and NFD-normalized
228
+                UTF-8 byte representations.
229
+
230
+        """
231
+        if isinstance(s, str):
232
+            norm = unicodedata.normalize
233
+            if norm('NFC', s) != norm('NFD', s):
234
+                raise AmbiguousByteRepresentationError(
235
+                    'text string has ambiguous byte representation')
236
+            return s.encode('UTF-8')
237
+        return bytes(s)
238
+
200 239
     @classmethod
201 240
     def create_hash(
202
-        cls, phrase: bytes | bytearray, service: bytes | bytearray, *,
203
-        length: int = 32,
241
+        cls, phrase: bytes | bytearray | str,
242
+        service: bytes | bytearray, *, length: int = 32,
204 243
     ) -> bytes:
205 244
         r"""Create a pseudorandom byte stream from phrase and service.
206 245
 
... ...
@@ -213,6 +252,9 @@ class Vault:
213 252
                 A master passphrase, or sometimes an SSH signature.
214 253
                 Used as the key for PBKDF2, the underlying cryptographic
215 254
                 primitive.
255
+
256
+                If a text string, then the byte representation must be
257
+                unique.
216 258
             service:
217 259
                 A vault service name.  Will be suffixed with
218 260
                 `Vault._UUID`, and then used as the salt value for
... ...
@@ -223,6 +265,11 @@ class Vault:
223 265
         Returns:
224 266
             A pseudorandom byte string of length `length`.
225 267
 
268
+        Raises:
269
+            AmbiguousByteRepresentationError:
270
+                The phrase is a text string with differing NFC- and
271
+                NFD-normalized UTF-8 byte representations.
272
+
226 273
         Note:
227 274
             Shorter values returned from this method (with the same key
228 275
             and message) are prefixes of longer values returned from
... ...
@@ -249,13 +296,15 @@ class Vault:
249 296
             b'\x1c\xc3\x9c\xd9\xb6\x1a\x99CS\x07\xc41\xf4\x85#s'
250 297
 
251 298
         """
299
+        phrase = cls._get_binary_string(phrase)
300
+        assert not isinstance(phrase, str)
252 301
         salt = bytes(service) + cls._UUID
253 302
         return hashlib.pbkdf2_hmac(hash_name='sha1', password=phrase,
254 303
                                    salt=salt, iterations=8, dklen=length)
255 304
 
256 305
     def generate(
257 306
         self, service_name: str | bytes | bytearray, /, *,
258
-        phrase: bytes | bytearray = b'',
307
+        phrase: bytes | bytearray | str = b'',
259 308
     ) -> bytes:
260 309
         r"""Generate a service passphrase.
261 310
 
... ...
@@ -266,6 +315,17 @@ class Vault:
266 315
                 If given, override the passphrase given during
267 316
                 construction.
268 317
 
318
+                If a text string, then the byte representation must be
319
+                unique.
320
+
321
+        Returns:
322
+            The service passphrase.
323
+
324
+        Raises:
325
+            AmbiguousByteRepresentationError:
326
+                The phrase is a text string with differing NFC- and
327
+                NFD-normalized UTF-8 byte representations.
328
+
269 329
         Examples:
270 330
             >>> phrase = b'She cells C shells bye the sea shoars'
271 331
             >>> # Using default options in constructor.
... ...
@@ -287,6 +347,7 @@ class Vault:
287 347
         assert_type(service_name, bytes)
288 348
         if not phrase:
289 349
             phrase = self._phrase
350
+        phrase = self._get_binary_string(phrase)
290 351
         # Repeat the passphrase generation with ever-increasing hash
291 352
         # lengths, until the passphrase can be formed without exhausting
292 353
         # the sequin.  See the guarantee in the create_hash method for
293 354