Add docstrings and better variable names on storeroom exporter
Marco Ricci

Marco Ricci commited on 2024-08-16 22:48:08
Zeige 1 geänderte Dateien mit 139 Einfügungen und 21 Löschungen.

... ...
@@ -1,5 +1,7 @@
1 1
 #!/usr/bin/python3
2 2
 
3
+from __future__ import annotations
4
+
3 5
 import base64
4 6
 import glob
5 7
 import json
... ...
@@ -42,11 +44,33 @@ class MasterKeys(TypedDict):
42 44
 
43 45
 
44 46
 def derive_master_keys_keys(password: str | bytes, iterations: int) -> KeyPair:
47
+    """Derive encryption and signing keys for the master keys data.
48
+
49
+    The master password is run through a key derivation function to
50
+    obtain a 64-byte string, which is then split to yield two 32-byte
51
+    keys.  The key derivation function is PBKDF2, using HMAC-SHA1 and
52
+    salted with the storeroom master keys UUID.
53
+
54
+    Args:
55
+        password:
56
+            A master password for the storeroom instance.  Usually read
57
+            from the `VAULT_KEY` environment variable, otherwise
58
+            defaults to the username.
59
+        iterations:
60
+            A count of rounds for the underlying key derivation
61
+            function.  Usually stored as a setting next to the encrypted
62
+            master keys data.
63
+
64
+    Returns:
65
+        A 2-tuple of keys, the encryption key and the signing key, to
66
+        decrypt and verify the master keys data with.
67
+
68
+    """
45 69
     if isinstance(password, str):
46 70
         password = password.encode('ASCII')
47 71
     master_keys_keys_blob = pbkdf2.PBKDF2HMAC(
48 72
         algorithm=hashes.SHA1(),  # noqa: S303
49
-        length=64,
73
+        length=2 * KEY_SIZE,
50 74
         salt=STOREROOM_MASTER_KEYS_UUID,
51 75
         iterations=iterations,
52 76
     ).derive(password)
... ...
@@ -76,6 +100,39 @@ def derive_master_keys_keys(password: str | bytes, iterations: int) -> KeyPair:
76 100
 
77 101
 
78 102
 def decrypt_master_keys_data(data: bytes, keys: KeyPair) -> MasterKeys:
103
+    """Decrypt the master keys data.
104
+
105
+    The master keys data contains:
106
+
107
+    - a 16-byte IV,
108
+    - a 96-byte AES256-CBC-encrypted payload (using PKCS7 padding on the
109
+      inside), and
110
+    - a 32-byte MAC of the preceding 112 bytes.
111
+
112
+    The decrypted payload itself consists of three 32-byte keys: the
113
+    hashing, encryption and signing keys, in that order.
114
+
115
+    The encrypted payload is encrypted with the encryption key, and the
116
+    MAC is created based on the signing key.  As per standard
117
+    cryptographic procedure, the MAC can be verified before attempting
118
+    to decrypt the payload.
119
+
120
+    Because the payload size is both fixed and a multiple of the
121
+    cipher blocksize, in this case, the PKCS7 padding is a no-op.
122
+
123
+    Args:
124
+        data:
125
+            The encrypted master keys data.
126
+        keys:
127
+            The encryption and signing keys for the master keys data.
128
+            These should have previously been derived via the
129
+            [`derivepassphrase.exporter.storeroom.derive_master_keys_keys`][]
130
+            function.
131
+
132
+    Returns:
133
+        The master encryption, signing and hashing keys.
134
+
135
+    """
79 136
     ciphertext, claimed_mac = struct.unpack(
80 137
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
81 138
     )
... ...
@@ -124,21 +181,54 @@ def decrypt_master_keys_data(data: bytes, keys: KeyPair) -> MasterKeys:
124 181
     }
125 182
 
126 183
 
127
-def decrypt_session_keys(data: bytes, keys: MasterKeys) -> KeyPair:
184
+def decrypt_session_keys(data: bytes, master_keys: MasterKeys) -> KeyPair:
185
+    """Decrypt the bucket item's session keys.
186
+
187
+    The bucket item's session keys are single-use keys for encrypting
188
+    and signing a single item in the storage bucket.  The encrypted
189
+    session key data consists of:
190
+
191
+    - a 16-byte IV,
192
+    - a 64-byte AES256-CBC-encrypted payload (using PKCS7 padding on the
193
+      inside), and
194
+    - a 32-byte MAC of the preceding 80 bytes.
195
+
196
+    The encrypted payload is encrypted with the master encryption key,
197
+    and the MAC is created with the master signing key.  As per standard
198
+    cryptographic procedure, the MAC can be verified before attempting
199
+    to decrypt the payload.
200
+
201
+    Because the payload size is both fixed and a multiple of the
202
+    cipher blocksize, in this case, the PKCS7 padding is a no-op.
203
+
204
+    Args:
205
+        data:
206
+            The encrypted bucket item session key data.
207
+        master_keys:
208
+            The master keys.  Presumably these have previously been
209
+            obtained via the
210
+            [`derivepassphrase.exporter.storeroom.decrypt_master_keys_data`][]
211
+            function.
212
+
213
+    Returns:
214
+        The bucket item's encryption and signing keys.
215
+
216
+    """
217
+
128 218
     ciphertext, claimed_mac = struct.unpack(
129 219
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
130 220
     )
131
-    actual_mac = hmac.HMAC(keys['signing_key'], hashes.SHA256())
221
+    actual_mac = hmac.HMAC(master_keys['signing_key'], hashes.SHA256())
132 222
     actual_mac.update(ciphertext)
133 223
     logger.debug(
134 224
         (
135
-            'decrypt_bucket_line (session_keys): '
225
+            'decrypt_bucket_item (session_keys): '
136 226
             'mac_key = bytes.fromhex(%s) (master), '
137 227
             'hashed_content = bytes.fromhex(%s), '
138 228
             'claimed_mac = bytes.fromhex(%s), '
139 229
             'actual_mac = bytes.fromhex(%s)'
140 230
         ),
141
-        repr(keys['signing_key'].hex(' ')),
231
+        repr(master_keys['signing_key'].hex(' ')),
142 232
         repr(ciphertext.hex(' ')),
143 233
         repr(claimed_mac.hex(' ')),
144 234
         repr(actual_mac.copy().finalize().hex(' ')),
... ...
@@ -149,7 +239,7 @@ def decrypt_session_keys(data: bytes, keys: MasterKeys) -> KeyPair:
149 239
         f'{IV_SIZE}s {len(ciphertext) - IV_SIZE}s', ciphertext
150 240
     )
151 241
     decryptor = ciphers.Cipher(
152
-        algorithms.AES256(keys['encryption_key']), modes.CBC(iv)
242
+        algorithms.AES256(master_keys['encryption_key']), modes.CBC(iv)
153 243
     ).decryptor()
154 244
     padded_plaintext = bytearray()
155 245
     padded_plaintext.extend(decryptor.update(payload))
... ...
@@ -170,14 +260,14 @@ def decrypt_session_keys(data: bytes, keys: MasterKeys) -> KeyPair:
170 260
 
171 261
     logger.debug(
172 262
         (
173
-            'decrypt_bucket_line (session_keys): '
263
+            'decrypt_bucket_item (session_keys): '
174 264
             'decrypt_aes256_cbc_and_unpad(key=bytes.fromhex(%s), '
175 265
             'iv=bytes.fromhex(%s))(bytes.fromhex(%s)) '
176 266
             '= bytes.fromhex(%s) '
177 267
             '= {"encryption_key": bytes.fromhex(%s), '
178 268
             '"signing_key": bytes.fromhex(%s)}'
179 269
         ),
180
-        repr(keys['encryption_key'].hex(' ')),
270
+        repr(master_keys['encryption_key'].hex(' ')),
181 271
         repr(iv.hex(' ')),
182 272
         repr(payload.hex(' ')),
183 273
         repr(plaintext.hex(' ')),
... ...
@@ -194,21 +284,49 @@ def decrypt_session_keys(data: bytes, keys: MasterKeys) -> KeyPair:
194 284
     return session_keys
195 285
 
196 286
 
197
-def decrypt_contents(data: bytes, keys: KeyPair) -> bytes:
287
+def decrypt_contents(data: bytes, session_keys: KeyPair) -> bytes:
288
+    """Decrypt the bucket item's contents.
289
+
290
+    The data consists of:
291
+
292
+    - a 16-byte IV,
293
+    - a variable-sized AES256-CBC-encrypted payload (using PKCS7 padding
294
+      on the inside), and
295
+    - a 32-byte MAC of the preceding 80 bytes.
296
+
297
+    The encrypted payload is encrypted with the bucket item's session
298
+    encryption key, and the MAC is created with the bucket item's
299
+    session signing key.  As per standard cryptographic procedure, the
300
+    MAC can be verified before attempting to decrypt the payload.
301
+
302
+    Args:
303
+        data:
304
+            The encrypted bucket item payload data.
305
+        session_keys:
306
+            The bucket item's session keys.  Presumably these have
307
+            previously been obtained via the
308
+            [`derivepassphrase.exporter.storeroom.decrypt_session_keys`][]
309
+            function.
310
+
311
+    Returns:
312
+        The bucket item's payload.
313
+
314
+    """
315
+
198 316
     ciphertext, claimed_mac = struct.unpack(
199 317
         f'{len(data) - MAC_SIZE}s {MAC_SIZE}s', data
200 318
     )
201
-    actual_mac = hmac.HMAC(keys['signing_key'], hashes.SHA256())
319
+    actual_mac = hmac.HMAC(session_keys['signing_key'], hashes.SHA256())
202 320
     actual_mac.update(ciphertext)
203 321
     logger.debug(
204 322
         (
205
-            'decrypt_bucket_line (contents): '
323
+            'decrypt_bucket_item (contents): '
206 324
             'mac_key = bytes.fromhex(%s), '
207 325
             'hashed_content = bytes.fromhex(%s), '
208 326
             'claimed_mac = bytes.fromhex(%s), '
209 327
             'actual_mac = bytes.fromhex(%s)'
210 328
         ),
211
-        repr(keys['signing_key'].hex(' ')),
329
+        repr(session_keys['signing_key'].hex(' ')),
212 330
         repr(ciphertext.hex(' ')),
213 331
         repr(claimed_mac.hex(' ')),
214 332
         repr(actual_mac.copy().finalize().hex(' ')),
... ...
@@ -219,7 +337,7 @@ def decrypt_contents(data: bytes, keys: KeyPair) -> bytes:
219 337
         f'{IV_SIZE}s {len(ciphertext) - IV_SIZE}s', ciphertext
220 338
     )
221 339
     decryptor = ciphers.Cipher(
222
-        algorithms.AES256(keys['encryption_key']), modes.CBC(iv)
340
+        algorithms.AES256(session_keys['encryption_key']), modes.CBC(iv)
223 341
     ).decryptor()
224 342
     padded_plaintext = bytearray()
225 343
     padded_plaintext.extend(decryptor.update(payload))
... ...
@@ -231,12 +349,12 @@ def decrypt_contents(data: bytes, keys: KeyPair) -> bytes:
231 349
 
232 350
     logger.debug(
233 351
         (
234
-            'decrypt_bucket_line (contents): '
352
+            'decrypt_bucket_item (contents): '
235 353
             'decrypt_aes256_cbc_and_unpad(key=bytes.fromhex(%s), '
236 354
             'iv=bytes.fromhex(%s))(bytes.fromhex(%s)) '
237 355
             '= bytes.fromhex(%s)'
238 356
         ),
239
-        repr(keys['encryption_key'].hex(' ')),
357
+        repr(session_keys['encryption_key'].hex(' ')),
240 358
         repr(iv.hex(' ')),
241 359
         repr(payload.hex(' ')),
242 360
         repr(plaintext.hex(' ')),
... ...
@@ -245,23 +363,23 @@ def decrypt_contents(data: bytes, keys: KeyPair) -> bytes:
245 363
     return plaintext
246 364
 
247 365
 
248
-def decrypt_bucket_line(bucket_line: bytes, master_keys: MasterKeys) -> bytes:
366
+def decrypt_bucket_item(bucket_item: bytes, master_keys: MasterKeys) -> bytes:
249 367
     logger.debug(
250 368
         (
251
-            'decrypt_bucket_line: data = bytes.fromhex(%s), '
369
+            'decrypt_bucket_item: data = bytes.fromhex(%s), '
252 370
             'encryption_key = bytes.fromhex(%s), '
253 371
             'signing_key = bytes.fromhex(%s)'
254 372
         ),
255
-        repr(bucket_line.hex(' ')),
373
+        repr(bucket_item.hex(' ')),
256 374
         repr(master_keys['encryption_key'].hex(' ')),
257 375
         repr(master_keys['signing_key'].hex(' ')),
258 376
     )
259 377
     data_version, encrypted_session_keys, data_contents = struct.unpack(
260 378
         (
261 379
             f'B {ENCRYPTED_KEYPAIR_SIZE}s '
262
-            f'{len(bucket_line) - 1 - ENCRYPTED_KEYPAIR_SIZE}s'
380
+            f'{len(bucket_item) - 1 - ENCRYPTED_KEYPAIR_SIZE}s'
263 381
         ),
264
-        bucket_line,
382
+        bucket_item,
265 383
     )
266 384
     if data_version != 1:
267 385
         msg = f'Cannot handle version {data_version} encrypted data'
... ...
@@ -287,7 +405,7 @@ def decrypt_bucket_file(filename: str, master_keys: MasterKeys) -> None:
287 405
         decrypted_file.write(header_line)
288 406
         for line in bucket_file:
289 407
             decrypted_contents = (
290
-                decrypt_bucket_line(
408
+                decrypt_bucket_item(
291 409
                     base64.standard_b64decode(line), master_keys
292 410
                 ).removesuffix(b'\n')
293 411
                 + b'\n'
294 412