Document internal functions of the vault config exporter (51223f0) - derivepassphrase.git

src/derivepassphrase/exporter/vault_native.py

...	...	@@ -170,6 +170,34 @@ class VaultNativeConfigParser(abc.ABC):
170	170	def _pbkdf2(
171	171	password: str \| Buffer, key_size: int, iterations: int
172	172	) -> bytes:
	173	+ """Generate a key from a password.
	174	+
	175	+ Uses PBKDF2 with HMAC-SHA1, with the vault UUID as a fixed salt
	176	+ value.
	177	+
	178	+ Args:
	179	+ password:
	180	+ The password from which to derive the key.
	181	+ key_size:
	182	+ The size of the output string. The effective key size
	183	+ (in bytes) is thus half of this output string size.
	184	+ iterations:
	185	+ The PBKDF2 iteration count.
	186	+
	187	+ Returns:
	188	+ The PBKDF2-derived key, encoded as a lowercase ASCII
	189	+ hexadecimal string.
	190	+
	191	+ Danger: Insecure use of cryptography
	192	+ This function is insecure because it uses a fixed salt
	193	+ value, which is not secure against rainbow tables. It is
	194	+ further difficult to use because the effective key size is
	195	+ only half as large as the "size" parameter (output string
	196	+ size). Finally, though the use of SHA-1 in HMAC per se is
	197	+ not known to be insecure, SHA-1 is known not to be
	198	+ collision-resistant.
	199	+
	200	+ """
173	201	if isinstance(password, str):
174	202	password = password.encode('utf-8')
175	203	raw_key = pbkdf2.PBKDF2HMAC(
...	...	@@ -194,6 +222,16 @@ class VaultNativeConfigParser(abc.ABC):
194	222	return result_key
195	223
196	224	def _parse_contents(self) -> None:
	225	+ """Parse the contents into IV, payload and MAC.
	226	+
	227	+ This operates on, and sets, multiple internal attributes of the
	228	+ parser.
	229	+
	230	+ Raises:
	231	+ ValueError:
	232	+ The configuration file contents are clearly truncated.
	233	+
	234	+ """
197	235	logger.info(
198	236	_msg.TranslatedString(
199	237	_msg.InfoMsgTemplate.VAULT_NATIVE_PARSING_IV_PAYLOAD_MAC,
...	...	@@ -224,6 +262,12 @@ class VaultNativeConfigParser(abc.ABC):
224	262	)
225	263
226	264	def _derive_keys(self) -> None:
	265	+ """Derive the signing and encryption keys.
	266	+
	267	+ This is a bookkeeping method. The actual work is done in
	268	+ [`_generate_keys`][].
	269	+
	270	+ """
227	271	logger.info(
228	272	_msg.TranslatedString(
229	273	_msg.InfoMsgTemplate.VAULT_NATIVE_DERIVING_KEYS,
...	...	@@ -239,9 +283,29 @@ class VaultNativeConfigParser(abc.ABC):
239	283
240	284	@abc.abstractmethod
241	285	def _generate_keys(self) -> None:
	286	+ """Derive the signing and encryption keys, and set the key sizes.
	287	+
	288	+ Subclasses must override this, as the derivation system is
	289	+ version-specific. The default implementation raises an error.
	290	+
	291	+ Raises:
	292	+ AssertionError:
	293	+ There is no default implementation.
	294	+
	295	+ """
242	296	raise AssertionError
243	297
244	298	def _check_signature(self) -> None:
	299	+ """Check for a valid MAC on the encrypted vault configuration.
	300	+
	301	+ The MAC uses HMAC-SHA1, and thus is 32 bytes long, before
	302	+ encoding.
	303	+
	304	+ Raises:
	305	+ ValueError:
	306	+ The MAC is invalid.
	307	+
	308	+ """
245	309	logger.info(
246	310	_msg.TranslatedString(
247	311	_msg.InfoMsgTemplate.VAULT_NATIVE_CHECKING_MAC,
...	...	@@ -265,9 +329,26 @@ class VaultNativeConfigParser(abc.ABC):
265	329
266	330	@abc.abstractmethod
267	331	def _hmac_input(self) -> bytes:
	332	+ """Return the input the MAC is supposed to verify.
	333	+
	334	+ Subclasses must override this, as the MAC-attested data is
	335	+ version-specific. The default implementation raises an error.
	336	+
	337	+ Raises:
	338	+ AssertionError:
	339	+ There is no default implementation.
	340	+
	341	+ """
268	342	raise AssertionError
269	343
270	344	def _decrypt_payload(self) -> Any: # noqa: ANN401
	345	+ """Return the decrypted vault configuration.
	346	+
	347	+ Requires [`_parse_contents`][] and [`_derive_keys`][] to have
	348	+ run, and relies on [`_check_signature`][] for tampering
	349	+ detection.
	350	+
	351	+ """
271	352	logger.info(
272	353	_msg.TranslatedString(
273	354	_msg.InfoMsgTemplate.VAULT_NATIVE_DECRYPTING_CONTENTS,
...	...	@@ -297,6 +378,16 @@ class VaultNativeConfigParser(abc.ABC):
297	378
298	379	@abc.abstractmethod
299	380	def _make_decryptor(self) -> ciphers.CipherContext:
	381	+ """Return the cipher context object used for decryption.
	382	+
	383	+ Subclasses must override this, as the cipher setup is
	384	+ version-specific. The default implementation raises an error.
	385	+
	386	+ Raises:
	387	+ AssertionError:
	388	+ There is no default implementation.
	389	+
	390	+ """
300	391	raise AssertionError
301	392
302	393
...	...	@@ -313,6 +404,11 @@ class VaultNativeV03ConfigParser(VaultNativeConfigParser):
313	404	"""
314	405
315	406	KEY_SIZE = 32
	407	+ """
	408	+ Key size for both the encryption and the signing key, including the
	409	+ encoding as a hexadecimal string. (The effective cryptographic
	410	+ strength is half of this value.)
	411	+ """
316	412
317	413	def __init__(self, args: Any, *kwargs: Any) -> None: # noqa: ANN401
318	414	super().__init__(args, *kwargs)
...	...	@@ -320,14 +416,45 @@ class VaultNativeV03ConfigParser(VaultNativeConfigParser):
320	416	self._mac_size = 32
321	417
322	418	def _generate_keys(self) -> None:
	419	+ """Derive the signing and encryption keys, and set the key sizes.
	420	+
	421	+ Version 0.3 vault configurations use a constant key size; see
	422	+ [`KEY_SIZE`][]. The encryption and signing keys differ in how
	423	+ many rounds of PBKDF2 they use (100 and 200, respectively).
	424	+
	425	+ Danger: Insecure use of cryptography
	426	+ This function makes use of the insecure function
	427	+ [`VaultNativeConfigParser._pbkdf2`][], without any attempts
	428	+ at mitigating its insecurity. It further uses `_pbkdf2`
	429	+ with the low iteration count of 100 and 200 rounds, which is
	430	+ drastically insufficient to defend against password
	431	+ guessing attacks using GPUs or ASICs. We provide this
	432	+ function for the purpose of interoperability with existing
	433	+ vault installations. Do not rely on this system to keep
	434	+ your vault configuration secure against access by even
	435	+ moderately determined attackers!
	436	+
	437	+ """
323	438	self._encryption_key = self._pbkdf2(self._password, self.KEY_SIZE, 100)
324	439	self._signing_key = self._pbkdf2(self._password, self.KEY_SIZE, 200)
325	440	self._encryption_key_size = self._signing_key_size = self.KEY_SIZE
326	441
327	442	def _hmac_input(self) -> bytes:
	443	+ """Return the input the MAC is supposed to verify.
	444	+
	445	+ This includes hexadecimal encoding of the message payload.
	446	+
	447	+ """
328	448	return self._message.hex().lower().encode('ASCII')
329	449
330	450	def _make_decryptor(self) -> ciphers.CipherContext:
	451	+ """Return the cipher context object used for decryption.
	452	+
	453	+ This is a standard AES256-CBC cipher context using the
	454	+ previously derived encryption key and the IV declared in the
	455	+ (MAC-verified) message payload.
	456	+
	457	+ """
331	458	return ciphers.Cipher(
332	459	algorithms.AES256(self._encryption_key), modes.CBC(self._iv)
333	460	).decryptor()
...	...	@@ -357,6 +484,22 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
357	484	self._mac_size = 64
358	485
359	486	def _parse_contents(self) -> None:
	487	+ """Parse the contents into IV, payload and MAC.
	488	+
	489	+ Like the base class implementation, this operates on, and sets,
	490	+ multiple internal attributes of the parser. In version 0.2
	491	+ vault configurations, the payload is encoded in base64 and the
	492	+ message tag (MAC) is encoded in hexadecimal, so unlike the base
	493	+ class implementation, we additionally decode the payload and the
	494	+ MAC.
	495	+
	496	+ Raises:
	497	+ ValueError:
	498	+ The configuration file contents are clearly truncated,
	499	+ or the payload or the message tag cannot be decoded
	500	+ properly.
	501	+
	502	+ """
360	503	super()._parse_contents()
361	504	self._payload = base64.standard_b64decode(self._payload)
362	505	self._message_tag = bytes.fromhex(self._message_tag.decode('ASCII'))
...	...	@@ -369,18 +512,114 @@ class VaultNativeV02ConfigParser(VaultNativeConfigParser):
369	512	)
370	513
371	514	def _generate_keys(self) -> None:
	515	+ """Derive the signing and encryption keys, and set the key sizes.
	516	+
	517	+ Version 0.2 vault configurations use 8-byte encryption keys and
	518	+ 16-byte signing keys, including the hexadecimal encoding. They
	519	+ both use 16 rounds of PBKDF2. This is due to an oversight in
	520	+ vault, where the author mistakenly supplied the intended
	521	+ iteration count as the key size, and the key size as the
	522	+ iteration count.
	523	+
	524	+ Danger: Insecure use of cryptography
	525	+ This function makes use of the insecure function
	526	+ [`VaultNativeConfigParser._pbkdf2`][], without any attempts
	527	+ at mitigating its insecurity. It further uses `_pbkdf2`
	528	+ with the low iteration count of 16 rounds, which is
	529	+ drastically insufficient to defend against password
	530	+ guessing attacks using GPUs or ASICs, and generates the
	531	+ encryption key as a truncation of the signing key. We
	532	+ provide this function for the purpose of interoperability
	533	+ with existing vault installations. Do not rely on this
	534	+ system to keep your vault configuration secure against
	535	+ access by even moderately determined attackers!
	536	+
	537	+ """
372	538	self._encryption_key = self._pbkdf2(self._password, 8, 16)
373	539	self._signing_key = self._pbkdf2(self._password, 16, 16)
374	540	self._encryption_key_size = 8
375	541	self._signing_key_size = 16
376	542
377	543	def _hmac_input(self) -> bytes:
	544	+ """Return the input the MAC is supposed to verify.
	545	+
	546	+ This includes hexadecimal encoding of the message payload.
	547	+
	548	+ """
378	549	return base64.standard_b64encode(self._message)
379	550
380	551	def _make_decryptor(self) -> ciphers.CipherContext:
	552	+ """Return the cipher context object used for decryption.
	553	+
	554	+ This is a standard AES256-CBC cipher context. The encryption key
	555	+ and the IV are derived via the OpenSSL `EVP_BytesToKey` function
	556	+ (using MD5, no salt, and one iteration). This is what the
	557	+ Node.js `crypto` library (v21 series and older) used in its
	558	+ implementation of `crypto.createCipher("aes256", password)`.
	559	+
	560	+ Danger: Insecure use of cryptography
	561	+ This function makes use of (an implementation of) the
	562	+ OpenSSL function `EVP_BytesToKey`, which generates
	563	+ cryptographically weak keys, without any attempts at
	564	+ mitigating its insecurity. We provide this function for the
	565	+ purpose of interoperability with existing vault
	566	+ installations. Do not rely on this system to keep your
	567	+ vault configuration secure against access by even moderately
	568	+ determined attackers!
	569	+
	570	+ """
	571	+
381	572	def evp_bytestokey_md5_one_iteration_no_salt(
382	573	data: bytes, key_size: int, iv_size: int
383	574	) -> tuple[bytes, bytes]:
	575	+ """Reimplement OpenSSL's `EVP_BytesToKey` with fixed parameters.
	576	+
	577	+ `EVP_BytesToKey` in general is a key derivation function,
	578	+ i.e., a function that derives key material from an input
	579	+ byte string. `EVP_BytesToKey` conceptually splits the
	580	+ derived key material into an encryption key and an
	581	+ initialization vector (IV).
	582	+
	583	+ Note: Algorithm description
	584	+ `EVP_BytesToKey` takes an input byte string, two output
	585	+ size (encryption key size and IV size), a message digest
	586	+ function, a salt value and an iteration count. The
	587	+ derived key material is calculated in blocks, each of
	588	+ which is the output of (iterated application of) the
	589	+ message digest function. The input to the message
	590	+ digest function is the concatenation of the previous
	591	+ block (if any) with the input byte string and the salt
	592	+ value (if any):
	593	+
	594	+ ~~~~ python
	595	+
	596	+ data = block_input = b''.join([
	597	+ previous_block, input_string, salt
	598	+ ])
	599	+ for i in range(iteration_count):
	600	+ data = message_digest(data)
	601	+ block = data
	602	+
	603	+ ~~~~
	604	+
	605	+ We use as many blocks as are necessary to cover the
	606	+ total output byte string size. The first few bytes
	607	+ (dictated by the encryption key size) form the
	608	+ encryption key, the other bytes (dictated by the IV
	609	+ size) form the IV.
	610	+
	611	+ We implement exactly the subset of `EVP_BytesToKey` that the
	612	+ Node.js `crypto` library (v21 series and older) uses in its
	613	+ implementation of `crypto.createCipher("aes256", password)`.
	614	+ Specifically, the message digest function is fixed to MD5,
	615	+ the salt is always empty, and the iteration count is fixed
	616	+ at one.
	617	+
	618	+ Returns:
	619	+ A 2-tuple containing the derived encryption key and the
	620	+ derived initialization vector.
	621	+
	622	+ """
384	623	total_size = key_size + iv_size
385	624	buffer = bytearray()
386	625	last_block = b''
387	626

...	...	@@ -6,3 +6,15 @@
6	6
7	7	::: derivepassphrase.exporter.vault_native
8	8	heading_level: 1
	9	+ filters:
	10	+ - "^[A-Za-z0-9]"
	11	+ - "^__[a-zA-Z0-9_-]+__"
	12	+ - "^_pbkdf2$"
	13	+ - "^_parse_contents$"
	14	+ - "^_derive_keys$"
	15	+ - "^_generate_keys$"
	16	+ - "^_check_signature$"
	17	+ - "^_hmac_input$"
	18	+ - "^_decrypt_payload$"
	19	+ - "^_make_decryptor$"
	20	+ - "^_evp_bytestokey_md5_one_iteration_no_salt$"