Marco Ricci commited on 2024-06-22 21:19:30
Zeige 2 geänderte Dateien mit 161 Einfügungen und 24 Löschungen.
Expose some functionality from `derivepassphrase.Vault` as interal methods, to facilitate testing and to avoid reimplementing the same functionality again in the command-line interface. This includes hash length estimation and SSH key suitability checking.
| ... | ... |
@@ -132,19 +132,70 @@ class Vault: |
| 132 | 132 |
for _ in range(len(self._required), self._length): |
| 133 | 133 |
self._required.append(bytes(self._allowed)) |
| 134 | 134 |
|
| 135 |
- def _entropy_upper_bound(self) -> int: |
|
| 135 |
+ def _entropy(self) -> float: |
|
| 136 | 136 |
"""Estimate the passphrase entropy, given the current settings. |
| 137 | 137 |
|
| 138 | 138 |
The entropy is the base 2 logarithm of the amount of |
| 139 |
- possibilities. We operate directly on the logarithms, and round |
|
| 140 |
- each summand up, overestimating the true entropy. |
|
| 139 |
+ possibilities. We operate directly on the logarithms, and use |
|
| 140 |
+ sorting and [`math.fsum`][] to keep high accuracy. |
|
| 141 |
+ |
|
| 142 |
+ Note: |
|
| 143 |
+ We actually overestimate the entropy here because of poor |
|
| 144 |
+ handling of character repetitions. In the extreme, assuming |
|
| 145 |
+ that only one character were allowed, then because there is |
|
| 146 |
+ only one possible string of each given length, the entropy |
|
| 147 |
+ of that string `s` is always be zero. However, we calculate |
|
| 148 |
+ the entropy as `math.log2(math.factorial(len(s)))`, i.e. we |
|
| 149 |
+ assume the characters at the respective string position are |
|
| 150 |
+ distinguishable from each other. |
|
| 151 |
+ |
|
| 152 |
+ Returns: |
|
| 153 |
+ A valid (and somewhat close) upper bound to the entropy. |
|
| 141 | 154 |
|
| 142 | 155 |
""" |
| 143 | 156 |
factors: list[int] = [] |
| 157 |
+ if not self._required or any(not x for x in self._required): |
|
| 158 |
+ return float('-inf')
|
|
| 144 | 159 |
for i, charset in enumerate(self._required): |
| 145 | 160 |
factors.append(i + 1) |
| 146 | 161 |
factors.append(len(charset)) |
| 147 |
- return sum(int(math.ceil(math.log2(f))) for f in factors) |
|
| 162 |
+ factors.sort() |
|
| 163 |
+ return math.fsum(math.log2(f) for f in factors) |
|
| 164 |
+ |
|
| 165 |
+ def _estimate_sufficient_hash_length( |
|
| 166 |
+ self, safety_factor: float = 2.0, |
|
| 167 |
+ ) -> int: |
|
| 168 |
+ """Estimate the sufficient hash length, given the current settings. |
|
| 169 |
+ |
|
| 170 |
+ Using the entropy (via `_entropy`) and a safety factor, give an |
|
| 171 |
+ initial estimate of the length to use for `create_hash` such |
|
| 172 |
+ that using a `Sequin` with this hash will not exhaust it during |
|
| 173 |
+ passphrase generation. |
|
| 174 |
+ |
|
| 175 |
+ Args: |
|
| 176 |
+ safety_factor: The safety factor. Must be at least 1. |
|
| 177 |
+ |
|
| 178 |
+ Returns: |
|
| 179 |
+ The estimated sufficient hash length. |
|
| 180 |
+ |
|
| 181 |
+ Warning: |
|
| 182 |
+ This is a heuristic, not an exact computation; it may |
|
| 183 |
+ underestimate the true necessary hash length. It is |
|
| 184 |
+ intended as a starting point for searching for a sufficient |
|
| 185 |
+ hash length, usually by doubling the hash length each time |
|
| 186 |
+ it does not yet prove so. |
|
| 187 |
+ |
|
| 188 |
+ """ |
|
| 189 |
+ try: |
|
| 190 |
+ safety_factor = float(safety_factor) |
|
| 191 |
+ except TypeError as e: |
|
| 192 |
+ raise TypeError(f'invalid safety factor: not a float: ' |
|
| 193 |
+ f'{safety_factor!r}') from e
|
|
| 194 |
+ if not math.isfinite(safety_factor) or safety_factor < 1.0: |
|
| 195 |
+ raise ValueError(f'invalid safety factor {safety_factor!r}')
|
|
| 196 |
+ # Ensure the bound is strictly positive. |
|
| 197 |
+ entropy_bound = max(1, self._entropy()) |
|
| 198 |
+ return int(math.ceil(safety_factor * entropy_bound / 8)) |
|
| 148 | 199 |
|
| 149 | 200 |
@classmethod |
| 150 | 201 |
def create_hash( |
| ... | ... |
@@ -225,12 +276,8 @@ class Vault: |
| 225 | 276 |
b': 4TVH#5:aZl8LueOT\\{'
|
| 226 | 277 |
|
| 227 | 278 |
""" |
| 228 |
- entropy_bound = self._entropy_upper_bound() |
|
| 229 |
- # Use a safety factor, because a sequin will potentially throw |
|
| 230 |
- # bits away and we cannot rely on having generated a hash of |
|
| 231 |
- # exactly the right length. |
|
| 232 |
- safety_factor = 2 |
|
| 233 |
- hash_length = int(math.ceil(safety_factor * entropy_bound / 8)) |
|
| 279 |
+ hash_length = self._estimate_sufficient_hash_length() |
|
| 280 |
+ assert hash_length >= 1 |
|
| 234 | 281 |
# Ensure the phrase is a bytes object. Needed later for safe |
| 235 | 282 |
# concatenation. |
| 236 | 283 |
if isinstance(service_name, str): |
| ... | ... |
@@ -267,11 +314,36 @@ class Vault: |
| 267 | 314 |
charset) |
| 268 | 315 |
pos = seq.generate(len(charset)) |
| 269 | 316 |
result.extend(charset[pos:pos+1]) |
| 270 |
- except sequin.SequinExhaustedException: # pragma: no cover |
|
| 317 |
+ except sequin.SequinExhaustedException: |
|
| 271 | 318 |
hash_length *= 2 |
| 272 | 319 |
else: |
| 273 | 320 |
return bytes(result) |
| 274 | 321 |
|
| 322 |
+ @staticmethod |
|
| 323 |
+ def _is_suitable_ssh_key(key: bytes | bytearray, /) -> bool: |
|
| 324 |
+ """Check whether the key is suitable for passphrase derivation. |
|
| 325 |
+ |
|
| 326 |
+ Currently, this only checks whether signatures with this key |
|
| 327 |
+ type are deterministic. |
|
| 328 |
+ |
|
| 329 |
+ Args: |
|
| 330 |
+ key: SSH public key to check. |
|
| 331 |
+ |
|
| 332 |
+ Returns: |
|
| 333 |
+ True if and only if the key is suitable for use in deriving |
|
| 334 |
+ a passphrase deterministically. |
|
| 335 |
+ |
|
| 336 |
+ """ |
|
| 337 |
+ deterministic_signature_types = {
|
|
| 338 |
+ 'ssh-ed25519': |
|
| 339 |
+ lambda k: k.startswith(b'\x00\x00\x00\x0bssh-ed25519'), |
|
| 340 |
+ 'ssh-ed448': |
|
| 341 |
+ lambda k: k.startswith(b'\x00\x00\x00\x09ssh-ed448'), |
|
| 342 |
+ 'ssh-rsa': |
|
| 343 |
+ lambda k: k.startswith(b'\x00\x00\x00\x07ssh-rsa'), |
|
| 344 |
+ } |
|
| 345 |
+ return any(v(key) for v in deterministic_signature_types.values()) |
|
| 346 |
+ |
|
| 275 | 347 |
@classmethod |
| 276 | 348 |
def phrase_from_signature( |
| 277 | 349 |
cls, key: bytes | bytearray, / |
| ... | ... |
@@ -314,15 +386,7 @@ class Vault: |
| 314 | 386 |
True |
| 315 | 387 |
|
| 316 | 388 |
""" |
| 317 |
- deterministic_signature_types = {
|
|
| 318 |
- 'ssh-ed25519': |
|
| 319 |
- lambda k: k.startswith(b'\x00\x00\x00\x0bssh-ed25519'), |
|
| 320 |
- 'ssh-ed448': |
|
| 321 |
- lambda k: k.startswith(b'\x00\x00\x00\x09ssh-ed448'), |
|
| 322 |
- 'ssh-rsa': |
|
| 323 |
- lambda k: k.startswith(b'\x00\x00\x00\x07ssh-rsa'), |
|
| 324 |
- } |
|
| 325 |
- if not any(v(key) for v in deterministic_signature_types.values()): |
|
| 389 |
+ if not cls._is_suitable_ssh_key(key): |
|
| 326 | 390 |
raise ValueError( |
| 327 | 391 |
'unsuitable SSH key: bad key, or signature not deterministic') |
| 328 | 392 |
with ssh_agent_client.SSHAgentClient() as client: |
| ... | ... |
@@ -4,17 +4,22 @@ |
| 4 | 4 |
|
| 5 | 5 |
"""Test passphrase generation via derivepassphrase.Vault.""" |
| 6 | 6 |
|
| 7 |
-import pytest |
|
| 7 |
+from __future__ import annotations |
|
| 8 |
+ |
|
| 9 |
+import math |
|
| 8 | 10 |
|
| 9 | 11 |
import derivepassphrase |
| 10 | 12 |
import sequin |
| 13 |
+import pytest |
|
| 11 | 14 |
|
| 12 | 15 |
Vault = derivepassphrase.Vault |
| 13 | 16 |
phrase = b'She cells C shells bye the sea shoars' |
| 17 |
+google_phrase = rb': 4TVH#5:aZl8LueOT\{'
|
|
| 18 |
+twitter_phrase = rb"[ (HN_N:lI&<ro=)3'g9" |
|
| 14 | 19 |
|
| 15 |
-@pytest.mark.parametrize('service,expected', [
|
|
| 16 |
- (b'google', rb': 4TVH#5:aZl8LueOT\{'),
|
|
| 17 |
- ('twitter', rb"[ (HN_N:lI&<ro=)3'g9"),
|
|
| 20 |
+@pytest.mark.parametrize(['service', 'expected'], [ |
|
| 21 |
+ (b'google', google_phrase), |
|
| 22 |
+ ('twitter', twitter_phrase),
|
|
| 18 | 23 |
]) |
| 19 | 24 |
def test_200_basic_configuration(service, expected): |
| 20 | 25 |
assert Vault(phrase=phrase).generate(service) == expected |
| ... | ... |
@@ -122,3 +127,71 @@ def test_301_character_set_subtraction_duplicate(): |
| 122 | 127 |
Vault._subtract(b'abcdef', b'aabbccddeeff') |
| 123 | 128 |
with pytest.raises(ValueError, match='duplicate characters'): |
| 124 | 129 |
Vault._subtract(b'aabbccddeeff', b'abcdef') |
| 130 |
+ |
|
| 131 |
+@pytest.mark.parametrize(['length', 'settings', 'entropy'], [ |
|
| 132 |
+ (20, {}, math.log2(math.factorial(20)) + 20 * math.log2(94)),
|
|
| 133 |
+ ( |
|
| 134 |
+ 20, |
|
| 135 |
+ {'upper': 0, 'number': 0, 'space': 0, 'symbol': 0},
|
|
| 136 |
+ math.log2(math.factorial(20)) + 20 * math.log2(26) |
|
| 137 |
+ ), |
|
| 138 |
+ (0, {}, float('-inf')),
|
|
| 139 |
+ (0, {'lower': 0, 'number': 0, 'space': 0, 'symbol': 0}, float('-inf')),
|
|
| 140 |
+ (1, {}, math.log2(94)),
|
|
| 141 |
+ (1, {'upper': 0, 'lower': 0, 'number': 0, 'symbol': 0}, 0.0),
|
|
| 142 |
+]) |
|
| 143 |
+def test_400_entropy( |
|
| 144 |
+ length: int, settings: dict[str, int], entropy: int |
|
| 145 |
+) -> None: |
|
| 146 |
+ v = Vault(length=length, **settings) |
|
| 147 |
+ assert math.isclose(v._entropy(), entropy) |
|
| 148 |
+ assert v._estimate_sufficient_hash_length() > 0 |
|
| 149 |
+ if math.isfinite(entropy) and entropy: |
|
| 150 |
+ assert v._estimate_sufficient_hash_length(1.0) == math.ceil(entropy / 8) |
|
| 151 |
+ assert v._estimate_sufficient_hash_length(8.0) >= entropy |
|
| 152 |
+ |
|
| 153 |
+def test_401_hash_length_estimation( |
|
| 154 |
+) -> None: |
|
| 155 |
+ v = Vault(phrase=phrase) |
|
| 156 |
+ with pytest.raises(ValueError, |
|
| 157 |
+ match='invalid safety factor'): |
|
| 158 |
+ assert v._estimate_sufficient_hash_length(-1.0) |
|
| 159 |
+ with pytest.raises(TypeError, |
|
| 160 |
+ match='invalid safety factor: not a float'): |
|
| 161 |
+ assert v._estimate_sufficient_hash_length(None) # type: ignore |
|
| 162 |
+ v2 = Vault(phrase=phrase, lower=0, upper=0, number=0, symbol=0, |
|
| 163 |
+ space=1, length=1) |
|
| 164 |
+ assert v2._entropy() == 0.0 |
|
| 165 |
+ assert v2._estimate_sufficient_hash_length() > 0 |
|
| 166 |
+ |
|
| 167 |
+@pytest.mark.parametrize(['service', 'expected'], [ |
|
| 168 |
+ (b'google', google_phrase), |
|
| 169 |
+ ('twitter', twitter_phrase),
|
|
| 170 |
+]) |
|
| 171 |
+def test_402_hash_length_expansion( |
|
| 172 |
+ monkeypatch: Any, service: str | bytes, expected: bytes |
|
| 173 |
+) -> None: |
|
| 174 |
+ v = Vault(phrase=phrase) |
|
| 175 |
+ monkeypatch.setattr(v, |
|
| 176 |
+ '_estimate_sufficient_hash_length', |
|
| 177 |
+ lambda *args, **kwargs: 1) |
|
| 178 |
+ assert v._estimate_sufficient_hash_length |
|
| 179 |
+ assert v.generate(service) == expected |
|
| 180 |
+ |
|
| 181 |
+@pytest.mark.parametrize(['s', 'raises'], [ |
|
| 182 |
+ ('ñ', True), ('Düsseldorf', True),
|
|
| 183 |
+ ('liberté, egalité, fraternité', True), ('ASCII', False),
|
|
| 184 |
+ ('Düsseldorf'.encode('UTF-8'), False),
|
|
| 185 |
+ (bytearray([2, 3, 5, 7, 11, 13]), False), |
|
| 186 |
+]) |
|
| 187 |
+def test_403_binary_strings(s: str | bytes | bytearray, raises: bool) -> None: |
|
| 188 |
+ binstr = derivepassphrase.Vault._get_binary_string |
|
| 189 |
+ if raises: |
|
| 190 |
+ with pytest.raises(derivepassphrase.AmbiguousByteRepresentationError): |
|
| 191 |
+ binstr(s) |
|
| 192 |
+ elif isinstance(s, str): |
|
| 193 |
+ assert binstr(s) == s.encode('UTF-8')
|
|
| 194 |
+ assert binstr(binstr(s)) == s.encode('UTF-8')
|
|
| 195 |
+ else: |
|
| 196 |
+ assert binstr(s) == bytes(s) |
|
| 197 |
+ assert binstr(binstr(s)) == bytes(s) |
|
| 125 | 198 |