Add quality control scripts
Marco Ricci

Marco Ricci commited on 2025-02-09 20:47:17
Zeige 2 geänderte Dateien mit 370 Einfügungen und 0 Löschungen.


Add a quality control script `qc_auto.py` that calls the linter, the
formatter, the type checker, the test suite and the documentation
builder, depending on what branch we are on.  It is intended to be
usable as a pre-commit hook.

Add another quality control script `man_diagnostics.py` that checks that
the diagnostics documented in the manpages are complete, and match the
enum values within `derivepassphrase`.  It relies on annotations
(comments) in the manpages to map description texts to the respective
enum values (the mapping is not one-to-one).  It also does
a (rudimentary) check that every warning and error message is mentioned
somewhere in the source tree (besides the messages module that defines
those messages).
... ...
@@ -0,0 +1,283 @@
1
+#!/usr/bin/python3
2
+# SPDX-FileCopyrightText: 2025 Marco Ricci <software@the13thletter.info>
3
+#
4
+# SPDX-License-Identifier: Zlib
5
+
6
+"""Check for diagnostic messages not emitted in the manpages."""
7
+
8
+from __future__ import annotations
9
+
10
+import pathlib
11
+import re
12
+import sys
13
+from typing import TYPE_CHECKING, Literal, NewType, cast
14
+
15
+sys.path.append(str(pathlib.Path(sys.argv[0]).resolve().parent.parent / 'src'))
16
+from derivepassphrase._internals import cli_messages  # noqa: PLC2701
17
+
18
+if TYPE_CHECKING:
19
+    from collections.abc import Iterator
20
+
21
+    EnumName = NewType('EnumName', str)
22
+    DiagnosticText = NewType('DiagnosticText', str)
23
+
24
+known_errors = cli_messages.ErrMsgTemplate.__members__
25
+known_warnings = cli_messages.WarnMsgTemplate.__members__
26
+
27
+
28
+def _replace_known_metavars(string: str) -> str:
29
+    return (
30
+        string.replace(
31
+            '{service_metavar!s}',
32
+            cli_messages.Label.VAULT_METAVAR_SERVICE.value.singular,
33
+        )
34
+        .replace('{PROG_NAME!s}', cli_messages.PROG_NAME)
35
+        .replace('{settings_type!s}', 'global/service-specific settings')
36
+    )
37
+
38
+
39
+# Use a double negative in the name ("does not mismatch text") because
40
+# this is an error condition check, and if the enum name doesn't exist
41
+# (because the manpage is outdated), then there is no mismatch.  This is
42
+# clearer (to me at least) than erroneously claiming that a missing text
43
+# matches the desired pattern.
44
+def _mismatches_text(
45
+    pattern: re.Pattern[str],
46
+    enum_name: EnumName,
47
+    name_type: Literal['warning', 'error'],
48
+) -> bool:
49
+    while '.' in enum_name:
50
+        enum_name = cast('EnumName', enum_name.partition('.')[2])
51
+    try:
52
+        enum_value = (
53
+            known_errors[enum_name].value
54
+            if name_type == 'error'
55
+            else known_warnings[enum_name].value
56
+        )
57
+    except KeyError:
58
+        # No text, so no mismatch.
59
+        return False
60
+    texts = {enum_value.singular, enum_value.plural} - {''}
61
+    return not all(pattern.match(_replace_known_metavars(t)) for t in texts)
62
+
63
+
64
+def _entries_from_text(
65
+    text: DiagnosticText,
66
+    enum_names: set[EnumName],
67
+) -> Iterator[
68
+    tuple[
69
+        Literal['warning', 'error'],
70
+        tuple[DiagnosticText, EnumName],
71
+    ]
72
+]:
73
+    assert text not in manpage_documented_warnings
74
+    assert text not in manpage_documented_errors
75
+    pattern_parts = [
76
+        '.*' if part == '%s' else re.escape(part)
77
+        for part in re.split(r'(%s)', text)
78
+    ]
79
+    pattern = re.compile(''.join(pattern_parts))
80
+    for name in enum_names:
81
+        _class_name, dot, enum_entry = name.partition('.')
82
+        assert dot == '.', f'Invalid enum name {name!r}'
83
+        assert '.' not in enum_entry, f'Unsupported enum name {name!r}'
84
+        if name.startswith('WarnMsgTemplate.'):
85
+            assert not _mismatches_text(
86
+                pattern, enum_name=name, name_type='warning'
87
+            ), (
88
+                f"Warning text for {name} doesn't match the manpage: "
89
+                f'{text!r} -> {pattern.pattern!r}'
90
+            )
91
+            yield ('warning', (text, cast('EnumName', enum_entry)))
92
+        if name.startswith('ErrMsgTemplate.'):
93
+            assert not _mismatches_text(
94
+                pattern, enum_name=name, name_type='error'
95
+            ), (
96
+                f"Error text for {name} doesn't match the manpage: "
97
+                f'{text!r} -> {pattern.pattern!r}'
98
+            )
99
+            yield ('error', (text, cast('EnumName', enum_entry)))
100
+
101
+
102
+def _check_manpage(
103
+    path: pathlib.Path,
104
+) -> Iterator[
105
+    tuple[
106
+        Literal['warning', 'error'],
107
+        tuple[DiagnosticText, EnumName],
108
+    ]
109
+]:
110
+    enum_names: set[EnumName] = set()
111
+
112
+    for line in path.read_text(encoding='UTF-8').splitlines(keepends=False):
113
+        if enum_names and line.startswith('.It '):
114
+            # Some *roff escape sequences need to be undone.  This is not an
115
+            # exhaustive list; new entries will be added based on the actual
116
+            # manpages as the need arises.
117
+            text = cast(
118
+                'DiagnosticText',
119
+                line.removeprefix('.It ').replace('"', '').replace(r'\-', '-'),
120
+            )
121
+            yield from _entries_from_text(text=text, enum_names=enum_names)
122
+            enum_names.clear()
123
+        elif line.startswith(r'.\" Message-ID (mark only):'):
124
+            yield from _entries_from_mark_only(
125
+                cast('EnumName', line.split(None, 4)[4])
126
+            )
127
+        elif line.startswith(r'.\" Message-ID:'):
128
+            enum_names.add(cast('EnumName', line.split(None, 2)[2]))
129
+
130
+
131
+def _entries_from_mark_only(
132
+    name: EnumName,
133
+) -> Iterator[
134
+    tuple[
135
+        Literal['warning', 'error'],
136
+        tuple[DiagnosticText, EnumName],
137
+    ]
138
+]:
139
+    text = cast('DiagnosticText', '<mark only>')
140
+    _class_name, dot, enum_entry = name.partition('.')
141
+    assert dot == '.', f'Invalid enum name {name!r}'
142
+    assert '.' not in enum_entry, f'Unsupported enum name {name!r}'
143
+    if name.startswith('WarnMsgTemplate.'):
144
+        yield ('warning', (text, cast('EnumName', enum_entry)))
145
+    if name.startswith('ErrMsgTemplate.'):
146
+        yield ('error', (text, cast('EnumName', enum_entry)))
147
+
148
+
149
+def _check_manpagedoc(
150
+    path: pathlib.Path,
151
+) -> Iterator[
152
+    tuple[
153
+        Literal['warning', 'error'],
154
+        tuple[DiagnosticText, EnumName],
155
+    ]
156
+]:
157
+    enum_names: set[EnumName] = set()
158
+
159
+    for line in path.read_text(encoding='UTF-8').splitlines(keepends=False):
160
+        if enum_names and line.startswith(('??? failure ', '??? warning ')):
161
+            text = cast('DiagnosticText', line.split(None, 2)[2])
162
+            for ch in ['"', '`']:
163
+                assert text.startswith(ch)
164
+                assert text.endswith(ch)
165
+                text = cast('DiagnosticText', text[1:-1])
166
+            yield from _entries_from_text(text=text, enum_names=enum_names)
167
+            enum_names.clear()
168
+        elif line.startswith('<!-- Message-ID (mark only):') and line.endswith(
169
+            '-->'
170
+        ):
171
+            name = cast(
172
+                'EnumName',
173
+                line.removeprefix('<!-- Message-ID (mark only):')
174
+                .removesuffix('-->')
175
+                .strip(),
176
+            )
177
+            yield from _entries_from_mark_only(name)
178
+        elif line.startswith('<!-- Message-ID:') and line.endswith('-->'):
179
+            name = cast(
180
+                'EnumName',
181
+                line.removeprefix('<!-- Message-ID:')
182
+                .removesuffix('-->')
183
+                .strip(),
184
+            )
185
+            enum_names.add(name)
186
+
187
+
188
+base = pathlib.Path(sys.argv[0]).resolve().parent.parent
189
+manpage_documented_errors: dict[EnumName, DiagnosticText] = {}
190
+manpage_documented_warnings: dict[EnumName, DiagnosticText] = {}
191
+manpagedoc_documented_errors: dict[EnumName, DiagnosticText] = {}
192
+manpagedoc_documented_warnings: dict[EnumName, DiagnosticText] = {}
193
+for set_name, globs, errors, warnings in [
194
+    (
195
+        'manpages',
196
+        sorted(pathlib.Path(base, 'man').glob('derivepassphrase*.1')),
197
+        manpage_documented_errors,
198
+        manpage_documented_warnings,
199
+    ),
200
+    (
201
+        'manpage-ish docs',
202
+        sorted(
203
+            pathlib.Path(base, 'docs', 'reference').glob(
204
+                'derivepassphrase*.1.md'
205
+            )
206
+        ),
207
+        manpagedoc_documented_errors,
208
+        manpagedoc_documented_warnings,
209
+    ),
210
+]:
211
+    for path in globs:
212
+        print(f'Checking manpage {path}', file=sys.stderr)
213
+        checker = (
214
+            _check_manpage if set_name == 'manpages' else _check_manpagedoc
215
+        )
216
+        for diagnostic_type, (text, name) in checker(path):
217
+            if diagnostic_type == 'warning':
218
+                warnings[name] = text
219
+                print(
220
+                    f'Found warning message {name!r} with {text!r} in manpage.',  # noqa: E501
221
+                    file=sys.stderr,
222
+                )
223
+            else:
224
+                errors[name] = text
225
+                print(
226
+                    f'Found error message {name!r} with {text!r} in manpage.',
227
+                    file=sys.stderr,
228
+                )
229
+    assert set(errors) >= set(known_errors), (
230
+        f"Some error messages aren't documented in the {set_name}: "
231
+        + repr(set(known_errors) - set(errors))
232
+    )
233
+    assert set(warnings) >= set(known_warnings), (
234
+        f"Some warning messages aren't documented in the {set_name}: "
235
+        + repr(set(known_warnings) - set(warnings))
236
+    )
237
+    assert set(errors) <= set(known_errors), (
238
+        f'Some unknown error messages are documented in the {set_name}: '
239
+        + repr(set(errors) - set(known_errors))  # type: ignore[arg-type]
240
+    )
241
+    assert set(warnings) <= set(known_warnings), (
242
+        f'Some unknown warning messages are documented in the {set_name}: '
243
+        + repr(set(warnings) - set(known_warnings))  # type: ignore[arg-type]
244
+    )
245
+
246
+py_file_errors: set[EnumName] = set()
247
+py_file_warnings: set[EnumName] = set()
248
+match_errors_warnings = re.compile(
249
+    r'\b(?:cli_messages|msg|_msg)\.(Err|Warn)MsgTemplate\.([A-Z0-9_]+)'
250
+)
251
+for path in pathlib.Path(base, 'src', 'derivepassphrase').glob('**/*.py'):
252
+    if path != pathlib.Path(
253
+        base, 'src', 'derivepassphrase', '_internals', 'cli_messages.py'
254
+    ):
255
+        filecontents = path.read_text(encoding='UTF-8')
256
+        for match in match_errors_warnings.finditer(filecontents):
257
+            message_type, symbol = match.group(1, 2)
258
+            if message_type == 'Err':
259
+                py_file_errors.add(cast('EnumName', symbol))
260
+                print(
261
+                    f'Found mention of error message {symbol} '
262
+                    f'in source file {path!r}.',
263
+                    file=sys.stderr,
264
+                )
265
+            elif message_type == 'Warn':
266
+                py_file_warnings.add(cast('EnumName', symbol))
267
+                print(
268
+                    f'Found mention of warning message {symbol} '
269
+                    f'in source file {path!r}.',
270
+                    file=sys.stderr,
271
+                )
272
+if py_file_errors != set(known_errors):
273
+    print(
274
+        "Some error messages aren't in use: "
275
+        + repr(set(known_errors) - py_file_errors),
276
+        file=sys.stderr,
277
+    )
278
+if py_file_warnings != set(known_warnings):
279
+    print(
280
+        "Some warning messages aren't in use: "
281
+        + repr(set(known_warnings) - py_file_warnings),
282
+        file=sys.stderr,
283
+    )
... ...
@@ -0,0 +1,87 @@
1
+#!/usr/bin/python3
2
+# SPDX-FileCopyrightText: 2025 Marco Ricci <software@the13thletter.info>
3
+#
4
+# SPDX-License-Identifier: Zlib
5
+
6
+# ruff: noqa: S404,S603,S607
7
+
8
+"""Run various quality control checks automatically.
9
+
10
+Distinguish between the master branch and other branches: run the full
11
+test suite and build the documentation only on the master branch,
12
+otherwise use only a reduced set of test environments and don't build
13
+the documentation at all.  In both cases, run the linter, the formatter,
14
+and the type checker.
15
+
16
+If we are currently in a Stacked Git patch queue, do not run any tests,
17
+do not run the type checker and do not build the documentation.  These
18
+all slow down patch refreshing to a grinding halt, and will be checked
19
+afterwards anyway when merging the patch queue back into the master
20
+branch.  Stick to formatting and linting only.
21
+
22
+"""
23
+
24
+import os
25
+import subprocess
26
+import sys
27
+
28
+envs = ['3.9', '3.11', '3.13', 'pypy3.10']
29
+opts = ['-py', ','.join(envs)]
30
+
31
+current_branch = subprocess.run(
32
+    ['git', 'branch', '--show-current'],
33
+    capture_output=True,
34
+    text=True,
35
+    check=False,
36
+).stdout.strip()
37
+# We use rev-parse to check for Stacked Git's metadata tracking branch,
38
+# instead of checking `stg top` or similar, because we also want the
39
+# first `stg new` or `stg import` to correctly detect that we are
40
+# working on a patch queue.
41
+is_stgit_patch = bool(
42
+    subprocess.run(
43
+        [
44
+            'git',
45
+            'rev-parse',
46
+            '--verify',
47
+            '--end-of-options',
48
+            f'refs/stacks/{current_branch}',
49
+        ],
50
+        capture_output=True,
51
+        check=False,
52
+    ).stdout
53
+)
54
+
55
+try:
56
+    subprocess.run(['hatch', 'fmt', '-l'], check=True)
57
+    subprocess.run(['hatch', 'fmt', '-f'], check=True)
58
+    if current_branch == 'master':
59
+        subprocess.run(
60
+            ['hatch', 'env', 'run', '-e', 'types', '--', 'check'], check=True
61
+        )
62
+        subprocess.run(
63
+            ['hatch', 'test', '-acpqr', '--', '--maxfail', '1'],
64
+            check=True,
65
+        )
66
+        # fmt: off
67
+        subprocess.run(
68
+            [
69
+                'hatch', 'env', 'run', '-e', 'docs', '--',
70
+                'build', '-f', 'mkdocs_devsetup.yaml',
71
+            ],
72
+            check=True,
73
+        )
74
+        # fmt: on
75
+    elif not is_stgit_patch:
76
+        subprocess.run(
77
+            ['hatch', 'env', 'run', '-e', 'types', '--', 'check'], check=True
78
+        )
79
+        subprocess.run(
80
+            ['hatch', 'test', '-cpqr', *opts, '--', '--maxfail', '1'],
81
+            env={**os.environ} | {'HYPOTHESIS_PROFILE': 'dev'},
82
+            check=True,
83
+        )
84
+except subprocess.CalledProcessError as exc:
85
+    sys.exit(getattr(exc, 'returncode', 1))
86
+except KeyboardInterrupt:
87
+    sys.exit(1)
0 88