Generate debug translations automatically, in code and on the command-line
Marco Ricci

Marco Ricci commited on 2025-01-13 14:19:27
Zeige 1 geänderte Dateien mit 243 Einfügungen und 28 Löschungen.


Introduce a `DebugTranslations` object that returns the enum name of the
given message as its translation, including the parameters to be
interpolated.

Rename the `_format_pot_file` to `_format_po_file` and expand it to emit
either a template file or a debug translation `.po` file.  This differs
slightly in the header, and in whether the translations are empty by
default or filled in with the enum name.  The ad-hoc command-line
interface now accepts options to select the template or the debug
translation, and a way to explicitly override the declared version of
the `.po` template.

Because of negative experience with the `poedit` translations editor,
the message-ID (enum name) is no longer embedded as a (pseudo-)location
of the message string, but rather embedded as a translators' comment.
... ...
@@ -12,17 +12,18 @@ import enum
12 12
 import gettext
13 13
 import inspect
14 14
 import os
15
+import string
15 16
 import sys
16 17
 import textwrap
17 18
 import types
18 19
 from typing import TYPE_CHECKING, NamedTuple, TextIO, Union, cast
19 20
 
20
-from typing_extensions import TypeAlias
21
+from typing_extensions import TypeAlias, override
21 22
 
22 23
 import derivepassphrase as dpp
23 24
 
24 25
 if TYPE_CHECKING:
25
-    from collections.abc import Iterable, Mapping, Sequence
26
+    from collections.abc import Iterable, Iterator, Mapping, Sequence
26 27
 
27 28
     from typing_extensions import Any, Self
28 29
 
... ...
@@ -88,6 +89,112 @@ def load_translations(
88 89
 
89 90
 
90 91
 translation = load_translations()
92
+_debug_translation_message_cache: dict[tuple[str, str], MsgTemplate] = {}
93
+
94
+
95
+class DebugTranslations(gettext.NullTranslations):
96
+    """A debug object indicating which known message is being requested.
97
+
98
+    Each call to the `*gettext` methods will return the enum name if the
99
+    message is a known translatable message for the `derivepassphrase`
100
+    command-line interface, or the message itself otherwise.
101
+
102
+    """
103
+
104
+    @staticmethod
105
+    def _load_cache() -> None:
106
+        cache = _debug_translation_message_cache
107
+        for enum_class in MSG_TEMPLATE_CLASSES:
108
+            for member in enum_class.__members__.values():
109
+                value = cast('TranslatableString', member.value)
110
+                singular = value.singular
111
+                plural = value.plural
112
+                context = value.l10n_context
113
+                cache.setdefault((context, singular), member)
114
+                if plural:
115
+                    cache.setdefault((context, plural), member)
116
+
117
+    @classmethod
118
+    def _locate_message(
119
+        cls,
120
+        message: str,
121
+        /,
122
+        *,
123
+        context: str = '',
124
+        message_plural: str = '',
125
+        n: int = 1,
126
+    ) -> str:
127
+        try:
128
+            enum_value = _debug_translation_message_cache[context, message]
129
+        except KeyError:
130
+            return message if not message_plural or n == 1 else message_plural
131
+        return cls._format_enum_name_maybe_with_fields(
132
+            enum_name=str(enum_value),
133
+            ts=cast('TranslatableString', enum_value.value),
134
+        )
135
+
136
+    @staticmethod
137
+    def _format_enum_name_maybe_with_fields(
138
+        enum_name: str,
139
+        ts: TranslatableString,
140
+    ) -> str:
141
+        formatter = string.Formatter()
142
+        fields: dict[str, int] = {}
143
+        for _lit, field, _spec, _conv in formatter.parse(ts.singular):
144
+            if field is not None and field not in fields:
145
+                fields[field] = len(fields)
146
+        sorted_fields = [
147
+            f'{field}={{{field}!r}}'
148
+            for field in sorted(fields.keys(), key=fields.__getitem__)
149
+        ]
150
+        return (
151
+            '{!s}({})'.format(enum_name, ', '.join(sorted_fields))
152
+            if sorted_fields
153
+            else str(enum_name)
154
+        )
155
+
156
+    @override
157
+    def gettext(
158
+        self,
159
+        message: str,
160
+        /,
161
+    ) -> str:  # pragma: no cover
162
+        return self._locate_message(message)
163
+
164
+    @override
165
+    def ngettext(
166
+        self,
167
+        msgid1: str,
168
+        msgid2: str,
169
+        n: int,
170
+        /,
171
+    ) -> str:  # pragma: no cover
172
+        return self._locate_message(msgid1, message_plural=msgid2, n=n)
173
+
174
+    @override
175
+    def pgettext(
176
+        self,
177
+        context: str,
178
+        message: str,
179
+        /,
180
+    ) -> str:
181
+        return self._locate_message(message, context=context)
182
+
183
+    @override
184
+    def npgettext(
185
+        self,
186
+        context: str,
187
+        msgid1: str,
188
+        msgid2: str,
189
+        n: int,
190
+        /,
191
+    ) -> str:  # pragma: no cover
192
+        return self._locate_message(
193
+            msgid1,
194
+            context=context,
195
+            message_plural=msgid2,
196
+            n=n,
197
+        )
91 198
 
92 199
 
93 200
 class TranslatableString(NamedTuple):
... ...
@@ -1678,9 +1785,18 @@ MSG_TEMPLATE_CLASSES = (
1678 1785
     ErrMsgTemplate,
1679 1786
 )
1680 1787
 
1788
+DebugTranslations._load_cache()  # noqa: SLF001
1789
+
1790
+
1681 1791
 
1682
-def _write_pot_file(fileobj: TextIO) -> None:  # pragma: no cover
1683
-    r"""Write a .po template to the given file object.
1792
+def _write_po_file(  # noqa: C901
1793
+    fileobj: TextIO,
1794
+    /,
1795
+    *,
1796
+    is_template: bool = True,
1797
+    version: str = __version__,
1798
+) -> None:  # pragma: no cover
1799
+    r"""Write a .po file to the given file object.
1684 1800
 
1685 1801
     Assumes the file object is opened for writing and accepts string
1686 1802
     inputs.  The file will *not* be closed when writing is complete.
... ...
@@ -1695,8 +1811,9 @@ def _write_pot_file(fileobj: TextIO) -> None:  # pragma: no cover
1695 1811
     entries: dict[str, dict[str, MsgTemplate]] = {}
1696 1812
     for enum_class in MSG_TEMPLATE_CLASSES:
1697 1813
         for member in enum_class.__members__.values():
1698
-            ctx = member.value.l10n_context
1699
-            msg = member.value.singular
1814
+            value = cast('TranslatableString', member.value)
1815
+            ctx = value.l10n_context
1816
+            msg = value.singular
1700 1817
             if (
1701 1818
                 msg in entries.setdefault(ctx, {})
1702 1819
                 and entries[ctx][msg] != member
... ...
@@ -1706,49 +1823,113 @@ def _write_pot_file(fileobj: TextIO) -> None:  # pragma: no cover
1706 1823
                     f'{entries[ctx][msg]!r} and {member!r}'
1707 1824
                 )
1708 1825
             entries[ctx][msg] = member
1709
-    now = datetime.datetime.now().astimezone()
1826
+    build_time = datetime.datetime.now().astimezone()
1827
+    if is_template:
1710 1828
         header = (
1711 1829
             inspect.cleandoc(rf"""
1712 1830
             # English translation for {PROG_NAME!s}.
1713
-        # Copyright (C) {now.strftime('%Y')} AUTHOR
1831
+            # Copyright (C) {build_time.strftime('%Y')} AUTHOR
1832
+            # This file is distributed under the same license as {PROG_NAME!s}.
1833
+            # AUTHOR <someone@example.com>, {build_time.strftime('%Y')}.
1834
+            #
1835
+            msgid ""
1836
+            msgstr ""
1837
+            """).removesuffix('\n')
1838
+            + '\n'
1839
+        )
1840
+    else:
1841
+        header = (
1842
+            inspect.cleandoc(rf"""
1843
+            # English debug translation for {PROG_NAME!s}.
1844
+            # Copyright (C) {build_time.strftime('%Y')} {__author__}
1714 1845
             # This file is distributed under the same license as {PROG_NAME!s}.
1715
-        # AUTHOR <someone@example.com>, {now.strftime('%Y')}.
1716 1846
             #
1717 1847
             msgid ""
1718 1848
             msgstr ""
1719
-        "Project-Id-Version: {PROG_NAME!s} {__version__!s}\n"
1720
-        "Report-Msgid-Bugs-To: software@the13thletter.info\n"
1721
-        "POT-Creation-Date: {now.strftime('%Y-%m-%d %H:%M%z')}\n"
1722
-        "PO-Revision-Date: {now.strftime('%Y-%m-%d %H:%M%z')}\n"
1723
-        "Last-Translator: AUTHOR <someone@example.com>\n"
1724
-        "Language: en\n"
1725
-        "MIME-Version: 1.0\n"
1726
-        "Content-Type: text/plain; charset=UTF-8\n"
1727
-        "Content-Transfer-Encoding: 8bit\n"
1728
-        "Plural-Forms: nplurals=2; plural=(n != 1);\n"
1729 1849
             """).removesuffix('\n')
1730 1850
             + '\n'
1731 1851
         )
1732 1852
     fileobj.write(header)
1853
+    po_info = {
1854
+        'Project-Id-Version': f'{PROG_NAME} {version}',
1855
+        'Report-Msgid-Bugs-To': 'software@the13thletter.info',
1856
+        'PO-Revision-Date': build_time.strftime('%Y-%m-%d %H:%M%z'),
1857
+        'MIME-Version': '1.0',
1858
+        'Content-Type': 'text/plain; charset=UTF-8',
1859
+        'Content-Transfer-Encoding': '8bit',
1860
+        'Plural-Forms': 'nplurals=2; plural=(n != 1);',
1861
+    }
1862
+    if is_template:
1863
+        po_info.update({
1864
+            'POT-Creation-Date': build_time.strftime('%Y-%m-%d %H:%M%z'),
1865
+            'Last-Translator': 'AUTHOR <someone@example.com>',
1866
+            'Language': 'en',
1867
+            'Language-Team': 'English',
1868
+        })
1869
+    else:
1870
+        po_info.update({
1871
+            'Last-Translator': __author__,
1872
+            'Language': 'en_DEBUG',
1873
+            'Language-Team': 'English',
1874
+        })
1875
+    print(*_format_po_info(po_info), sep='\n', end='\n', file=fileobj)
1733 1876
     for _ctx, subdict in sorted(entries.items()):
1734 1877
         for _msg, enum_value in sorted(
1735 1878
             subdict.items(),
1736 1879
             key=lambda kv: str(kv[1]),
1737 1880
         ):
1738
-            fileobj.writelines(_format_po_entry(enum_value))
1881
+            fileobj.writelines(
1882
+                _format_po_entry(
1883
+                    enum_value, is_debug_translation=not is_template
1884
+                )
1885
+            )
1886
+
1887
+
1888
+def _format_po_info(
1889
+    data: Mapping[str, Any],
1890
+    /,
1891
+) -> Iterator[str]:  # pragma: no cover
1892
+    sortorder = [
1893
+        'project-id-version',
1894
+        'report-msgid-bugs-to',
1895
+        'pot-creation-date',
1896
+        'po-revision-date',
1897
+        'last-translator',
1898
+        'language',
1899
+        'language-team',
1900
+        'mime-version',
1901
+        'content-type',
1902
+        'content-transfer-encoding',
1903
+        'plural-forms',
1904
+    ]
1905
+
1906
+    def _sort_position(s: str, /) -> int:
1907
+        n = len(sortorder)
1908
+        for i, x in enumerate(sortorder):
1909
+            if s.lower().rstrip(':') == x:
1910
+                return i
1911
+        return n
1912
+
1913
+    for key in sorted(data.keys(), key=_sort_position):
1914
+        value = data[key]
1915
+        line = f"{key}: {value}\n"
1916
+        yield _cstr(line)
1739 1917
 
1740 1918
 
1741 1919
 def _format_po_entry(
1742 1920
     enum_value: MsgTemplate,
1921
+    /,
1922
+    *,
1923
+    is_debug_translation: bool = False,
1743 1924
 ) -> tuple[str, ...]:  # pragma: no cover
1744 1925
     ret: list[str] = ['\n']
1745 1926
     ts = enum_value.value
1746 1927
     if ts.translator_comments:
1747
-        ret.extend(
1748
-            f'#. {line}\n'
1749
-            for line in ts.translator_comments.splitlines(False)  # noqa: FBT003
1750
-        )
1751
-    ret.append(f'#: derivepassphrase/_cli_msg.py:{enum_value}\n')
1928
+        comments = ts.translator_comments.splitlines(False)  # noqa: FBT003
1929
+        comments.extend(['', f'Message-ID: {enum_value}'])
1930
+    else:
1931
+        comments = [f'TRANSLATORS: Message-ID: {enum_value}']
1932
+    ret.extend(f'#. {line}\n' for line in comments)
1752 1933
     if ts.flags:
1753 1934
         ret.append(f'#, {", ".join(sorted(ts.flags))}\n')
1754 1935
     if ts.l10n_context:
... ...
@@ -1756,7 +1937,12 @@ def _format_po_entry(
1756 1937
     ret.append(f'msgid {_cstr(ts.singular)}\n')
1757 1938
     if ts.plural:
1758 1939
         ret.append(f'msgid_plural {_cstr(ts.plural)}\n')
1759
-    ret.append('msgstr ""\n')
1940
+    value = (
1941
+        DebugTranslations().pgettext(ts.l10n_context, ts.singular)
1942
+        if is_debug_translation
1943
+        else ''
1944
+    )
1945
+    ret.append(f'msgstr {_cstr(value)}\n')
1760 1946
     return tuple(ret)
1761 1947
 
1762 1948
 
... ...
@@ -1786,9 +1972,38 @@ def _cstr(s: str) -> str:  # pragma: no cover
1786 1972
 
1787 1973
     return '\n'.join(
1788 1974
         f'"{escape(line)}"'
1789
-        for line in s.splitlines(True)  # noqa: FBT003
1975
+        for line in s.splitlines(True) or ['']  # noqa: FBT003
1790 1976
     )
1791 1977
 
1792 1978
 
1793 1979
 if __name__ == '__main__':
1794
-    _write_pot_file(sys.stdout)
1980
+    import argparse
1981
+    ap = argparse.ArgumentParser()
1982
+    ex = ap.add_mutually_exclusive_group()
1983
+    ex.add_argument(
1984
+        '--template',
1985
+        action='store_true',
1986
+        dest='is_template',
1987
+        default=True,
1988
+        help='Generate a template file (default)',
1989
+    )
1990
+    ex.add_argument(
1991
+        '--debug-translation',
1992
+        action='store_false',
1993
+        dest='is_template',
1994
+        default=True,
1995
+        help='Generate a "debug" translation file',
1996
+    )
1997
+    ap.add_argument(
1998
+        '--set-version',
1999
+        action='store',
2000
+        dest='version',
2001
+        default=__version__,
2002
+        help='Override declared software version',
2003
+    )
2004
+    args = ap.parse_args()
2005
+    _write_po_file(
2006
+        sys.stdout,
2007
+        version=args.version,
2008
+        is_template=args.is_template,
2009
+    )
1795 2010