145
|
1 #!/usr/bin/env python3
|
|
2 #
|
|
3 # Check gcc.pot file for stylistic issues as described in
|
|
4 # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html,
|
|
5 # especially in gcc-internal-format messages.
|
|
6 #
|
|
7 # This file is part of GCC.
|
|
8 #
|
|
9 # GCC is free software; you can redistribute it and/or modify it under
|
|
10 # the terms of the GNU General Public License as published by the Free
|
|
11 # Software Foundation; either version 3, or (at your option) any later
|
|
12 # version.
|
|
13 #
|
|
14 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
16 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
17 # for more details.
|
|
18 #
|
|
19 # You should have received a copy of the GNU General Public License
|
|
20 # along with GCC; see the file COPYING3. If not see
|
|
21 # <http://www.gnu.org/licenses/>.
|
|
22
|
|
23 import argparse
|
|
24 import re
|
|
25 from collections import Counter
|
|
26 from typing import Dict, Match
|
|
27
|
|
28 import polib
|
|
29
|
|
30 seen_warnings = Counter()
|
|
31
|
|
32
|
|
33 def location(msg: polib.POEntry):
|
|
34 if msg.occurrences:
|
|
35 occ = msg.occurrences[0]
|
|
36 return f'{occ[0]}:{occ[1]}'
|
|
37 return '<unknown location>'
|
|
38
|
|
39
|
|
40 def warn(msg: polib.POEntry,
|
|
41 diagnostic_id: str, diagnostic: str, include_msgid=True):
|
|
42 """
|
|
43 To suppress a warning for a particular message,
|
|
44 add a line "#, gcclint:ignore:{diagnostic_id}" to the message.
|
|
45 """
|
|
46
|
|
47 if f'gcclint:ignore:{diagnostic_id}' in msg.flags:
|
|
48 return
|
|
49
|
|
50 seen_warnings[diagnostic] += 1
|
|
51
|
|
52 if include_msgid:
|
|
53 print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}')
|
|
54 else:
|
|
55 print(f'{location(msg)}: {diagnostic}')
|
|
56
|
|
57
|
|
58 def lint_gcc_internal_format(msg: polib.POEntry):
|
|
59 """
|
|
60 Checks a single message that has the gcc-internal-format. These
|
|
61 messages use a variety of placeholders like %qs, %<quotes%> and
|
|
62 %q#E.
|
|
63 """
|
|
64
|
|
65 msgid: str = msg.msgid
|
|
66
|
|
67 def outside_quotes(m: Match[str]):
|
|
68 before = msgid[:m.start(0)]
|
|
69 return before.count("%<") == before.count("%>")
|
|
70
|
|
71 def lint_matching_placeholders():
|
|
72 """
|
|
73 Warns when literal values in placeholders are not exactly equal
|
|
74 in the translation. This can happen when doing copy-and-paste
|
|
75 translations of similar messages.
|
|
76
|
|
77 To avoid these mismatches in the first place,
|
|
78 structurally equal messages are found by
|
|
79 lint_diagnostics_differing_only_in_placeholders.
|
|
80
|
|
81 This check only applies when checking a finished translation
|
|
82 such as de.po, not gcc.pot.
|
|
83 """
|
|
84
|
|
85 if not msg.translated():
|
|
86 return
|
|
87
|
|
88 in_msgid = re.findall('%<[^%]+%>', msgid)
|
|
89 in_msgstr = re.findall('%<[^%]+%>', msg.msgstr)
|
|
90
|
|
91 if set(in_msgid) != set(in_msgstr):
|
|
92 warn(msg,
|
|
93 'placeholder-mismatch',
|
|
94 f'placeholder mismatch: msgid has {in_msgid}, '
|
|
95 f'msgstr has {in_msgstr}',
|
|
96 include_msgid=False)
|
|
97
|
|
98 def lint_option_outside_quotes():
|
|
99 for match in re.finditer(r'\S+', msgid):
|
|
100 part = match.group()
|
|
101 if not outside_quotes(match):
|
|
102 continue
|
|
103
|
|
104 if part.startswith('-'):
|
|
105 if len(part) >= 2 and part[1].isalpha():
|
|
106 if part == '-INF':
|
|
107 continue
|
|
108
|
|
109 warn(msg,
|
|
110 'option-outside-quotes',
|
|
111 'command line option outside %<quotes%>')
|
|
112
|
|
113 if part.startswith('__builtin_'):
|
|
114 warn(msg,
|
|
115 'builtin-outside-quotes',
|
|
116 'builtin function outside %<quotes%>')
|
|
117
|
|
118 def lint_plain_apostrophe():
|
|
119 for match in re.finditer("[^%]'", msgid):
|
|
120 if outside_quotes(match):
|
|
121 warn(msg, 'apostrophe', 'apostrophe without leading %')
|
|
122
|
|
123 def lint_space_before_quote():
|
|
124 """
|
|
125 A space before %< is often the result of string literals that
|
|
126 are joined by the C compiler and neither literal has a space
|
|
127 to separate the words.
|
|
128 """
|
|
129
|
|
130 for match in re.finditer("(.?[a-zA-Z0-9])%<", msgid):
|
|
131 if match.group(1) != '%s':
|
|
132 warn(msg,
|
|
133 'no-space-before-quote',
|
|
134 '%< directly following a letter or digit')
|
|
135
|
|
136 def lint_underscore_outside_quotes():
|
|
137 """
|
|
138 An underscore outside of quotes is used in several contexts,
|
|
139 and many of them violate the GCC Guidelines for Diagnostics:
|
|
140
|
|
141 * names of GCC-internal compiler functions
|
|
142 * names of GCC-internal data structures
|
|
143 * static_cast and the like (which are legitimate)
|
|
144 """
|
|
145
|
|
146 for match in re.finditer("_", msgid):
|
|
147 if outside_quotes(match):
|
|
148 warn(msg,
|
|
149 'underscore-outside-quotes',
|
|
150 'underscore outside of %<quotes%>')
|
|
151 return
|
|
152
|
|
153 def lint_may_not():
|
|
154 """
|
|
155 The term "may not" may either mean "it could be the case"
|
|
156 or "should not". These two different meanings are sometimes
|
|
157 hard to tell apart.
|
|
158 """
|
|
159
|
|
160 if re.search(r'\bmay not\b', msgid):
|
|
161 warn(msg,
|
|
162 'ambiguous-may-not',
|
|
163 'the term "may not" is ambiguous')
|
|
164
|
|
165 def lint_unbalanced_quotes():
|
|
166 if msgid.count("%<") != msgid.count("%>"):
|
|
167 warn(msg,
|
|
168 'unbalanced-quotes',
|
|
169 'unbalanced %< and %> quotes')
|
|
170
|
|
171 if msg.translated():
|
|
172 if msg.msgstr.count("%<") != msg.msgstr.count("%>"):
|
|
173 warn(msg,
|
|
174 'unbalanced-quotes',
|
|
175 'unbalanced %< and %> quotes')
|
|
176
|
|
177 def lint_single_space_after_sentence():
|
|
178 """
|
|
179 After a sentence there should be two spaces.
|
|
180 """
|
|
181
|
|
182 if re.search(r'[.] [A-Z]', msgid):
|
|
183 warn(msg,
|
|
184 'single-space-after-sentence',
|
|
185 'single space after sentence')
|
|
186
|
|
187 def lint_non_canonical_quotes():
|
|
188 """
|
|
189 Catches %<%s%>, which can be written in the shorter form %qs.
|
|
190 """
|
|
191 match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid)
|
|
192 if match:
|
|
193 warn(msg,
|
|
194 'non-canonical-quotes',
|
|
195 f'placeholder {match.group()} should be written as %qs')
|
|
196
|
|
197 lint_option_outside_quotes()
|
|
198 lint_plain_apostrophe()
|
|
199 lint_space_before_quote()
|
|
200 lint_underscore_outside_quotes()
|
|
201 lint_may_not()
|
|
202 lint_unbalanced_quotes()
|
|
203 lint_matching_placeholders()
|
|
204 lint_single_space_after_sentence()
|
|
205 lint_non_canonical_quotes()
|
|
206
|
|
207
|
|
208 def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile):
|
|
209 """
|
|
210 Detects messages that are structurally the same, except that they
|
|
211 use different plain strings inside %<quotes%>. These messages can
|
|
212 be merged in order to prevent copy-and-paste mistakes by the
|
|
213 translators.
|
|
214
|
|
215 See bug 90119.
|
|
216 """
|
|
217
|
|
218 seen: Dict[str, polib.POEntry] = {}
|
|
219
|
|
220 for msg in po:
|
|
221 msg: polib.POEntry
|
|
222 msgid = msg.msgid
|
|
223
|
|
224 normalized = re.sub('%<[^%]+%>', '%qs', msgid)
|
|
225 if normalized not in seen:
|
|
226 seen[normalized] = msg
|
|
227 seen[msgid] = msg
|
|
228 continue
|
|
229
|
|
230 prev = seen[normalized]
|
|
231 warn(msg,
|
|
232 'same-pattern',
|
|
233 f'same pattern for {repr(msgid)} and '
|
|
234 f'{repr(prev.msgid)} in {location(prev)}',
|
|
235 include_msgid=False)
|
|
236
|
|
237
|
|
238 def lint_file(po: polib.POFile):
|
|
239 for msg in po:
|
|
240 msg: polib.POEntry
|
|
241
|
|
242 if not msg.obsolete and not msg.fuzzy:
|
|
243 if 'gcc-internal-format' in msg.flags:
|
|
244 lint_gcc_internal_format(msg)
|
|
245
|
|
246 lint_diagnostics_differing_only_in_placeholders(po)
|
|
247
|
|
248
|
|
249 def main():
|
|
250 parser = argparse.ArgumentParser(description='')
|
|
251 parser.add_argument('file', help='pot file')
|
|
252
|
|
253 args = parser.parse_args()
|
|
254
|
|
255 po = polib.pofile(args.file)
|
|
256 lint_file(po)
|
|
257
|
|
258 print()
|
|
259 print('summary:')
|
|
260 for entry in seen_warnings.most_common():
|
|
261 if entry[1] > 1:
|
|
262 print(f'{entry[1]}\t{entry[0]}')
|
|
263
|
|
264
|
|
265 if __name__ == '__main__':
|
|
266 main()
|