-
Notifications
You must be signed in to change notification settings - Fork 245
Translation linter work - Rework of #3597 #3683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
f38d03c
4024c00
eedebbb
f56b17e
064481c
eca14ce
b9fad6c
6fd3a76
9cdeff0
9e7aace
4027304
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,286 @@ | ||
| #!/usr/bin/env python3 | ||
| # | ||
| ############################################################################## | ||
| # Copyright (c) 2026 | ||
| # | ||
| # Author(s): | ||
| # ChatGPT | ||
| # ann0see | ||
|
JaminShanti marked this conversation as resolved.
|
||
| # JaminShanti | ||
| # Gemini | ||
| # The Jamulus Development Team | ||
| # | ||
| ############################################################################## | ||
| # | ||
| # This program is free software; you can redistribute it and/or modify it under | ||
| # the terms of the GNU General Public License as published by the Free Software | ||
| # Foundation; either version 2 of the License, or (at your option) any later | ||
| # version. | ||
| # | ||
| # This program is distributed in the hope that it will be useful, but WITHOUT | ||
| # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
| # details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License along with | ||
| # this program; if not, write to the Free Software Foundation, Inc., | ||
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA | ||
| # | ||
| ############################################################################## | ||
|
|
||
| """ | ||
| Qt TS translation checker. | ||
|
|
||
| This tool validates Qt `.ts` translation files according to Qt Linguist | ||
| semantics. | ||
| Warnings are reported with best-effort line numbers. In strict mode, the | ||
| presence of any warning results in a non-zero exit code to allow CI failure. | ||
| """ | ||
|
|
||
| import argparse | ||
| import re | ||
| import sys | ||
| import xml.etree.ElementTree as ET | ||
| from collections import defaultdict, Counter | ||
| from dataclasses import dataclass | ||
| from enum import IntEnum | ||
| from pathlib import Path | ||
|
|
||
| # Regex helpers | ||
| PLACEHOLDER_RE = re.compile(r"%\d+") | ||
| HTML_TAG_RE = re.compile(r"<[^>]+>") | ||
|
|
||
| # ANSI escape codes | ||
| BOLD = "\033[1m" | ||
| CYAN = "\033[36m" | ||
| YELLOW = "\033[33m" | ||
| RED = "\033[31m" | ||
| RESET = "\033[0m" | ||
|
|
||
|
|
||
| # Severity Enum | ||
| class Severity(IntEnum): | ||
| WARNING = 1 | ||
| SEVERE = 2 | ||
|
|
||
|
|
||
| # Data structures | ||
| @dataclass(frozen=True) | ||
| class MessageContext: | ||
| ts_file: Path | ||
| line: int | ||
| lang: str | ||
| source: str | ||
| translation: str | ||
| tr_type: str | ||
| excerpt: str | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class WarningItem: | ||
| ts_file: Path | ||
| line: int | ||
| lang: str | ||
| message: str | ||
| severity: Severity | ||
|
|
||
|
|
||
| # Helpers | ||
| def approximate_message_lines(text: str): | ||
| """Yield approximate line numbers for <message> elements.""" | ||
| lines = text.splitlines() | ||
| cursor = 0 | ||
| for _ in range(text.count("<message")): | ||
| for i in range(cursor, len(lines)): | ||
| if "<message" in lines[i]: | ||
| cursor = i + 1 | ||
| yield i + 1 | ||
| break | ||
| else: | ||
| yield 0 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a TODO to think about how to not need to approximate the line numbers. I know it's from the original version and ChatGPT came up with it as hack.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tired something different. See if you think it's an improvement or I can revert. |
||
|
|
||
|
|
||
| # Checks | ||
| def check_language_header(ts_file: Path, root, file_lang: str): | ||
| header_lang = root.attrib.get("language", "") | ||
| if header_lang != file_lang: | ||
| return [WarningItem(ts_file, 0, file_lang, | ||
| f"Language header mismatch '{header_lang}' != '{file_lang}'", | ||
| Severity.WARNING)] | ||
| return [] | ||
|
|
||
|
|
||
| def check_empty_translation(ctx: MessageContext): | ||
| if not ctx.translation.strip() and ctx.tr_type != "unfinished": | ||
| return [WarningItem(ctx.ts_file, ctx.line, ctx.lang, | ||
| f"Empty translation for '{ctx.excerpt}'", | ||
| Severity.SEVERE)] | ||
| return [] | ||
|
|
||
|
|
||
| def check_placeholders(ctx: MessageContext): | ||
| if ctx.tr_type != "unfinished" and Counter(PLACEHOLDER_RE.findall(ctx.source)) != Counter( | ||
| PLACEHOLDER_RE.findall(ctx.translation)): | ||
| msg = (f"Placeholder mismatch for '{ctx.excerpt}'\n" | ||
| f"Source: {ctx.source}\n" | ||
| f"Trans: {ctx.translation}") | ||
| return [WarningItem(ctx.ts_file, ctx.line, ctx.lang, msg, Severity.WARNING)] | ||
| return [] | ||
|
|
||
|
|
||
| def check_html(ctx: MessageContext): | ||
| if HTML_TAG_RE.search(ctx.source) and not HTML_TAG_RE.search( | ||
| ctx.translation) and ctx.tr_type != "unfinished": | ||
| msg = (f"HTML missing for '{ctx.excerpt}'\n" | ||
| f"Source: {ctx.source}\n" | ||
| f"Trans: {ctx.translation}") | ||
| return [WarningItem(ctx.ts_file, ctx.line, ctx.lang, msg, Severity.WARNING)] | ||
| return [] | ||
|
|
||
|
|
||
| def check_whitespace(ctx: MessageContext): | ||
| if not ctx.translation or ctx.tr_type == "unfinished": | ||
| return [] | ||
|
|
||
| # Check if leading/trailing whitespace presence matches between source and translation | ||
| src_lead = ctx.source != ctx.source.lstrip() | ||
| src_trail = ctx.source != ctx.source.rstrip() | ||
| tr_lead = ctx.translation != ctx.translation.lstrip() | ||
| tr_trail = ctx.translation != ctx.translation.rstrip() | ||
|
|
||
| if src_lead != tr_lead or src_trail != tr_trail: | ||
| return [WarningItem(ctx.ts_file, ctx.line, ctx.lang, | ||
| f"Leading/trailing whitespace mismatch for '{ctx.excerpt}'", | ||
| Severity.WARNING)] | ||
| return [] | ||
|
|
||
|
|
||
| def check_newline_consistency(ctx: MessageContext): | ||
| if ctx.source.endswith("\n") != ctx.translation.endswith("\n"): | ||
| return [WarningItem(ctx.ts_file, ctx.line, ctx.lang, | ||
| f"Newline mismatch for '{ctx.excerpt}'", | ||
| Severity.WARNING)] | ||
| return [] | ||
|
|
||
|
|
||
| # Detect warnings | ||
| def detect_warnings(ts_file: Path, file_lang: str): | ||
| try: | ||
| text = ts_file.read_text(encoding="utf-8") | ||
| root = ET.fromstring(text) | ||
| except (OSError, ET.ParseError) as exc: | ||
| return [WarningItem(ts_file, 0, file_lang, | ||
| f"Error reading or parsing XML: {exc}", | ||
| Severity.SEVERE)] | ||
|
|
||
| warnings = [] | ||
| warnings.extend(check_language_header(ts_file, root, file_lang)) | ||
|
|
||
| message_lines = approximate_message_lines(text) | ||
|
|
||
| for context in root.findall("context"): | ||
| for message, line in zip(context.findall("message"), message_lines): | ||
|
|
||
| # Safely extract source text | ||
| source_elem = message.find("source") | ||
| source = "".join(source_elem.itertext()) if source_elem is not None else "" | ||
|
|
||
| # Safely extract translation text (handling Qt plural <numerusform> elements) | ||
| tr_elem = message.find("translation") | ||
| translation = "" | ||
| tr_type = "" | ||
| if tr_elem is not None: | ||
| tr_type = tr_elem.attrib.get("type", "") | ||
| numerus_forms = tr_elem.findall("numerusform") | ||
| if numerus_forms: | ||
| translation = " ".join("".join(n.itertext()) for n in numerus_forms) | ||
| else: | ||
| translation = "".join(tr_elem.itertext()) | ||
|
|
||
| # Format a clean excerpt without blindly adding '...' to tiny strings | ||
| source_clean = source.strip().replace("\n", " ") | ||
| excerpt = source_clean[:30] + ("..." if len(source_clean) > 30 else "") | ||
|
|
||
| ctx = MessageContext(ts_file, line, file_lang, source, translation, tr_type, excerpt) | ||
|
|
||
| # All checks | ||
| warnings.extend(check_empty_translation(ctx)) | ||
| warnings.extend(check_placeholders(ctx)) | ||
| warnings.extend(check_html(ctx)) | ||
| warnings.extend(check_whitespace(ctx)) | ||
| warnings.extend(check_newline_consistency(ctx)) | ||
|
|
||
| return warnings | ||
|
|
||
|
|
||
| # CLI | ||
| def main(): | ||
| parser = argparse.ArgumentParser(description="Qt TS translation checker with extended rules") | ||
|
ann0see marked this conversation as resolved.
Outdated
|
||
| parser.add_argument("--ts-dir", type=Path, default=Path("../src/translation"), | ||
| help="Directory containing translation_*.ts files") | ||
| parser.add_argument("--strict", action="store_true", | ||
| help="Exit non-zero if any warning is found") | ||
| args = parser.parse_args() | ||
|
|
||
| if not args.ts_dir.exists(): | ||
| print(f"Directory not found: {args.ts_dir}", file=sys.stderr) | ||
| return 2 | ||
|
|
||
| ts_files = sorted(args.ts_dir.glob("translation_*.ts")) | ||
| if not ts_files: | ||
| print(f"No TS files found in {args.ts_dir}", file=sys.stderr) | ||
| return 2 | ||
|
|
||
| all_warnings = [] | ||
| failures_by_language = defaultdict(lambda: {"severe": 0, "warning": 0}) | ||
|
|
||
| for ts_file in ts_files: | ||
| lang = ts_file.stem.replace("translation_", "") | ||
| failures_by_language[lang] # Initializes default counters | ||
| all_warnings.extend(detect_warnings(ts_file, lang)) | ||
|
|
||
| # Group output by file | ||
| grouped = defaultdict(list) | ||
| for w in all_warnings: | ||
| grouped[w.ts_file].append(w) | ||
|
|
||
| if w.severity == Severity.SEVERE: | ||
| failures_by_language[w.lang]["severe"] += 1 | ||
| else: | ||
| failures_by_language[w.lang]["warning"] += 1 | ||
|
|
||
| # Detailed clean column output | ||
| for file in sorted(grouped.keys()): | ||
| messages = grouped[file] | ||
| print(f"\n{BOLD}File: {file.name}{RESET}") | ||
|
|
||
| for w in sorted(messages, key=lambda x: x.line): | ||
| color = RED if w.severity == Severity.SEVERE else YELLOW | ||
| sev_text = "SEVERE " if w.severity == Severity.SEVERE else "WARNING" | ||
|
|
||
| msg_lines = w.message.split("\n") | ||
| # Print the primary warning line | ||
| print(f" {CYAN}Line {w.line:<4}{RESET} | {color}{sev_text}{RESET} | {msg_lines[0]}") | ||
|
|
||
| # Print any secondary data (like HTML/Placeholder source & trans contexts) properly aligned | ||
| for extra_line in msg_lines[1:]: | ||
| print(f" | | {extra_line}") | ||
|
|
||
| # Test summary | ||
| print("\n== Test Summary ==") | ||
| for lang in sorted(failures_by_language.keys()): | ||
| counts = failures_by_language[lang] | ||
| print(f"{BOLD}[{lang}]{RESET} Severe: {counts['severe']}, Warnings: {counts['warning']}") | ||
|
|
||
| total_severe = sum(f["severe"] for f in failures_by_language.values()) | ||
| total_warning = sum(f["warning"] for f in failures_by_language.values()) | ||
| print(f"\nTotal Severe: {total_severe}, Total Warnings: {total_warning}") | ||
|
|
||
| if total_severe > 0 or (args.strict and total_warning > 0): | ||
| return 1 | ||
|
|
||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) | ||
Uh oh!
There was an error while loading. Please reload this page.