diff --git a/src/textual/drivers/linux_driver.py b/src/textual/drivers/linux_driver.py index 98bf632ff7..48b71e0e6e 100644 --- a/src/textual/drivers/linux_driver.py +++ b/src/textual/drivers/linux_driver.py @@ -412,7 +412,7 @@ def run_input_thread(self) -> None: feed = parser.feed tick = parser.tick - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder read = os.read diff --git a/src/textual/drivers/linux_inline_driver.py b/src/textual/drivers/linux_inline_driver.py index 14aa61fba0..16060654ab 100644 --- a/src/textual/drivers/linux_inline_driver.py +++ b/src/textual/drivers/linux_inline_driver.py @@ -130,7 +130,7 @@ def run_input_thread(self) -> None: feed = parser.feed tick = parser.tick - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder read = os.read diff --git a/src/textual/drivers/web_driver.py b/src/textual/drivers/web_driver.py index f21d19ed38..fcb5d4d73f 100644 --- a/src/textual/drivers/web_driver.py +++ b/src/textual/drivers/web_driver.py @@ -185,7 +185,7 @@ def run_input_thread(self) -> None: """Wait for input and dispatch events.""" input_reader = self._input_reader parser = XTermParser(debug=self._debug) - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder # The server sends us a stream of bytes, which contains the equivalent of stdin, plus # in band data packets. diff --git a/tests/test_utf8_decode_resilience.py b/tests/test_utf8_decode_resilience.py new file mode 100644 index 0000000000..20c8012247 --- /dev/null +++ b/tests/test_utf8_decode_resilience.py @@ -0,0 +1,63 @@ +"""Regression test for https://github.com/Textualize/textual/issues/6456 + +Verify that the UTF-8 incremental decoders used in drivers are configured +with ``errors="replace"`` so that invalid byte sequences produce U+FFFD +instead of raising ``UnicodeDecodeError`` and crashing the input thread. + +This test inspects the driver source code to ensure the fix is in place. +Without errors="replace", invalid UTF-8 input would crash the input thread. +""" + +import re +from pathlib import Path + + +def _get_driver_files() -> dict[str, Path]: + """Get the paths to the three drivers that were modified.""" + drivers_dir = Path(__file__).parent.parent / "src" / "textual" / "drivers" + return { + "linux_driver": drivers_dir / "linux_driver.py", + "linux_inline_driver": drivers_dir / "linux_inline_driver.py", + "web_driver": drivers_dir / "web_driver.py", + } + + +def _check_driver_decoder_config(driver_path: Path) -> bool: + """Check if driver uses getincrementaldecoder with errors='replace'.""" + if not driver_path.exists(): + raise FileNotFoundError(f"Driver file not found: {driver_path}") + + source = driver_path.read_text(encoding="utf-8") + + # Look for the pattern: getincrementaldecoder("utf-8")(errors="replace") + # This regex matches the decoder instantiation with the replace error handler + pattern = r'getincrementaldecoder\s*\(\s*["\']utf-8["\']\s*\)\s*\(\s*errors\s*=\s*["\']replace["\']\s*\)' + + return bool(re.search(pattern, source)) + + +def test_linux_driver_uses_replace_errors() -> None: + """Linux driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["linux_driver"]), ( + "linux_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + ) + + +def test_linux_inline_driver_uses_replace_errors() -> None: + """Linux inline driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["linux_inline_driver"]), ( + "linux_inline_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + ) + + +def test_web_driver_uses_replace_errors() -> None: + """Web driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["web_driver"]), ( + "web_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + )