From 21aa70a8c589ec2476313df29e88e92e7b11ff14 Mon Sep 17 00:00:00 2001 From: hamza-mobeen Date: Fri, 17 Apr 2026 17:35:43 +0100 Subject: [PATCH 1/2] Fix UnicodeDecodeError crash on invalid UTF-8 input (#6456) --- src/textual/drivers/linux_driver.py | 2 +- src/textual/drivers/linux_inline_driver.py | 2 +- src/textual/drivers/web_driver.py | 2 +- tests/test_utf8_decode_resilience.py | 32 ++++++++++++++++++++++ 4 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 tests/test_utf8_decode_resilience.py diff --git a/src/textual/drivers/linux_driver.py b/src/textual/drivers/linux_driver.py index 98bf632ff7..48b71e0e6e 100644 --- a/src/textual/drivers/linux_driver.py +++ b/src/textual/drivers/linux_driver.py @@ -412,7 +412,7 @@ def run_input_thread(self) -> None: feed = parser.feed tick = parser.tick - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder read = os.read diff --git a/src/textual/drivers/linux_inline_driver.py b/src/textual/drivers/linux_inline_driver.py index 14aa61fba0..16060654ab 100644 --- a/src/textual/drivers/linux_inline_driver.py +++ b/src/textual/drivers/linux_inline_driver.py @@ -130,7 +130,7 @@ def run_input_thread(self) -> None: feed = parser.feed tick = parser.tick - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder read = os.read diff --git a/src/textual/drivers/web_driver.py b/src/textual/drivers/web_driver.py index f21d19ed38..fcb5d4d73f 100644 --- a/src/textual/drivers/web_driver.py +++ b/src/textual/drivers/web_driver.py @@ -185,7 +185,7 @@ def run_input_thread(self) -> None: """Wait for input and dispatch events.""" input_reader = self._input_reader parser = XTermParser(debug=self._debug) - utf8_decoder = getincrementaldecoder("utf-8")().decode + utf8_decoder = getincrementaldecoder("utf-8")(errors="replace").decode decode = utf8_decoder # The server sends us a stream of bytes, which contains the equivalent of stdin, plus # in band data packets. diff --git a/tests/test_utf8_decode_resilience.py b/tests/test_utf8_decode_resilience.py new file mode 100644 index 0000000000..51d38da401 --- /dev/null +++ b/tests/test_utf8_decode_resilience.py @@ -0,0 +1,32 @@ +"""Regression test for https://github.com/Textualize/textual/issues/6456 + +Verify that the UTF-8 incremental decoders used in drivers are configured +with ``errors="replace"`` so that invalid byte sequences produce U+FFFD +instead of raising ``UnicodeDecodeError`` and crashing the input thread. +""" + +from codecs import getincrementaldecoder + + +def test_utf8_decoder_replace_mode() -> None: + """The decoder must not raise on invalid UTF-8 bytes.""" + decoder = getincrementaldecoder("utf-8")(errors="replace") + # 0xFF is never valid in UTF-8 + result = decoder.decode(b"\xff") + assert result == "\ufffd" + + +def test_utf8_decoder_replace_mixed() -> None: + """Valid bytes surrounding an invalid byte must decode correctly.""" + decoder = getincrementaldecoder("utf-8")(errors="replace") + result = decoder.decode(b"hello\xffworld") + assert result == "hello\ufffdworld" + + +def test_utf8_decoder_replace_truncated_multibyte() -> None: + """A truncated multi-byte sequence at end of chunk must not raise.""" + decoder = getincrementaldecoder("utf-8")(errors="replace") + # \xc3 is the start of a 2-byte sequence; passing final=True forces + # the decoder to flush, which would raise under strict mode. + result = decoder.decode(b"\xc3", final=True) + assert result == "\ufffd" From 766e268ca608d7a5fbb947cc66199b8b1c74626a Mon Sep 17 00:00:00 2001 From: hamza-mobeen Date: Mon, 20 Apr 2026 10:43:45 +0100 Subject: [PATCH 2/2] Fix UTF-8 decoder regression test --- tests/test_utf8_decode_resilience.py | 81 +++++++++++++++++++--------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/tests/test_utf8_decode_resilience.py b/tests/test_utf8_decode_resilience.py index 51d38da401..20c8012247 100644 --- a/tests/test_utf8_decode_resilience.py +++ b/tests/test_utf8_decode_resilience.py @@ -3,30 +3,61 @@ Verify that the UTF-8 incremental decoders used in drivers are configured with ``errors="replace"`` so that invalid byte sequences produce U+FFFD instead of raising ``UnicodeDecodeError`` and crashing the input thread. -""" - -from codecs import getincrementaldecoder - - -def test_utf8_decoder_replace_mode() -> None: - """The decoder must not raise on invalid UTF-8 bytes.""" - decoder = getincrementaldecoder("utf-8")(errors="replace") - # 0xFF is never valid in UTF-8 - result = decoder.decode(b"\xff") - assert result == "\ufffd" - - -def test_utf8_decoder_replace_mixed() -> None: - """Valid bytes surrounding an invalid byte must decode correctly.""" - decoder = getincrementaldecoder("utf-8")(errors="replace") - result = decoder.decode(b"hello\xffworld") - assert result == "hello\ufffdworld" +This test inspects the driver source code to ensure the fix is in place. +Without errors="replace", invalid UTF-8 input would crash the input thread. +""" -def test_utf8_decoder_replace_truncated_multibyte() -> None: - """A truncated multi-byte sequence at end of chunk must not raise.""" - decoder = getincrementaldecoder("utf-8")(errors="replace") - # \xc3 is the start of a 2-byte sequence; passing final=True forces - # the decoder to flush, which would raise under strict mode. - result = decoder.decode(b"\xc3", final=True) - assert result == "\ufffd" +import re +from pathlib import Path + + +def _get_driver_files() -> dict[str, Path]: + """Get the paths to the three drivers that were modified.""" + drivers_dir = Path(__file__).parent.parent / "src" / "textual" / "drivers" + return { + "linux_driver": drivers_dir / "linux_driver.py", + "linux_inline_driver": drivers_dir / "linux_inline_driver.py", + "web_driver": drivers_dir / "web_driver.py", + } + + +def _check_driver_decoder_config(driver_path: Path) -> bool: + """Check if driver uses getincrementaldecoder with errors='replace'.""" + if not driver_path.exists(): + raise FileNotFoundError(f"Driver file not found: {driver_path}") + + source = driver_path.read_text(encoding="utf-8") + + # Look for the pattern: getincrementaldecoder("utf-8")(errors="replace") + # This regex matches the decoder instantiation with the replace error handler + pattern = r'getincrementaldecoder\s*\(\s*["\']utf-8["\']\s*\)\s*\(\s*errors\s*=\s*["\']replace["\']\s*\)' + + return bool(re.search(pattern, source)) + + +def test_linux_driver_uses_replace_errors() -> None: + """Linux driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["linux_driver"]), ( + "linux_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + ) + + +def test_linux_inline_driver_uses_replace_errors() -> None: + """Linux inline driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["linux_inline_driver"]), ( + "linux_inline_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + ) + + +def test_web_driver_uses_replace_errors() -> None: + """Web driver must use errors='replace' for UTF-8 decoder.""" + drivers = _get_driver_files() + assert _check_driver_decoder_config(drivers["web_driver"]), ( + "web_driver.py must use getincrementaldecoder('utf-8')(errors='replace'). " + "Without this, invalid UTF-8 bytes will crash the input thread." + )