From 6138a7822c28183b751faf46831ad49b2b44ec7d Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Thu, 23 Apr 2026 13:05:18 +0800 Subject: [PATCH 1/5] gh-148535: Fix heap buffer overflow in pyexpat CharacterDataHandler --- .../next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst | 2 ++ Modules/pyexpat.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst diff --git a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst new file mode 100644 index 00000000000000..55bdb3e5a48015 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst @@ -0,0 +1,2 @@ +Fix heap buffer overflow in pyexpat CharacterDataHandler, which is caused by +two signed intergets added up. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 0f0afe17513ef1..c01f7babe74527 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -393,7 +393,7 @@ my_CharacterDataHandler(void *userData, const XML_Char *data, int len) if (self->buffer == NULL) call_character_handler(self, data, len); else { - if ((self->buffer_used + len) > self->buffer_size) { + if (len > (self->buffer_size - self->buffer_used)) { if (flush_character_buffer(self) < 0) return; /* handler might have changed; drop the rest on the floor From 8f8884b144d3cf3817143d2fd8536bb7049c142f Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Thu, 23 Apr 2026 13:15:38 +0800 Subject: [PATCH 2/5] Add test for heap overflow in expat parser's CharacterDataHandler --- Lib/test/test_pyexpat.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index aaa91aca36e3c4..5b50fc3c3f9595 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -712,6 +712,20 @@ def test_change_size_2(self): parser.Parse(xml2, True) self.assertEqual(self.n, 4) + @support.requires_resource('cpu') + @support.requires_resource('walltime') + def test_heap_overflow(self): + # See https://github.com/python/cpython/issues/148441 + parser = expat.ParserCreate() + parser.buffer_text = True + parser.buffer_size = 2**31 - 1 # INT_MAX + def handler(text): + pass + N = 2049 * (1 << 20) - 3 # 2,148,532,221 bytes of character data + parser.CharacterDataHandler = handler + xml_data = b"" + b"A" * N + b"" + self.assertEqual(parser.Parse(xml_data, True), 1) + class ElementDeclHandlerTest(unittest.TestCase): def test_trigger_leak(self): # Unfixed, this test would leak the memory of the so-called From 588fa762ecbb68877b1ae66a0194298abf1d5b7c Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 25 Apr 2026 20:38:31 +0800 Subject: [PATCH 3/5] Update Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst index 55bdb3e5a48015..f51fbdcd99ac9b 100644 --- a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst +++ b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst @@ -1,2 +1,4 @@ -Fix heap buffer overflow in pyexpat CharacterDataHandler, which is caused by -two signed intergets added up. +:mod:`xml.parsers.expat`: Fix a heap buffer overflow in +:meth:`~xml.parsers.expat.xmlparser.CharacterDataHandler` +when the character data size exceeds the parser's +:attr:`buffer size `. From 2e070e977f31d47b15997058ac6744f2b7b20563 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 25 Apr 2026 20:38:40 +0800 Subject: [PATCH 4/5] Update Lib/test/test_pyexpat.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_pyexpat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 5b50fc3c3f9595..b507655ecd3278 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -718,7 +718,7 @@ def test_heap_overflow(self): # See https://github.com/python/cpython/issues/148441 parser = expat.ParserCreate() parser.buffer_text = True - parser.buffer_size = 2**31 - 1 # INT_MAX + parser.buffer_size = 2**31 - 1 # INT_MAX def handler(text): pass N = 2049 * (1 << 20) - 3 # 2,148,532,221 bytes of character data From 22edd46542dc0cd04b54d51db8e0978d9874fdd7 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 25 Apr 2026 21:26:52 +0800 Subject: [PATCH 5/5] Update test_pyexpat.py --- Lib/test/test_pyexpat.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index b507655ecd3278..0b95547b63edcf 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -714,15 +714,14 @@ def test_change_size_2(self): @support.requires_resource('cpu') @support.requires_resource('walltime') - def test_heap_overflow(self): + def test_large_character_data_no_buffer_overflow(self): # See https://github.com/python/cpython/issues/148441 parser = expat.ParserCreate() parser.buffer_text = True parser.buffer_size = 2**31 - 1 # INT_MAX - def handler(text): - pass - N = 2049 * (1 << 20) - 3 # 2,148,532,221 bytes of character data - parser.CharacterDataHandler = handler + N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX + self.assertGreater(N, parser.buffer_size) + parser.CharacterDataHandler = lambda text: None xml_data = b"" + b"A" * N + b"" self.assertEqual(parser.Parse(xml_data, True), 1)