collective · ale-rt · Apr 10, 2026 · Apr 10, 2026
diff --git a/HISTORY.md b/HISTORY.md
@@ -5,6 +5,9 @@
 
 - Declare support for Python 3.10 - 3.14
   [ale-rt]
+- Do not break `title` and `textarea` in page templates.
+  (Fixes #198)
+  [ale-rt]
 
 
 ## 3.1.1 (2025-06-23)

diff --git a/zpretty/elements.py b/zpretty/elements.py
@@ -77,7 +77,19 @@ class PrettyElement:
     )
     escaper = EntitySubstitution()
     preserve_text_whitespace_elements = ["pre"]
-    skip_text_escaping_elements = ["script", "style"]
+    skip_text_escaping_elements = [
+        # Do not fiddle with the content of script tags,
+        # as it may contain html entities that we do not want to be escaped
+        "script",
+        # Do not fiddle with the content of style tags,
+        # as it may contain html entities that we do not want to be escaped
+        "style",
+        # The title and textarea tags may contain markup-like text
+        # that should that by HTML parser is escaped into text,
+        # but that we want to be rendered as markup in page templates.
+        "title",
+        "textarea",
+    ]
 
     def __init__(self, context, level=0):
         """Take something a (bs4) element and an indentation level"""

diff --git a/zpretty/prettifier.py b/zpretty/prettifier.py
@@ -1,6 +1,7 @@
 from bs4 import BeautifulSoup
 from bs4.element import Doctype
 from bs4.element import ProcessingInstruction
+from bs4.element import Tag
 from logging import getLogger
 from uuid import uuid4
 from zpretty.elements import PrettyElement
@@ -26,6 +27,7 @@ class ZPrettifier:
     _doctype_pattern = re.compile(
         r"(<!DOCTYPE[^>[]*(\[[^]]*\])?>)", re.IGNORECASE | re.DOTALL
     )
+    _rcdata_tags = ("title", "textarea")
     _cdatas = []
     _doctype = None
 
@@ -52,6 +54,12 @@ def __init__(self, filename="", text="", encoding="utf8"):
                 if self._ampersand_marker in value:
                     attrs[key] = value.replace(self._ampersand_marker, "&")
 
+        if self.parser == "html.parser":
+            # Page templates are parsed with the html.parser,
+            # but can contain invalid markup inside RCDATA tags,
+            # see https://github.com/collective/zpretty/issues/198
+            self.fix_rcdata_markup(soup)
+
         self.soup = soup
 
         # Cleanup all spurious self._newlines_marker attributes, see #35
@@ -61,6 +69,43 @@ def __init__(self, filename="", text="", encoding="utf8"):
 
         self.root = self.pretty_element(self.soup, -1)
 
+    def fix_rcdata_markup(self, soup):
+        """Parse markup-like text inside RCDATA tags as child nodes.
+
+        In page templates we might have elements inside these fields,
+        e.g. inside a <title> or <textarea>.
+        The html.parser used by BeautifulSoup escapes the markup inside these tags
+        and does not parse it as tags, but we want to prettify it as well.
+
+        This method applies a workaround for this problem,
+        by prettifying the content of these tags as if it were an XML fragment
+        and then replacing the content of the tag with the prettified version.
+
+        Then the rcdata elements content will be rendered as it is.
+        """
+        for tag in soup.find_all(self._rcdata_tags):
+            raw_content = "".join(str(node) for node in tag.contents)
+
+            null_tag_name = self.pretty_element.null_tag_name
+            fragment_soup = BeautifulSoup(
+                f"<{null_tag_name}>{raw_content}</{null_tag_name}>",
+                self.parser,
+            )
+            fragment_root = getattr(fragment_soup, null_tag_name, None)
+            if not fragment_root:
+                continue
+
+            parsed_children = list(fragment_root.children)
+            # Check if the tag contains some markup like text,
+            # if not we can skip it and avoid to mess with the content
+            if not any(isinstance(child, Tag) for child in parsed_children):
+                continue
+
+            # Replace the content of the tag with the parsed prettified children
+            tag.clear()
+            for child in parsed_children:
+                tag.append(child)
+
     def _prepare_text(self):
         """This tweaks the text passed to the prettifier
         to overcome some limitations of the BeautifulSoup parser

diff --git a/zpretty/tests/original/sample_pt.pt b/zpretty/tests/original/sample_pt.pt
@@ -7,6 +7,16 @@
       xml:lang="en"
       i18n:domain="plone"
 ><![CDATA[ <>& ]]>
+  <head>
+    <title>
+      <!--! Check https://github.com/collective/zpretty/issues/198 -->
+      &lt;i18n:translate&gt;
+        Should stay like this
+      &lt;/i18n:translate&gt;
+
+      <i18n:translate>Sample Page</i18n:translate>
+    </title>
+  </head>
   <body>
     <metal:main fill-slot="content-core">
       <metal:content-core define-macro="content-core">
@@ -29,6 +39,21 @@
           Foo
             Bar
         </pre>
+        <form action="#"
+              method="post"
+        >
+          <textarea name="foo">
+            <!--! Check https://github.com/collective/zpretty/issues/198 -->
+            &lt;i18n:translate&gt;
+              Should stay like this
+            &lt;/i18n:translate&gt;
+            <tal:content define="
+                           foo python: 'foo';
+                         "
+                         replace="foo"
+            />
+          </textarea>
+        </form>
         <span tal:define="
                 foo python: 1 &gt; 2 and 1 &lt; 2 and &amp;;
                 bar python: 1 > 2 and 1 < 2 and &;

diff --git a/zpretty/tests/original/sample_xml.xml b/zpretty/tests/original/sample_xml.xml
@@ -4,6 +4,10 @@
   <!ENTITY another "value 2">
 ]>
 <root>
+  <title>
+    This is a title with an entity reference: &name;
+    <subtitle>This is a subtitle with another entity reference: &another;</subtitle>
+  </title>
   <pretty a="b">1</pretty>
   <closeme hidden="" />
   <preserve_space> </preserve_space>

diff --git a/zpretty/tests/test_zpretty.py b/zpretty/tests/test_zpretty.py
@@ -140,6 +140,18 @@ def test_fix_self_closing(self):
         self.assertPrettified("<input><a /><b />", "<input /><a></a><b></b>\n")
         self.assertPrettified("<input><a /><b /></input>", "<input /><a></a><b></b>\n")
 
+    def test_title_prettifies_markup_like_text(self):
+        self.assertPrettified(
+            "<title> <i18n:translate>Sample Page</i18n:translate> </title>",
+            "<title>\n  <i18n:translate>Sample Page</i18n:translate>\n</title>\n",
+        )
+
+    def test_textarea_prettifies_markup_like_text(self):
+        self.assertPrettified(
+            '<textarea> <tal:content replace="structure view/value" /> </textarea>',
+            '<textarea>\n  <tal:content replace="structure view/value" />\n</textarea>\n',  # noqa: E501
+        )
+
     def test_element_repr(self):
         prettifier = ZPrettifier(text="")
         self.assertEqual(repr(prettifier.root), "<pretty:-1:null_tag_name />")