Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

- Declare support for Python 3.10 - 3.14
[ale-rt]
- Do not break `title` and `textarea` in page templates.
(Fixes #198)
[ale-rt]


## 3.1.1 (2025-06-23)
Expand Down
14 changes: 13 additions & 1 deletion zpretty/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,19 @@ class PrettyElement:
)
escaper = EntitySubstitution()
preserve_text_whitespace_elements = ["pre"]
skip_text_escaping_elements = ["script", "style"]
skip_text_escaping_elements = [
# Do not fiddle with the content of script tags,
# as it may contain html entities that we do not want to be escaped
"script",
# Do not fiddle with the content of style tags,
# as it may contain html entities that we do not want to be escaped
"style",
# The title and textarea tags may contain markup-like text
# that should that by HTML parser is escaped into text,
# but that we want to be rendered as markup in page templates.
"title",
"textarea",
]

def __init__(self, context, level=0):
"""Take something a (bs4) element and an indentation level"""
Expand Down
45 changes: 45 additions & 0 deletions zpretty/prettifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from bs4 import BeautifulSoup
from bs4.element import Doctype
from bs4.element import ProcessingInstruction
from bs4.element import Tag
from logging import getLogger
from uuid import uuid4
from zpretty.elements import PrettyElement
Expand All @@ -26,6 +27,7 @@ class ZPrettifier:
_doctype_pattern = re.compile(
r"(<!DOCTYPE[^>[]*(\[[^]]*\])?>)", re.IGNORECASE | re.DOTALL
)
_rcdata_tags = ("title", "textarea")
_cdatas = []
_doctype = None

Expand All @@ -52,6 +54,12 @@ def __init__(self, filename="", text="", encoding="utf8"):
if self._ampersand_marker in value:
attrs[key] = value.replace(self._ampersand_marker, "&")

if self.parser == "html.parser":
# Page templates are parsed with the html.parser,
# but can contain invalid markup inside RCDATA tags,
# see https://github.com/collective/zpretty/issues/198
self.fix_rcdata_markup(soup)

self.soup = soup

# Cleanup all spurious self._newlines_marker attributes, see #35
Expand All @@ -61,6 +69,43 @@ def __init__(self, filename="", text="", encoding="utf8"):

self.root = self.pretty_element(self.soup, -1)

def fix_rcdata_markup(self, soup):
"""Parse markup-like text inside RCDATA tags as child nodes.

In page templates we might have elements inside these fields,
e.g. inside a <title> or <textarea>.
The html.parser used by BeautifulSoup escapes the markup inside these tags
and does not parse it as tags, but we want to prettify it as well.

This method applies a workaround for this problem,
by prettifying the content of these tags as if it were an XML fragment
and then replacing the content of the tag with the prettified version.

Then the rcdata elements content will be rendered as it is.
"""
for tag in soup.find_all(self._rcdata_tags):
raw_content = "".join(str(node) for node in tag.contents)

null_tag_name = self.pretty_element.null_tag_name
fragment_soup = BeautifulSoup(
f"<{null_tag_name}>{raw_content}</{null_tag_name}>",
self.parser,
)
fragment_root = getattr(fragment_soup, null_tag_name, None)
if not fragment_root:
continue

parsed_children = list(fragment_root.children)
# Check if the tag contains some markup like text,
# if not we can skip it and avoid to mess with the content
if not any(isinstance(child, Tag) for child in parsed_children):
continue

# Replace the content of the tag with the parsed prettified children
tag.clear()
for child in parsed_children:
tag.append(child)

def _prepare_text(self):
"""This tweaks the text passed to the prettifier
to overcome some limitations of the BeautifulSoup parser
Expand Down
25 changes: 25 additions & 0 deletions zpretty/tests/original/sample_pt.pt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
xml:lang="en"
i18n:domain="plone"
><![CDATA[ <>& ]]>
<head>
<title>
<!--! Check https://github.com/collective/zpretty/issues/198 -->
&lt;i18n:translate&gt;
Should stay like this
&lt;/i18n:translate&gt;

<i18n:translate>Sample Page</i18n:translate>
</title>
</head>
<body>
<metal:main fill-slot="content-core">
<metal:content-core define-macro="content-core">
Expand All @@ -29,6 +39,21 @@
Foo
Bar
</pre>
<form action="#"
method="post"
>
<textarea name="foo">
<!--! Check https://github.com/collective/zpretty/issues/198 -->
&lt;i18n:translate&gt;
Should stay like this
&lt;/i18n:translate&gt;
<tal:content define="
foo python: 'foo';
"
replace="foo"
/>
</textarea>
</form>
<span tal:define="
foo python: 1 &gt; 2 and 1 &lt; 2 and &amp;;
bar python: 1 > 2 and 1 < 2 and &;
Expand Down
4 changes: 4 additions & 0 deletions zpretty/tests/original/sample_xml.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
<!ENTITY another "value 2">
]>
<root>
<title>
This is a title with an entity reference: &name;
<subtitle>This is a subtitle with another entity reference: &another;</subtitle>
</title>
<pretty a="b">1</pretty>
<closeme hidden="" />
<preserve_space> </preserve_space>
Expand Down
12 changes: 12 additions & 0 deletions zpretty/tests/test_zpretty.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ def test_fix_self_closing(self):
self.assertPrettified("<input><a /><b />", "<input /><a></a><b></b>\n")
self.assertPrettified("<input><a /><b /></input>", "<input /><a></a><b></b>\n")

def test_title_prettifies_markup_like_text(self):
self.assertPrettified(
"<title> <i18n:translate>Sample Page</i18n:translate> </title>",
"<title>\n <i18n:translate>Sample Page</i18n:translate>\n</title>\n",
)

def test_textarea_prettifies_markup_like_text(self):
self.assertPrettified(
'<textarea> <tal:content replace="structure view/value" /> </textarea>',
'<textarea>\n <tal:content replace="structure view/value" />\n</textarea>\n', # noqa: E501
)

def test_element_repr(self):
prettifier = ZPrettifier(text="")
self.assertEqual(repr(prettifier.root), "<pretty:-1:null_tag_name />")
Expand Down
Loading