Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright (c) 2026, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# The Universal Permissive License (UPL), Version 1.0
#
# Subject to the condition set forth below, permission is hereby granted to any
# person obtaining a copy of this software, associated documentation and/or
# data (collectively the "Software"), free of charge and under any and all
# copyright rights in the Software, and any and all patent rights owned or
# freely licensable by each licensor hereunder covering either (i) the
# unmodified Software as contributed to or provided by such licensor, or (ii)
# the Larger Works (as defined below), to deal in both
#
# (a) the Software, and
#
# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
# one is included with the Software each a "Larger Work" to which the Software
# is contributed by such licensors),
#
# without restriction, including without limitation the rights to copy, create
# derivative works of, display, perform, and distribute the Software and make,
# use, sell, offer for sale, import, export, have made, and have sold the
# Software and the Larger Work(s), and to sublicense the foregoing rights on
# either these or other terms.
#
# This license is subject to the following condition:
#
# The above copyright notice and either this complete permission notice or at a
# minimum a reference to the UPL must be included in all copies or substantial
# portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import subprocess
import sys
import textwrap
import unittest


def entity_expansion_payload(levels=5, fanout=8):
entities = ['<!ENTITY e0 "x">']
for level in range(1, levels + 1):
value = f"&e{level - 1};" * fanout
entities.append(f'<!ENTITY e{level} "{value}">')
dtd = "\n".join(entities)
return f"<!DOCTYPE doc [\n{dtd}\n]><doc>&e{levels};</doc>".encode()


class PyExpatEntityExpansionLimitTest(unittest.TestCase):

def test_java_backend_internal_entity_expansion_is_limited(self):
code = textwrap.dedent(f"""
from xml.parsers import expat

expat.ParserCreate().Parse({entity_expansion_payload(levels=6)!r}, True)

parser = expat.ParserCreate()
try:
parser.Parse({entity_expansion_payload(levels=7)!r}, True)
except expat.ExpatError as e:
expected_code = expat.errors.codes[expat.errors.XML_ERROR_AMPLIFICATION_LIMIT_BREACH]
if e.code != expected_code:
raise SystemExit(f"unexpected error code: {{e.code}} != {{expected_code}}")
if expat.ErrorString(e.code) != expat.errors.XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
raise SystemExit(f"unexpected error string: {{expat.ErrorString(e.code)!r}}")
raise SystemExit(0)
raise SystemExit("entity expansion was not limited")
""")

result = subprocess.run([
sys.executable,
"--vm.Djdk.xml.entityExpansionLimit=0",
"--vm.Djdk.xml.totalEntitySizeLimit=0",
"--vm.Djdk.xml.entityReplacementLimit=0",
"--python.PyExpatModuleBackend=java",
"-c",
code,
], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

self.assertEqual(0, result.returncode, result.stdout + result.stderr)


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public final class PXMLParser extends PythonBuiltinObject {
public static final int XML_ERROR_FINISHED = 1;
public static final int XML_ERROR_SYNTAX = 2;
public static final int XML_ERROR_UNCLOSED_TOKEN = 3;
public static final int XML_ERROR_AMPLIFICATION_LIMIT_BREACH = 43;
public static final String XML_ERROR_AMPLIFICATION_LIMIT_BREACH_MESSAGE = "limit on input amplification factor (from DTD and entities) breached";

private final TruffleString namespaceSeparator;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ Interface to pyexpat parser (Java backend).
addError(errors, codes, messages, "XML_ERROR_FINISHED", "parsing finished", PXMLParser.XML_ERROR_FINISHED);
addError(errors, codes, messages, "XML_ERROR_SYNTAX", "syntax error", PXMLParser.XML_ERROR_SYNTAX);
addError(errors, codes, messages, "XML_ERROR_UNCLOSED_TOKEN", "unclosed token", PXMLParser.XML_ERROR_UNCLOSED_TOKEN);
addError(errors, codes, messages, "XML_ERROR_AMPLIFICATION_LIMIT_BREACH", PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH_MESSAGE,
PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH);
errors.setAttribute(T_CODES, codes);
errors.setAttribute(T_MESSAGES, messages);
addBuiltinConstant("errors", errors);
Expand Down Expand Up @@ -141,12 +143,14 @@ abstract static class ErrorStringNode extends PythonBuiltinNode {
private static final TruffleString T_PARSING_FINISHED = tsLiteral("parsing finished");
private static final TruffleString T_UNCLOSED_TOKEN = tsLiteral("unclosed token");
private static final TruffleString T_SYNTAX_ERROR = tsLiteral("syntax error");
private static final TruffleString T_AMPLIFICATION_LIMIT_BREACH = tsLiteral(PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH_MESSAGE);

@Specialization
static TruffleString doIt(int code) {
return switch (code) {
case PXMLParser.XML_ERROR_FINISHED -> T_PARSING_FINISHED;
case PXMLParser.XML_ERROR_UNCLOSED_TOKEN -> T_UNCLOSED_TOKEN;
case PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH -> T_AMPLIFICATION_LIMIT_BREACH;
default -> T_SYNTAX_ERROR;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
import java.util.Map;
import java.util.Set;

import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
Expand Down Expand Up @@ -124,6 +126,28 @@
public final class XMLParserBuiltins extends PythonBuiltins {
public static final TpSlots SLOTS = XMLParserBuiltinsSlotsGen.SLOTS;

/*
* JAXP exposes entity-expansion protection through separate per-parser properties. The matching jdk.xml.* system
* properties are read once into static limit values so stricter process-wide limits remain effective without
* re-reading properties for every parse. The SAX feature URIs below disable external entity and DTD loading where
* the selected SAX implementation supports them.
*/
private static final int ENTITY_EXPANSION_LIMIT = 1_000_000;
private static final int TOTAL_ENTITY_SIZE_LIMIT = 1_000_000;
private static final int ENTITY_REPLACEMENT_LIMIT = 1_000_000;
private static final String JDK_ENTITY_EXPANSION_LIMIT_PROPERTY = "jdk.xml.entityExpansionLimit";
private static final String JDK_TOTAL_ENTITY_SIZE_LIMIT_PROPERTY = "jdk.xml.totalEntitySizeLimit";
private static final String JDK_ENTITY_REPLACEMENT_LIMIT_PROPERTY = "jdk.xml.entityReplacementLimit";
private static final String JAXP_ENTITY_EXPANSION_LIMIT = "http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit";
private static final String JAXP_TOTAL_ENTITY_SIZE_LIMIT = "http://www.oracle.com/xml/jaxp/properties/totalEntitySizeLimit";
private static final String JAXP_ENTITY_REPLACEMENT_LIMIT = "http://www.oracle.com/xml/jaxp/properties/entityReplacementLimit";
private static final String ENTITY_EXPANSION_LIMIT_VALUE = getParserLimit(JDK_ENTITY_EXPANSION_LIMIT_PROPERTY, ENTITY_EXPANSION_LIMIT);
private static final String TOTAL_ENTITY_SIZE_LIMIT_VALUE = getParserLimit(JDK_TOTAL_ENTITY_SIZE_LIMIT_PROPERTY, TOTAL_ENTITY_SIZE_LIMIT);
private static final String ENTITY_REPLACEMENT_LIMIT_VALUE = getParserLimit(JDK_ENTITY_REPLACEMENT_LIMIT_PROPERTY, ENTITY_REPLACEMENT_LIMIT);
private static final String SAX_EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities";
private static final String SAX_EXTERNAL_PARAMETER_ENTITIES = "http://xml.org/sax/features/external-parameter-entities";
private static final String SAX_LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd";

@Override
protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFactories() {
return XMLParserBuiltinsFactory.getFactories();
Expand Down Expand Up @@ -1187,11 +1211,16 @@ private Object elementModel(String model) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(parser.getNamespaceSeparator() != null);
XMLReader reader = factory.newSAXParser().getXMLReader();
try {
reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
} catch (Exception ignored) {
}
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
SAXParser saxParser = factory.newSAXParser();
saxParser.setProperty(JAXP_ENTITY_EXPANSION_LIMIT, ENTITY_EXPANSION_LIMIT_VALUE);
saxParser.setProperty(JAXP_TOTAL_ENTITY_SIZE_LIMIT, TOTAL_ENTITY_SIZE_LIMIT_VALUE);
saxParser.setProperty(JAXP_ENTITY_REPLACEMENT_LIMIT, ENTITY_REPLACEMENT_LIMIT_VALUE);
saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
XMLReader reader = saxParser.getXMLReader();
setFeatureIfSupported(reader, SAX_EXTERNAL_GENERAL_ENTITIES, false);
setFeatureIfSupported(reader, SAX_EXTERNAL_PARAMETER_ENTITIES, false);
setFeatureIfSupported(reader, SAX_LOAD_EXTERNAL_DTD, false);
reader.setEntityResolver(new EntityResolver2() {
@Override
public InputSource getExternalSubset(String name, String baseURI) {
Expand Down Expand Up @@ -1257,6 +1286,28 @@ public InputSource resolveEntity(String publicId, String systemId) {
}
}

private static String getParserLimit(String property, int defaultLimit) {
String configuredLimit = System.getProperty(property);
if (configuredLimit != null) {
try {
int limit = Integer.parseInt(configuredLimit);
if (limit > 0 && limit < defaultLimit) {
// Preserve a user-configured process-wide limit when it is stricter than GraalPy's default.
return configuredLimit;
}
} catch (NumberFormatException ignored) {
}
}
return Integer.toString(defaultLimit);
}

private static void setFeatureIfSupported(XMLReader reader, String feature, boolean value) {
try {
reader.setFeature(feature, value);
} catch (Exception ignored) {
}
}

@FunctionalInterface
private interface ByteIndexSupplier {
int get();
Expand Down Expand Up @@ -1452,6 +1503,9 @@ private static String formatErrorMessage(SAXParseException e) {
if (message == null) {
return "syntax error";
}
if (isEntityAmplificationLimitError(message)) {
return PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH_MESSAGE + ": line " + e.getLineNumber() + ", column " + Math.max(0, e.getColumnNumber() - 1);
}
if (message.contains("entity") && message.contains("not declared")) {
int firstQuote = message.indexOf('"');
int secondQuote = firstQuote >= 0 ? message.indexOf('"', firstQuote + 1) : -1;
Expand All @@ -1466,10 +1520,18 @@ private static String formatErrorMessage(SAXParseException e) {
private static int mapErrorCode(SAXParseException e) {
String message = e.getMessage();
if (message != null) {
if (isEntityAmplificationLimitError(message)) {
return PXMLParser.XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
}
if (message.contains("start and end within the same entity") || message.contains("premature end of file") || message.contains("must be terminated")) {
return PXMLParser.XML_ERROR_UNCLOSED_TOKEN;
}
}
return PXMLParser.XML_ERROR_SYNTAX;
}

private static boolean isEntityAmplificationLimitError(String message) {
return message.contains("JAXP00010001") || message.contains("JAXP00010004") || message.contains("JAXP00010007") || message.contains("entityExpansionLimit") ||
message.contains("totalEntitySizeLimit") || message.contains("entityReplacementLimit");
}
}
Loading