From a52a3dd2a5e0eda9cc5000fb4e84268b4e036c55 Mon Sep 17 00:00:00 2001 From: Rodrigo Nogueira Date: Tue, 21 Apr 2026 13:23:31 -0300 Subject: [PATCH 1/2] Reject URLs with multiple brackets in host component Fixes host-confusion parsing where URLs containing multiple bracket characters in the authority (e.g. http://[:localhost[]].google:80) were silently canonicalized to an unintended host. Both split_url() and split_netloc() now raise ValueError when: - more than one '[' or ']' appears in the netloc/hostinfo, or - '[' does not start the host subcomponent (per RFC 3986 IP-literal) Adds 7 regression tests covering the affected code paths. --- CHANGES/1661.bugfix.rst | 7 +++++++ tests/test_url.py | 31 +++++++++++++++++++++++++++++++ yarl/_parse.py | 10 ++++++++++ 3 files changed, 48 insertions(+) create mode 100644 CHANGES/1661.bugfix.rst diff --git a/CHANGES/1661.bugfix.rst b/CHANGES/1661.bugfix.rst new file mode 100644 index 000000000..6329c9f4d --- /dev/null +++ b/CHANGES/1661.bugfix.rst @@ -0,0 +1,7 @@ +Fixed a host-confusion parsing bug where URLs containing multiple bracket +characters in the host component (e.g. ``http://[:localhost[]].google:80``) +were silently canonicalized to an unintended host. Both ``split_url()`` and +``split_netloc()`` now raise :exc:`ValueError` when more than one ``[`` or +``]`` is found in the authority, or when ``[`` does not appear at the start of +the host subcomponent, in compliance with :rfc:`3986` -- by +:user:`rodrigobnogueira`. diff --git a/tests/test_url.py b/tests/test_url.py index 37871fedb..5fb66c56b 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -365,6 +365,37 @@ def test_ipfuture_brackets_not_allowed() -> None: URL("http://[v10]/") +@pytest.mark.parametrize( + "url", + ( + "http://[:localhost[]].google:80", + "http://[:localhost[]].google", + "http://[:attacker.com[]]:80", + "http://[:evil.com[]].bank.com:443", + "http://[:127.0.0.1[]]:80", + "http://[v1.:attacker[]].bank.com:80", + ), + ids=( + "host-confusion-with-port", + "host-confusion-without-port", + "attacker-host-injection", + "domain-allowlist-bypass", + "private-ip-injection", + "ipvfuture-bracket-abuse", + ), +) +def test_malformed_bracketed_host_rejected(url: str) -> None: + """Reject URLs with multiple brackets to prevent host confusion (SSRF).""" + with pytest.raises(ValueError, match="Invalid IPv6 URL"): + URL(url) + + +def test_malformed_bracketed_host_in_authority() -> None: + """Reject malformed brackets via URL.build(authority=...) path.""" + with pytest.raises(ValueError, match="Invalid IPv6 URL"): + URL.build(scheme="http", authority="[:localhost[]].google:80") + + def test_ipv4_zone() -> None: # I'm unsure if it is correct. url = URL("http://1.2.3.4%ั‚ะตัั‚%42:123") diff --git a/yarl/_parse.py b/yarl/_parse.py index bb64165c7..6ec649ffa 100644 --- a/yarl/_parse.py +++ b/yarl/_parse.py @@ -63,6 +63,10 @@ def split_url(url: str) -> SplitURLType: has_right_bracket and not has_left_bracket ): raise ValueError("Invalid IPv6 URL") + if has_left_bracket and ( + netloc.count("[") != 1 or netloc.count("]") != 1 + ): + raise ValueError("Invalid IPv6 URL") if has_left_bracket: bracketed_host = netloc.partition("[")[2].partition("]")[0] # Valid bracketed hosts are defined in @@ -120,6 +124,12 @@ def split_netloc( password = None if "[" in hostinfo: + if ( + hostinfo.count("[") != 1 + or hostinfo.count("]") != 1 + or not hostinfo.startswith("[") + ): + raise ValueError("Invalid IPv6 URL") _, _, bracketed = hostinfo.partition("[") hostname, _, port_str = bracketed.partition("]") _, _, port_str = port_str.partition(":") From 9584179444e735dcb310fab27cfbf1c855f27083 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Apr 2026 16:37:09 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- yarl/_parse.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yarl/_parse.py b/yarl/_parse.py index 6ec649ffa..3f7b9ebf5 100644 --- a/yarl/_parse.py +++ b/yarl/_parse.py @@ -63,9 +63,7 @@ def split_url(url: str) -> SplitURLType: has_right_bracket and not has_left_bracket ): raise ValueError("Invalid IPv6 URL") - if has_left_bracket and ( - netloc.count("[") != 1 or netloc.count("]") != 1 - ): + if has_left_bracket and (netloc.count("[") != 1 or netloc.count("]") != 1): raise ValueError("Invalid IPv6 URL") if has_left_bracket: bracketed_host = netloc.partition("[")[2].partition("]")[0]