Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/http_client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ function _do_incoming!(
previous_secure = current_secure
previous_address = current_address
previous_target = current_request.target
next_address, next_secure, next_target = _resolve_redirect_target(current_address, current_secure, location, current_request.target)
next_address, next_secure, next_target, next_host_header = _resolve_redirect_target(current_address, current_secure, location, current_request.target, current_request.host)
_emit_trace(
trace,
RedirectEvent(
Expand Down Expand Up @@ -940,7 +940,7 @@ function _do_incoming!(
cookies = Cookie[]
end
end
current_request.host = current_address
current_request.host = next_host_header
break
end
end
Expand Down Expand Up @@ -1944,7 +1944,7 @@ function request(
parsed.target;
headers=req_headers,
body=normalized_body.body,
host=parsed.address,
host=parsed.host_header,
content_length=normalized_body.content_length,
context=req_context,
)
Expand Down
23 changes: 19 additions & 4 deletions src/http_client_redirect.jl
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,34 @@ function _normalize_redirect_authority(authority::String, secure::Bool)::String
return HostResolvers.join_host_port(authority, secure ? 443 : 80)
end

function _resolve_redirect_target(current_address::String, current_secure::Bool, location::String, current_target::String)
# Returns `(address, secure, target, host_header)` for the next hop. `address`
# is the dial target (always carries a port); `host_header` is the authority as
# written for the `Host` header (default port never synthesized) — see
# `_urlparts_host_header!`. For a host-changing redirect both come from the
# parsed Location; for a same-authority relative redirect the host is unchanged,
# so `current_host_header` is carried through verbatim.
#
# `current_host_header` is the current hop's `request.host`, which is `nothing`
# for low-level `do!` callers that pin `Host` only in the request headers. The
# header is stripped on each hop by `_prepare_request_for_redirect`, so the next
# `Host` is taken from this returned value; on a relative redirect with no parsed
# host we fall back to the dial `address` (carrying the port, as before this
# change) rather than returning `nothing`, which would drop the `Host` header
# entirely. An absolute redirect always yields a proper parsed `host_header`.
function _resolve_redirect_target(current_address::String, current_secure::Bool, location::String, current_target::String, current_host_header::Union{Nothing,String})
scheme_match = match(r"^([A-Za-z][A-Za-z0-9+\\.-]*):", location)
if scheme_match !== nothing
scheme = lowercase(String(scheme_match.captures[1]))
(scheme == "http" || scheme == "https") || throw(ProtocolError("unsupported redirect location scheme '$scheme'"))
parsed = _parse_http_url(location)
return parsed.address, parsed.secure, parsed.target
return parsed.address, parsed.secure, parsed.target, parsed.host_header
end
if startswith(location, "//")
parsed = _parse_http_url(string(current_secure ? "https:" : "http:", location))
return parsed.address, parsed.secure, parsed.target
return parsed.address, parsed.secure, parsed.target, parsed.host_header
end
return current_address, current_secure, _resolve_relative_redirect_request_target(current_target, location)
next_host_header = current_host_header === nothing ? current_address : current_host_header
return current_address, current_secure, _resolve_relative_redirect_request_target(current_target, location), next_host_header
end

function _rewrite_method_for_redirect(method::String, status::Int, policy::_RedirectPolicy)::String
Expand Down
28 changes: 27 additions & 1 deletion src/http_client_url.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,29 @@ function _urlparts_url!(parts::_URLParts)::String
return url
end

# Authority for the `Host` header. This mirrors the URL as written: an explicit
# port is preserved, but the scheme's default port is never synthesized. The
# connection address (`_urlparts_address!`) always carries a port for dialing,
# so the two intentionally differ for a default-port URL.
#
# Synthesizing the default port into `Host` (e.g. `s3.amazonaws.com:443`) is
# legal per RFC 9110 but breaks any server that treats the Host verbatim. The
# motivating case is AWS SigV4: it signs the canonical host (`s3.amazonaws.com`)
# and rejects a request whose `Host` carries `:443`. Go's net/http, curl and
# HTTP.jl 1.x all keep the Host authority as written; this restores that.
#
# For an explicit port we use the authority directly. For a default-port URL we
# strip the synthesized `:<default_port>` off `address` rather than reusing
# `server_name`, because `server_name` unwraps IPv6 brackets (`2001:db8::1`)
# whereas a `Host` header requires them (`[2001:db8::1]`); `address` keeps the
# brackets (`[2001:db8::1]:443`), so trimming the port yields the correct form.
function _urlparts_host_header!(parts::_URLParts)::String
getfield(parts, :has_explicit_port) && return _urlparts_address!(parts)
address = _urlparts_address!(parts)
suffix = string(':', Int(getfield(parts, :default_port)))
return endswith(address, suffix) ? address[1:(end - length(suffix))] : address
end

function _urlparts_authorization!(parts::_URLParts)::Union{Nothing,String}
getfield(parts, :has_userinfo) || return nothing
cached = getfield(parts, :authorization_cache)
Expand All @@ -166,6 +189,8 @@ function Base.getproperty(parts::_URLParts, sym::Symbol)
return _urlparts_target!(parts)
elseif sym === :server_name
return _urlparts_server_name!(parts)
elseif sym === :host_header
return _urlparts_host_header!(parts)
elseif sym === :url
return _urlparts_url!(parts)
elseif sym === :authorization
Expand All @@ -175,7 +200,8 @@ function Base.getproperty(parts::_URLParts, sym::Symbol)
end

function Base.propertynames(::_URLParts, private::Bool=false)
return private ? fieldnames(_URLParts) : (:secure, :address, :target, :server_name, :url, :authorization)
return private ? fieldnames(_URLParts) :
(:secure, :address, :target, :server_name, :host_header, :url, :authorization)
end

@inline function _request_url(secure::Bool, address::String, target::String)::String
Expand Down
2 changes: 1 addition & 1 deletion src/http_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function _client_stream_request(
copy(headers),
Headers(),
EmptyBody(),
parsed.address,
parsed.host_header,
Int64(0),
UInt8(1),
UInt8(1),
Expand Down
7 changes: 4 additions & 3 deletions src/http_websockets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,7 @@ function _open_client_websocket(
pmce_offer = compress ? _pmce_client_offer_header() : nothing
key = ws_random_handshake_key()
_apply_websocket_request_headers!(req_headers, key, subprotocols, pmce_offer)
request = Request("GET", parsed.target; headers=req_headers, host=parsed.address, body=EmptyBody(), content_length=0)
request = Request("GET", parsed.target; headers=req_headers, host=parsed.host_header, body=EmptyBody(), content_length=0)
request_timeout_ns, timeout_config = _resolve_request_timeout_settings(
request_timeout,
connect_timeout,
Expand Down Expand Up @@ -990,17 +990,18 @@ function _open_client_websocket(
previous_secure = current_secure
previous_address = current_address
previous_target = current_request.target
current_address, current_secure, next_target = _resolve_redirect_target(
current_address, current_secure, next_target, next_host_header = _resolve_redirect_target(
current_address,
current_secure,
_normalize_websocket_redirect_location(location::String),
current_request.target,
current_request.host,
)
current_server_name = _host_for_sni(current_address)
current_request = _prepare_request_for_redirect(current_request, response.status, next_target, redirect_policy)
key = ws_random_handshake_key()
_apply_websocket_request_headers!(current_request.headers, key, subprotocols, pmce_offer)
current_request.host = current_address
current_request.host = next_host_header
next_ref = _redirect_referer(previous_secure, previous_address, previous_target, current_secure, header(current_request.headers, "Referer", nothing))
if next_ref === nothing
removeheader(current_request.headers, "Referer")
Expand Down
138 changes: 131 additions & 7 deletions test/http_client_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -777,37 +777,140 @@ end
end

@testset "HTTP client redirect absolute location default ports" begin
address_h2, secure_h2, target_h2 = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/")
# `_resolve_redirect_target` returns `(address, secure, target, host_header)`.
# `address` keeps the dial port; `host_header` mirrors the next hop's authority
# as written (default port never synthesized), so it feeds `request.host` on a
# redirect just as `parsed.host_header` does on the initial request.
address_h2, secure_h2, target_h2, host_h2 = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/", "origin.com")
@test address_h2 == "www.google.com:443"
@test secure_h2
@test target_h2 == "/search"
@test host_h2 == "www.google.com"

address_h1, secure_h1, target_h1 = HT._resolve_redirect_target("origin.com:80", false, "http://example.com/next", "/")
address_h1, secure_h1, target_h1, host_h1 = HT._resolve_redirect_target("origin.com:80", false, "http://example.com/next", "/", "origin.com")
@test address_h1 == "example.com:80"
@test !secure_h1
@test target_h1 == "/next"
@test host_h1 == "example.com"

address_rel, secure_rel, target_rel = HT._resolve_redirect_target("origin.com:443", true, "//cdn.example.com/assets", "/")
# An explicit default port in the Location is preserved in the Host header.
address_exp, secure_exp, target_exp, host_exp = HT._resolve_redirect_target("origin.com:443", true, "https://example.com:443/next", "/", "origin.com")
@test address_exp == "example.com:443"
@test host_exp == "example.com:443"

address_rel, secure_rel, target_rel, host_rel = HT._resolve_redirect_target("origin.com:443", true, "//cdn.example.com/assets", "/", "origin.com")
@test address_rel == "cdn.example.com:443"
@test secure_rel
@test target_rel == "/assets"
@test host_rel == "cdn.example.com"

address_dot, secure_dot, target_dot = HT._resolve_redirect_target("origin.com:80", false, "../next", "/a/b/c")
# A same-authority relative redirect carries the current host header through
# verbatim (it must not regress a bare host back to a default-port form).
address_dot, secure_dot, target_dot, host_dot = HT._resolve_redirect_target("origin.com:80", false, "../next", "/a/b/c", "origin.com")
@test address_dot == "origin.com:80"
@test !secure_dot
@test target_dot == "/a/next"
@test host_dot == "origin.com"

address_query, secure_query, target_query = HT._resolve_redirect_target("origin.com:80", false, "?q=1", "/a/b/c")
address_query, secure_query, target_query, host_query = HT._resolve_redirect_target("origin.com:80", false, "?q=1", "/a/b/c", "origin.com")
@test address_query == "origin.com:80"
@test !secure_query
@test target_query == "/a/b/c?q=1"
@test host_query == "origin.com"

address_frag, secure_frag, target_frag = HT._resolve_redirect_target("origin.com:80", false, "#frag", "/a/b/c?x=1")
address_frag, secure_frag, target_frag, host_frag = HT._resolve_redirect_target("origin.com:80", false, "#frag", "/a/b/c?x=1", "origin.com")
@test address_frag == "origin.com:80"
@test !secure_frag
@test target_frag == "/a/b/c?x=1"
@test host_frag == "origin.com"

@test_throws HT.ProtocolError HT._resolve_redirect_target("origin.com:80", false, "ftp://example.com/file", "/", "origin.com")

# Low-level `do!` callers may pin `Host` only in headers, leaving
# `request.host === nothing`. `_resolve_redirect_target` must accept a
# `nothing` current host (not throw a MethodError on the `::String` arg) and
# never propagate `nothing` into `request.host` — otherwise the redirected
# request would carry no `Host` header at all.
address_nh, secure_nh, target_nh, host_nh = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/", nothing)
@test address_nh == "www.google.com:443"
@test host_nh == "www.google.com" # absolute redirect: from parsed Location
address_nr, secure_nr, target_nr, host_nr = HT._resolve_redirect_target("origin.com:443", true, "../next", "/a/b/c", nothing)
@test address_nr == "origin.com:443"
@test host_nr == "origin.com:443" # relative redirect, no parsed host: falls back to dial address
end

@test_throws HT.ProtocolError HT._resolve_redirect_target("origin.com:80", false, "ftp://example.com/file", "/")
@testset "do! redirect with header-only Host (request.host === nothing)" begin
# Low-level `do!` callers may set `Host` only in the request headers, leaving
# `request.host === nothing`. A relative redirect must still be followed: the
# threaded `current_host_header` is `nothing` here, and `_resolve_redirect_target`
# must accept it (not throw a MethodError) and re-establish a `Host` for the
# next hop rather than dropping it. Regression for PR #1318 review follow-up.
listener = ND.listen("tcp", "127.0.0.1:0"; backlog = 8)
laddr = NC.addr(listener)::NC.SocketAddrV4
address = ND.join_host_port("127.0.0.1", Int(laddr.port))
seen_host_hop1 = Ref{Union{Nothing, String}}(nothing)
seen_host_hop2 = Ref{Union{Nothing, String}}(nothing)
server_task = errormonitor(Threads.@spawn begin
# Hop 1: /start -> 302 to a *relative* Location on the same authority.
conn1 = NC.accept(listener)
try
req1 = HT.read_request(HT._ConnReader(conn1))
seen_host_hop1[] = HT.header(req1.headers, "Host", nothing)
headers = HT.Headers()
HT.setheader(headers, "Location", "/final")
HT.setheader(headers, "Connection", "close")
_send_response_client!(conn1, req1; status = 302, reason = "Found", headers = headers, close_conn = true)
finally
HTTP.@try_ignore NC.close(conn1)
end
# Hop 2: /final -> 200; capture the Host the client sent after redirect.
conn2 = NC.accept(listener)
try
req2 = HT.read_request(HT._ConnReader(conn2))
seen_host_hop2[] = HT.header(req2.headers, "Host", nothing)
_send_response_client!(conn2, req2; body_text = "ok", close_conn = true)
finally
HTTP.@try_ignore NC.close(conn2)
end
return nothing
end)
client = HT.Client(transport = HT.Transport(max_idle_per_host = 4, max_idle_total = 4), cookiejar = nothing)
try
headers = HT.Headers()
HT.setheader(headers, "Host", address)
req = HT.Request("GET", "/start"; headers = headers, body = HT.EmptyBody(), content_length = 0)
@test req.host === nothing
response = HT.do!(client, address, req; redirect_limit = 1, protocol = :h1)
@test response.status == 200
@test String(_read_all_body_bytes_client(response.body)) == "ok"
_wait_task_client!(server_task)
@test seen_host_hop1[] == address
# The redirected request still carries a Host header (re-established from
# the dial address since the caller provided no parsed host).
@test seen_host_hop2[] == address
finally
close(client.transport)
HTTP.@try_ignore NC.close(listener)
end
end

@testset "streaming request Host mirrors URL authority as written" begin
# `HTTP.open`/streaming builds its `Request` from `_client_stream_request`,
# which must use `host_header` (authority as written), not the dial
# `address` (which always carries a port). Otherwise a default-port URL
# leaks `Host: example.com:443` and breaks AWS SigV4, exactly as the
# high-level path did.
bare = HT._client_stream_request("PUT", HT._parse_http_url("https://example.com/upload"), HT.Headers(), Int64(0), nothing)
@test bare.host == "example.com"

explicit = HT._client_stream_request("PUT", HT._parse_http_url("https://example.com:443/upload"), HT.Headers(), Int64(0), nothing)
@test explicit.host == "example.com:443"

custom = HT._client_stream_request("PUT", HT._parse_http_url("http://minio:9000/bucket/key"), HT.Headers(), Int64(0), nothing)
@test custom.host == "minio:9000"

ipv6 = HT._client_stream_request("GET", HT._parse_http_url("https://[2001:db8::1]/x"), HT.Headers(), Int64(0), nothing)
@test ipv6.host == "[2001:db8::1]"
end

@testset "HTTP high-level redirect derives TLS server name per hop" begin
Expand Down Expand Up @@ -1281,6 +1384,24 @@ end
@test parsed_query.server_name == "example.com"
@test parsed_query.url == "https://example.com:443/?x=1&y=2"
@test parsed_query.authorization === nothing
# `host_header` mirrors the URL authority: the synthesized default port
# is never added (so the `Host` header stays `example.com`, matching the
# bare host servers like AWS SigV4 sign over), while `address` keeps the
# port for dialing.
@test parsed_query.host_header == "example.com"

parsed_default_port = HT._parse_http_url("https://example.com:443/explicit")
@test parsed_default_port.address == "example.com:443"
# An explicitly-written default port is preserved verbatim (as in Go).
@test parsed_default_port.host_header == "example.com:443"

parsed_http_default = HT._parse_http_url("http://example.com/plain")
@test parsed_http_default.address == "example.com:80"
@test parsed_http_default.host_header == "example.com"

parsed_custom_port = HT._parse_http_url("http://minio:9000/bucket/key")
@test parsed_custom_port.address == "minio:9000"
@test parsed_custom_port.host_header == "minio:9000"

parsed_query_uri = HT._parse_http_url(HT.URI("https://example.com?x=1"), Dict("y" => 2))
@test parsed_query_uri.secure
Expand All @@ -1302,11 +1423,14 @@ end
@test parsed_ipv6.server_name == "2001:db8::1"
@test parsed_ipv6.url == "https://[2001:db8::1]:443/ipv6"
@test parsed_ipv6.authorization === nothing
# IPv6 Host headers keep their brackets (unlike `server_name`).
@test parsed_ipv6.host_header == "[2001:db8::1]"

parsed_ipv6_port = HT._parse_http_url("https://[2001:db8::1]:8443/ipv6")
@test parsed_ipv6_port.address == "[2001:db8::1]:8443"
@test parsed_ipv6_port.server_name == "2001:db8::1"
@test parsed_ipv6_port.url == "https://[2001:db8::1]:8443/ipv6"
@test parsed_ipv6_port.host_header == "[2001:db8::1]:8443"

parsed_ipv6_uri = HT._parse_http_url(HT.URI("https://[2001:db8::1]/ipv6"))
@test parsed_ipv6_uri.address == "[2001:db8::1]:443"
Expand Down
Loading
Loading