diff --git a/src/http_client.jl b/src/http_client.jl index 8a29f07d9..4549e6f78 100644 --- a/src/http_client.jl +++ b/src/http_client.jl @@ -895,7 +895,7 @@ function _do_incoming!( previous_secure = current_secure previous_address = current_address previous_target = current_request.target - next_address, next_secure, next_target = _resolve_redirect_target(current_address, current_secure, location, current_request.target) + next_address, next_secure, next_target, next_host_header = _resolve_redirect_target(current_address, current_secure, location, current_request.target, current_request.host) _emit_trace( trace, RedirectEvent( @@ -940,7 +940,7 @@ function _do_incoming!( cookies = Cookie[] end end - current_request.host = current_address + current_request.host = next_host_header break end end @@ -1944,7 +1944,7 @@ function request( parsed.target; headers=req_headers, body=normalized_body.body, - host=parsed.address, + host=parsed.host_header, content_length=normalized_body.content_length, context=req_context, ) diff --git a/src/http_client_redirect.jl b/src/http_client_redirect.jl index 28c8fb0d5..040988276 100644 --- a/src/http_client_redirect.jl +++ b/src/http_client_redirect.jl @@ -199,19 +199,34 @@ function _normalize_redirect_authority(authority::String, secure::Bool)::String return HostResolvers.join_host_port(authority, secure ? 443 : 80) end -function _resolve_redirect_target(current_address::String, current_secure::Bool, location::String, current_target::String) +# Returns `(address, secure, target, host_header)` for the next hop. `address` +# is the dial target (always carries a port); `host_header` is the authority as +# written for the `Host` header (default port never synthesized) — see +# `_urlparts_host_header!`. For a host-changing redirect both come from the +# parsed Location; for a same-authority relative redirect the host is unchanged, +# so `current_host_header` is carried through verbatim. +# +# `current_host_header` is the current hop's `request.host`, which is `nothing` +# for low-level `do!` callers that pin `Host` only in the request headers. The +# header is stripped on each hop by `_prepare_request_for_redirect`, so the next +# `Host` is taken from this returned value; on a relative redirect with no parsed +# host we fall back to the dial `address` (carrying the port, as before this +# change) rather than returning `nothing`, which would drop the `Host` header +# entirely. An absolute redirect always yields a proper parsed `host_header`. +function _resolve_redirect_target(current_address::String, current_secure::Bool, location::String, current_target::String, current_host_header::Union{Nothing,String}) scheme_match = match(r"^([A-Za-z][A-Za-z0-9+\\.-]*):", location) if scheme_match !== nothing scheme = lowercase(String(scheme_match.captures[1])) (scheme == "http" || scheme == "https") || throw(ProtocolError("unsupported redirect location scheme '$scheme'")) parsed = _parse_http_url(location) - return parsed.address, parsed.secure, parsed.target + return parsed.address, parsed.secure, parsed.target, parsed.host_header end if startswith(location, "//") parsed = _parse_http_url(string(current_secure ? "https:" : "http:", location)) - return parsed.address, parsed.secure, parsed.target + return parsed.address, parsed.secure, parsed.target, parsed.host_header end - return current_address, current_secure, _resolve_relative_redirect_request_target(current_target, location) + next_host_header = current_host_header === nothing ? current_address : current_host_header + return current_address, current_secure, _resolve_relative_redirect_request_target(current_target, location), next_host_header end function _rewrite_method_for_redirect(method::String, status::Int, policy::_RedirectPolicy)::String diff --git a/src/http_client_url.jl b/src/http_client_url.jl index 148012120..24f91e0ce 100644 --- a/src/http_client_url.jl +++ b/src/http_client_url.jl @@ -147,6 +147,29 @@ function _urlparts_url!(parts::_URLParts)::String return url end +# Authority for the `Host` header. This mirrors the URL as written: an explicit +# port is preserved, but the scheme's default port is never synthesized. The +# connection address (`_urlparts_address!`) always carries a port for dialing, +# so the two intentionally differ for a default-port URL. +# +# Synthesizing the default port into `Host` (e.g. `s3.amazonaws.com:443`) is +# legal per RFC 9110 but breaks any server that treats the Host verbatim. The +# motivating case is AWS SigV4: it signs the canonical host (`s3.amazonaws.com`) +# and rejects a request whose `Host` carries `:443`. Go's net/http, curl and +# HTTP.jl 1.x all keep the Host authority as written; this restores that. +# +# For an explicit port we use the authority directly. For a default-port URL we +# strip the synthesized `:` off `address` rather than reusing +# `server_name`, because `server_name` unwraps IPv6 brackets (`2001:db8::1`) +# whereas a `Host` header requires them (`[2001:db8::1]`); `address` keeps the +# brackets (`[2001:db8::1]:443`), so trimming the port yields the correct form. +function _urlparts_host_header!(parts::_URLParts)::String + getfield(parts, :has_explicit_port) && return _urlparts_address!(parts) + address = _urlparts_address!(parts) + suffix = string(':', Int(getfield(parts, :default_port))) + return endswith(address, suffix) ? address[1:(end - length(suffix))] : address +end + function _urlparts_authorization!(parts::_URLParts)::Union{Nothing,String} getfield(parts, :has_userinfo) || return nothing cached = getfield(parts, :authorization_cache) @@ -166,6 +189,8 @@ function Base.getproperty(parts::_URLParts, sym::Symbol) return _urlparts_target!(parts) elseif sym === :server_name return _urlparts_server_name!(parts) + elseif sym === :host_header + return _urlparts_host_header!(parts) elseif sym === :url return _urlparts_url!(parts) elseif sym === :authorization @@ -175,7 +200,8 @@ function Base.getproperty(parts::_URLParts, sym::Symbol) end function Base.propertynames(::_URLParts, private::Bool=false) - return private ? fieldnames(_URLParts) : (:secure, :address, :target, :server_name, :url, :authorization) + return private ? fieldnames(_URLParts) : + (:secure, :address, :target, :server_name, :host_header, :url, :authorization) end @inline function _request_url(secure::Bool, address::String, target::String)::String diff --git a/src/http_stream.jl b/src/http_stream.jl index 6b0154809..da9c538bc 100644 --- a/src/http_stream.jl +++ b/src/http_stream.jl @@ -20,7 +20,7 @@ function _client_stream_request( copy(headers), Headers(), EmptyBody(), - parsed.address, + parsed.host_header, Int64(0), UInt8(1), UInt8(1), diff --git a/src/http_websockets.jl b/src/http_websockets.jl index a590e88da..b9517ffdc 100644 --- a/src/http_websockets.jl +++ b/src/http_websockets.jl @@ -900,7 +900,7 @@ function _open_client_websocket( pmce_offer = compress ? _pmce_client_offer_header() : nothing key = ws_random_handshake_key() _apply_websocket_request_headers!(req_headers, key, subprotocols, pmce_offer) - request = Request("GET", parsed.target; headers=req_headers, host=parsed.address, body=EmptyBody(), content_length=0) + request = Request("GET", parsed.target; headers=req_headers, host=parsed.host_header, body=EmptyBody(), content_length=0) request_timeout_ns, timeout_config = _resolve_request_timeout_settings( request_timeout, connect_timeout, @@ -990,17 +990,18 @@ function _open_client_websocket( previous_secure = current_secure previous_address = current_address previous_target = current_request.target - current_address, current_secure, next_target = _resolve_redirect_target( + current_address, current_secure, next_target, next_host_header = _resolve_redirect_target( current_address, current_secure, _normalize_websocket_redirect_location(location::String), current_request.target, + current_request.host, ) current_server_name = _host_for_sni(current_address) current_request = _prepare_request_for_redirect(current_request, response.status, next_target, redirect_policy) key = ws_random_handshake_key() _apply_websocket_request_headers!(current_request.headers, key, subprotocols, pmce_offer) - current_request.host = current_address + current_request.host = next_host_header next_ref = _redirect_referer(previous_secure, previous_address, previous_target, current_secure, header(current_request.headers, "Referer", nothing)) if next_ref === nothing removeheader(current_request.headers, "Referer") diff --git a/test/http_client_tests.jl b/test/http_client_tests.jl index 8180156f0..6eb7c7edb 100644 --- a/test/http_client_tests.jl +++ b/test/http_client_tests.jl @@ -777,37 +777,140 @@ end end @testset "HTTP client redirect absolute location default ports" begin - address_h2, secure_h2, target_h2 = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/") + # `_resolve_redirect_target` returns `(address, secure, target, host_header)`. + # `address` keeps the dial port; `host_header` mirrors the next hop's authority + # as written (default port never synthesized), so it feeds `request.host` on a + # redirect just as `parsed.host_header` does on the initial request. + address_h2, secure_h2, target_h2, host_h2 = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/", "origin.com") @test address_h2 == "www.google.com:443" @test secure_h2 @test target_h2 == "/search" + @test host_h2 == "www.google.com" - address_h1, secure_h1, target_h1 = HT._resolve_redirect_target("origin.com:80", false, "http://example.com/next", "/") + address_h1, secure_h1, target_h1, host_h1 = HT._resolve_redirect_target("origin.com:80", false, "http://example.com/next", "/", "origin.com") @test address_h1 == "example.com:80" @test !secure_h1 @test target_h1 == "/next" + @test host_h1 == "example.com" - address_rel, secure_rel, target_rel = HT._resolve_redirect_target("origin.com:443", true, "//cdn.example.com/assets", "/") + # An explicit default port in the Location is preserved in the Host header. + address_exp, secure_exp, target_exp, host_exp = HT._resolve_redirect_target("origin.com:443", true, "https://example.com:443/next", "/", "origin.com") + @test address_exp == "example.com:443" + @test host_exp == "example.com:443" + + address_rel, secure_rel, target_rel, host_rel = HT._resolve_redirect_target("origin.com:443", true, "//cdn.example.com/assets", "/", "origin.com") @test address_rel == "cdn.example.com:443" @test secure_rel @test target_rel == "/assets" + @test host_rel == "cdn.example.com" - address_dot, secure_dot, target_dot = HT._resolve_redirect_target("origin.com:80", false, "../next", "/a/b/c") + # A same-authority relative redirect carries the current host header through + # verbatim (it must not regress a bare host back to a default-port form). + address_dot, secure_dot, target_dot, host_dot = HT._resolve_redirect_target("origin.com:80", false, "../next", "/a/b/c", "origin.com") @test address_dot == "origin.com:80" @test !secure_dot @test target_dot == "/a/next" + @test host_dot == "origin.com" - address_query, secure_query, target_query = HT._resolve_redirect_target("origin.com:80", false, "?q=1", "/a/b/c") + address_query, secure_query, target_query, host_query = HT._resolve_redirect_target("origin.com:80", false, "?q=1", "/a/b/c", "origin.com") @test address_query == "origin.com:80" @test !secure_query @test target_query == "/a/b/c?q=1" + @test host_query == "origin.com" - address_frag, secure_frag, target_frag = HT._resolve_redirect_target("origin.com:80", false, "#frag", "/a/b/c?x=1") + address_frag, secure_frag, target_frag, host_frag = HT._resolve_redirect_target("origin.com:80", false, "#frag", "/a/b/c?x=1", "origin.com") @test address_frag == "origin.com:80" @test !secure_frag @test target_frag == "/a/b/c?x=1" + @test host_frag == "origin.com" + + @test_throws HT.ProtocolError HT._resolve_redirect_target("origin.com:80", false, "ftp://example.com/file", "/", "origin.com") + + # Low-level `do!` callers may pin `Host` only in headers, leaving + # `request.host === nothing`. `_resolve_redirect_target` must accept a + # `nothing` current host (not throw a MethodError on the `::String` arg) and + # never propagate `nothing` into `request.host` — otherwise the redirected + # request would carry no `Host` header at all. + address_nh, secure_nh, target_nh, host_nh = HT._resolve_redirect_target("origin.com:443", true, "https://www.google.com/search", "/", nothing) + @test address_nh == "www.google.com:443" + @test host_nh == "www.google.com" # absolute redirect: from parsed Location + address_nr, secure_nr, target_nr, host_nr = HT._resolve_redirect_target("origin.com:443", true, "../next", "/a/b/c", nothing) + @test address_nr == "origin.com:443" + @test host_nr == "origin.com:443" # relative redirect, no parsed host: falls back to dial address +end - @test_throws HT.ProtocolError HT._resolve_redirect_target("origin.com:80", false, "ftp://example.com/file", "/") +@testset "do! redirect with header-only Host (request.host === nothing)" begin + # Low-level `do!` callers may set `Host` only in the request headers, leaving + # `request.host === nothing`. A relative redirect must still be followed: the + # threaded `current_host_header` is `nothing` here, and `_resolve_redirect_target` + # must accept it (not throw a MethodError) and re-establish a `Host` for the + # next hop rather than dropping it. Regression for PR #1318 review follow-up. + listener = ND.listen("tcp", "127.0.0.1:0"; backlog = 8) + laddr = NC.addr(listener)::NC.SocketAddrV4 + address = ND.join_host_port("127.0.0.1", Int(laddr.port)) + seen_host_hop1 = Ref{Union{Nothing, String}}(nothing) + seen_host_hop2 = Ref{Union{Nothing, String}}(nothing) + server_task = errormonitor(Threads.@spawn begin + # Hop 1: /start -> 302 to a *relative* Location on the same authority. + conn1 = NC.accept(listener) + try + req1 = HT.read_request(HT._ConnReader(conn1)) + seen_host_hop1[] = HT.header(req1.headers, "Host", nothing) + headers = HT.Headers() + HT.setheader(headers, "Location", "/final") + HT.setheader(headers, "Connection", "close") + _send_response_client!(conn1, req1; status = 302, reason = "Found", headers = headers, close_conn = true) + finally + HTTP.@try_ignore NC.close(conn1) + end + # Hop 2: /final -> 200; capture the Host the client sent after redirect. + conn2 = NC.accept(listener) + try + req2 = HT.read_request(HT._ConnReader(conn2)) + seen_host_hop2[] = HT.header(req2.headers, "Host", nothing) + _send_response_client!(conn2, req2; body_text = "ok", close_conn = true) + finally + HTTP.@try_ignore NC.close(conn2) + end + return nothing + end) + client = HT.Client(transport = HT.Transport(max_idle_per_host = 4, max_idle_total = 4), cookiejar = nothing) + try + headers = HT.Headers() + HT.setheader(headers, "Host", address) + req = HT.Request("GET", "/start"; headers = headers, body = HT.EmptyBody(), content_length = 0) + @test req.host === nothing + response = HT.do!(client, address, req; redirect_limit = 1, protocol = :h1) + @test response.status == 200 + @test String(_read_all_body_bytes_client(response.body)) == "ok" + _wait_task_client!(server_task) + @test seen_host_hop1[] == address + # The redirected request still carries a Host header (re-established from + # the dial address since the caller provided no parsed host). + @test seen_host_hop2[] == address + finally + close(client.transport) + HTTP.@try_ignore NC.close(listener) + end +end + +@testset "streaming request Host mirrors URL authority as written" begin + # `HTTP.open`/streaming builds its `Request` from `_client_stream_request`, + # which must use `host_header` (authority as written), not the dial + # `address` (which always carries a port). Otherwise a default-port URL + # leaks `Host: example.com:443` and breaks AWS SigV4, exactly as the + # high-level path did. + bare = HT._client_stream_request("PUT", HT._parse_http_url("https://example.com/upload"), HT.Headers(), Int64(0), nothing) + @test bare.host == "example.com" + + explicit = HT._client_stream_request("PUT", HT._parse_http_url("https://example.com:443/upload"), HT.Headers(), Int64(0), nothing) + @test explicit.host == "example.com:443" + + custom = HT._client_stream_request("PUT", HT._parse_http_url("http://minio:9000/bucket/key"), HT.Headers(), Int64(0), nothing) + @test custom.host == "minio:9000" + + ipv6 = HT._client_stream_request("GET", HT._parse_http_url("https://[2001:db8::1]/x"), HT.Headers(), Int64(0), nothing) + @test ipv6.host == "[2001:db8::1]" end @testset "HTTP high-level redirect derives TLS server name per hop" begin @@ -1281,6 +1384,24 @@ end @test parsed_query.server_name == "example.com" @test parsed_query.url == "https://example.com:443/?x=1&y=2" @test parsed_query.authorization === nothing + # `host_header` mirrors the URL authority: the synthesized default port + # is never added (so the `Host` header stays `example.com`, matching the + # bare host servers like AWS SigV4 sign over), while `address` keeps the + # port for dialing. + @test parsed_query.host_header == "example.com" + + parsed_default_port = HT._parse_http_url("https://example.com:443/explicit") + @test parsed_default_port.address == "example.com:443" + # An explicitly-written default port is preserved verbatim (as in Go). + @test parsed_default_port.host_header == "example.com:443" + + parsed_http_default = HT._parse_http_url("http://example.com/plain") + @test parsed_http_default.address == "example.com:80" + @test parsed_http_default.host_header == "example.com" + + parsed_custom_port = HT._parse_http_url("http://minio:9000/bucket/key") + @test parsed_custom_port.address == "minio:9000" + @test parsed_custom_port.host_header == "minio:9000" parsed_query_uri = HT._parse_http_url(HT.URI("https://example.com?x=1"), Dict("y" => 2)) @test parsed_query_uri.secure @@ -1302,11 +1423,14 @@ end @test parsed_ipv6.server_name == "2001:db8::1" @test parsed_ipv6.url == "https://[2001:db8::1]:443/ipv6" @test parsed_ipv6.authorization === nothing + # IPv6 Host headers keep their brackets (unlike `server_name`). + @test parsed_ipv6.host_header == "[2001:db8::1]" parsed_ipv6_port = HT._parse_http_url("https://[2001:db8::1]:8443/ipv6") @test parsed_ipv6_port.address == "[2001:db8::1]:8443" @test parsed_ipv6_port.server_name == "2001:db8::1" @test parsed_ipv6_port.url == "https://[2001:db8::1]:8443/ipv6" + @test parsed_ipv6_port.host_header == "[2001:db8::1]:8443" parsed_ipv6_uri = HT._parse_http_url(HT.URI("https://[2001:db8::1]/ipv6")) @test parsed_ipv6_uri.address == "[2001:db8::1]:443" diff --git a/test/http_websocket_client_tests.jl b/test/http_websocket_client_tests.jl index cbbeeace2..fef7575ef 100644 --- a/test/http_websocket_client_tests.jl +++ b/test/http_websocket_client_tests.jl @@ -112,6 +112,20 @@ function _read_ws_frames(conn, ws::W.Conn) return HT.WebSockets.ws_on_incoming_data!(ws, buf) end +@testset "WebSocket handshake Host mirrors URL authority as written" begin + # The WS handshake `Host` is built from `parsed.host_header`, where `parsed` + # comes from `_parse_websocket_url` (which rewrites ws->http / wss->https + # before parsing). Exercise that mapping directly: a default-port wss/ws URL + # must yield a bare Host, while an explicit or custom port is preserved. + @test W._parse_websocket_url("wss://example.com/chat").host_header == "example.com" + @test W._parse_websocket_url("ws://example.com/chat").host_header == "example.com" + @test W._parse_websocket_url("wss://example.com:443/chat").host_header == "example.com:443" + @test W._parse_websocket_url("ws://minio:9000/chat").host_header == "minio:9000" + @test W._parse_websocket_url("wss://[2001:db8::1]/chat").host_header == "[2001:db8::1]" + # The dial address still carries the port for connecting. + @test W._parse_websocket_url("wss://example.com/chat").address == "example.com:443" +end + @testset "HTTP.WebSockets client open over ws" begin listener = nothing task = nothing @@ -137,6 +151,12 @@ end read_idle_timeout = 2, write_idle_timeout = 2, ) + # The handshake `Host` mirrors the URL authority as written (built from + # `parsed.host_header`, not the dial `address`). Here the URL carries an + # explicit port so it is preserved verbatim; the default-port stripping + # case is covered by the shared `host_header` / `_resolve_redirect_target` + # unit tests in http_client_tests.jl. + @test ws.handshake_request.host == address @test HT.get_request_context(ws.handshake_request).deadline_ns != 0 timeout_config = HT.get_request_context(ws.handshake_request).timeout_config @test timeout_config !== nothing