Skip to content

Commit d562027

Browse files
committed
make some improvements
1 parent cded9e7 commit d562027

6 files changed

Lines changed: 657 additions & 60 deletions

File tree

examples/wpt_cost.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use std::time::Instant;
2+
3+
fn main() {
4+
let raw = std::fs::read_to_string("tests/wpt/urltestdata.json").unwrap();
5+
let data: serde_json::Value = serde_json::from_str(&raw).unwrap();
6+
let entries: Vec<_> = data.as_array().unwrap().iter()
7+
.filter_map(|e| e.as_object())
8+
.filter_map(|o| {
9+
let input = o.get("input")?.as_str()?;
10+
let base = o.get("base").and_then(|b| b.as_str()).unwrap_or("");
11+
Some((input.to_string(), base.to_string()))
12+
})
13+
.collect();
14+
15+
let n = 2000u32;
16+
// warm up
17+
for _ in 0..200 {
18+
for (inp, base) in &entries {
19+
let b = if base.is_empty() { None } else { Some(base.as_str()) };
20+
let _ = ada_url::Url::parse(inp.as_str(), b);
21+
}
22+
}
23+
24+
// Timed run
25+
let t = Instant::now();
26+
for _ in 0..n {
27+
for (inp, base) in &entries {
28+
let b = if base.is_empty() { None } else { Some(std::hint::black_box(base.as_str())) };
29+
let _ = std::hint::black_box(ada_url::Url::parse(std::hint::black_box(inp.as_str()), b));
30+
}
31+
}
32+
let elapsed = t.elapsed();
33+
let per_iter = elapsed.as_nanos() as f64 / n as f64;
34+
let per_url = per_iter / entries.len() as f64;
35+
println!("Total per iteration: {:.1}µs ({} entries, {:.1}ns/url)", per_iter/1000.0, entries.len(), per_url);
36+
37+
// Now simulate the benchmark (with base re-parse like the bench does)
38+
let t2 = Instant::now();
39+
for _ in 0..n {
40+
let mut href_size = 0usize;
41+
for (input, base) in &entries {
42+
let parsed = if !base.is_empty() {
43+
match ada_url::Url::parse(std::hint::black_box(base.as_str()), None::<&str>) {
44+
Ok(base_url) => {
45+
let base_href = base_url.href().to_owned();
46+
ada_url::Url::parse(std::hint::black_box(input.as_str()), Some(base_href.as_str()))
47+
}
48+
Err(_) => continue,
49+
}
50+
} else {
51+
ada_url::Url::parse(std::hint::black_box(input.as_str()), None::<&str>)
52+
};
53+
if let Ok(url) = parsed { href_size += url.href().len(); }
54+
}
55+
std::hint::black_box(href_size);
56+
}
57+
let e2 = t2.elapsed();
58+
let per_bench_iter = e2.as_nanos() as f64 / n as f64;
59+
println!("Benchmark simulation: {:.1}µs per iteration", per_bench_iter/1000.0);
60+
println!("Overhead from double-parse: {:.1}µs ({:.1}%)", (per_bench_iter - per_iter)/1000.0, (per_bench_iter - per_iter) / per_iter * 100.0);
61+
}
62+
// This won't compile as-is, use a separate test

src/checkers.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,106 @@ pub fn path_signature(input: &str) -> u8 {
176176
}
177177
}
178178

179+
/// Full WHATWG IPv4 address parser — handles decimal, octal (leading 0),
180+
/// hex (0x/0X), and 1–4 dot-separated parts (e.g. "127.1", "0xc0a80101").
181+
///
182+
/// Returns `Some(packed_u32)` on success, `None` for invalid input.
183+
/// This is used by `try_parse_absolute_fast` to normalise IPv4 addresses in-place.
184+
pub fn parse_ipv4_address(input: &str) -> Option<u32> {
185+
let input = input.trim_end_matches('.');
186+
if input.is_empty() {
187+
return None;
188+
}
189+
let mut parts = 0usize; // dot-separated parts consumed so far
190+
let mut ipv4: u64 = 0;
191+
let mut rem = input;
192+
193+
loop {
194+
if parts >= 4 || rem.is_empty() {
195+
break;
196+
}
197+
let b = rem.as_bytes();
198+
199+
// Determine radix and parse one part
200+
let (val, consumed) = if b.len() >= 2 && b[0] == b'0' && (b[1] == b'x' || b[1] == b'X') {
201+
// Hexadecimal
202+
if b.len() == 2 || b[2] == b'.' {
203+
(0u64, 2usize)
204+
} else {
205+
let (v, c) = parse_uint_raw(&rem[2..], 16)?;
206+
(v, 2 + c)
207+
}
208+
} else if b[0] == b'0' && b.len() > 1 && b[1] >= b'0' && b[1] <= b'9' {
209+
// Octal
210+
let (v, c) = parse_uint_raw(&rem[1..], 8)?;
211+
(v, 1 + c)
212+
} else {
213+
// Decimal
214+
parse_uint_raw(rem, 10)?
215+
};
216+
217+
rem = &rem[consumed..];
218+
219+
if rem.is_empty() {
220+
// Final (possibly multi-octet) part
221+
let bits = 32u32.wrapping_sub(parts as u32 * 8);
222+
let max = if bits >= 64 { u64::MAX } else { 1u64 << bits };
223+
if val >= max {
224+
return None; // overflow
225+
}
226+
ipv4 = (ipv4 << bits) | val;
227+
parts += 1;
228+
break;
229+
} else {
230+
// Intermediate part — must be a single octet ≤ 255 followed by '.'
231+
if val > 255 || rem.as_bytes()[0] != b'.' {
232+
return None;
233+
}
234+
ipv4 = (ipv4 << 8) | val;
235+
rem = &rem[1..]; // skip '.'
236+
parts += 1;
237+
}
238+
}
239+
240+
if !rem.is_empty() || parts == 0 {
241+
return None;
242+
}
243+
Some(ipv4 as u32)
244+
}
245+
246+
/// Parse an unsigned integer of the given radix from the start of `s`.
247+
/// Returns `(value, bytes_consumed)` or `None` if no digits were found.
248+
#[inline]
249+
fn parse_uint_raw(s: &str, radix: u64) -> Option<(u64, usize)> {
250+
let b = s.as_bytes();
251+
if b.is_empty() {
252+
return None;
253+
}
254+
let mut v = 0u64;
255+
let mut c = 0usize;
256+
for &byte in b {
257+
let d = match radix {
258+
16 => match byte {
259+
b'0'..=b'9' => (byte - b'0') as u64,
260+
b'a'..=b'f' => (byte - b'a' + 10) as u64,
261+
b'A'..=b'F' => (byte - b'A' + 10) as u64,
262+
_ => break,
263+
},
264+
8 => match byte {
265+
b'0'..=b'7' => (byte - b'0') as u64,
266+
_ => break,
267+
},
268+
_ => match byte {
269+
b'0'..=b'9' => (byte - b'0') as u64,
270+
_ => break,
271+
},
272+
};
273+
v = v.checked_mul(radix)?.checked_add(d)?;
274+
c += 1;
275+
}
276+
if c == 0 { None } else { Some((v, c)) }
277+
}
278+
179279
/// Check that the domain name length and label lengths are within DNS limits.
180280
pub fn verify_dns_length(input: &str) -> bool {
181281
let s = input.strip_suffix('.').unwrap_or(input);

src/lib.rs

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,28 @@ impl Url {
218218
where
219219
Input: AsRef<str>,
220220
{
221+
let input_str = input.as_ref();
222+
223+
// Optimised path: when a base is supplied but the input is itself an
224+
// absolute URL, the parsed base is never consulted for resolution.
225+
// Instead of fully parsing the base string into a `Url` (which
226+
// allocates a String buffer), validate it with the zero-allocation
227+
// validator. The spec still requires failure when the base is invalid,
228+
// so we check that too — but without heap allocation.
229+
if let Some(b) = base
230+
&& let Some(url) = parser::try_parse_absolute_fast(input_str)
231+
{
232+
// Validate base cheaply: try the zero-alloc fast checker first,
233+
// fall back to the full zero-alloc validator for edge cases.
234+
let base_ok =
235+
parser::try_validate_absolute_fast(b).is_some() || validator::can_parse_no_base(b);
236+
return if base_ok {
237+
Ok(url)
238+
} else {
239+
Err(ParseUrlError { input })
240+
};
241+
}
242+
221243
let base_url = if let Some(b) = base {
222244
match parser::parse_url(b, None) {
223245
Some(u) if u.is_valid => Some(u),
@@ -227,12 +249,33 @@ impl Url {
227249
None
228250
};
229251

230-
match parser::parse_url(input.as_ref(), base_url.as_ref()) {
252+
match parser::parse_url(input_str, base_url.as_ref()) {
231253
Some(u) if u.is_valid => Ok(u),
232254
_ => Err(ParseUrlError { input }),
233255
}
234256
}
235257

258+
/// Parse `input` relative to an already-parsed `base` URL.
259+
///
260+
/// This is more efficient than [`Url::parse`] with a base string because the
261+
/// base URL is **not** re-parsed — use this in hot loops where the same base
262+
/// is reused across many inputs (e.g. the WPT URL benchmark pattern).
263+
///
264+
/// Returns `None` when either `base` is invalid or `input` cannot be resolved.
265+
#[must_use]
266+
pub fn parse_with_base<Input>(input: Input, base: &Url) -> Option<Self>
267+
where
268+
Input: AsRef<str>,
269+
{
270+
if !base.is_valid {
271+
return None;
272+
}
273+
match parser::parse_url(input.as_ref(), Some(base)) {
274+
Some(u) if u.is_valid => Some(u),
275+
_ => None,
276+
}
277+
}
278+
236279
/// Returns `true` when `input` can be parsed as a valid URL.
237280
///
238281
/// When `base` is `None` this uses a zero-allocation fast-path validator
@@ -1682,6 +1725,35 @@ impl Url {
16821725
self.buffer.push_str(input);
16831726
return;
16841727
}
1728+
// Fast append: path already set, no dot-segments, no encoding, AND
1729+
// no search/fragment follows the path in the buffer. Only then can
1730+
// we safely push directly to the buffer end without displacing query
1731+
// or fragment bytes that sit after the current path.
1732+
//
1733+
// Extra guard: when the current path is exactly "/" appending "/"
1734+
// + input would produce "//input". `update_base_pathname("//...")` has
1735+
// a side-effect of inserting "/." for authority-less URLs; bypassing it
1736+
// would produce a wrong href (e.g. "non-spec://path" instead of
1737+
// "non-spec:/.//path"). Avoid fast_append for this edge case.
1738+
let fast_append = trivial
1739+
&& !self.is_at_path()
1740+
&& !input.starts_with("..")
1741+
&& !input.starts_with('.')
1742+
&& self.components.search_start == OMITTED
1743+
&& self.components.hash_start == OMITTED
1744+
&& self.pathname() != "/";
1745+
if fast_append {
1746+
let added = (1 + input.len()) as u32;
1747+
self.buffer.push('/');
1748+
self.buffer.push_str(input);
1749+
if self.components.search_start != OMITTED {
1750+
self.components.search_start += added;
1751+
}
1752+
if self.components.hash_start != OMITTED {
1753+
self.components.hash_start += added;
1754+
}
1755+
return;
1756+
}
16851757
let mut new_path = if self.is_at_path() {
16861758
String::new()
16871759
} else {

0 commit comments

Comments
 (0)