Skip to content

Commit d4c0683

Browse files
committed
make improvements
1 parent eae07e5 commit d4c0683

16 files changed

Lines changed: 12839 additions & 4395 deletions

examples/bench_debug.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
use std::hint::black_box;
2+
use std::time::Instant;
3+
const URLS: &[&str] = &[
4+
"https://www.google.com/webhp?hl=en&ictx=2",
5+
"https://en.wikipedia.org/wiki/Dog#Roles_with_humans",
6+
"https://www.tiktok.com/@aguyandagolden/video/7133277734310038830",
7+
"https://business.twitter.com/en/help/troubleshooting/how-twitter-ads-work.html?ref=web",
8+
"https://images-na.ssl-images-amazon.com/images/I/41Gc3C8UysL.css?AUI",
9+
"https://www.reddit.com/?after=t3_zvz1ze",
10+
"https://www.reddit.com/login/?dest=https%3A%2F%2Fwww.reddit.com%2F",
11+
"postgresql://other:pass@localhost:5432/db",
12+
"http://192.168.1.1",
13+
"http://[2606:4700::1111]",
14+
];
15+
fn bench(label: &str, n: u32, f: impl Fn()) {
16+
for _ in 0..10000 { f(); }
17+
let t = Instant::now();
18+
for _ in 0..n { f(); }
19+
let ns = t.elapsed().as_nanos() as f64 / n as f64;
20+
println!("{label}: {ns:.0} ns/iter ({:.0} ns/url)", ns / URLS.len() as f64);
21+
}
22+
fn main() {
23+
let n = 500_000u32;
24+
bench("can_parse all", n, || { for u in URLS { black_box(ada_url::Url::can_parse(black_box(u), None)); } });
25+
bench("parse all", n, || { for u in URLS { black_box(ada_url::Url::parse(black_box(u), None)); } });
26+
bench("can_parse fast7", n, || { for u in &URLS[..7] { black_box(ada_url::Url::can_parse(black_box(u), None)); } });
27+
bench("can_parse slow3", n, || { for u in &URLS[7..] { black_box(ada_url::Url::can_parse(black_box(u), None)); } });
28+
}

examples/check_idna.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@ fn main() {
66
let arr = v.as_array().unwrap();
77
let mut wrong = 0;
88
for elem in arr.iter() {
9-
if elem.is_string() { continue; }
9+
if elem.is_string() {
10+
continue;
11+
}
1012
let o = elem.as_object().unwrap();
1113
let input = o["input"].as_str().unwrap_or("");
1214
let expected = o.get("output").unwrap();
1315
let result = Idna::ascii(input);
1416
if expected.is_null() {
15-
if !result.is_empty() { eprintln!("SHOULD_FAIL {:?} -> {:?}", input, result); wrong+=1; }
17+
if !result.is_empty() {
18+
eprintln!("SHOULD_FAIL {:?} -> {:?}", input, result);
19+
wrong += 1;
20+
}
1621
} else {
1722
let exp = expected.as_str().unwrap();
18-
if result != exp { eprintln!("WRONG {:?}: got={:?} exp={:?}", input, result, exp); wrong+=1; }
23+
if result != exp {
24+
eprintln!("WRONG {:?}: got={:?} exp={:?}", input, result, exp);
25+
wrong += 1;
26+
}
1927
}
2028
}
2129
eprintln!("Total wrong: {}", wrong);

src/checkers.rs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ pub fn is_ipv4(input: &str) -> bool {
6666
let last_char = b[b.len() - 1];
6767

6868
// Quick filter: the last character must be a decimal digit, a-f, or 'x'
69-
let possible = last_char.is_ascii_digit()
70-
|| matches!(last_char, b'a'..=b'f')
71-
|| last_char == b'x';
69+
let possible =
70+
last_char.is_ascii_digit() || matches!(last_char, b'a'..=b'f') || last_char == b'x';
7271
if !possible {
7372
return false;
7473
}
@@ -119,34 +118,54 @@ pub fn is_ipv4(input: &str) -> bool {
119118
const PATH_SIG_TABLE: [u8; 256] = {
120119
let mut t = [0u8; 256];
121120
// Needs encoding: C0 controls (0x00-0x1F), DEL (0x7F), high bytes (0x80-0xFF)
122-
let mut i = 0usize; while i <= 0x1F { t[i] |= 0x01; i += 1; }
123-
let mut i = 0x7Fusize; while i < 256 { t[i] |= 0x01; i += 1; }
121+
let mut i = 0usize;
122+
while i <= 0x1F {
123+
t[i] |= 0x01;
124+
i += 1;
125+
}
126+
let mut i = 0x7Fusize;
127+
while i < 256 {
128+
t[i] |= 0x01;
129+
i += 1;
130+
}
124131
// Needs encoding: specific printable ASCII chars
125132
let enc: &[u8] = b" \"#<>?^`{|}";
126133

127-
let mut i = 0; while i < enc.len() { t[enc[i] as usize] |= 0x01; i += 1; }
134+
let mut i = 0;
135+
while i < enc.len() {
136+
t[enc[i] as usize] |= 0x01;
137+
i += 1;
138+
}
128139
// Special flags
129140
t[b'\\' as usize] |= 0x02; // backslash
130-
t[b'.' as usize] |= 0x04; // dot
131-
t[b'%' as usize] |= 0x08; // percent
141+
t[b'.' as usize] |= 0x04; // dot
142+
t[b'%' as usize] |= 0x08; // percent
132143
t
133144
};
134145

135-
/// Compute a path-signature byte via an unrolled table lookup — branch-free.
136-
/// Returns a bitmask of the flags above.
146+
/// Compute a path-signature byte via Ada's exact 8-at-a-time unrolled lookup.
147+
///
148+
/// Ada C++ uses `for (; i + 7 < size; i += 8)` — we match that exactly.
137149
pub fn path_signature(input: &str) -> u8 {
138150
let b = input.as_bytes();
139151
let mut acc = 0u8;
140152
let mut i = 0;
141-
// Unrolled 4-at-a-time — same as Ada C++ style
142-
while i + 4 <= b.len() {
143-
acc |= PATH_SIG_TABLE[b[i] as usize]
144-
| PATH_SIG_TABLE[b[i+1] as usize]
145-
| PATH_SIG_TABLE[b[i+2] as usize]
146-
| PATH_SIG_TABLE[b[i+3] as usize];
147-
i += 4;
148-
}
149-
while i < b.len() { acc |= PATH_SIG_TABLE[b[i] as usize]; i += 1; }
153+
// 8-at-a-time — Ada C++ uses this exact unroll factor
154+
while i + 8 <= b.len() {
155+
acc |= PATH_SIG_TABLE[b[i] as usize]
156+
| PATH_SIG_TABLE[b[i + 1] as usize]
157+
| PATH_SIG_TABLE[b[i + 2] as usize]
158+
| PATH_SIG_TABLE[b[i + 3] as usize]
159+
| PATH_SIG_TABLE[b[i + 4] as usize]
160+
| PATH_SIG_TABLE[b[i + 5] as usize]
161+
| PATH_SIG_TABLE[b[i + 6] as usize]
162+
| PATH_SIG_TABLE[b[i + 7] as usize];
163+
i += 8;
164+
}
165+
while i < b.len() {
166+
acc |= PATH_SIG_TABLE[b[i] as usize];
167+
i += 1;
168+
}
150169
acc
151170
}
152171

src/helpers.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use crate::checkers::{
1717
};
1818
use crate::scheme::SchemeType;
1919
use crate::unicode::{
20-
is_c0_control_or_space, is_double_dot_path_segment,
21-
is_single_dot_path_segment, percent_encode_append,
20+
is_c0_control_or_space, is_double_dot_path_segment, is_single_dot_path_segment,
21+
percent_encode_append,
2222
};
2323

2424
// ---------------------------------------------------------------------------
@@ -66,6 +66,7 @@ pub fn remove_ascii_tab_or_newline(s: &mut String) {
6666
/// Trim leading and trailing C0 control characters and ASCII space.
6767
/// Returns a `&str` slice into the original — **zero allocation**.
6868
#[inline]
69+
#[allow(dead_code)]
6970
pub fn trim_c0_whitespace(s: &str) -> &str {
7071
let start = s
7172
.as_bytes()

src/idna.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,13 @@ mod tests {
3939

4040
#[test]
4141
#[cfg(feature = "std")]
42-
fn unicode_works() { assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); }
42+
fn unicode_works() {
43+
assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca");
44+
}
4345

4446
#[test]
4547
#[cfg(feature = "std")]
46-
fn ascii_works() { assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); }
48+
fn ascii_works() {
49+
assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca");
50+
}
4751
}

0 commit comments

Comments
 (0)