Skip to content

Commit 55fa12a

Browse files
committed
Prune redundant benchmarks in cast_kernels
1 parent 986b084 commit 55fa12a

2 files changed

Lines changed: 129 additions & 51 deletions

File tree

arrow/benches/cast_kernels.rs

Lines changed: 106 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -83,36 +83,6 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
8383
Arc::new(builder.finish())
8484
}
8585

86-
fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
87-
let mut rng = seedable_rng();
88-
let mut builder = Decimal32Builder::with_capacity(size);
89-
90-
for _ in 0..size {
91-
builder.append_value(rng.random_range::<i32, _>(0..1000000));
92-
}
93-
Arc::new(
94-
builder
95-
.finish()
96-
.with_precision_and_scale(precision, scale)
97-
.unwrap(),
98-
)
99-
}
100-
101-
fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
102-
let mut rng = seedable_rng();
103-
let mut builder = Decimal64Builder::with_capacity(size);
104-
105-
for _ in 0..size {
106-
builder.append_value(rng.random_range::<i64, _>(0..1000000000));
107-
}
108-
Arc::new(
109-
builder
110-
.finish()
111-
.with_precision_and_scale(precision, scale)
112-
.unwrap(),
113-
)
114-
}
115-
11686
fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
11787
let mut rng = seedable_rng();
11888
let mut builder = Decimal128Builder::with_capacity(size);
@@ -157,6 +127,53 @@ fn build_string_array(size: usize) -> ArrayRef {
157127
Arc::new(builder.finish())
158128
}
159129

130+
fn build_string_float_array(size: usize, null_density: f32) -> ArrayRef {
131+
let mut builder = StringBuilder::new();
132+
133+
let mut rng = seedable_rng();
134+
135+
for _ in 0..size {
136+
if rng.random::<f32>() < null_density {
137+
builder.append_null()
138+
} else {
139+
builder.append_value(
140+
rng.random_range(-999_999_999f32..999_999_999f32)
141+
.to_string(),
142+
)
143+
}
144+
}
145+
Arc::new(builder.finish())
146+
}
147+
148+
macro_rules! build_array_with_samples {
149+
($builder: ident, $size: ident, $null_density: expr, $samples: ident) => {{
150+
let mut rng = seedable_rng();
151+
for i in 0..$size {
152+
if rng.random::<f32>() < $null_density {
153+
$builder.append_null();
154+
} else {
155+
$builder.append_value($samples[i % $samples.len()])
156+
}
157+
}
158+
Arc::new($builder.finish())
159+
}};
160+
}
161+
162+
fn build_float64_array_for_cast_to_decimal(size: usize, null_density: f32) -> ArrayRef {
163+
Arc::new(create_primitive_array_range::<Float64Type>(
164+
size,
165+
null_density,
166+
-999_999_999f64..999_999_999f64,
167+
))
168+
}
169+
170+
fn build_float64_array_invalid_items(size: usize, null_density: f32) -> ArrayRef {
171+
let mut builder = Float64Builder::with_capacity(size);
172+
let invalid_values = [f64::NAN, f64::INFINITY, f64::NEG_INFINITY];
173+
174+
build_array_with_samples!(builder, size, null_density, invalid_values)
175+
}
176+
160177
fn build_dict_array(size: usize) -> ArrayRef {
161178
let values = StringArray::from_iter([
162179
Some("small"),
@@ -170,7 +187,7 @@ fn build_dict_array(size: usize) -> ArrayRef {
170187

171188
// cast array from specified primitive array type to desired data type
172189
fn cast_array(array: &ArrayRef, to_type: DataType) {
173-
hint::black_box(cast(array, &to_type).unwrap());
190+
hint::black_box(cast(hint::black_box(array), hint::black_box(&to_type)).unwrap());
174191
}
175192

176193
fn add_benchmark(c: &mut Criterion) {
@@ -189,17 +206,19 @@ fn add_benchmark(c: &mut Criterion) {
189206
let utf8_date_array = build_utf8_date_array(512, true);
190207
let utf8_date_time_array = build_utf8_date_time_array(512, true);
191208

192-
let decimal32_array = build_decimal32_array(512, 9, 3);
193-
let decimal64_array = build_decimal64_array(512, 10, 3);
194-
let decimal128_array = build_decimal128_array(512, 10, 3);
195-
let decimal256_array = build_decimal256_array(512, 50, 3);
209+
let decimal128_array = build_decimal128_array(8_000, 10, 3);
210+
let decimal256_array = build_decimal256_array(8_000, 50, 3);
196211
let string_array = build_string_array(512);
197212
let wide_string_array = cast(&string_array, &DataType::LargeUtf8).unwrap();
198213

199214
let dict_array = build_dict_array(10_000);
200215
let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap();
201216
let binary_view_array = cast(&string_view_array, &DataType::BinaryView).unwrap();
202217

218+
let string_float_array_normal = build_string_float_array(5_000, 0.1);
219+
let float64_array_cast_to_decimal = build_float64_array_for_cast_to_decimal(8_000, 0.1);
220+
let invalid_float64_array_to_decimal = build_float64_array_invalid_items(8_000, 0.1);
221+
203222
c.bench_function("cast int32 to int32 512", |b| {
204223
b.iter(|| cast_array(&i32_array, DataType::Int32))
205224
});
@@ -280,22 +299,6 @@ fn add_benchmark(c: &mut Criterion) {
280299
b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
281300
});
282301

283-
c.bench_function("cast decimal32 to decimal32 512", |b| {
284-
b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(9, 4)))
285-
});
286-
c.bench_function("cast decimal32 to decimal32 512 lower precision", |b| {
287-
b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(6, 5)))
288-
});
289-
c.bench_function("cast decimal32 to decimal64 512", |b| {
290-
b.iter(|| cast_array(&decimal32_array, DataType::Decimal64(11, 5)))
291-
});
292-
c.bench_function("cast decimal64 to decimal32 512", |b| {
293-
b.iter(|| cast_array(&decimal64_array, DataType::Decimal32(9, 2)))
294-
});
295-
c.bench_function("cast decimal64 to decimal64 512", |b| {
296-
b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(12, 4)))
297-
});
298-
299302
c.bench_function("cast decimal128 to decimal128 512", |b| {
300303
b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
301304
});
@@ -360,6 +363,58 @@ fn add_benchmark(c: &mut Criterion) {
360363
b.iter(|| cast_array(&binary_view_array, DataType::Utf8View))
361364
});
362365

366+
macro_rules! benchmark_cast {
367+
($name: expr, $input_array: ident, $target_type: expr) => {
368+
c.bench_function(stringify!($name), |b| {
369+
b.iter(|| cast_array(&$input_array, $target_type))
370+
});
371+
};
372+
}
373+
374+
// cast string with normal items to decimals
375+
benchmark_cast!(
376+
"cast string to decimal128(38, 3)",
377+
string_float_array_normal,
378+
DataType::Decimal128(38, 3)
379+
);
380+
381+
// cast float64 to decimals
382+
benchmark_cast!(
383+
"cast float64 to decimal128(32, 3)",
384+
float64_array_cast_to_decimal,
385+
DataType::Decimal128(32, 3)
386+
);
387+
388+
// cast invalid float64 to decimals
389+
benchmark_cast!(
390+
"cast invalid float64 to to decimal128(32, 3)",
391+
invalid_float64_array_to_decimal,
392+
DataType::Decimal128(32, 3)
393+
);
394+
395+
// cast decimals to float/integers
396+
benchmark_cast!(
397+
"cast decimal128 to float64",
398+
decimal128_array,
399+
DataType::Float64
400+
);
401+
benchmark_cast!(
402+
"cast decimal128 to int64",
403+
decimal128_array,
404+
DataType::Int64
405+
);
406+
407+
benchmark_cast!(
408+
"cast decimal256 to float64",
409+
decimal256_array,
410+
DataType::Float64
411+
);
412+
benchmark_cast!(
413+
"cast decimal256 to int64",
414+
decimal256_array,
415+
DataType::Int64
416+
);
417+
363418
c.bench_function("cast string single run to ree<int32>", |b| {
364419
let source_array = StringArray::from(vec!["a"; 8192]);
365420
let array_ref = Arc::new(source_array) as ArrayRef;

arrow/src/util/bench_util.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,29 @@ where
5151
.collect()
5252
}
5353

54+
/// Creates a random (but fixed-seeded) array of a given size and null density with a specific range
55+
pub fn create_primitive_array_range<T>(
56+
size: usize,
57+
null_density: f32,
58+
range: Range<T::Native>,
59+
) -> PrimitiveArray<T>
60+
where
61+
T: ArrowPrimitiveType,
62+
T::Native: SampleUniform,
63+
{
64+
let mut rng = seedable_rng();
65+
66+
(0..size)
67+
.map(|_| {
68+
if rng.random::<f32>() < null_density {
69+
None
70+
} else {
71+
Some(rng.random_range(range.clone()))
72+
}
73+
})
74+
.collect()
75+
}
76+
5477
/// Creates a [`PrimitiveArray`] of a given `size` and `null_density`
5578
/// filling it with random numbers generated using the provided `seed`.
5679
pub fn create_primitive_array_with_seed<T>(

0 commit comments

Comments
 (0)