"""Minimal repro: cudf-polars streaming-mode broadcast bug for
`with_columns(pl.lit(<numpy_array>))` at >~1M rows.
Setup:
polars==1.38.1, cudf==26.04, on NVIDIA L40 / Ubuntu 22.04, CUDA 12.x
Symptom:
RuntimeError: Cannot broadcast columns of length nrows=N to target_length=N/2
Trigger:
A LazyFrame whose `with_columns` adds a literal column built from a
Series/numpy array of the same length as the source (i.e. a row-aligned
column literal, not a scalar `pl.lit(value)`). At >~1M rows the streaming
executor partitions the upstream into halves but does not partition the
literal alongside; the subsequent broadcast then sees a full-length
column being placed into a half-length partition and raises.
Workaround:
pl.GPUEngine(executor="in-memory") # skips the streaming code path
"""
from __future__ import annotations
import numpy as np
import polars as pl
def make_lf(n: int) -> pl.LazyFrame:
return pl.LazyFrame({
"key": np.random.default_rng(0).integers(0, 100, size=n, dtype=np.int32),
}).with_columns(
pl.lit(np.arange(n, dtype=np.uint64)).alias("var_id"),
)
def try_collect(n: int, executor: str) -> str:
engine = pl.GPUEngine(raise_on_fail=True, executor=executor)
try:
df = make_lf(n).collect(engine=engine)
return f"OK ({df.height} rows)"
except RuntimeError as e:
return f"FAIL: {e}"
if __name__ == "__main__":
print("Polars:", pl.__version__)
import cudf
print("cudf: ", cudf.__version__)
print()
for n in [100_000, 500_000, 1_000_000, 1_500_000, 2_000_000, 5_000_000]:
print(f"n={n:>10} streaming: {try_collect(n, 'streaming')}")
print()
print(f"n={5_000_000:>10} in-memory: {try_collect(5_000_000, 'in-memory')}")
Claude-generated description of the issue and repro: