@@ -360,8 +360,47 @@ def visit_GeoSimplify(self, op, *, arg, tolerance, preserve_collapsed):
360360 )
361361 return self .f .st_simplify (arg , tolerance )
362362
363+ def _visit_approx_quantile_helper (self , op , * , arg , where ):
364+ # BigQuery syntax is `APPROX_QUANTILES(col, resolution)` to return
365+ # `resolution + 1` quantiles array. To handle this, we compute the
366+ # resolution ourselves then restructure the output array as needed.
367+ # To avoid excessive resolution we arbitrarily cap it at 100,000 -
368+ # since these are approximate quantiles anyway this seems fine.
369+
370+ quantiles = util .promote_list (op .quantile .value )
371+ fracs = [decimal .Decimal (str (q )).as_integer_ratio () for q in quantiles ]
372+ resolution = min (math .lcm (* (den for _ , den in fracs )), 100_000 )
373+ indices = [(num * resolution ) // den for num , den in fracs ]
374+
375+ if where is not None :
376+ arg = self .if_ (where , arg , NULL )
377+
378+ if not op .arg .dtype .is_floating ():
379+ arg = self .cast (arg , dt .float64 )
380+
381+ array = self .f .approx_quantiles (
382+ arg , sge .IgnoreNulls (this = sge .convert (resolution ))
383+ )
384+ if isinstance (op , (ops .ApproxQuantile , ops .ApproxMedian )):
385+ return array [indices [0 ]]
386+
387+ if indices == list (range (resolution + 1 )):
388+ return array
389+ else :
390+ return sge .Array (expressions = [array [i ] for i in indices ])
391+
392+ def visit_ApproxQuantile (self , op , * , arg , quantile , where ):
393+ if not isinstance (op .quantile , ops .Literal ):
394+ raise com .UnsupportedOperationError (
395+ "quantile must be a literal in BigQuery"
396+ )
397+ return self ._visit_approx_quantile_helper (op , arg = arg , where = where )
398+
363399 def visit_ApproxMedian (self , op , * , arg , where ):
364- return self .agg .approx_quantiles (arg , 2 , where = where )[self .f .offset (1 )]
400+ new_op = ops .ApproxQuantile (arg = op .arg , quantile = 0.5 , where = op .where )
401+ return self ._visit_approx_quantile_helper (new_op , arg = arg , where = where )
402+
403+ visit_ApproxMultiQuantile = visit_ApproxQuantile
365404
366405 def visit_Pi (self , op ):
367406 return self .f .acos (- 1 )
@@ -397,41 +436,6 @@ def visit_GroupConcat(self, op, *, arg, sep, where, order_by):
397436
398437 return sge .GroupConcat (this = arg , separator = sep )
399438
400- def visit_ApproxQuantile (self , op , * , arg , quantile , where ):
401- if not isinstance (op .quantile , ops .Literal ):
402- raise com .UnsupportedOperationError (
403- "quantile must be a literal in BigQuery"
404- )
405-
406- # BigQuery syntax is `APPROX_QUANTILES(col, resolution)` to return
407- # `resolution + 1` quantiles array. To handle this, we compute the
408- # resolution ourselves then restructure the output array as needed.
409- # To avoid excessive resolution we arbitrarily cap it at 100,000 -
410- # since these are approximate quantiles anyway this seems fine.
411- quantiles = util .promote_list (op .quantile .value )
412- fracs = [decimal .Decimal (str (q )).as_integer_ratio () for q in quantiles ]
413- resolution = min (math .lcm (* (den for _ , den in fracs )), 100_000 )
414- indices = [(num * resolution ) // den for num , den in fracs ]
415-
416- if where is not None :
417- arg = self .if_ (where , arg , NULL )
418-
419- if not op .arg .dtype .is_floating ():
420- arg = self .cast (arg , dt .float64 )
421-
422- array = self .f .approx_quantiles (
423- arg , sge .IgnoreNulls (this = sge .convert (resolution ))
424- )
425- if isinstance (op , ops .ApproxQuantile ):
426- return array [indices [0 ]]
427-
428- if indices == list (range (resolution + 1 )):
429- return array
430- else :
431- return sge .Array (expressions = [array [i ] for i in indices ])
432-
433- visit_ApproxMultiQuantile = visit_ApproxQuantile
434-
435439 def visit_FloorDivide (self , op , * , left , right ):
436440 return self .cast (self .f .floor (self .f .ieee_divide (left , right )), op .dtype )
437441
0 commit comments