From a10ba49cc8e4741a512b3b9dfbb7d5decbbd2b54 Mon Sep 17 00:00:00 2001 From: Navaneeth Sujith Date: Sun, 14 Jun 2026 11:19:28 -0700 Subject: [PATCH] [CH] Support skewness aggregate function --- .../scala/org/apache/gluten/utils/CHExpressionUtil.scala | 1 - .../execution/GlutenClickhouseCountDistinctSuite.scala | 5 ++--- .../SimpleStatisticsFunctions.cpp | 7 +++++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala index 3bbb4a467f6..e5bee2dc202 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala @@ -192,7 +192,6 @@ object CHExpressionUtil { URL_DECODE -> DefaultValidator(), URL_ENCODE -> DefaultValidator(), FORMAT_STRING -> FormatStringValidator(), - SKEWNESS -> DefaultValidator(), MAKE_YM_INTERVAL -> DefaultValidator(), MAP_ZIP_WITH -> DefaultValidator(), KURTOSIS -> DefaultValidator(), diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala index 22a82a9439d..2aa0979dacb 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala @@ -103,9 +103,8 @@ class GlutenClickhouseCountDistinctSuite extends GlutenClickHouseWholeStageTrans // skewness agg is not supported, will cause fallback val sql = "select count(distinct(a,b)) , skewness(b) from " + "values (0, null,1), (0,null,1), (1, 1,1), (2, 2, 1) ,(2,2,2),(3,3,3) as data(a,b,c)" - assertThrows[UnsupportedOperationException] { - spark.sql(sql).show - } + compareResultsAgainstVanillaSpark(sql, true, { _ => }) + } test("check count distinct with expr fallback") { diff --git a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp index bda181e0fad..93ed056cbf9 100644 --- a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp +++ b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp @@ -63,7 +63,14 @@ class AggregateFunctionParserStddev final : public AggregateFunctionParser return func_node; } }; +// for skewness +struct SkewnessNameStruct +{ + static constexpr auto spark_name = "skewness"; + static constexpr auto ch_name = "skewSamp"; +}; +static const AggregateFunctionParserRegister> registerer_skewness; static const AggregateFunctionParserRegister> registerer_stddev; static const AggregateFunctionParserRegister> registerer_stddev_samp; }