Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -4604,6 +4604,8 @@ export class BaseQuery {
join_types: {
inner: 'INNER',
left: 'LEFT',
right: 'RIGHT',
full: 'FULL',
Comment thread
MazterQyou marked this conversation as resolved.
},
window_frame_types: {
rows: 'ROWS',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,11 @@ export class CubeStoreQuery extends BaseQuery {
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
templates.expressions.wrap_segment_select = 'IF({{ expr }}, 1, 0)';
templates.expressions.wrap_segment_filter = '{{ expr }} = 1';
// CubeStore has no native FULL OUTER JOIN (it is emulated via LEFT JOIN chains), and its
// distributed join executor assumes the left-most table is the split root, so RIGHT/FULL
// across partitioned tables is unsafe. Don't push those join types down to CubeStore.
delete templates.join_types.full;
delete templates.join_types.right;
return templates;
}
}
2 changes: 2 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ export class MysqlQuery extends BaseQuery {
templates.types.timestamp = 'DATETIME';
delete templates.types.interval;
templates.types.binary = 'BLOB';
// MySQL has no FULL OUTER JOIN
delete templates.join_types.full;

templates.expressions.concat_strings = 'CONCAT({{ strings | join(\',\' ) }})';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ export class SnowflakeQuery extends BaseQuery {
templates.expressions.like = '{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}{% if default_escape %} ESCAPE \'\\\\\'{% endif %}';
templates.expressions.ilike = '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}{% if default_escape %} ESCAPE \'\\\\\'{% endif %}';
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
templates.join_types.full = 'FULL';
delete templates.types.interval;
return templates;
}
Expand Down
4 changes: 4 additions & 0 deletions rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3459,6 +3459,7 @@ impl WrappedSelectNode {
}

match join_type {
// Right/Full are only generated on the non-push-to-Cube path
JoinType::Inner | JoinType::Left => {
// Do nothing
}
Expand Down Expand Up @@ -3596,6 +3597,7 @@ impl WrappedSelectNode {
};

let join_type = match join_type {
// Right/Full are only generated on the non-push-to-Cube path
JoinType::Left => generator.get_sql_templates().left_join()?,
JoinType::Inner => generator.get_sql_templates().inner_join()?,
_ => {
Expand Down Expand Up @@ -3868,6 +3870,8 @@ impl WrappedSelectNode {
let join_type_sql = match join_type {
JoinType::Left => generator.get_sql_templates().left_join()?,
JoinType::Inner => generator.get_sql_templates().inner_join()?,
JoinType::Right => generator.get_sql_templates().right_join()?,
JoinType::Full => generator.get_sql_templates().full_join()?,
_ => {
return Err(CubeError::internal(format!(
"Unsupported join type for join subquery: {join_type:?}"
Expand Down
69 changes: 68 additions & 1 deletion rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{
WrappedSelectJoinJoinType, WrappedSelectPushToCube, WrapperReplacerContextAliasToCube,
WrapperReplacerContextGroupedSubqueries,
},
transport::MetaContext,
var, var_iter, var_list_iter,
};

Expand All @@ -21,7 +22,7 @@ use datafusion::{
logical_plan::Column,
prelude::JoinType,
};
use egg::{Id, Subst};
use egg::{Id, Subst, Var};
use itertools::Itertools;

impl WrapperRules {
Expand Down Expand Up @@ -263,6 +264,7 @@ impl WrapperRules {
"?left_on",
"?right_on",
"?in_join_type",
"?input_data_source",
"?out_join_expr",
"?out_join_type",
"?out_grouped_subqueries",
Expand Down Expand Up @@ -481,6 +483,7 @@ impl WrapperRules {
"?left_push_to_cube",
"?right_on",
"?in_join_type",
"?input_data_source",
"?out_join_expr",
"?out_join_type",
"?out_grouped_subqueries",
Expand Down Expand Up @@ -982,12 +985,42 @@ impl WrapperRules {
result_expr
}

/// Whether a join subquery with `join_type` can be pushed down to `data_source_var`.
///
/// Inner/Left are always supported. Right/Full are only supported on the non-push-to-Cube
/// path (`push_to_cube == false`), i.e. when both sides become standalone subqueries joined
/// together — there the outer-join semantics map directly to SQL. On the push-to-Cube path
/// the join is folded inside the Cube query alongside its grouping/measures, where NULL-extended
/// outer rows are not validated, so Right/Full are refused there.
/// Other join types (semi/anti) are never supported as join subqueries.
fn is_subquery_join_type_supported(
egraph: &CubeEGraph,
subst: &mut Subst,
meta: &MetaContext,
data_source_var: Var,
join_type: &JoinType,
push_to_cube: bool,
) -> bool {
let template = match join_type {
JoinType::Inner => "join_types/inner",
JoinType::Left => "join_types/left",
JoinType::Right if !push_to_cube => "join_types/right",
JoinType::Full if !push_to_cube => "join_types/full",
_ => return false,
};
let Ok(data_source) = Self::get_data_source(egraph, subst, data_source_var) else {
return false;
};
Self::can_rewrite_template(&data_source, meta, template)
}

fn transform_ungrouped_join_grouped(
&self,
left_members_var: &'static str,
left_on_var: &'static str,
right_on_var: &'static str,
in_join_type_var: &'static str,
input_data_source_var: &'static str,
out_join_expr_var: &'static str,
out_join_type_var: &'static str,
out_grouped_subqueries_var: &'static str,
Expand All @@ -998,11 +1031,14 @@ impl WrapperRules {
let right_on_var = var!(right_on_var);

let in_join_type_var = var!(in_join_type_var);
let input_data_source_var = var!(input_data_source_var);

let out_join_expr_var = var!(out_join_expr_var);
let out_join_type_var = var!(out_join_type_var);
let out_grouped_subqueries_var = var!(out_grouped_subqueries_var);

let meta = self.meta_context.clone();

// Only left is allowed to be ungrouped query, so right would be a subquery join for left ungrouped CubeScan
// It means we don't care about just a "single cube" in LHS, and there's essentially no cubes by this moment in RHS

Expand All @@ -1020,6 +1056,19 @@ impl WrapperRules {
for in_join_type in
var_list_iter!(egraph[subst[in_join_type_var]], JoinJoinType).cloned()
{
// Left is an ungrouped CubeScan pushed to Cube, so this is always the
// push-to-Cube path: Right/Full are not supported here.
if !Self::is_subquery_join_type_supported(
egraph,
subst,
&meta,
input_data_source_var,
&in_join_type.0,
true,
) {
Comment thread
MazterQyou marked this conversation as resolved.
return false;
}

if !Self::are_join_members_supported(
egraph,
subst[left_members_var],
Expand Down Expand Up @@ -1217,6 +1266,7 @@ impl WrapperRules {
left_push_to_cube_var: &'static str,
right_on_var: &'static str,
in_join_type_var: &'static str,
input_data_source_var: &'static str,
out_join_expr_var: &'static str,
out_join_type_var: &'static str,
out_grouped_subqueries_var: &'static str,
Expand All @@ -1228,12 +1278,15 @@ impl WrapperRules {
let right_on_var = var!(right_on_var);

let in_join_type_var = var!(in_join_type_var);
let input_data_source_var = var!(input_data_source_var);

let out_join_expr_var = var!(out_join_expr_var);
let out_join_type_var = var!(out_join_type_var);
let out_grouped_subqueries_var = var!(out_grouped_subqueries_var);
let out_push_to_cube_var = var!(out_push_to_cube_var);

let meta = self.meta_context.clone();

move |egraph, subst| {
// We are going to generate join with grouped subquery
// TODO Do we have to check stuff like `transform_check_subquery_allowed` is checking:
Expand All @@ -1254,6 +1307,20 @@ impl WrapperRules {
)
.cloned()
{
// Right/Full are only supported on the non-push-to-Cube variant.
// `continue` rather than `return false` so the non-push variant of
// this eclass still gets a chance to match.
if !Self::is_subquery_join_type_supported(
egraph,
subst,
&meta,
input_data_source_var,
&in_join_type.0,
left_push_to_cube.0,
) {
continue;
}

// TODO what's a proper way to find table expression alias?
let Some(right_join_alias) = right_join_on
.iter()
Expand Down
7 changes: 7 additions & 0 deletions rust/cubesql/cubesql/src/compile/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,8 @@ OFFSET {{ offset }}{% endif %}"#.to_string(),
("expressions/between".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}BETWEEN {{ low }} AND {{ high }}".to_string()),
("join_types/inner".to_string(), "INNER".to_string()),
("join_types/left".to_string(), "LEFT".to_string()),
("join_types/right".to_string(), "RIGHT".to_string()),
("join_types/full".to_string(), "FULL".to_string()),
("quotes/identifiers".to_string(), "\"".to_string()),
("quotes/escape".to_string(), "\"\"".to_string()),
("params/param".to_string(), "${{ param_index + 1 }}".to_string()),
Expand All @@ -778,6 +780,11 @@ OFFSET {{ offset }}{% endif %}"#.to_string(),
("types/binary".to_string(), "BINARY".to_string()),
]
.into_iter().chain(custom_templates)
.collect::<HashMap<_, _>>()
.into_iter()
// Custom template with an empty value removes the base template,
// allowing tests to check behavior of data sources without it
.filter(|(_, value)| !value.is_empty())
.collect(),
false,
)
Expand Down
Loading
Loading