Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,18 @@ def _sanitize_filters(self) -> None:
engine = database.db_engine_spec.engine

if needs_transpilation:
clause = transpile_to_dialect(clause, engine)
# source_engine=engine ensures idempotency: this
# method can run more than once (validate() is called
# from both raise_for_access and get_df_payload), so
# the second pass must be able to re-parse the
# dialect-specific output (e.g. BigQuery backticks)
# produced by the first pass.
clause = transpile_to_dialect(
clause, engine, source_engine=engine, identify=True
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This calls transpile_to_dialect with source and target as the same engine? Maybe a comment on why this is necessary

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

)

sanitized_clause = sanitize_clause(clause, engine)
if sanitized_clause != clause:
self.extras[param] = sanitized_clause
self.extras[param] = sanitized_clause
Comment on lines -366 to +374
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a test here to prevent a regression?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

except QueryClauseValidationException as ex:
raise QueryObjectValidationError(ex.message) from ex

Expand Down
3 changes: 3 additions & 0 deletions superset/sql/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,7 @@ def transpile_to_dialect(
sql: str,
target_engine: str,
source_engine: str | None = None,
identify: bool = False,
) -> str:
"""
Transpile SQL from one database dialect to another using SQLGlot.
Expand All @@ -1576,6 +1577,7 @@ def transpile_to_dialect(
sql: The SQL query to transpile
target_engine: The target database engine (e.g., "mysql", "postgresql")
source_engine: The source database engine. If None, uses generic SQL dialect.
identify: If True, quote all identifiers per the target dialect.

Returns:
The transpiled SQL string
Expand All @@ -1598,6 +1600,7 @@ def transpile_to_dialect(
copy=True,
comments=False,
pretty=False,
identify=identify,
)
except ParseError as ex:
raise QueryClauseValidationException(f"Cannot parse SQL clause: {sql}") from ex
Expand Down
96 changes: 96 additions & 0 deletions tests/unit_tests/sql/transpile_to_dialect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,3 +396,99 @@ def test_transpile_unknown_source_engine_uses_generic() -> None:
"SELECT * FROM orders", "postgresql", "unknown_engine"
)
assert result == "SELECT * FROM orders"


# Tests for identify=True (identifier quoting)
@pytest.mark.parametrize(
"sql,dialect,expected",
[
# PostgreSQL - double-quoted identifiers
(
"STATE ILIKE '%AL%'",
"postgresql",
"\"STATE\" ILIKE '%AL%'",
),
# MySQL - backtick-quoted identifiers, ILIKE transpiled
(
"STATE ILIKE '%AL%'",
"mysql",
"LOWER(`STATE`) LIKE LOWER('%AL%')",
),
# BigQuery - backtick-quoted identifiers, ILIKE transpiled
(
"STATE ILIKE '%AL%'",
"bigquery",
"LOWER(`STATE`) LIKE LOWER('%AL%')",
),
# Snowflake - double-quoted identifiers
(
"STATE ILIKE '%AL%'",
"snowflake",
"\"STATE\" ILIKE '%AL%'",
),
# MSSQL - bracket-quoted identifiers
(
"STATE = 'CA'",
"mssql",
"[STATE] = 'CA'",
),
# Compound filter with multiple identifiers
(
"STATE = 'CA' AND AIRLINE = 'Delta'",
"postgresql",
"\"STATE\" = 'CA' AND \"AIRLINE\" = 'Delta'",
),
# Lowercase identifiers also get quoted
(
"name = 'test'",
"postgresql",
"\"name\" = 'test'",
),
],
)
def test_identify_quotes_identifiers(sql: str, dialect: str, expected: str) -> None:
"""Test that identify=True quotes identifiers per target dialect."""
assert transpile_to_dialect(sql, dialect, identify=True) == expected


def test_identify_unknown_engine_returns_unchanged() -> None:
"""Test that identify=True has no effect on unknown engines."""
sql = "STATE = 'CA'"
assert transpile_to_dialect(sql, "unknown_engine", identify=True) == sql


@pytest.mark.parametrize(
"sql,engine,expected",
[
(
"STATE ILIKE '%AL%'",
"postgresql",
"\"STATE\" ILIKE '%AL%'",
),
(
"country ILIKE '%Italy%'",
"bigquery",
"LOWER(`country`) LIKE LOWER('%Italy%')",
),
],
)
def test_identify_with_source_engine(sql: str, engine: str, expected: str) -> None:
"""Test identify=True with source_engine matching target engine."""
result = transpile_to_dialect(sql, engine, source_engine=engine, identify=True)
assert result == expected


@pytest.mark.parametrize(
"engine",
["postgresql", "bigquery", "mysql", "snowflake"],
)
def test_identify_transpilation_is_idempotent(engine: str) -> None:
"""Test that transpiling twice produces the same result (idempotent).

This matters because _sanitize_filters() can be called multiple times
via validate().
"""
clause = "STATE ILIKE '%AL%'"
pass1 = transpile_to_dialect(clause, engine, source_engine=engine, identify=True)
pass2 = transpile_to_dialect(pass1, engine, source_engine=engine, identify=True)
assert pass1 == pass2
Loading