Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions superset/mcp_service/chart/validation/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,25 @@ def _get_generic_error_message(error_str: str) -> str | None:

def _sanitize_validation_error(error: Exception) -> str:
"""SECURITY FIX: Sanitize validation errors to prevent disclosure."""
import re
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a module level import? I think this function level imports is the pattern in this file though so not sure

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, I'm going to move it on the top as a standard Python convention


error_str = str(error)

# Pydantic tagged-union errors prefix the message with a long
# ``1 validation error for tagged-union[...]`` header before the
# actionable ``Value error, ...`` body — extract the body so the
# 200-char truncation below doesn't swallow the useful part.
if "tagged-union[" in error_str and "Value error," in error_str:
idx = error_str.find("Value error,")
footer = error_str.find("\n For further information")
Comment thread
EnxDev marked this conversation as resolved.
Outdated
end = footer if footer != -1 else len(error_str)
error_str = error_str[idx:end].strip()

# SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
if len(error_str) > 200:
error_str = error_str[:200] + "...[truncated]"

# Remove potentially sensitive schema information
import re

sensitive_patterns = [
(r'\btable\s+[\'"`]?(\w+)[\'"`]?', "table [REDACTED]"),
(r'\bcolumn\s+[\'"`]?(\w+)[\'"`]?', "column [REDACTED]"),
Expand Down
12 changes: 12 additions & 0 deletions superset/mcp_service/utils/error_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,18 @@ class ChartErrorBuilder:
"Check Superset logs for detailed error information",
],
},
"validation_error": {
"message": "Chart configuration is invalid: {reason}",
"details": "{reason}",
"suggestions": [
"Review the field names and types in your config against the "
"chart_type's schema",
"Call get_chart_type_schema or read the chart://configs resource "
"for valid fields and examples",
"For mixed_timeseries charts, use 'primary_kind' and "
"'secondary_kind' (not 'kind' / 'kind_secondary')",
Comment thread
EnxDev marked this conversation as resolved.
Outdated
],
},
}

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Regression tests for validation-pipeline error surfacing.

Previously, when ``parse_chart_config`` raised a Pydantic ValueError inside
``ValidationPipeline.validate_request_with_warnings``, the pipeline's generic
``except Exception`` branch routed the sanitized reason through
``ChartErrorBuilder.build_error(template_key='validation_error', ...)``.
That template key did not exist in ``ChartErrorBuilder.TEMPLATES``, so the
builder fell back to the default ``"An error occurred"`` message with empty
details and no suggestions — producing a silent failure for LLM callers.

The most common trigger was a ``mixed_timeseries`` config that used the XY
field names ``kind`` / ``kind_secondary`` instead of the mixed-timeseries
fields ``primary_kind`` / ``secondary_kind``.
"""

from unittest.mock import patch

from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
from superset.mcp_service.utils.error_builder import ChartErrorBuilder


def _passthrough_normalize(request, *_args, **_kwargs):
return request


def test_validation_error_template_exists() -> None:
"""The ``validation_error`` template must be registered so pipeline
failures surface an actionable message and suggestions."""
assert "validation_error" in ChartErrorBuilder.TEMPLATES

error = ChartErrorBuilder.build_error(
error_type="validation_system_error",
template_key="validation_error",
template_vars={"reason": "Unknown field 'kind'"},
error_code="VALIDATION_PIPELINE_ERROR",
)
dumped = error.model_dump()
assert dumped["message"] != "An error occurred"
assert "Unknown field" in dumped["message"]
assert "Unknown field" in dumped["details"]
assert dumped["suggestions"], "validation_error template must have suggestions"


def test_mixed_timeseries_with_wrong_kind_fields_returns_actionable_error() -> None:
"""Regression: mixed_timeseries + XY field names (``kind`` / ``kind_secondary``)
used to return an error with empty details and no suggestions."""
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "mixed_timeseries",
"x": {"name": "order_date"},
"y": [{"name": "revenue", "aggregate": "SUM"}],
"y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
"kind": "line",
"kind_secondary": "bar",
},
}

with (
patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
patch.object(
ValidationPipeline, "_validate_dataset", return_value=(True, None)
),
patch.object(
ValidationPipeline, "_validate_runtime", return_value=(True, None)
),
patch.object(
ValidationPipeline,
"_normalize_column_names",
side_effect=_passthrough_normalize,
),
):
result = ValidationPipeline.validate_request_with_warnings(request_data)

assert result.is_valid is False
assert result.error is not None
dumped = result.error.model_dump()

# The bug symptom was a generic empty message — verify both message and
# details carry the actionable reason now.
assert dumped["message"] != "An error occurred"
assert dumped["details"] != ""
assert "kind" in dumped["details"]
# Ugly Pydantic tagged-union prefix must be stripped from the surfaced body.
assert "tagged-union" not in dumped["details"]
assert "tagged-union" not in dumped["message"]
# Suggestions should steer callers back to a valid schema.
assert dumped["suggestions"]
assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"


def test_valid_mixed_timeseries_config_passes() -> None:
"""Sanity check: the correct field names still validate successfully."""
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "mixed_timeseries",
"x": {"name": "order_date"},
"y": [{"name": "revenue", "aggregate": "SUM"}],
"y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
"primary_kind": "line",
"secondary_kind": "bar",
},
}

with (
patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
patch.object(
ValidationPipeline, "_validate_dataset", return_value=(True, None)
),
patch.object(
ValidationPipeline, "_validate_runtime", return_value=(True, None)
),
patch.object(
ValidationPipeline,
"_normalize_column_names",
side_effect=_passthrough_normalize,
),
):
result = ValidationPipeline.validate_request_with_warnings(request_data)

assert result.is_valid is True
assert result.error is None
Loading