diff --git a/superset/mcp_service/chart/validation/pipeline.py b/superset/mcp_service/chart/validation/pipeline.py index 6923de0bd0b..b2ca1fa791b 100644 --- a/superset/mcp_service/chart/validation/pipeline.py +++ b/superset/mcp_service/chart/validation/pipeline.py @@ -21,6 +21,7 @@ """ import logging +import re from typing import Any, Dict, List, Tuple from superset.mcp_service.chart.schemas import ( @@ -77,13 +78,27 @@ def _sanitize_validation_error(error: Exception) -> str: """SECURITY FIX: Sanitize validation errors to prevent disclosure.""" error_str = str(error) + # Pydantic tagged-union errors prefix the message with a long + # ``1 validation error for tagged-union[...]`` header before the + # per-field body (e.g. ``Value error, ...``, ``Field required``, + # ``Input should be ...``). The body always lives on a line indented + # by exactly two spaces — pull it out so the 200-char truncation + # below doesn't swallow the actionable part. The pydantic footer + # ``\n For further information ...`` uses four-space indent and + # is dropped here. + if "tagged-union[" in error_str: + body_match = re.search(r"\n (?! )", error_str) + if body_match: + idx = body_match.end() + footer_idx = error_str.find("\n For further information", idx) + end = footer_idx if footer_idx != -1 else len(error_str) + error_str = error_str[idx:end].strip() + # SECURITY FIX: Limit length FIRST to prevent ReDoS attacks if len(error_str) > 200: error_str = error_str[:200] + "...[truncated]" # Remove potentially sensitive schema information - import re - sensitive_patterns = [ (r'\btable\s+[\'"`]?(\w+)[\'"`]?', "table [REDACTED]"), (r'\bcolumn\s+[\'"`]?(\w+)[\'"`]?', "column [REDACTED]"), diff --git a/superset/mcp_service/utils/error_builder.py b/superset/mcp_service/utils/error_builder.py index 61df3eb9108..f019092dcc6 100644 --- a/superset/mcp_service/utils/error_builder.py +++ b/superset/mcp_service/utils/error_builder.py @@ -186,6 +186,18 @@ class ChartErrorBuilder: "Check Superset logs for detailed error information", ], }, + "validation_error": { + "message": "Chart configuration is invalid: {reason}", + "details": "{reason}", + "suggestions": [ + "Review the field names and types in your config against the " + "chart_type's schema", + "Call get_chart_type_schema or read the chart://configs resource " + "for valid fields and examples", + "Replace any unknown field names with the ones listed in the " + "error details above", + ], + }, } @classmethod diff --git a/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py b/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py new file mode 100644 index 00000000000..8ba32ca5dc4 --- /dev/null +++ b/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Regression tests for validation-pipeline error surfacing. + +Previously, when ``parse_chart_config`` raised a Pydantic ValueError inside +``ValidationPipeline.validate_request_with_warnings``, the pipeline's generic +``except Exception`` branch routed the sanitized reason through +``ChartErrorBuilder.build_error(template_key='validation_error', ...)``. +That template key did not exist in ``ChartErrorBuilder.TEMPLATES``, so the +builder fell back to the default ``"An error occurred"`` message with empty +details and no suggestions — producing a silent failure for LLM callers. + +The most common trigger was a ``mixed_timeseries`` config that used the XY +field names ``kind`` / ``kind_secondary`` instead of the mixed-timeseries +fields ``primary_kind`` / ``secondary_kind``. +""" + +from unittest.mock import patch + +from superset.mcp_service.chart.validation.pipeline import ValidationPipeline +from superset.mcp_service.utils.error_builder import ChartErrorBuilder + + +def _passthrough_normalize(request, *_args, **_kwargs): + return request + + +def test_validation_error_template_exists() -> None: + """The ``validation_error`` template must be registered so pipeline + failures surface an actionable message and suggestions.""" + assert "validation_error" in ChartErrorBuilder.TEMPLATES + + error = ChartErrorBuilder.build_error( + error_type="validation_system_error", + template_key="validation_error", + template_vars={"reason": "Unknown field 'kind'"}, + error_code="VALIDATION_PIPELINE_ERROR", + ) + dumped = error.model_dump() + assert dumped["message"] != "An error occurred" + assert "Unknown field" in dumped["message"] + assert "Unknown field" in dumped["details"] + assert dumped["suggestions"], "validation_error template must have suggestions" + + +def test_mixed_timeseries_with_wrong_kind_fields_returns_actionable_error() -> None: + """Regression: mixed_timeseries + XY field names (``kind`` / ``kind_secondary``) + used to return an error with empty details and no suggestions.""" + request_data = { + "dataset_id": 1, + "config": { + "chart_type": "mixed_timeseries", + "x": {"name": "order_date"}, + "y": [{"name": "revenue", "aggregate": "SUM"}], + "y_secondary": [{"name": "orders", "aggregate": "COUNT"}], + "kind": "line", + "kind_secondary": "bar", + }, + } + + with ( + patch.object(ValidationPipeline, "_get_dataset_context", return_value=None), + patch.object( + ValidationPipeline, "_validate_dataset", return_value=(True, None) + ), + patch.object( + ValidationPipeline, "_validate_runtime", return_value=(True, None) + ), + patch.object( + ValidationPipeline, + "_normalize_column_names", + side_effect=_passthrough_normalize, + ), + ): + result = ValidationPipeline.validate_request_with_warnings(request_data) + + assert result.is_valid is False + assert result.error is not None + dumped = result.error.model_dump() + + # The bug symptom was a generic empty message — verify both message and + # details carry the actionable reason now. + assert dumped["message"] != "An error occurred" + assert dumped["details"] != "" + assert "kind" in dumped["details"] + # Ugly Pydantic tagged-union prefix must be stripped from the surfaced body. + assert "tagged-union" not in dumped["details"] + assert "tagged-union" not in dumped["message"] + # Suggestions should steer callers back to a valid schema. + assert dumped["suggestions"] + assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR" + + +def test_valid_mixed_timeseries_config_passes() -> None: + """Sanity check: the correct field names still validate successfully.""" + request_data = { + "dataset_id": 1, + "config": { + "chart_type": "mixed_timeseries", + "x": {"name": "order_date"}, + "y": [{"name": "revenue", "aggregate": "SUM"}], + "y_secondary": [{"name": "orders", "aggregate": "COUNT"}], + "primary_kind": "line", + "secondary_kind": "bar", + }, + } + + with ( + patch.object(ValidationPipeline, "_get_dataset_context", return_value=None), + patch.object( + ValidationPipeline, "_validate_dataset", return_value=(True, None) + ), + patch.object( + ValidationPipeline, "_validate_runtime", return_value=(True, None) + ), + patch.object( + ValidationPipeline, + "_normalize_column_names", + side_effect=_passthrough_normalize, + ), + ): + result = ValidationPipeline.validate_request_with_warnings(request_data) + + assert result.is_valid is True + assert result.error is None + + +def test_non_value_error_pydantic_body_is_surfaced() -> None: + """The tagged-union cleanup must also handle non-``Value error`` bodies + like ``Input should be ...`` (literal_error from Pydantic enums). + + Regression for the case where the sanitizer only matched ``Value error,`` + and left the long tagged-union header attached, causing the 200-char + truncation to swallow the actionable message. + """ + # Invalid aggregate value — triggers a Pydantic literal_error whose body + # starts with ``Input should be 'SUM', 'COUNT', ...``, not ``Value error,``. + request_data = { + "dataset_id": 1, + "config": { + "chart_type": "pie", + "dimension": {"name": "product"}, + "metric": {"name": "revenue", "aggregate": "BOGUS"}, + }, + } + + with ( + patch.object(ValidationPipeline, "_get_dataset_context", return_value=None), + patch.object( + ValidationPipeline, "_validate_dataset", return_value=(True, None) + ), + patch.object( + ValidationPipeline, "_validate_runtime", return_value=(True, None) + ), + patch.object( + ValidationPipeline, + "_normalize_column_names", + side_effect=_passthrough_normalize, + ), + ): + result = ValidationPipeline.validate_request_with_warnings(request_data) + + assert result.is_valid is False + assert result.error is not None + dumped = result.error.model_dump() + assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR" + assert "tagged-union" not in dumped["message"] + assert "tagged-union" not in dumped["details"] + # The actionable body must survive the 200-char truncation. + assert "Input should be" in dumped["details"] + + +def test_validation_error_template_suggestions_are_chart_type_agnostic() -> None: + """The ``validation_error`` template fires for every chart type, so its + suggestions must not mention specific chart-type field names.""" + template = ChartErrorBuilder.TEMPLATES["validation_error"] + joined = " ".join(template["suggestions"]).lower() + assert "mixed_timeseries" not in joined + assert "primary_kind" not in joined + assert "secondary_kind" not in joined