apache · EnxDev · Apr 23, 2026 · Apr 21, 2026 · Apr 21, 2026 · msyavuz
diff --git a/superset/mcp_service/chart/validation/pipeline.py b/superset/mcp_service/chart/validation/pipeline.py
@@ -75,15 +75,25 @@ def _get_generic_error_message(error_str: str) -> str | None:
 
 def _sanitize_validation_error(error: Exception) -> str:
     """SECURITY FIX: Sanitize validation errors to prevent disclosure."""
+    import re
+
     error_str = str(error)
 
+    # Pydantic tagged-union errors prefix the message with a long
+    # ``1 validation error for tagged-union[...]`` header before the
+    # actionable ``Value error, ...`` body — extract the body so the
+    # 200-char truncation below doesn't swallow the useful part.
+    if "tagged-union[" in error_str and "Value error," in error_str:
+        idx = error_str.find("Value error,")
+        footer = error_str.find("\n    For further information")
+        end = footer if footer != -1 else len(error_str)
+        error_str = error_str[idx:end].strip()
+
     # SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
     if len(error_str) > 200:
         error_str = error_str[:200] + "...[truncated]"
 
     # Remove potentially sensitive schema information
-    import re
-
     sensitive_patterns = [
         (r'\btable\s+[\'"`]?(\w+)[\'"`]?', "table [REDACTED]"),
         (r'\bcolumn\s+[\'"`]?(\w+)[\'"`]?', "column [REDACTED]"),

diff --git a/superset/mcp_service/utils/error_builder.py b/superset/mcp_service/utils/error_builder.py
@@ -186,6 +186,18 @@ class ChartErrorBuilder:
                 "Check Superset logs for detailed error information",
             ],
         },
+        "validation_error": {
+            "message": "Chart configuration is invalid: {reason}",
+            "details": "{reason}",
+            "suggestions": [
+                "Review the field names and types in your config against the "
+                "chart_type's schema",
+                "Call get_chart_type_schema or read the chart://configs resource "
+                "for valid fields and examples",
+                "For mixed_timeseries charts, use 'primary_kind' and "
+                "'secondary_kind' (not 'kind' / 'kind_secondary')",
+            ],
+        },
     }
 
     @classmethod

diff --git a/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py b/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Regression tests for validation-pipeline error surfacing.
+
+Previously, when ``parse_chart_config`` raised a Pydantic ValueError inside
+``ValidationPipeline.validate_request_with_warnings``, the pipeline's generic
+``except Exception`` branch routed the sanitized reason through
+``ChartErrorBuilder.build_error(template_key='validation_error', ...)``.
+That template key did not exist in ``ChartErrorBuilder.TEMPLATES``, so the
+builder fell back to the default ``"An error occurred"`` message with empty
+details and no suggestions — producing a silent failure for LLM callers.
+
+The most common trigger was a ``mixed_timeseries`` config that used the XY
+field names ``kind`` / ``kind_secondary`` instead of the mixed-timeseries
+fields ``primary_kind`` / ``secondary_kind``.
+"""
+
+from unittest.mock import patch
+
+from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
+from superset.mcp_service.utils.error_builder import ChartErrorBuilder
+
+
+def _passthrough_normalize(request, *_args, **_kwargs):
+    return request
+
+
+def test_validation_error_template_exists() -> None:
+    """The ``validation_error`` template must be registered so pipeline
+    failures surface an actionable message and suggestions."""
+    assert "validation_error" in ChartErrorBuilder.TEMPLATES
+
+    error = ChartErrorBuilder.build_error(
+        error_type="validation_system_error",
+        template_key="validation_error",
+        template_vars={"reason": "Unknown field 'kind'"},
+        error_code="VALIDATION_PIPELINE_ERROR",
+    )
+    dumped = error.model_dump()
+    assert dumped["message"] != "An error occurred"
+    assert "Unknown field" in dumped["message"]
+    assert "Unknown field" in dumped["details"]
+    assert dumped["suggestions"], "validation_error template must have suggestions"
+
+
+def test_mixed_timeseries_with_wrong_kind_fields_returns_actionable_error() -> None:
+    """Regression: mixed_timeseries + XY field names (``kind`` / ``kind_secondary``)
+    used to return an error with empty details and no suggestions."""
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "mixed_timeseries",
+            "x": {"name": "order_date"},
+            "y": [{"name": "revenue", "aggregate": "SUM"}],
+            "y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
+            "kind": "line",
+            "kind_secondary": "bar",
+        },
+    }
+
+    with (
+        patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
+        patch.object(
+            ValidationPipeline, "_validate_dataset", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline, "_validate_runtime", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline,
+            "_normalize_column_names",
+            side_effect=_passthrough_normalize,
+        ),
+    ):
+        result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert result.is_valid is False
+    assert result.error is not None
+    dumped = result.error.model_dump()
+
+    # The bug symptom was a generic empty message — verify both message and
+    # details carry the actionable reason now.
+    assert dumped["message"] != "An error occurred"
+    assert dumped["details"] != ""
+    assert "kind" in dumped["details"]
+    # Ugly Pydantic tagged-union prefix must be stripped from the surfaced body.
+    assert "tagged-union" not in dumped["details"]
+    assert "tagged-union" not in dumped["message"]
+    # Suggestions should steer callers back to a valid schema.
+    assert dumped["suggestions"]
+    assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"
+
+
+def test_valid_mixed_timeseries_config_passes() -> None:
+    """Sanity check: the correct field names still validate successfully."""
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "mixed_timeseries",
+            "x": {"name": "order_date"},
+            "y": [{"name": "revenue", "aggregate": "SUM"}],
+            "y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
+            "primary_kind": "line",
+            "secondary_kind": "bar",
+        },
+    }
+
+    with (
+        patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
+        patch.object(
+            ValidationPipeline, "_validate_dataset", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline, "_validate_runtime", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline,
+            "_normalize_column_names",
+            side_effect=_passthrough_normalize,
+        ),
+    ):
+        result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert result.is_valid is True
+    assert result.error is None