apache · EnxDev · Apr 23, 2026 · Apr 21, 2026 · Apr 21, 2026 · alexandrusoare
diff --git a/superset/mcp_service/chart/validation/pipeline.py b/superset/mcp_service/chart/validation/pipeline.py
@@ -21,6 +21,7 @@
 """
 
 import logging
+import re
 from typing import Any, Dict, List, Tuple
 
 from superset.mcp_service.chart.schemas import (
@@ -77,13 +78,27 @@ def _sanitize_validation_error(error: Exception) -> str:
     """SECURITY FIX: Sanitize validation errors to prevent disclosure."""
     error_str = str(error)
 
+    # Pydantic tagged-union errors prefix the message with a long
+    # ``1 validation error for tagged-union[...]`` header before the
+    # per-field body (e.g. ``Value error, ...``, ``Field required``,
+    # ``Input should be ...``). The body always lives on a line indented
+    # by exactly two spaces — pull it out so the 200-char truncation
+    # below doesn't swallow the actionable part. The pydantic footer
+    # ``\n    For further information ...`` uses four-space indent and
+    # is dropped here.
+    if "tagged-union[" in error_str:
+        body_match = re.search(r"\n  (?! )", error_str)
+        if body_match:
+            idx = body_match.end()
+            footer_idx = error_str.find("\n    For further information", idx)
+            end = footer_idx if footer_idx != -1 else len(error_str)
+            error_str = error_str[idx:end].strip()
+
     # SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
     if len(error_str) > 200:
         error_str = error_str[:200] + "...[truncated]"
 
     # Remove potentially sensitive schema information
-    import re
-
     sensitive_patterns = [
         (r'\btable\s+[\'"`]?(\w+)[\'"`]?', "table [REDACTED]"),
         (r'\bcolumn\s+[\'"`]?(\w+)[\'"`]?', "column [REDACTED]"),

diff --git a/superset/mcp_service/utils/error_builder.py b/superset/mcp_service/utils/error_builder.py
@@ -186,6 +186,18 @@ class ChartErrorBuilder:
                 "Check Superset logs for detailed error information",
             ],
         },
+        "validation_error": {
+            "message": "Chart configuration is invalid: {reason}",
+            "details": "{reason}",
+            "suggestions": [
+                "Review the field names and types in your config against the "
+                "chart_type's schema",
+                "Call get_chart_type_schema or read the chart://configs resource "
+                "for valid fields and examples",
+                "Replace any unknown field names with the ones listed in the "
+                "error details above",
+            ],
+        },
     }
 
     @classmethod

diff --git a/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py b/tests/unit_tests/mcp_service/chart/validation/test_pipeline_error_surface.py
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Regression tests for validation-pipeline error surfacing.
+
+Previously, when ``parse_chart_config`` raised a Pydantic ValueError inside
+``ValidationPipeline.validate_request_with_warnings``, the pipeline's generic
+``except Exception`` branch routed the sanitized reason through
+``ChartErrorBuilder.build_error(template_key='validation_error', ...)``.
+That template key did not exist in ``ChartErrorBuilder.TEMPLATES``, so the
+builder fell back to the default ``"An error occurred"`` message with empty
+details and no suggestions — producing a silent failure for LLM callers.
+
+The most common trigger was a ``mixed_timeseries`` config that used the XY
+field names ``kind`` / ``kind_secondary`` instead of the mixed-timeseries
+fields ``primary_kind`` / ``secondary_kind``.
+"""
+
+from unittest.mock import patch
+
+from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
+from superset.mcp_service.utils.error_builder import ChartErrorBuilder
+
+
+def _passthrough_normalize(request, *_args, **_kwargs):
+    return request
+
+
+def test_validation_error_template_exists() -> None:
+    """The ``validation_error`` template must be registered so pipeline
+    failures surface an actionable message and suggestions."""
+    assert "validation_error" in ChartErrorBuilder.TEMPLATES
+
+    error = ChartErrorBuilder.build_error(
+        error_type="validation_system_error",
+        template_key="validation_error",
+        template_vars={"reason": "Unknown field 'kind'"},
+        error_code="VALIDATION_PIPELINE_ERROR",
+    )
+    dumped = error.model_dump()
+    assert dumped["message"] != "An error occurred"
+    assert "Unknown field" in dumped["message"]
+    assert "Unknown field" in dumped["details"]
+    assert dumped["suggestions"], "validation_error template must have suggestions"
+
+
+def test_mixed_timeseries_with_wrong_kind_fields_returns_actionable_error() -> None:
+    """Regression: mixed_timeseries + XY field names (``kind`` / ``kind_secondary``)
+    used to return an error with empty details and no suggestions."""
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "mixed_timeseries",
+            "x": {"name": "order_date"},
+            "y": [{"name": "revenue", "aggregate": "SUM"}],
+            "y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
+            "kind": "line",
+            "kind_secondary": "bar",
+        },
+    }
+
+    with (
+        patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
+        patch.object(
+            ValidationPipeline, "_validate_dataset", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline, "_validate_runtime", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline,
+            "_normalize_column_names",
+            side_effect=_passthrough_normalize,
+        ),
+    ):
+        result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert result.is_valid is False
+    assert result.error is not None
+    dumped = result.error.model_dump()
+
+    # The bug symptom was a generic empty message — verify both message and
+    # details carry the actionable reason now.
+    assert dumped["message"] != "An error occurred"
+    assert dumped["details"] != ""
+    assert "kind" in dumped["details"]
+    # Ugly Pydantic tagged-union prefix must be stripped from the surfaced body.
+    assert "tagged-union" not in dumped["details"]
+    assert "tagged-union" not in dumped["message"]
+    # Suggestions should steer callers back to a valid schema.
+    assert dumped["suggestions"]
+    assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"
+
+
+def test_valid_mixed_timeseries_config_passes() -> None:
+    """Sanity check: the correct field names still validate successfully."""
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "mixed_timeseries",
+            "x": {"name": "order_date"},
+            "y": [{"name": "revenue", "aggregate": "SUM"}],
+            "y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
+            "primary_kind": "line",
+            "secondary_kind": "bar",
+        },
+    }
+
+    with (
+        patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
+        patch.object(
+            ValidationPipeline, "_validate_dataset", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline, "_validate_runtime", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline,
+            "_normalize_column_names",
+            side_effect=_passthrough_normalize,
+        ),
+    ):
+        result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert result.is_valid is True
+    assert result.error is None
+
+
+def test_non_value_error_pydantic_body_is_surfaced() -> None:
+    """The tagged-union cleanup must also handle non-``Value error`` bodies
+    like ``Input should be ...`` (literal_error from Pydantic enums).
+
+    Regression for the case where the sanitizer only matched ``Value error,``
+    and left the long tagged-union header attached, causing the 200-char
+    truncation to swallow the actionable message.
+    """
+    # Invalid aggregate value — triggers a Pydantic literal_error whose body
+    # starts with ``Input should be 'SUM', 'COUNT', ...``, not ``Value error,``.
+    request_data = {
+        "dataset_id": 1,
+        "config": {
+            "chart_type": "pie",
+            "dimension": {"name": "product"},
+            "metric": {"name": "revenue", "aggregate": "BOGUS"},
+        },
+    }
+
+    with (
+        patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
+        patch.object(
+            ValidationPipeline, "_validate_dataset", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline, "_validate_runtime", return_value=(True, None)
+        ),
+        patch.object(
+            ValidationPipeline,
+            "_normalize_column_names",
+            side_effect=_passthrough_normalize,
+        ),
+    ):
+        result = ValidationPipeline.validate_request_with_warnings(request_data)
+
+    assert result.is_valid is False
+    assert result.error is not None
+    dumped = result.error.model_dump()
+    assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"
+    assert "tagged-union" not in dumped["message"]
+    assert "tagged-union" not in dumped["details"]
+    # The actionable body must survive the 200-char truncation.
+    assert "Input should be" in dumped["details"]
+
+
+def test_validation_error_template_suggestions_are_chart_type_agnostic() -> None:
+    """The ``validation_error`` template fires for every chart type, so its
+    suggestions must not mention specific chart-type field names."""
+    template = ChartErrorBuilder.TEMPLATES["validation_error"]
+    joined = " ".join(template["suggestions"]).lower()
+    assert "mixed_timeseries" not in joined
+    assert "primary_kind" not in joined
+    assert "secondary_kind" not in joined