Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions superset/mcp_service/chart/validation/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"""

import logging
import re
from typing import Any, Dict, List, Tuple

from superset.mcp_service.chart.schemas import (
Expand Down Expand Up @@ -77,13 +78,27 @@ def _sanitize_validation_error(error: Exception) -> str:
"""SECURITY FIX: Sanitize validation errors to prevent disclosure."""
error_str = str(error)

# Pydantic tagged-union errors prefix the message with a long
# ``1 validation error for tagged-union[...]`` header before the
# per-field body (e.g. ``Value error, ...``, ``Field required``,
# ``Input should be ...``). The body always lives on a line indented
# by exactly two spaces — pull it out so the 200-char truncation
# below doesn't swallow the actionable part. The pydantic footer
# ``\n For further information ...`` uses four-space indent and
# is dropped here.
if "tagged-union[" in error_str:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this string a constant, and if it is maybe we should give it to a variable?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, replaced!

body_match = re.search(r"\n (?! )", error_str)
if body_match:
idx = body_match.end()
footer_idx = error_str.find("\n For further information", idx)
end = footer_idx if footer_idx != -1 else len(error_str)
error_str = error_str[idx:end].strip()

# SECURITY FIX: Limit length FIRST to prevent ReDoS attacks
if len(error_str) > 200:
error_str = error_str[:200] + "...[truncated]"

# Remove potentially sensitive schema information
import re

sensitive_patterns = [
(r'\btable\s+[\'"`]?(\w+)[\'"`]?', "table [REDACTED]"),
(r'\bcolumn\s+[\'"`]?(\w+)[\'"`]?', "column [REDACTED]"),
Expand Down
12 changes: 12 additions & 0 deletions superset/mcp_service/utils/error_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,18 @@ class ChartErrorBuilder:
"Check Superset logs for detailed error information",
],
},
"validation_error": {
"message": "Chart configuration is invalid: {reason}",
"details": "{reason}",
"suggestions": [
"Review the field names and types in your config against the "
"chart_type's schema",
"Call get_chart_type_schema or read the chart://configs resource "
"for valid fields and examples",
"Replace any unknown field names with the ones listed in the "
"error details above",
],
},
}

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Regression tests for validation-pipeline error surfacing.

Previously, when ``parse_chart_config`` raised a Pydantic ValueError inside
``ValidationPipeline.validate_request_with_warnings``, the pipeline's generic
``except Exception`` branch routed the sanitized reason through
``ChartErrorBuilder.build_error(template_key='validation_error', ...)``.
That template key did not exist in ``ChartErrorBuilder.TEMPLATES``, so the
builder fell back to the default ``"An error occurred"`` message with empty
details and no suggestions — producing a silent failure for LLM callers.

The most common trigger was a ``mixed_timeseries`` config that used the XY
field names ``kind`` / ``kind_secondary`` instead of the mixed-timeseries
fields ``primary_kind`` / ``secondary_kind``.
"""

from unittest.mock import patch

from superset.mcp_service.chart.validation.pipeline import ValidationPipeline
from superset.mcp_service.utils.error_builder import ChartErrorBuilder


def _passthrough_normalize(request, *_args, **_kwargs):
return request


def test_validation_error_template_exists() -> None:
"""The ``validation_error`` template must be registered so pipeline
failures surface an actionable message and suggestions."""
assert "validation_error" in ChartErrorBuilder.TEMPLATES

error = ChartErrorBuilder.build_error(
error_type="validation_system_error",
template_key="validation_error",
template_vars={"reason": "Unknown field 'kind'"},
error_code="VALIDATION_PIPELINE_ERROR",
)
dumped = error.model_dump()
assert dumped["message"] != "An error occurred"
assert "Unknown field" in dumped["message"]
assert "Unknown field" in dumped["details"]
assert dumped["suggestions"], "validation_error template must have suggestions"


def test_mixed_timeseries_with_wrong_kind_fields_returns_actionable_error() -> None:
"""Regression: mixed_timeseries + XY field names (``kind`` / ``kind_secondary``)
used to return an error with empty details and no suggestions."""
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "mixed_timeseries",
"x": {"name": "order_date"},
"y": [{"name": "revenue", "aggregate": "SUM"}],
"y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
"kind": "line",
"kind_secondary": "bar",
},
}

with (
patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
patch.object(
ValidationPipeline, "_validate_dataset", return_value=(True, None)
),
patch.object(
ValidationPipeline, "_validate_runtime", return_value=(True, None)
),
patch.object(
ValidationPipeline,
"_normalize_column_names",
side_effect=_passthrough_normalize,
),
):
result = ValidationPipeline.validate_request_with_warnings(request_data)

assert result.is_valid is False
assert result.error is not None
dumped = result.error.model_dump()

# The bug symptom was a generic empty message — verify both message and
# details carry the actionable reason now.
assert dumped["message"] != "An error occurred"
assert dumped["details"] != ""
assert "kind" in dumped["details"]
# Ugly Pydantic tagged-union prefix must be stripped from the surfaced body.
assert "tagged-union" not in dumped["details"]
assert "tagged-union" not in dumped["message"]
# Suggestions should steer callers back to a valid schema.
assert dumped["suggestions"]
assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"


def test_valid_mixed_timeseries_config_passes() -> None:
"""Sanity check: the correct field names still validate successfully."""
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "mixed_timeseries",
"x": {"name": "order_date"},
"y": [{"name": "revenue", "aggregate": "SUM"}],
"y_secondary": [{"name": "orders", "aggregate": "COUNT"}],
"primary_kind": "line",
"secondary_kind": "bar",
},
}

with (
patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
patch.object(
ValidationPipeline, "_validate_dataset", return_value=(True, None)
),
patch.object(
ValidationPipeline, "_validate_runtime", return_value=(True, None)
),
patch.object(
ValidationPipeline,
"_normalize_column_names",
side_effect=_passthrough_normalize,
),
):
result = ValidationPipeline.validate_request_with_warnings(request_data)

assert result.is_valid is True
assert result.error is None


def test_non_value_error_pydantic_body_is_surfaced() -> None:
"""The tagged-union cleanup must also handle non-``Value error`` bodies
like ``Input should be ...`` (literal_error from Pydantic enums).

Regression for the case where the sanitizer only matched ``Value error,``
and left the long tagged-union header attached, causing the 200-char
truncation to swallow the actionable message.
"""
# Invalid aggregate value — triggers a Pydantic literal_error whose body
# starts with ``Input should be 'SUM', 'COUNT', ...``, not ``Value error,``.
request_data = {
"dataset_id": 1,
"config": {
"chart_type": "pie",
"dimension": {"name": "product"},
"metric": {"name": "revenue", "aggregate": "BOGUS"},
},
}

with (
patch.object(ValidationPipeline, "_get_dataset_context", return_value=None),
patch.object(
ValidationPipeline, "_validate_dataset", return_value=(True, None)
),
patch.object(
ValidationPipeline, "_validate_runtime", return_value=(True, None)
),
patch.object(
ValidationPipeline,
"_normalize_column_names",
side_effect=_passthrough_normalize,
),
):
result = ValidationPipeline.validate_request_with_warnings(request_data)

assert result.is_valid is False
assert result.error is not None
dumped = result.error.model_dump()
assert dumped["error_code"] == "VALIDATION_PIPELINE_ERROR"
assert "tagged-union" not in dumped["message"]
assert "tagged-union" not in dumped["details"]
# The actionable body must survive the 200-char truncation.
assert "Input should be" in dumped["details"]


def test_validation_error_template_suggestions_are_chart_type_agnostic() -> None:
"""The ``validation_error`` template fires for every chart type, so its
suggestions must not mention specific chart-type field names."""
template = ChartErrorBuilder.TEMPLATES["validation_error"]
joined = " ".join(template["suggestions"]).lower()
assert "mixed_timeseries" not in joined
assert "primary_kind" not in joined
assert "secondary_kind" not in joined
Loading