apache · EnxDev · Apr 22, 2026 · codeant-ai-for-open-source · Apr 22, 2026 · codeant-ai-for-open-source
diff --git a/superset/daos/dashboard.py b/superset/daos/dashboard.py
@@ -59,6 +59,10 @@
 
 class DashboardDAO(BaseDAO[Dashboard]):
     base_filter = DashboardAccessFilter
+    # Column used by MCP tools for title-based identifier fallback, so a
+    # slug-like identifier can resolve to a dashboard even when its slug
+    # field is empty.
+    title_column = "dashboard_title"
 
     @classmethod
     def apply_column_operators(

diff --git a/superset/mcp_service/mcp_core.py b/superset/mcp_service/mcp_core.py
@@ -18,6 +18,7 @@
 from __future__ import annotations
 
 import logging
+import re
 from abc import ABC, abstractmethod
 from datetime import datetime, timezone
 from typing import Any, Callable, Dict, Generic, List, Literal, Type, TypeVar
@@ -28,6 +29,23 @@
 from superset.mcp_service.constants import ModelType
 from superset.mcp_service.utils import _is_uuid
 
+
+def _slugify(value: str) -> str:
+    """Normalize a string to a slug-like form for comparison.
+
+    Lowercases, drops apostrophes so possessives collapse
+    ("World Bank's" → "worldbanks" territory), then collapses any
+    remaining non-alphanumerics to single hyphens and trims
+    leading/trailing hyphens. Mirrors how agents typically guess slugs
+    from a dashboard title (e.g. "World Bank's Data" → "world-banks-data").
+    """
+    lowered = value.lower()
+    # Drop apostrophes entirely so "bank's" collapses to "banks" rather than
+    # splitting into "bank-s". Covers straight and curly variants.
+    stripped = re.sub(r"['’]", "", lowered)
+    return re.sub(r"[^a-z0-9]+", "-", stripped).strip("-")
+
+
 # Type variables for generic model tools
 T = TypeVar("T")  # For model objects
 S = TypeVar("S", bound=BaseModel)  # For Pydantic schemas
@@ -262,6 +280,7 @@ def __init__(
         supports_slug: bool = False,
         logger: logging.Logger | None = None,
         query_options: list[Any] | None = None,
+        title_column_name: str | None = None,
     ) -> None:
         super().__init__(logger)
         self.dao_class = dao_class
@@ -270,6 +289,49 @@ def __init__(
         self.serializer = serializer
         self.supports_slug = supports_slug
         self.query_options = query_options or []
+        # When set, enables a slugified-title fallback after slug lookup
+        # fails, so identifiers like "world-banks-data" still resolve to
+        # "World Bank's Data" when the dashboard's slug field is empty.
+        # Defaults to the DAO's `title_column` attribute when not overridden.
+        self.title_column_name = title_column_name or getattr(
+            dao_class, "title_column", None
+        )
+
+    def _find_by_slugified_title(self, identifier: str) -> Any:
+        """Resolve a slug-like identifier by matching against slugified titles.
+
+        Loads all rows and compares `_slugify(row.title)` to `_slugify(identifier)`.
+        If multiple rows match, logs a warning and returns the first one —
+        collisions in real dashboards are rare and the caller can always
+        disambiguate by id or UUID.
+        """
+        if not self.title_column_name:
+            return None
+        target = _slugify(identifier)
+        if not target:
+            return None
+
+        from superset.extensions import db
+
+        model_class = self.dao_class.model_cls
+        query = db.session.query(model_class)
+        if self.query_options:
+            query = query.options(*self.query_options)
+        matches = [
+            obj
+            for obj in query.all()
+            if _slugify(getattr(obj, self.title_column_name, "") or "") == target
+        ]
+        if not matches:
+            return None
+        if len(matches) > 1:
+            ids = [getattr(m, "id", None) for m in matches]
+            self._log_warning(
+                f"Identifier '{identifier}' matched {len(matches)} rows by "
+                f"slugified title (ids={ids}); returning the first. Pass an "
+                "id or UUID to disambiguate."
+            )
+        return matches[0]
 
     def _find_object(self, identifier: int | str) -> Any:
         """Find object by identifier using appropriate method."""
@@ -309,7 +371,14 @@ def _find_object(self, identifier: int | str) -> Any:
             query = db.session.query(model_class).filter(id_or_slug_filter(identifier))
             if opts:
                 query = query.options(*opts)
-            return query.one_or_none()
+            slug_result = query.one_or_none()
+            if slug_result is not None:
+                return slug_result
+
+            # Many dashboards have empty slugs, so slug lookup alone silently
+            # fails when agents pass a slug-like string derived from the
+            # dashboard title. Fall back to slugified-title matching.
+            return self._find_by_slugified_title(identifier)
 
         # If we get here, it's an invalid identifier
         return None

diff --git a/tests/unit_tests/mcp_service/test_mcp_core.py b/tests/unit_tests/mcp_service/test_mcp_core.py
@@ -0,0 +1,213 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Unit tests for mcp_core reusable core classes.
+
+Focused on the ModelGetInfoCore title-based fallback that resolves
+slug-like identifiers (e.g. "world-banks-data") to dashboards whose
+slug column is empty but whose title matches.
+"""
+
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+import pytest
+from pydantic import BaseModel
+
+from superset.mcp_service.mcp_core import _slugify, ModelGetInfoCore
+
+
+class _FakeOutput(BaseModel):
+    id: int
+    title: str
+
+
+class _FakeError(BaseModel):
+    error: str
+    error_type: str
+    timestamp: datetime
+
+
+class _Unset:
+    """Sentinel meaning "DAO has no title_column attribute at all"."""
+
+
+@pytest.fixture(autouse=True)
+def _patch_id_or_slug_filter():
+    """id_or_slug_filter is called inside _find_object's slug branch, but its
+    return value is only used by SQLAlchemy internals we've mocked away —
+    we just need it to not blow up."""
+    with patch("superset.models.dashboard.id_or_slug_filter", return_value=MagicMock()):
+        yield
+
+
+def _make_dashboard(id_: int, title: str, slug: str = "") -> MagicMock:
+    dashboard = MagicMock()
+    dashboard.id = id_
+    dashboard.dashboard_title = title
+    dashboard.slug = slug
+    return dashboard
+
+
+def _build_core(
+    *,
+    supports_slug: bool = True,
+    title_column: str | None = "dashboard_title",
+    dao_title_column: str | None | type[_Unset] = None,
+) -> tuple[ModelGetInfoCore, MagicMock]:
+    """Build a core with configurable title-column wiring.
+
+    - `title_column` is the explicit override passed into the core.
+    - `dao_title_column` simulates the DAO's class attribute; defaults
+      to None so we rely on the explicit override in most tests.
+    """
+    dao_class = MagicMock()
+    # getattr(model_class, "dashboard_title") needs to return a truthy column
+    # so the fallback proceeds past its guard.
+    dao_class.model_cls = MagicMock(dashboard_title=MagicMock())
+    dao_class.find_by_id.return_value = None
+    # MagicMock auto-vivifies attrs, so explicitly control title_column.
+    if dao_title_column is _Unset:
+        del dao_class.title_column
+    else:
+        dao_class.title_column = dao_title_column
+
+    def serializer(obj: MagicMock) -> _FakeOutput:
+        return _FakeOutput(id=obj.id, title=obj.dashboard_title)
+
+    core = ModelGetInfoCore(
+        dao_class=dao_class,
+        output_schema=_FakeOutput,
+        error_schema=_FakeError,
+        serializer=serializer,
+        supports_slug=supports_slug,
+        title_column_name=title_column,
+    )
+    return core, dao_class
+
+
+def test_slugify_matches_agent_guesses() -> None:
+    """Agents slugify titles by lowercasing and hyphenating non-alphanumerics."""
+    assert _slugify("World Bank's Data") == "world-banks-data"
+    assert _slugify("  Multiple   Spaces  ") == "multiple-spaces"
+    assert _slugify("!!!") == ""
+
+
+def _make_db_mocks(*, all_rows: list[MagicMock]) -> tuple[MagicMock, MagicMock]:
+    """Return (slug_query, title_query) for db.session.query.side_effect.
+
+    The core calls db.session.query twice during a string-identifier lookup:
+    once for the id_or_slug_filter query (returns None to force fallback),
+    and once for the slugified-title scan (returns `all_rows`).
+    """
+    slug_query = MagicMock()
+    slug_query.filter.return_value = slug_query
+    slug_query.options.return_value = slug_query
+    slug_query.one_or_none.return_value = None
+
+    title_query = MagicMock()
+    title_query.options.return_value = title_query
+    title_query.all.return_value = all_rows
+    return slug_query, title_query
+
+
+@patch("superset.extensions.db")
+def test_title_fallback_resolves_dashboard_with_empty_slug(
+    mock_db: MagicMock,
+) -> None:
+    """Regression: slug lookup must not silently fail when slug is empty."""
+    core, _ = _build_core()
+    dashboard = _make_dashboard(id_=2, title="World Bank's Data", slug="")
+    slug_query, title_query = _make_db_mocks(all_rows=[dashboard])
+    mock_db.session.query.side_effect = [slug_query, title_query]
+
+    result = core.run_tool("world-banks-data")
+
+    assert isinstance(result, _FakeOutput)
+    assert result.id == 2
+    assert result.title == "World Bank's Data"
+
+
+@patch("superset.extensions.db")
+def test_title_fallback_ambiguous_picks_first_and_warns(
+    mock_db: MagicMock, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Two dashboards slugify to the same identifier — pick the first, warn."""
+    core, _ = _build_core()
+    dash_a = _make_dashboard(id_=2, title="World Bank's Data")
+    dash_b = _make_dashboard(id_=7, title="World Banks Data")
+    slug_query, title_query = _make_db_mocks(all_rows=[dash_a, dash_b])
+    mock_db.session.query.side_effect = [slug_query, title_query]
+
+    with caplog.at_level("WARNING"):
+        result = core.run_tool("world-banks-data")
+
+    assert isinstance(result, _FakeOutput)
+    assert result.id == 2  # first match wins
+    assert any("matched 2 rows" in rec.message for rec in caplog.records)
+
+
+@patch("superset.extensions.db")
+def test_not_found_error_when_no_title_match(
+    mock_db: MagicMock,
+) -> None:
+    """No slug, no title, no slugified-title match — plain not_found."""
+    core, _ = _build_core()
+    slug_query, title_query = _make_db_mocks(all_rows=[])
+    mock_db.session.query.side_effect = [slug_query, title_query]
+
+    result = core.run_tool("does-not-exist")
+
+    assert isinstance(result, _FakeError)
+    assert result.error_type == "not_found"
+
+
+def test_title_fallback_disabled_returns_not_found() -> None:
+    """When neither override nor DAO provides a title column, no fallback."""
+    core, _ = _build_core(
+        supports_slug=False, title_column=None, dao_title_column=_Unset
+    )
+
+    result = core.run_tool("anything")
+
+    assert isinstance(result, _FakeError)
+    assert result.error_type == "not_found"
+
+
+def test_title_column_defaults_from_dao_attribute() -> None:
+    """No explicit override → core picks up DAO.title_column."""
+    core, _ = _build_core(title_column=None, dao_title_column="dashboard_title")
+    assert core.title_column_name == "dashboard_title"
+
+
+def test_explicit_title_column_overrides_dao_attribute() -> None:
+    """Explicit override beats the DAO default."""
+    core, _ = _build_core(title_column="custom_col", dao_title_column="dashboard_title")
+    assert core.title_column_name == "custom_col"
+
+
+@pytest.mark.parametrize(
+    "identifier,expected_slug",
+    [
+        ("World Bank's Data", "world-banks-data"),
+        ("multi  space", "multi-space"),
+        ("leading--and--trailing--", "leading-and-trailing"),
+    ],
+)
+def test_slugify_handles_edge_cases(identifier: str, expected_slug: str) -> None:
+    assert _slugify(identifier) == expected_slug