tslearn-team · jbbqqf · May 9, 2026 · charavelg · May 11, 2026 · charavelg
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ Changelogs for this project are recorded in this file since v0.2.0.
 ### Added
 
 * Allow parallel computation of DTW barycenters and plug it in `TimeSeriesKMeans`.
+* `tslearn.metrics.sbd` and `tslearn.metrics.cdist_sbd` expose the Shape-Based Distance used inside `KShape` as stand-alone functions ([#276](https://github.com/tslearn-team/tslearn/issues/276)).
 
 ### Changed
 

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -1111,3 +1111,47 @@ def test_sax():
         dataset2=[[-1, 0, 1], [1, 0, 1]],
     )
     np.testing.assert_equal(dists, expected)
+
+
+def test_sbd_public():
+    # Regression test for #276: SBD must be exposed as a stand-alone scalar
+    # function, the way dtw / soft_dtw / lcss already are. The result must
+    # also match what KShape uses internally (1 - cdist_normalized_cc), so
+    # callers can reproduce KShape's distance without monkey-patching.
+    from tslearn.metrics import sbd, cdist_sbd
+    from tslearn.metrics.cycc import cdist_normalized_cc
+
+    # 1) Identical series → SBD = 0.
+    assert float(sbd([1.0, 2.0, 3.0], [1.0, 2.0, 3.0])) == 0.0
+
+    # 2) Mismatched shapes are rejected with a clear error.
+    with pytest.raises(ValueError):
+        sbd([1.0, 2.0, 3.0], [[1.0, 2.0], [3.0, 4.0]])
+
+    # 3) cdist_sbd returns a true distance matrix (zero on the diagonal,
+    #    symmetric, non-negative). The legacy self_similarity=True branch of
+    #    cdist_normalized_cc is *not* a valid SBD matrix on its own — that's
+    #    the bug this PR works around.
+    rng = np.random.RandomState(0)
+    X = rng.rand(4, 6, 1).astype(np.float64)
+    D = cdist_sbd(X)
+    assert D.shape == (4, 4)
+    np.testing.assert_allclose(np.diag(D), np.zeros(4), atol=1e-12)
+    np.testing.assert_allclose(D, D.T, atol=1e-12)
+    assert (D >= -1e-12).all()
+
+    # 4) Scalar sbd matches the pairwise matrix entries — same path KShape
+    #    takes.
+    np.testing.assert_allclose(sbd(X[0], X[1]), D[0, 1], atol=1e-12)
+
+    # 5) Asymmetric two-dataset call has the right shape and matches scalar.
+    Y = rng.rand(2, 6, 1).astype(np.float64)
+    D2 = cdist_sbd(X, Y)
+    assert D2.shape == (4, 2)
+    np.testing.assert_allclose(sbd(X[2], Y[1]), D2[2, 1], atol=1e-12)
+
+    # 6) Cross-check with raw normalized_cc — this is the contract callers
+    #    rely on when porting KShape-style code.
+    norms = np.full(4, -1.0, dtype=np.float64)
+    cc = cdist_normalized_cc(X, X, norms.copy(), norms.copy(), False)
+    np.testing.assert_allclose(D, 1.0 - cc, atol=1e-12)
diff --git a/tslearn/metrics/__init__.py b/tslearn/metrics/__init__.py
@@ -59,6 +59,7 @@
 )
 from .soft_dtw_loss_pytorch import SoftDTWLossPyTorch
 from .cycc import cdist_normalized_cc, y_shifted_sbd_vec
+from ._sbd import sbd, cdist_sbd
 from ._frechet import (
     frechet,
     frechet_path,
@@ -129,6 +130,8 @@
     "SoftDTWLossPyTorch",
     "cdist_normalized_cc",
     "y_shifted_sbd_vec",
+    "sbd",
+    "cdist_sbd",
     "frechet",
     "frechet_path",
     "frechet_accumulated_matrix",

diff --git a/tslearn/metrics/_sbd.py b/tslearn/metrics/_sbd.py
@@ -0,0 +1,130 @@
+"""Public Shape-Based Distance (SBD) helpers.
+
+SBD is the distance used inside :class:`tslearn.clustering.KShape`. Until now
+it was only available indirectly via :func:`cdist_normalized_cc`. Issue #276
+asks for a function-level handle, the way :func:`tslearn.metrics.dtw` exposes
+DTW as a stand-alone distance.
+
+The implementation defers to the existing numba-jitted
+:func:`tslearn.metrics.cycc.normalized_cc` so behaviour matches what KShape
+already does internally — see ``KShape._cross_dists`` in
+``tslearn/clustering/kshape.py``.
+"""
+import numpy
+
+from .cycc import cdist_normalized_cc, normalized_cc
+from ..utils import to_time_series, to_time_series_dataset
+
+
+def sbd(s1, s2):
+    r"""Shape-Based Distance (SBD) between two time series.
+
+    SBD is defined in [1]_ as
+
+    .. math::
+
+        \mathrm{SBD}(\mathbf{x}, \mathbf{y}) =
+            1 - \max_{w}\;\frac{\mathrm{NCC}_w(\mathbf{x}, \mathbf{y})}
+                                {\|\mathbf{x}\|_2 \cdot \|\mathbf{y}\|_2}
+
+    where :math:`\mathrm{NCC}_w` denotes the cross-correlation of the two
+    series at lag :math:`w`. SBD is the distance used by
+    :class:`tslearn.clustering.KShape`.
+
+    Parameters
+    ----------
+    s1 : array-like, shape=(sz, d) or (sz,)
+        A time series.
+    s2 : array-like, shape=(sz, d) or (sz,)
+        Another time series of the same length and dimensionality as ``s1``.
+
+    Returns
+    -------
+    float
+        SBD value in :math:`[0, 2]`. ``0`` means perfect shape match (up to a
+        cyclic shift), ``2`` means perfectly anti-correlated.
+
+    Examples
+    --------
+    >>> import numpy
+    >>> float(sbd([1., 2., 3.], [1., 2., 3.]))
+    0.0
+    >>> # Equal-length series with a partial shape match
+    >>> float(round(sbd([1., 2., 3.], [3., 2., 1.]), 4))
+    0.1429
+
+    See Also
+    --------
+    cdist_sbd : Pairwise SBD on two datasets.
+    cdist_normalized_cc : Underlying cross-correlation matrix.
+    tslearn.clustering.KShape : Clustering algorithm built on SBD.
+
+    References
+    ----------
+    .. [1] J. Paparrizos and L. Gravano. k-Shape: Efficient and Accurate
+       Clustering of Time Series. SIGMOD 2015.
+    """
+    # SBD requires matching length / dimensionality. Use to_time_series so
+    # callers can pass plain Python lists or 1-D arrays just like dtw().
+    s1 = to_time_series(s1)
+    s2 = to_time_series(s2)
+    if s1.shape != s2.shape:
+        raise ValueError(
+            "sbd() requires both time series to have the same shape, "
+            f"got {s1.shape} and {s2.shape}."
+        )
+    # normalized_cc returns the full lag-correlation vector; SBD is 1 minus
+    # its max, matching KShape._cross_dists.
+    cc = normalized_cc(s1.astype(numpy.float64), s2.astype(numpy.float64))
+    return 1.0 - cc.max()
+
+
+def cdist_sbd(dataset1, dataset2=None):
+    """Pairwise Shape-Based Distance between two time-series datasets.
+
+    Parameters
+    ----------
+    dataset1 : array-like, shape=(n_ts1, sz, d) or (n_ts1, sz)
+        First dataset of time series.
+    dataset2 : array-like, shape=(n_ts2, sz, d) or (n_ts2, sz), optional
+        Second dataset. If ``None`` (default), pairwise SBD is computed within
+        ``dataset1``.
+
+    Returns
+    -------
+    numpy.ndarray of shape (n_ts1, n_ts2)
+        Pairwise SBD values. Same convention as :func:`sbd`: ``0`` means
+        identical shape, ``2`` means perfectly anti-correlated.
+
+    Examples
+    --------
+    >>> import numpy
+    >>> X = numpy.array([[[1.], [2.], [3.]], [[1.], [2.], [3.]]])
+    >>> dists = cdist_sbd(X)
+    >>> dists.shape
+    (2, 2)
+    >>> float(dists[0, 1])
+    0.0
+
+    See Also
+    --------
+    sbd : Scalar SBD between two time series.
+    cdist_normalized_cc : Underlying cross-correlation matrix.
+    """
+    dataset1 = to_time_series_dataset(dataset1).astype(numpy.float64)
+    if dataset2 is None:
+        dataset2 = dataset1
+    else:
+        dataset2 = to_time_series_dataset(dataset2).astype(numpy.float64)
+    # cdist_normalized_cc expects pre-allocated norm vectors; passing -1.0
+    # tells the kernel to compute them on the fly. We always pass
+    # self_similarity=False — the self_similarity=True branch of that kernel
+    # is tailored for KShape (zero diagonal, lower-triangle fill) and would
+    # silently produce SBD=1 on the diagonal, which is wrong for callers who
+    # want a true distance matrix where diag(SBD) == 0.
+    norms1 = numpy.full(dataset1.shape[0], -1.0, dtype=numpy.float64)
+    norms2 = numpy.full(dataset2.shape[0], -1.0, dtype=numpy.float64)
+    cc = cdist_normalized_cc(
+        dataset1, dataset2, norms1, norms2, False
+    )
+    return 1.0 - cc