diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index 3f9f1ec940..ad4c1187de 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -35,7 +35,7 @@ type DETest = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] type HVGFlavor = Literal["seurat", "cell_ranger", "seurat_v3", "seurat_v3_paper"] -type LeidenFlavor = Literal["leidenalg", "igraph"] +type LeidenFlavor = Literal["leidenalg", "igraph", "networkit"] @dataclass diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index f0d67b550d..ab264101be 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -72,6 +72,7 @@ "ensure_igraph", "get_igraph_from_adjacency", "get_literal_vals", + "get_networkit_from_adjacency", "indent", "is_backed_type", "is_backed_type", @@ -98,6 +99,17 @@ def ensure_igraph() -> None: raise ImportError(msg) +def ensure_network() -> None: + if importlib.util.find_spec("netowrkit"): + return + msg = ( + "Please install the networkit package: " + "`conda install conda-forge::networkit` or" + "`pip install networkit`" + ) + raise ImportError(msg) + + def _getdoc(c_or_f: Callable | type) -> str | None: if getattr(c_or_f, "__doc__", None) is None: return None @@ -303,6 +315,25 @@ def get_igraph_from_adjacency(adjacency: CSBase, *, directed: bool) -> Graph: return g +def get_networkit_from_adjacency(adjacency: CSBase, *, weighted: bool = True): + """Get a NetworKit graph from an adjacency matrix.""" + import networkit as nk + import scipy.sparse as sps + + shape = adjacency.shape + assert shape is not None + upper = sps.triu(adjacency, k=1).tocoo() + return nk.graph.GraphFromCoo( + ( + upper.data.astype(np.float64), + (upper.row.astype(np.uint64), upper.col.astype(np.uint64)), + ), + n=adjacency.shape[0], + weighted=weighted, + directed=False, + ) + + # -------------------------------------------------------------------------------- # Group stuff # -------------------------------------------------------------------------------- diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 2f789acbaf..bc3eb0dddc 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -185,6 +185,33 @@ def leiden( # noqa: PLR0913 "MutableVertexPartition", leidenalg.find_partition(g, partition_type, seed=seed, **clustering_args), ) + elif flavor == "networkit": + from types import SimpleNamespace + + import networkit + + seed = int(rng.integers(np.iinfo(np.int64).max)) + networkit.setSeed(seed, useThreadId=True) + # only undirected for Parallel Leiden + g = _utils.get_networkit_from_adjacency(adjacency, weighted=use_weights) + iterations = n_iterations if n_iterations > 0 else 3 + gamma = 1.0 if resolution is None else resolution + # randomization was removed as an option, so it is randomize = True + algorithm = networkit.community.ParallelLeiden( + g, iterations=iterations, gamma=gamma + ) + # applying algorithm to the graph + algorithm.run() + nk_part = algorithm.getPartition() + # NetworKit's Partition exposes getVector() for the labels and a + # separate Modularity measure, rather than .membership / .modularity. + + part = SimpleNamespace( + # get the actual vector representing the partition data structure + membership=np.asarray(nk_part.getVector()), + modularity=networkit.community.Modularity().getQuality(nk_part, g), + ) + else: g = _utils.get_igraph_from_adjacency(adjacency, directed=False) if use_weights: @@ -243,6 +270,13 @@ def _validate_flavor( if partition_type is not None: msg = "Do not pass in partition_type argument when using igraph." raise ValueError(msg) + case "networkit": + if directed: + msg = "Cannot use NetworKit's leiden implementation with a directed graph." + raise ValueError(msg) + if partition_type is not None: + msg = "Do not pass in partition_type argument when using networkit." + raise ValueError(msg) case "leidenalg": msg = ( "The `igraph` implementation of leiden clustering is *orders of magnitude faster*. "