-
Notifications
You must be signed in to change notification settings - Fork 750
Adding "networkit" flavour to scanpy.tl.leiden for parallel community detection #4170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -72,6 +72,7 @@ | |||||
| "ensure_igraph", | ||||||
| "get_igraph_from_adjacency", | ||||||
| "get_literal_vals", | ||||||
| "get_networkit_from_adjacency", | ||||||
| "indent", | ||||||
| "is_backed_type", | ||||||
| "is_backed_type", | ||||||
|
|
@@ -98,6 +99,17 @@ def ensure_igraph() -> None: | |||||
| raise ImportError(msg) | ||||||
|
|
||||||
|
|
||||||
| def ensure_network() -> None: | ||||||
| if importlib.util.find_spec("netowrkit"): | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| return | ||||||
| msg = ( | ||||||
| "Please install the networkit package: " | ||||||
| "`conda install conda-forge::networkit` or" | ||||||
| "`pip install networkit`" | ||||||
| ) | ||||||
| raise ImportError(msg) | ||||||
|
|
||||||
|
|
||||||
| def _getdoc(c_or_f: Callable | type) -> str | None: | ||||||
| if getattr(c_or_f, "__doc__", None) is None: | ||||||
| return None | ||||||
|
|
@@ -303,6 +315,25 @@ def get_igraph_from_adjacency(adjacency: CSBase, *, directed: bool) -> Graph: | |||||
| return g | ||||||
|
|
||||||
|
|
||||||
| def get_networkit_from_adjacency(adjacency: CSBase, *, weighted: bool = True): | ||||||
| """Get a NetworKit graph from an adjacency matrix.""" | ||||||
| import networkit as nk | ||||||
| import scipy.sparse as sps | ||||||
|
|
||||||
| shape = adjacency.shape | ||||||
| assert shape is not None | ||||||
| upper = sps.triu(adjacency, k=1).tocoo() | ||||||
| return nk.graph.GraphFromCoo( | ||||||
| ( | ||||||
| upper.data.astype(np.float64), | ||||||
| (upper.row.astype(np.uint64), upper.col.astype(np.uint64)), | ||||||
| ), | ||||||
| n=adjacency.shape[0], | ||||||
| weighted=weighted, | ||||||
| directed=False, | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| # -------------------------------------------------------------------------------- | ||||||
| # Group stuff | ||||||
| # -------------------------------------------------------------------------------- | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -185,6 +185,33 @@ def leiden( # noqa: PLR0913 | |
| "MutableVertexPartition", | ||
| leidenalg.find_partition(g, partition_type, seed=seed, **clustering_args), | ||
| ) | ||
| elif flavor == "networkit": | ||
| from types import SimpleNamespace | ||
|
|
||
| import networkit | ||
|
|
||
| seed = int(rng.integers(np.iinfo(np.int64).max)) | ||
| networkit.setSeed(seed, useThreadId=True) | ||
|
Comment on lines
+193
to
+194
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Despite this, things aren't deterministic, right? Just making sure, but if that's the case, please comment why we do this (helps with determinism even if it's not perfect maybe?) |
||
| # only undirected for Parallel Leiden | ||
| g = _utils.get_networkit_from_adjacency(adjacency, weighted=use_weights) | ||
| iterations = n_iterations if n_iterations > 0 else 3 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where did the number 3 come from? |
||
| gamma = 1.0 if resolution is None else resolution | ||
| # randomization was removed as an option, so it is randomize = True | ||
| algorithm = networkit.community.ParallelLeiden( | ||
| g, iterations=iterations, gamma=gamma | ||
| ) | ||
| # applying algorithm to the graph | ||
| algorithm.run() | ||
| nk_part = algorithm.getPartition() | ||
| # NetworKit's Partition exposes getVector() for the labels and a | ||
| # separate Modularity measure, rather than .membership / .modularity. | ||
|
|
||
| part = SimpleNamespace( | ||
| # get the actual vector representing the partition data structure | ||
| membership=np.asarray(nk_part.getVector()), | ||
| modularity=networkit.community.Modularity().getQuality(nk_part, g), | ||
| ) | ||
|
|
||
| else: | ||
| g = _utils.get_igraph_from_adjacency(adjacency, directed=False) | ||
| if use_weights: | ||
|
|
@@ -243,6 +270,13 @@ def _validate_flavor( | |
| if partition_type is not None: | ||
| msg = "Do not pass in partition_type argument when using igraph." | ||
| raise ValueError(msg) | ||
| case "networkit": | ||
| if directed: | ||
| msg = "Cannot use NetworKit's leiden implementation with a directed graph." | ||
| raise ValueError(msg) | ||
| if partition_type is not None: | ||
| msg = "Do not pass in partition_type argument when using networkit." | ||
| raise ValueError(msg) | ||
| case "leidenalg": | ||
| msg = ( | ||
| "The `igraph` implementation of leiden clustering is *orders of magnitude faster*. " | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is this used?