Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions code_review_graph/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ def _supports_color() -> bool:
return sys.stdout.isatty()


def _print_disambiguated(result: dict, limit: int = 10) -> None:
"""Report duplicate symbol names that were disambiguated during a build."""
dups = result.get("disambiguated_nodes") or []
if not dups:
return
shown = ", ".join(dups[:limit])
more = f" (+{len(dups) - limit} more)" if len(dups) > limit else ""
print(f"Disambiguated {len(dups)} duplicate symbol name(s): {shown}{more}")


def _print_banner() -> None:
"""Print the startup banner with graph art and available commands."""
color = _supports_color()
Expand Down Expand Up @@ -994,6 +1004,7 @@ def main() -> None:
nodes = result.get("total_nodes", 0)
edges = result.get("total_edges", 0)
print(f"Full build: {parsed} files, {nodes} nodes, {edges} edges (postprocess={pp})")
_print_disambiguated(result)
if result.get("errors"):
print(f"Errors: {len(result['errors'])}")

Expand All @@ -1019,6 +1030,7 @@ def main() -> None:
f"{nodes} nodes, {edges} edges"
f" (postprocess={pp})"
)
_print_disambiguated(result)

# --brief: append a one-line change-impact summary with the same
# estimated context-savings approximation that detect-changes uses.
Expand Down
68 changes: 61 additions & 7 deletions code_review_graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import logging
import os
import re
import sqlite3
import threading
import time
Expand All @@ -25,6 +26,10 @@

logger = logging.getLogger(__name__)

# Suffix appended by _qualified_names_for_file to a 2nd+ same-file collision,
# e.g. "path::getById:L394". This regex recognizes those keys after the fact.
_DISAMBIGUATED_RE = re.compile(r":L\d+(?:#\d+)?$")

# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -186,10 +191,19 @@ def close(self) -> None:

# --- Write operations ---

def upsert_node(self, node: NodeInfo, file_hash: str = "") -> int:
"""Insert or update a node. Returns the node ID."""
def upsert_node(
self, node: NodeInfo, file_hash: str = "", qualified: str | None = None
) -> int:
"""Insert or update a node. Returns the node ID.

Pass ``qualified`` to override the computed key — used by the batch store
functions to disambiguate same-named symbols in one file (see
``_qualified_names_for_file``). Without it, collisions would collapse
under ``ON CONFLICT(qualified_name) DO UPDATE`` and silently drop nodes.
"""
now = time.time()
qualified = self._make_qualified(node)
if qualified is None:
qualified = self._make_qualified(node)
extra = json.dumps(node.extra) if node.extra else "{}"

self._conn.execute(
Expand Down Expand Up @@ -276,8 +290,9 @@ def store_file_nodes_edges(
self._begin_immediate()
try:
self.remove_file_data(file_path)
for node in nodes:
self.upsert_node(node, file_hash=fhash)
qualified_names = self._qualified_names_for_file(nodes)
for node, qualified in zip(nodes, qualified_names):
self.upsert_node(node, file_hash=fhash, qualified=qualified)
for edge in edges:
self.upsert_edge(edge)
self._conn.commit()
Expand All @@ -294,8 +309,9 @@ def store_file_batch(
try:
for file_path, nodes, edges, fhash in batch:
self.remove_file_data(file_path)
for node in nodes:
self.upsert_node(node, file_hash=fhash)
qualified_names = self._qualified_names_for_file(nodes)
for node, qualified in zip(nodes, qualified_names):
self.upsert_node(node, file_hash=fhash, qualified=qualified)
for edge in edges:
self.upsert_edge(edge)
self._conn.commit()
Expand Down Expand Up @@ -866,6 +882,21 @@ def get_stats(self) -> GraphStats:
last_updated=last_updated,
)

def find_disambiguated_nodes(self) -> list[str]:
"""Qualified names suffixed to resolve a same-file name collision.

When two same-named symbols share a file, the first keeps its bare key
and later ones get a ``:L<line>`` suffix (see _qualified_names_for_file).
Surfacing these makes the otherwise-invisible collisions reportable.
"""
rows = self._conn.execute(
"SELECT qualified_name FROM nodes WHERE qualified_name GLOB '*:L[0-9]*'"
).fetchall()
return sorted(
r["qualified_name"] for r in rows
if _DISAMBIGUATED_RE.search(r["qualified_name"])
)

def get_nodes_by_size(
self,
min_lines: int = 50,
Expand Down Expand Up @@ -1300,6 +1331,29 @@ def _make_qualified(self, node: NodeInfo) -> str:
return f"{node.file_path}::{node.parent_name}.{node.name}"
return f"{node.file_path}::{node.name}"

def _qualified_names_for_file(self, nodes: list[NodeInfo]) -> list[str]:
"""Compute collision-free qualified names for one file's nodes.

The first occurrence of a key keeps its bare form so existing edges
(which reference the same parser-computed key) still resolve to it.
Later same-key symbols are suffixed with their start line — two defs
cannot share a ``line_start``, so this is always unique.
"""
names: list[str] = []
seen: set[str] = set()
for index, node in enumerate(nodes):
base = self._make_qualified(node)
if base not in seen:
seen.add(base)
names.append(base)
continue
candidate = f"{base}:L{node.line_start}"
if candidate in seen:
candidate = f"{base}:L{node.line_start}#{index}"
seen.add(candidate)
names.append(candidate)
return names

def _row_to_node(self, row: sqlite3.Row) -> GraphNode:
return GraphNode(
id=row["id"],
Expand Down
11 changes: 11 additions & 0 deletions code_review_graph/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,10 +905,18 @@ def full_build(
spring_stats = _run_spring_resolver(store)
temporal_stats = _run_temporal_resolver(store)

disambiguated = store.find_disambiguated_nodes()
if disambiguated:
logger.info(
"Disambiguated %d duplicate qualified_name(s): %s",
len(disambiguated), ", ".join(disambiguated),
)

return {
"files_parsed": len(files),
"total_nodes": total_nodes,
"total_edges": total_edges,
"disambiguated_nodes": disambiguated,
"errors": errors,
"rescript_resolution": rescript_stats,
"spring_resolution": spring_stats,
Expand Down Expand Up @@ -1043,10 +1051,13 @@ def incremental_update(
spring_stats = _run_spring_resolver(store) if spring_changed else None
temporal_stats = _run_temporal_resolver(store) if spring_changed else None

disambiguated = store.find_disambiguated_nodes()

return {
"files_updated": len(all_files),
"total_nodes": total_nodes,
"total_edges": total_edges,
"disambiguated_nodes": disambiguated,
"changed_files": list(changed_files),
"dependent_files": list(dependent_files),
"errors": errors,
Expand Down
78 changes: 78 additions & 0 deletions tests/test_collisions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Same-file qualified_name collisions must not silently drop nodes.

Same-named symbols in one file (methods of different object literals, repeated
generated helpers) produce the same qualified_name. Before the fix, the insert's
ON CONFLICT(qualified_name) DO UPDATE collapsed them last-writer-wins and the
earlier definitions vanished. The batch store now disambiguates them.
"""

import tempfile
from pathlib import Path

from code_review_graph.graph import GraphStore
from code_review_graph.parser import EdgeInfo, NodeInfo


def _func(name, line, path="/repo/db.ts", parent=None):
return NodeInfo(
kind="Function", name=name, file_path=path,
line_start=line, line_end=line + 2, language="typescript",
parent_name=parent,
)


class TestQualifiedNameCollisions:
def setup_method(self):
self.tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
self.store = GraphStore(self.tmp.name)

def teardown_method(self):
self.store.close()
Path(self.tmp.name).unlink(missing_ok=True)

def test_same_name_methods_both_persist(self):
# Two object literals each defining getById -> empty parent_name -> collide.
self.store.store_file_nodes_edges(
"/repo/db.ts", [_func("getById", 10), _func("getById", 20)], []
)
assert self.store.get_stats().total_nodes == 2
# First occurrence keeps the bare key; the second carries a line suffix.
first = self.store.get_node("/repo/db.ts::getById")
assert first is not None and first.line_start == 10
second = self.store.get_node("/repo/db.ts::getById:L20")
assert second is not None and second.line_start == 20

def test_three_same_name_module_functions_persist(self):
nodes = [_func("rootRouteImport", n) for n in (5, 10, 15)]
self.store.store_file_nodes_edges("/repo/routeTree.gen.ts", nodes, [])
assert self.store.get_stats().total_nodes == 3

def test_edge_to_bare_key_resolves_to_first_def(self):
# An edge targeting the bare key still names a real node (the first def),
# so collisions never dangle existing edges.
self.store.store_file_nodes_edges(
"/repo/db.ts", [_func("getById", 10), _func("getById", 20)], []
)
edge = EdgeInfo(
kind="CALLS", source="/repo/api.ts::handler",
target="/repo/db.ts::getById", file_path="/repo/api.ts", line=3,
)
self.store.store_file_nodes_edges(
"/repo/api.ts", [_func("handler", 1, path="/repo/api.ts")], [edge]
)
target = self.store.get_node("/repo/db.ts::getById")
assert target is not None and target.line_start == 10

def test_find_disambiguated_nodes_lists_suffixed_keys(self):
self.store.store_file_nodes_edges(
"/repo/db.ts", [_func("getById", 10), _func("getById", 20)], []
)
assert self.store.find_disambiguated_nodes() == ["/repo/db.ts::getById:L20"]

def test_unique_names_keep_bare_keys_unchanged(self):
self.store.store_file_nodes_edges(
"/repo/db.ts", [_func("a", 10), _func("b", 20)], []
)
assert self.store.find_disambiguated_nodes() == []
assert self.store.get_node("/repo/db.ts::a") is not None
assert self.store.get_node("/repo/db.ts::b") is not None