Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 80 additions & 5 deletions code_review_graph/skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import logging
import os
import platform
import re
import shutil
import stat
import sys
Expand Down Expand Up @@ -289,6 +288,83 @@ def _merge_toml_mcp_server(
return True


def _strip_jsonc(text: str) -> str:
"""Strip JSONC comments and trailing commas without corrupting string values.

Editors like Zed accept non-standard JSON (``//`` and ``/* */`` comments,
trailing commas). To merge such a config we must reduce it to strict JSON
first. A naive regex pass cannot tell structure from data: it would delete a
comma inside ``"foo, bar"`` or truncate a ``"https://..."`` URL at the
``//``. This walks the text character by character, tracking whether we are
inside a double-quoted string (respecting ``\\`` escapes), and only removes
comments and trailing commas that appear in structural position. Content
inside string values is preserved verbatim. (GH #553)
"""

def _skip_comment(s: str, idx: int) -> int | None:
"""If a comment starts at ``idx``, return the index just past it."""
if s[idx] != "/" or idx + 1 >= len(s):
return None
nxt = s[idx + 1]
if nxt == "/":
idx += 2
while idx < len(s) and s[idx] != "\n":
idx += 1
return idx
if nxt == "*":
idx += 2
while idx + 1 < len(s) and not (s[idx] == "*" and s[idx + 1] == "/"):
idx += 1
return idx + 2 # consume the closing */ (or run off the end if unterminated)
return None

out: list[str] = []
i = 0
n = len(text)
in_string = False
while i < n:
ch = text[i]
if in_string:
out.append(ch)
if ch == "\\" and i + 1 < n:
out.append(text[i + 1]) # escaped char is data, never a delimiter
i += 2
continue
if ch == '"':
in_string = False
i += 1
continue
# Outside a string.
if ch == '"':
in_string = True
out.append(ch)
i += 1
continue
past = _skip_comment(text, i)
if past is not None:
i = past
continue
if ch == ",":
# Trailing comma if the next significant char (skipping whitespace
# and comments) closes an object or array.
j = i + 1
while j < n:
if text[j] in " \t\r\n":
j += 1
continue
past = _skip_comment(text, j)
if past is not None:
j = past
continue
break
if j < n and text[j] in "}]":
i += 1 # drop the trailing comma
continue
out.append(ch)
i += 1
return "".join(out)


def install_platform_configs(
repo_root: Path,
target: str = "all",
Expand Down Expand Up @@ -344,10 +420,9 @@ def install_platform_configs(
existing: dict[str, Any] = {}
if config_path.exists():
raw = config_path.read_text(encoding="utf-8", errors="replace")
# Strip single-line comments and trailing commas (JSONC compat
# for editors like Zed that allow non-standard JSON).
stripped = re.sub(r'//.*?$', '', raw, flags=re.MULTILINE)
stripped = re.sub(r',(\s*[}\]])', r'\1', stripped)
# Strip comments and trailing commas (JSONC compat for editors like
# Zed that allow non-standard JSON) without corrupting string values.
stripped = _strip_jsonc(raw)
try:
existing = json.loads(stripped)
except (json.JSONDecodeError, OSError):
Expand Down
66 changes: 66 additions & 0 deletions tests/test_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
_CLAUDE_MD_SECTION_MARKER,
PLATFORMS,
_cursor_hook_scripts,
_strip_jsonc,
_detect_serve_command,
_in_poetry_project,
_in_uv_project,
Expand All @@ -43,6 +44,71 @@
)


class TestStripJsonc:
"""JSONC sanitizer must not corrupt string values (GH #553)."""

def test_comma_inside_string_preserved(self):
# The original #553 repro: a comma inside a string, immediately before a
# line whose first non-space char is `}`. A naive regex deleted it.
src = (
'{\n'
' "mcp": {\n'
' "my-server": {\n'
' "command": ["x"],\n'
' "description": "foo, bar"\n'
' }\n'
' }\n'
'}\n'
)
parsed = json.loads(_strip_jsonc(src))
assert parsed["mcp"]["my-server"]["description"] == "foo, bar"

def test_url_with_double_slash_preserved(self):
# The `//` comment stripper must not truncate `https://...` inside a string.
src = '{"url": "https://mcp.example.com/path", "n": 1}'
parsed = json.loads(_strip_jsonc(src))
assert parsed["url"] == "https://mcp.example.com/path"
assert parsed["n"] == 1

def test_real_trailing_comma_before_brace_removed(self):
src = '{"a": 1, "b": 2,}'
assert json.loads(_strip_jsonc(src)) == {"a": 1, "b": 2}

def test_real_trailing_comma_before_bracket_removed(self):
src = '{"list": [1, 2, 3,]}'
assert json.loads(_strip_jsonc(src)) == {"list": [1, 2, 3]}

def test_line_comment_removed(self):
src = '{\n "a": 1 // inline comment\n}'
assert json.loads(_strip_jsonc(src)) == {"a": 1}

def test_block_comment_removed(self):
src = '{\n /* leading */ "a": 1\n}'
assert json.loads(_strip_jsonc(src)) == {"a": 1}

def test_comment_markers_inside_string_preserved(self):
src = '{"a": "x // y", "b": "p /* q */ r"}'
parsed = json.loads(_strip_jsonc(src))
assert parsed["a"] == "x // y"
assert parsed["b"] == "p /* q */ r"

def test_escaped_quote_does_not_break_string_tracking(self):
# The escaped quote must not end the string early; the comma after it is
# data, and the `}` that follows is structural.
src = '{"a": "he said \\"hi, there\\"", "b": 2,}'
parsed = json.loads(_strip_jsonc(src))
assert parsed["a"] == 'he said "hi, there"'
assert parsed["b"] == 2

def test_trailing_comma_then_comment_then_close(self):
src = '{\n "a": 1, // trailing then comment\n}'
assert json.loads(_strip_jsonc(src)) == {"a": 1}

def test_strict_json_unchanged(self):
src = '{"a": [1, 2], "b": {"c": "d, e"}}'
assert json.loads(_strip_jsonc(src)) == json.loads(src)


class TestGenerateSkills:
def test_creates_skills_directory(self, tmp_path):
result = generate_skills(tmp_path)
Expand Down