diff --git a/code_review_graph/skills.py b/code_review_graph/skills.py index aba22dc8..f88a70c2 100644 --- a/code_review_graph/skills.py +++ b/code_review_graph/skills.py @@ -12,7 +12,6 @@ import logging import os import platform -import re import shutil import stat import sys @@ -289,6 +288,83 @@ def _merge_toml_mcp_server( return True +def _strip_jsonc(text: str) -> str: + """Strip JSONC comments and trailing commas without corrupting string values. + + Editors like Zed accept non-standard JSON (``//`` and ``/* */`` comments, + trailing commas). To merge such a config we must reduce it to strict JSON + first. A naive regex pass cannot tell structure from data: it would delete a + comma inside ``"foo, bar"`` or truncate a ``"https://..."`` URL at the + ``//``. This walks the text character by character, tracking whether we are + inside a double-quoted string (respecting ``\\`` escapes), and only removes + comments and trailing commas that appear in structural position. Content + inside string values is preserved verbatim. (GH #553) + """ + + def _skip_comment(s: str, idx: int) -> int | None: + """If a comment starts at ``idx``, return the index just past it.""" + if s[idx] != "/" or idx + 1 >= len(s): + return None + nxt = s[idx + 1] + if nxt == "/": + idx += 2 + while idx < len(s) and s[idx] != "\n": + idx += 1 + return idx + if nxt == "*": + idx += 2 + while idx + 1 < len(s) and not (s[idx] == "*" and s[idx + 1] == "/"): + idx += 1 + return idx + 2 # consume the closing */ (or run off the end if unterminated) + return None + + out: list[str] = [] + i = 0 + n = len(text) + in_string = False + while i < n: + ch = text[i] + if in_string: + out.append(ch) + if ch == "\\" and i + 1 < n: + out.append(text[i + 1]) # escaped char is data, never a delimiter + i += 2 + continue + if ch == '"': + in_string = False + i += 1 + continue + # Outside a string. + if ch == '"': + in_string = True + out.append(ch) + i += 1 + continue + past = _skip_comment(text, i) + if past is not None: + i = past + continue + if ch == ",": + # Trailing comma if the next significant char (skipping whitespace + # and comments) closes an object or array. + j = i + 1 + while j < n: + if text[j] in " \t\r\n": + j += 1 + continue + past = _skip_comment(text, j) + if past is not None: + j = past + continue + break + if j < n and text[j] in "}]": + i += 1 # drop the trailing comma + continue + out.append(ch) + i += 1 + return "".join(out) + + def install_platform_configs( repo_root: Path, target: str = "all", @@ -344,10 +420,9 @@ def install_platform_configs( existing: dict[str, Any] = {} if config_path.exists(): raw = config_path.read_text(encoding="utf-8", errors="replace") - # Strip single-line comments and trailing commas (JSONC compat - # for editors like Zed that allow non-standard JSON). - stripped = re.sub(r'//.*?$', '', raw, flags=re.MULTILINE) - stripped = re.sub(r',(\s*[}\]])', r'\1', stripped) + # Strip comments and trailing commas (JSONC compat for editors like + # Zed that allow non-standard JSON) without corrupting string values. + stripped = _strip_jsonc(raw) try: existing = json.loads(stripped) except (json.JSONDecodeError, OSError): diff --git a/tests/test_skills.py b/tests/test_skills.py index 73fe8fe1..5e729a15 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -18,6 +18,7 @@ _CLAUDE_MD_SECTION_MARKER, PLATFORMS, _cursor_hook_scripts, + _strip_jsonc, _detect_serve_command, _in_poetry_project, _in_uv_project, @@ -43,6 +44,71 @@ ) +class TestStripJsonc: + """JSONC sanitizer must not corrupt string values (GH #553).""" + + def test_comma_inside_string_preserved(self): + # The original #553 repro: a comma inside a string, immediately before a + # line whose first non-space char is `}`. A naive regex deleted it. + src = ( + '{\n' + ' "mcp": {\n' + ' "my-server": {\n' + ' "command": ["x"],\n' + ' "description": "foo, bar"\n' + ' }\n' + ' }\n' + '}\n' + ) + parsed = json.loads(_strip_jsonc(src)) + assert parsed["mcp"]["my-server"]["description"] == "foo, bar" + + def test_url_with_double_slash_preserved(self): + # The `//` comment stripper must not truncate `https://...` inside a string. + src = '{"url": "https://mcp.example.com/path", "n": 1}' + parsed = json.loads(_strip_jsonc(src)) + assert parsed["url"] == "https://mcp.example.com/path" + assert parsed["n"] == 1 + + def test_real_trailing_comma_before_brace_removed(self): + src = '{"a": 1, "b": 2,}' + assert json.loads(_strip_jsonc(src)) == {"a": 1, "b": 2} + + def test_real_trailing_comma_before_bracket_removed(self): + src = '{"list": [1, 2, 3,]}' + assert json.loads(_strip_jsonc(src)) == {"list": [1, 2, 3]} + + def test_line_comment_removed(self): + src = '{\n "a": 1 // inline comment\n}' + assert json.loads(_strip_jsonc(src)) == {"a": 1} + + def test_block_comment_removed(self): + src = '{\n /* leading */ "a": 1\n}' + assert json.loads(_strip_jsonc(src)) == {"a": 1} + + def test_comment_markers_inside_string_preserved(self): + src = '{"a": "x // y", "b": "p /* q */ r"}' + parsed = json.loads(_strip_jsonc(src)) + assert parsed["a"] == "x // y" + assert parsed["b"] == "p /* q */ r" + + def test_escaped_quote_does_not_break_string_tracking(self): + # The escaped quote must not end the string early; the comma after it is + # data, and the `}` that follows is structural. + src = '{"a": "he said \\"hi, there\\"", "b": 2,}' + parsed = json.loads(_strip_jsonc(src)) + assert parsed["a"] == 'he said "hi, there"' + assert parsed["b"] == 2 + + def test_trailing_comma_then_comment_then_close(self): + src = '{\n "a": 1, // trailing then comment\n}' + assert json.loads(_strip_jsonc(src)) == {"a": 1} + + def test_strict_json_unchanged(self): + src = '{"a": [1, 2], "b": {"c": "d, e"}}' + assert json.loads(_strip_jsonc(src)) == json.loads(src) + + class TestGenerateSkills: def test_creates_skills_directory(self, tmp_path): result = generate_skills(tmp_path)