diff --git a/crates/pampa/src/readers/qmd.rs b/crates/pampa/src/readers/qmd.rs index 4998fcb53..93ecfe766 100644 --- a/crates/pampa/src/readers/qmd.rs +++ b/crates/pampa/src/readers/qmd.rs @@ -120,28 +120,33 @@ pub fn read( writeln!(output_stream, "---").unwrap(); }); if log_observer.had_errors() { - // Produce structured DiagnosticMessage objects with proper source locations - let mut diagnostics = produce_diagnostic_messages( - input_bytes, - &log_observer, - filename, - &context.source_context, - ); - - // Prune diagnostics based on ERROR nodes if enabled - if prune_errors { - use crate::readers::qmd_error_messages::{ - collect_error_node_ranges, get_outer_error_nodes, - prune_diagnostics_by_error_nodes, - }; - - let error_nodes = collect_error_node_ranges(&tree); - let outer_nodes = get_outer_error_nodes(&error_nodes); - diagnostics = - prune_diagnostics_by_error_nodes(diagnostics, &error_nodes, &outer_nodes); - } + use crate::readers::qmd_error_messages::{ + collect_error_node_ranges, get_outer_error_nodes, prune_diagnostics_by_error_nodes, + }; - return Err(diagnostics); + // GLR speculative parsing can hit detect_error in dead branches + // while another branch reaches accept cleanly. When that happens, + // the final tree has no ERROR nodes and the parse is genuinely + // successful — we should not report speculative errors from dead + // branches. Use the presence of ERROR nodes in the tree as the + // ground truth for whether the parse actually failed. + let error_nodes = collect_error_node_ranges(&tree); + if !error_nodes.is_empty() { + let mut diagnostics = produce_diagnostic_messages( + input_bytes, + &log_observer, + filename, + &context.source_context, + ); + + if prune_errors { + let outer_nodes = get_outer_error_nodes(&error_nodes); + diagnostics = + prune_diagnostics_by_error_nodes(diagnostics, &error_nodes, &outer_nodes); + } + + return Err(diagnostics); + } } } diff --git a/crates/pampa/tests/test_glr_dead_branch_speculation.rs b/crates/pampa/tests/test_glr_dead_branch_speculation.rs new file mode 100644 index 000000000..68a3f53c6 --- /dev/null +++ b/crates/pampa/tests/test_glr_dead_branch_speculation.rs @@ -0,0 +1,41 @@ +use pampa::readers; + +fn assert_parses_cleanly(input: &str, filename: &str) { + let mut content = input.to_string(); + if !content.ends_with('\n') { + content.push('\n'); + } + + let result = readers::qmd::read( + content.as_bytes(), + false, + filename, + &mut std::io::sink(), + true, + None, + ); + + if let Err(diagnostics) = result { + let mut source_context = quarto_source_map::SourceContext::new(); + source_context.add_file(filename.to_string(), Some(content.clone())); + let render_options = quarto_error_reporting::TextRenderOptions { + enable_hyperlinks: false, + }; + let mut output = String::new(); + for diagnostic in &diagnostics { + output + .push_str(&diagnostic.to_text_with_options(Some(&source_context), &render_options)); + output.push('\n'); + } + panic!("Expected clean parse for input:\n{content}\nGot diagnostics:\n{output}"); + } +} + +#[test] +fn nested_double_quote_inside_emphasis_parses_cleanly() { + // `*a" b."*` is `a"b."` where the two `"` form a paired + // double-quote span. GLR speculation hits detect_error in dead + // branches; the parse as a whole accepts cleanly and the resulting + // tree has no ERROR nodes, so no diagnostics should be reported. + assert_parses_cleanly("*a\" b.\"*\n", "nested-double-quote-in-emphasis.qmd"); +}