From 4b26a27ae40d3b308e6a93db80c504876fee4c14 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Thu, 7 May 2026 20:18:22 +0300 Subject: [PATCH 1/7] Resolve semantic identifiers in wiki reverse-link parser The wiki module maintains a separate work-package macro parser that produces formal database links from wiki pages back to referenced work packages. It still spoke numeric-only and silently dropped semantic identifiers, leaving reverse-link coverage uneven once a project is in semantic mode. The matcher now accepts the same shape as the inline-text macro (`#NNN`, `#PROJ-1`, plus the `##`/`###` widget variants) and resolves each capture through `WorkPackage.where_display_id_in`, which already mixes primary keys, current identifiers and historical aliases in one query. A rename history continues to produce a reverse link via the alias table. --- .../update_reverse_inline_wiki_page_links.rb | 18 ++-- .../wiki_pages/create_service_spec.rb | 88 +++++++++++++++++++ 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb index be443b9b67c1..684f4a7d6c14 100644 --- a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb +++ b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb @@ -38,22 +38,28 @@ def update_reverse_inline_wiki_page_links(wiki_page) Wikis::ReverseInlinePageLink.where(provider:, identifier: wiki_page.id).delete_all - find_wp_links(wiki_page.text).uniq.each do |wp_id| - wp = WorkPackage.find_by(id: wp_id) - next if wp.nil? + identifiers = find_wp_links(wiki_page.text).uniq + return if identifiers.empty? + WorkPackage.where_display_id_in(identifiers).find_each do |wp| Wikis::ReverseInlinePageLink.create!(linkable: wp, provider:, identifier: wiki_page.id) end end + # Mirrors the prefix character class of the inline-text macro matcher + # (lib/open_project/text_formatting/matchers/resource_links_matcher.rb). + # The trailing `(?!\w)` boundary on the semantic branch prevents + # `#PROJ-1abc` from incorrectly matching `#PROJ-1`; the numeric branch + # intentionally has no trailing boundary to preserve historic behaviour + # for inputs like `#13-blubb`. + WP_REF_RE = /(?:[[:space:],~>#\(\[\-]|^)#(?:(\d+)|([A-Z][A-Z0-9_]*-\d+)(?!\w))/ # rubocop:disable Style/RedundantRegexpEscape + private def find_wp_links(text) return [] if text.blank? - # extracted prefix from lib/open_project/text_formatting/matchers/resource_links_matcher.rb - # adding # as additional prefix - text.scan(/(?:[[:space:],~>#\(\[\-]|^)#([0-9]+)/) # rubocop:disable Style/RedundantRegexpEscape + text.scan(WP_REF_RE).map { |numeric, semantic| numeric || semantic } end end end diff --git a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb index 85b051128ec0..475a15dec660 100644 --- a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb +++ b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb @@ -202,4 +202,92 @@ expect(Wikis::ReverseInlinePageLink.count).to eq(0) end end + + context "with a semantic-identifier reference", + with_flag: { semantic_work_package_ids: true }, + with_settings: { work_packages_identifier: "semantic" } do + let(:project) { create(:project, :semantic) } + let(:work_package) do + create(:work_package, project:).tap do |wp| + wp.allocate_and_register_semantic_id + wp.reload + end + end + + context "when the reference uses the semantic identifier" do + let(:text) { "See ##{work_package.identifier} for context." } + + it "creates a reverse page link" do + subject + + expect(reverse_link_finder.count).to eq(1) + end + end + + context "when the semantic reference uses the ## widget syntax" do + let(:text) { "Block: ###{work_package.identifier}." } + + it "creates a reverse page link" do + subject + + expect(reverse_link_finder.count).to eq(1) + end + end + + context "when the semantic reference uses the ### widget syntax" do + let(:text) { "Detailed: ####{work_package.identifier}." } + + it "creates a reverse page link" do + subject + + expect(reverse_link_finder.count).to eq(1) + end + end + + context "when the project has been renamed and a historical alias is referenced" do + let(:text) { "Historical: #OLD-#{work_package.sequence_number}." } + + before do + WorkPackageSemanticAlias.create!(work_package:, identifier: "OLD-#{work_package.sequence_number}") + end + + it "still creates a reverse page link" do + subject + + expect(reverse_link_finder.count).to eq(1) + end + end + + context "when no work package matches the semantic reference" do + let(:text) { "Missing: #GHOST-99." } + + it "does not create a link" do + subject + + expect(Wikis::ReverseInlinePageLink.count).to eq(0) + end + end + + context "when the semantic identifier is followed by an alphanumeric word character" do + let(:text) { "Boundary: ##{work_package.identifier}abc." } + + it "does not create a link" do + subject + + expect(Wikis::ReverseInlinePageLink.count).to eq(0) + end + end + end + + context "with a semantic-shape reference in classic mode", + with_flag: { semantic_work_package_ids: false }, + with_settings: { work_packages_identifier: "classic" } do + let(:text) { "See #PROJ-1 for context." } + + it "does not create a link" do + subject + + expect(Wikis::ReverseInlinePageLink.count).to eq(0) + end + end end From 559154822b16665193404403f093f09b01df1af2 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Wed, 13 May 2026 17:43:46 +0300 Subject: [PATCH 2/7] Cap WP identifier preload IN-list at 500 The wiki reverse-link parser reads identifiers straight out of saved wiki page text. Without a bound, a multi-megabyte pasted body could push thousands of values into the alias-aware WP lookup in one query. `MAX_PRELOAD_IDENTIFIERS = 500` caps the per-save lookup; references past the cap simply don't get a reverse link recorded. --- .../concerns/update_reverse_inline_wiki_page_links.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb index 684f4a7d6c14..a7765dc6caa6 100644 --- a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb +++ b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb @@ -38,7 +38,10 @@ def update_reverse_inline_wiki_page_links(wiki_page) Wikis::ReverseInlinePageLink.where(provider:, identifier: wiki_page.id).delete_all - identifiers = find_wp_links(wiki_page.text).uniq + # Bound the per-save lookup so a multi-megabyte pasted body can't push + # an unbounded IN-list into the alias-aware WP lookup. References past + # the cap simply don't get a reverse link recorded. + identifiers = find_wp_links(wiki_page.text).uniq.first(MAX_PRELOAD_IDENTIFIERS) return if identifiers.empty? WorkPackage.where_display_id_in(identifiers).find_each do |wp| @@ -46,6 +49,8 @@ def update_reverse_inline_wiki_page_links(wiki_page) end end + MAX_PRELOAD_IDENTIFIERS = 500 + # Mirrors the prefix character class of the inline-text macro matcher # (lib/open_project/text_formatting/matchers/resource_links_matcher.rb). # The trailing `(?!\w)` boundary on the semantic branch prevents From 59382656542dd85b05a053bbac47abdfe549c42f Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Fri, 8 May 2026 12:20:51 +0300 Subject: [PATCH 3/7] Lock numeric trailing-text behaviour in wiki reverse-link parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WP_REF_RE applies `(?!\w)` only to the semantic alternation branch — the numeric branch is intentionally unbounded so historic shapes like `#13-blubb` keep matching. There was no spec pinning the historic behaviour, so a future tightening of the boundary could silently strip reverse-links from existing wiki content. --- .../services/wiki_pages/create_service_spec.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb index 475a15dec660..29012d63a8e8 100644 --- a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb +++ b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb @@ -179,6 +179,20 @@ end end + context "when a numeric reference is immediately followed by alphanumeric text" do + # The numeric branch of WP_REF_RE has no trailing `(?!\w)` boundary — + # historic behaviour matches `#13` inside `#13-blubb` and similar + # shapes. Locked here so a future tightening of the boundary can't + # silently strip reverse-links from existing wiki content. + let(:text) { "Trailing: ##{work_package.id}abc" } + + it "still creates a reverse page link from the numeric prefix" do + subject + + expect(reverse_link_finder.count).to eq(1) + end + end + context "when the internal provider is disabled" do let(:internal_provider) { create(:internal_wiki_provider, enabled: false) } From 2650b6537371b8cc6d430ba668a434076ed37db9 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Wed, 13 May 2026 18:05:27 +0300 Subject: [PATCH 4/7] Compose wiki regex from SEMANTIC_ID_PATTERN The semantic-shape branch of WP_REF_RE inlined the literal pattern `[A-Z][A-Z0-9_]*-\d+` instead of composing from `WorkPackage::SemanticIdentifier::SEMANTIC_ID_PATTERN.source`. A future tightening of the upstream pattern would silently drift away from the wiki parser's shape unless someone hand-edited this regex too. The multi-line /x form also restores the readable structure: prefix class on its own line, branches separated on the alternation, and the `(?!\w)` boundary visible next to the branch it constrains. --- .../update_reverse_inline_wiki_page_links.rb | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb index a7765dc6caa6..63afd23a5dae 100644 --- a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb +++ b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb @@ -51,13 +51,20 @@ def update_reverse_inline_wiki_page_links(wiki_page) MAX_PRELOAD_IDENTIFIERS = 500 - # Mirrors the prefix character class of the inline-text macro matcher - # (lib/open_project/text_formatting/matchers/resource_links_matcher.rb). - # The trailing `(?!\w)` boundary on the semantic branch prevents - # `#PROJ-1abc` from incorrectly matching `#PROJ-1`; the numeric branch - # intentionally has no trailing boundary to preserve historic behaviour - # for inputs like `#13-blubb`. - WP_REF_RE = /(?:[[:space:],~>#\(\[\-]|^)#(?:(\d+)|([A-Z][A-Z0-9_]*-\d+)(?!\w))/ # rubocop:disable Style/RedundantRegexpEscape + # Mirrors the prefix character class of the inline-text macro matcher. + # The trailing `(?!\w)` on the semantic branch keeps `#PROJ-1abc` from + # matching `#PROJ-1`; the numeric branch deliberately has no trailing + # boundary to preserve historic behaviour for inputs like `#13-blubb`. + # rubocop:disable Style/RedundantRegexpEscape + WP_REF_RE = / + (?:[[:space:],~>\#\(\[\-]|^)\# + (?: + (\d+) + | + (#{WorkPackage::SemanticIdentifier::SEMANTIC_ID_PATTERN.source})(?!\w) + ) + /x + # rubocop:enable Style/RedundantRegexpEscape private From 9448d5ceb9ace0e1bef754fbadd627988d551958 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Wed, 13 May 2026 21:51:45 +0300 Subject: [PATCH 5/7] Drop speculative IN-list cap from wiki reverse-link parser The 500-identifier cap was added defensively but silently truncated references past the boundary with no signal to the author. The prior numeric-only baseline had no cap at all and never showed a problem in practice; PostgreSQL handles large IN-lists comfortably at the scales realistically seen in a wiki body. If extreme volumes ever do surface, the per-row INSERT loop is the true bottleneck, not the SELECT -- a problem worth solving on its own terms rather than masking here. Constants now sit at the top of the module, above the method body, keeping the Ruby style convention. --- .../update_reverse_inline_wiki_page_links.rb | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb index 63afd23a5dae..44828c1be663 100644 --- a/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb +++ b/modules/wikis/app/services/wikis/concerns/update_reverse_inline_wiki_page_links.rb @@ -32,25 +32,6 @@ module Wikis::Concerns module UpdateReverseInlineWikiPageLinks extend ActiveSupport::Concern - def update_reverse_inline_wiki_page_links(wiki_page) - provider = Wikis::InternalProvider.enabled.first - return if provider.nil? - - Wikis::ReverseInlinePageLink.where(provider:, identifier: wiki_page.id).delete_all - - # Bound the per-save lookup so a multi-megabyte pasted body can't push - # an unbounded IN-list into the alias-aware WP lookup. References past - # the cap simply don't get a reverse link recorded. - identifiers = find_wp_links(wiki_page.text).uniq.first(MAX_PRELOAD_IDENTIFIERS) - return if identifiers.empty? - - WorkPackage.where_display_id_in(identifiers).find_each do |wp| - Wikis::ReverseInlinePageLink.create!(linkable: wp, provider:, identifier: wiki_page.id) - end - end - - MAX_PRELOAD_IDENTIFIERS = 500 - # Mirrors the prefix character class of the inline-text macro matcher. # The trailing `(?!\w)` on the semantic branch keeps `#PROJ-1abc` from # matching `#PROJ-1`; the numeric branch deliberately has no trailing @@ -66,6 +47,20 @@ def update_reverse_inline_wiki_page_links(wiki_page) /x # rubocop:enable Style/RedundantRegexpEscape + def update_reverse_inline_wiki_page_links(wiki_page) + provider = Wikis::InternalProvider.enabled.first + return if provider.nil? + + Wikis::ReverseInlinePageLink.where(provider:, identifier: wiki_page.id).delete_all + + identifiers = find_wp_links(wiki_page.text).uniq + return if identifiers.empty? + + WorkPackage.where_display_id_in(identifiers).find_each do |wp| + Wikis::ReverseInlinePageLink.create!(linkable: wp, provider:, identifier: wiki_page.id) + end + end + private def find_wp_links(text) From 8e01a24a40266bb2612c464e913175a671a4d06b Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Thu, 14 May 2026 23:27:31 +0300 Subject: [PATCH 6/7] Cover mixed numeric and semantic refs on one wiki page Adds a single example exercising both reference shapes in one wiki body, guarding against any future regex divergence that drops one branch when the other matches first on the same line. --- .../services/wiki_pages/create_service_spec.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb index 29012d63a8e8..9dd4a61be4ad 100644 --- a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb +++ b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb @@ -291,6 +291,21 @@ expect(Wikis::ReverseInlinePageLink.count).to eq(0) end end + + context "when the body mixes a numeric and a semantic reference" do + let(:numeric_work_package) { create(:work_package) } + let(:text) do + "Mixed: ##{numeric_work_package.id} and ##{work_package.identifier}." + end + + it "creates a reverse page link per referenced work package" do + subject + + wiki_page = WikiPage.first + links = Wikis::ReverseInlinePageLink.where(provider: internal_provider, identifier: wiki_page.id) + expect(links.pluck(:linkable_id)).to contain_exactly(numeric_work_package.id, work_package.id) + end + end end context "with a semantic-shape reference in classic mode", From 22c70f98df5d48e15393ae72a28cd0d483615856 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Thu, 14 May 2026 23:41:22 +0300 Subject: [PATCH 7/7] Lock single-link invariant when one WP is referenced multiple ways A wiki body referencing the same work package via its primary key, current semantic identifier, and a historical alias must still produce one reverse-link row. Where_display_id_in's OR-composed relation already guarantees this; the spec keeps it that way. --- .../services/wiki_pages/create_service_spec.rb | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb index 9dd4a61be4ad..b75c34650bed 100644 --- a/modules/wikis/spec/services/wiki_pages/create_service_spec.rb +++ b/modules/wikis/spec/services/wiki_pages/create_service_spec.rb @@ -306,6 +306,23 @@ expect(links.pluck(:linkable_id)).to contain_exactly(numeric_work_package.id, work_package.id) end end + + context "when several reference shapes resolve to the same work package" do + let(:text) do + "Triple: ##{work_package.id}, ##{work_package.identifier}, #OLD-#{work_package.sequence_number}." + end + + before do + WorkPackageSemanticAlias.create!(work_package:, identifier: "OLD-#{work_package.sequence_number}") + end + + it "creates a single reverse page link" do + subject + + expect(reverse_link_finder.count).to eq(1) + expect(reverse_link_finder.first.linkable).to eq(work_package) + end + end end context "with a semantic-shape reference in classic mode",