diff --git a/lib/jekyll/polyglot/liquid/tags/i18n_headers.rb b/lib/jekyll/polyglot/liquid/tags/i18n_headers.rb index d06c949f0..4dcea2a0c 100644 --- a/lib/jekyll/polyglot/liquid/tags/i18n_headers.rb +++ b/lib/jekyll/polyglot/liquid/tags/i18n_headers.rb @@ -21,13 +21,17 @@ def render(context) i18n = "" # Find all documents with the same page_id + valid_languages = ([site.default_lang] + site.languages).uniq docs_with_same_id = site.collections.values .flat_map(&:docs) .filter { |doc| !doc.data['page_id'].nil? } .select { |doc| doc.data['page_id'] == page_id } # Build a hash of lang => permalink for all matching docs - lang_to_permalink = docs_with_same_id.to_h { |doc| [doc.data['lang'], doc.data['permalink']] } + # Filter by explicit lang to exclude unconfigured languages even after normalization + lang_to_permalink = docs_with_same_id + .reject { |doc| doc.data['lang'] && !valid_languages.include?(doc.data['lang']) } + .to_h { |doc| [doc.data['lang'] || site.default_lang, doc.data['permalink']] } # Canonical should always point to the current page's permalink (active_lang) current_lang = site.active_lang diff --git a/lib/jekyll/polyglot/patches/jekyll/site.rb b/lib/jekyll/polyglot/patches/jekyll/site.rb index 9a094b520..3dadf0925 100644 --- a/lib/jekyll/polyglot/patches/jekyll/site.rb +++ b/lib/jekyll/polyglot/patches/jekyll/site.rb @@ -148,8 +148,23 @@ def derive_lang_from_path(doc) def coordinate_documents(docs) regex = document_url_regex approved = {} + # Build set of valid languages (default + configured) + valid_languages = ([@default_lang] + @languages).uniq + docs.each do |doc| - lang = doc.data['lang'] || derive_lang_from_path(doc) || @default_lang + # Get the explicitly declared language (frontmatter or path-derived) + explicit_lang = doc.data['lang'] || derive_lang_from_path(doc) + lang = explicit_lang || @default_lang + + # FILTER: Skip documents whose explicit lang is not in configured languages. + # Check the explicit value (not the fallback) so that documents with an + # unconfigured lang like 'de' are excluded even if normalization would + # map them to default_lang. + if explicit_lang && !valid_languages.include?(explicit_lang) + Jekyll.logger.warn "Polyglot:", "Skipping #{doc.relative_path} - lang '#{explicit_lang}' not in configured languages #{valid_languages.inspect}" + next + end + lang_exclusive = doc.data['lang-exclusive'] || [] url = doc.url.gsub(regex, '/') page_id = doc.data['page_id'] || url @@ -220,11 +235,22 @@ def assignPageLanguagePermalinks(doc, docs) pageId = doc.data['page_id'] if !pageId.nil? && !pageId.empty? unless doc.data['permalink_lang'] then doc.data['permalink_lang'] = {} end + + # Build set of valid languages + valid_languages = ([@default_lang] + @languages).uniq + permalinkDocs = docs.select do |dd| dd.data['page_id'] == pageId end permalinkDocs.each do |dd| - doclang = dd.data['lang'] || derive_lang_from_path(dd) || @default_lang + explicit_lang = dd.data['lang'] || derive_lang_from_path(dd) + doclang = explicit_lang || @default_lang + + # FILTER: Only include permalinks for configured languages. + # Check explicit lang so unconfigured languages are excluded + # even if normalization would map them to default_lang. + next if explicit_lang && !valid_languages.include?(explicit_lang) + doc.data['permalink_lang'][doclang] = dd.data['permalink'] end end diff --git a/spec/jekyll/polyglot/patches/jekyll/site_spec.rb b/spec/jekyll/polyglot/patches/jekyll/site_spec.rb index bb7dd16ae..e7e294eb7 100644 --- a/spec/jekyll/polyglot/patches/jekyll/site_spec.rb +++ b/spec/jekyll/polyglot/patches/jekyll/site_spec.rb @@ -1021,5 +1021,213 @@ expect(doc.data['rendered_lang']).to eq('es') end end + + describe 'coordinate_documents with unconfigured languages' do + before do + @collection = Jekyll::Collection.new(@site, 'test') + end + + it 'should exclude documents with unconfigured lang in frontmatter' do + # Configure site with only en and es + @site.config['languages'] = ['en', 'es'] + @site.config['default_lang'] = 'en' + @site.prepare + + docs = [ + # lang: en - should be included + Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'en' + doc.data['title'] = 'English Page' + end, + # lang: de - should be excluded (not in config) + Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'de' + doc.data['title'] = 'German Page' + end, + # lang: es - should be included + Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'es' + doc.data['title'] = 'Spanish Page' + end + ] + + coordinated = @site.coordinate_documents(docs) + coordinated_langs = coordinated.map { |d| d.data['lang'] } + + expect(coordinated_langs).to include('en') + expect(coordinated_langs).to include('es') + expect(coordinated_langs).not_to include('de') + end + + it 'should include documents with default_lang even if not in languages array' do + # Configure with es and fr, but default_lang is en + @site.config['languages'] = ['es', 'fr'] + @site.config['default_lang'] = 'en' + @site.prepare + + docs = [ + # lang: en (default_lang) - should be included + Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'en' + doc.data['title'] = 'English Page' + end, + # lang: es - should be included + Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'es' + doc.data['title'] = 'Spanish Page' + end, + # lang: de - should be excluded + Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'de' + doc.data['title'] = 'German Page' + end + ] + + coordinated = @site.coordinate_documents(docs) + coordinated_langs = coordinated.map { |d| d.data['lang'] } + + expect(coordinated_langs).to include('en') # default_lang always included + expect(coordinated_langs).to include('es') + expect(coordinated_langs).not_to include('de') + end + + it 'should log warning when excluding documents with unconfigured language' do + @site.config['languages'] = ['en', 'es'] + @site.config['default_lang'] = 'en' + @site.prepare + + doc = Jekyll::Document.new('test-de.md', site: @site, collection: @collection) + doc.data['lang'] = 'de' + doc.data['title'] = 'German Page' + + expect(Jekyll.logger).to receive(:warn).with("Polyglot:", /lang 'de' not in configured/) + + @site.coordinate_documents([doc]) + end + end + + it 'should not serve unconfigured language pages as default language content' do + # Real-world scenario: site configured with limited languages for dev builds, + # but pages exist for many languages from production config. + # German pages should not appear when only en and pt-BR are configured. + @site.config['languages'] = ['en', 'pt-BR'] + @site.config['default_lang'] = 'en' + @site.prepare + + # Simulate Jekyll::Page objects using OpenStruct (like site.pages) + pages = [ + OpenStruct.new( + data: { 'lang' => 'en', 'page_id' => 'home', 'permalink' => '/', + 'title' => 'The SQL Editor You Love' }, + url: '/' + ), + OpenStruct.new( + data: { 'lang' => 'de', 'page_id' => 'home', 'permalink' => '/', + 'title' => 'Der SQL-Editor Ihrer Träume' }, + url: '/' + ), + OpenStruct.new( + data: { 'lang' => 'pt-BR', 'page_id' => 'home', 'permalink' => '/', + 'title' => 'O Editor SQL dos Seus Sonhos' }, + url: '/' + ), + OpenStruct.new( + data: { 'lang' => 'fr', 'page_id' => 'home', 'permalink' => '/', + 'title' => "L'éditeur SQL de vos rêves" }, + url: '/' + ) + ] + + # Building for default language (en) + @site.active_lang = 'en' + coordinated = @site.coordinate_documents(pages) + + # Only configured languages should appear + coordinated_langs = coordinated.map { |p| p.data['lang'] } + expect(coordinated_langs).not_to include('de') + expect(coordinated_langs).not_to include('fr') + + # The winning page for the default language build should be English + expect(coordinated.length).to eq(1) + expect(coordinated[0].data['title']).to eq('The SQL Editor You Love') + end + + it 'should filter by explicit frontmatter lang even if lang resolves to default' do + # This test catches a bug where normalize_lang (on other branches) converts + # unknown lang codes to nil, which then falls back to default_lang, + # bypassing the valid_languages filter. + # The filter must check the ORIGINAL frontmatter lang value, not the resolved one. + @site.config['languages'] = ['en', 'pt-BR'] + @site.config['default_lang'] = 'en' + @site.prepare + + collection = Jekyll::Collection.new(@site, 'test') + docs = [ + Jekyll::Document.new('index.html', site: @site, collection: collection).tap do |doc| + doc.data['lang'] = 'en' + doc.data['page_id'] = 'home' + doc.data['title'] = 'English Home' + end, + # This doc has explicit lang: de in frontmatter. + # Even if a normalize step maps 'de' -> nil -> default_lang, + # it should still be excluded because 'de' is not configured. + Jekyll::Document.new('index-de.html', site: @site, collection: collection).tap do |doc| + doc.data['lang'] = 'de' + doc.data['page_id'] = 'home' + doc.data['title'] = 'German Home' + end + ] + + @site.active_lang = 'en' + coordinated = @site.coordinate_documents(docs) + + # The German doc must NOT be in the output + titles = coordinated.map { |d| d.data['title'] } + expect(titles).to include('English Home') + expect(titles).not_to include('German Home') + + # Verify the German doc's explicit lang was the reason for exclusion + coordinated_langs = coordinated.map { |d| d.data['lang'] } + expect(coordinated_langs).not_to include('de') + end + + describe 'assignPageLanguagePermalinks with unconfigured languages' do + before do + @collection = Jekyll::Collection.new(@site, 'test') + end + + it 'should only include configured languages in permalink_lang' do + @site.config['languages'] = ['en', 'es'] + @site.config['default_lang'] = 'en' + @site.prepare + + # Create docs with en, es, de (de not configured) + docs = [ + Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'en' + doc.data['page_id'] = 'test-page' + doc.data['permalink'] = '/test/' + end, + Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'es' + doc.data['page_id'] = 'test-page' + doc.data['permalink'] = '/es/test/' + end, + Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc| + doc.data['lang'] = 'de' + doc.data['page_id'] = 'test-page' + doc.data['permalink'] = '/de/test/' + end + ] + + # Call assignPageLanguagePermalinks on the English doc + @site.assignPageLanguagePermalinks(docs[0], docs) + + # Verify permalink_lang only has keys for en and es + expect(docs[0].data['permalink_lang'].keys).to include('en') + expect(docs[0].data['permalink_lang'].keys).to include('es') + expect(docs[0].data['permalink_lang'].keys).not_to include('de') + end + end end end