Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/jekyll/polyglot/liquid/tags/i18n_headers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ def render(context)
i18n = ""

# Find all documents with the same page_id
valid_languages = ([site.default_lang] + site.languages).uniq
docs_with_same_id = site.collections.values
.flat_map(&:docs)
.filter { |doc| !doc.data['page_id'].nil? }
.select { |doc| doc.data['page_id'] == page_id }

# Build a hash of lang => permalink for all matching docs
lang_to_permalink = docs_with_same_id.to_h { |doc| [doc.data['lang'], doc.data['permalink']] }
# Filter by explicit lang to exclude unconfigured languages even after normalization
lang_to_permalink = docs_with_same_id
.reject { |doc| doc.data['lang'] && !valid_languages.include?(doc.data['lang']) }
.to_h { |doc| [doc.data['lang'] || site.default_lang, doc.data['permalink']] }

# Canonical should always point to the current page's permalink (active_lang)
current_lang = site.active_lang
Expand Down
30 changes: 28 additions & 2 deletions lib/jekyll/polyglot/patches/jekyll/site.rb
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,23 @@ def derive_lang_from_path(doc)
def coordinate_documents(docs)
regex = document_url_regex
approved = {}
# Build set of valid languages (default + configured)
valid_languages = ([@default_lang] + @languages).uniq

docs.each do |doc|
lang = doc.data['lang'] || derive_lang_from_path(doc) || @default_lang
# Get the explicitly declared language (frontmatter or path-derived)
explicit_lang = doc.data['lang'] || derive_lang_from_path(doc)
lang = explicit_lang || @default_lang

# FILTER: Skip documents whose explicit lang is not in configured languages.
# Check the explicit value (not the fallback) so that documents with an
# unconfigured lang like 'de' are excluded even if normalization would
# map them to default_lang.
if explicit_lang && !valid_languages.include?(explicit_lang)
Jekyll.logger.warn "Polyglot:", "Skipping #{doc.relative_path} - lang '#{explicit_lang}' not in configured languages #{valid_languages.inspect}"
next
end

lang_exclusive = doc.data['lang-exclusive'] || []
url = doc.url.gsub(regex, '/')
page_id = doc.data['page_id'] || url
Expand Down Expand Up @@ -220,11 +235,22 @@ def assignPageLanguagePermalinks(doc, docs)
pageId = doc.data['page_id']
if !pageId.nil? && !pageId.empty?
unless doc.data['permalink_lang'] then doc.data['permalink_lang'] = {} end

# Build set of valid languages
valid_languages = ([@default_lang] + @languages).uniq

permalinkDocs = docs.select do |dd|
dd.data['page_id'] == pageId
end
permalinkDocs.each do |dd|
doclang = dd.data['lang'] || derive_lang_from_path(dd) || @default_lang
explicit_lang = dd.data['lang'] || derive_lang_from_path(dd)
doclang = explicit_lang || @default_lang

# FILTER: Only include permalinks for configured languages.
# Check explicit lang so unconfigured languages are excluded
# even if normalization would map them to default_lang.
next if explicit_lang && !valid_languages.include?(explicit_lang)

doc.data['permalink_lang'][doclang] = dd.data['permalink']
end
end
Expand Down
208 changes: 208 additions & 0 deletions spec/jekyll/polyglot/patches/jekyll/site_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1021,5 +1021,213 @@
expect(doc.data['rendered_lang']).to eq('es')
end
end

describe 'coordinate_documents with unconfigured languages' do
before do
@collection = Jekyll::Collection.new(@site, 'test')
end

it 'should exclude documents with unconfigured lang in frontmatter' do
# Configure site with only en and es
@site.config['languages'] = ['en', 'es']
@site.config['default_lang'] = 'en'
@site.prepare

docs = [
# lang: en - should be included
Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'en'
doc.data['title'] = 'English Page'
end,
# lang: de - should be excluded (not in config)
Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'de'
doc.data['title'] = 'German Page'
end,
# lang: es - should be included
Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'es'
doc.data['title'] = 'Spanish Page'
end
]

coordinated = @site.coordinate_documents(docs)
coordinated_langs = coordinated.map { |d| d.data['lang'] }

expect(coordinated_langs).to include('en')
expect(coordinated_langs).to include('es')
expect(coordinated_langs).not_to include('de')
end

it 'should include documents with default_lang even if not in languages array' do
# Configure with es and fr, but default_lang is en
@site.config['languages'] = ['es', 'fr']
@site.config['default_lang'] = 'en'
@site.prepare

docs = [
# lang: en (default_lang) - should be included
Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'en'
doc.data['title'] = 'English Page'
end,
# lang: es - should be included
Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'es'
doc.data['title'] = 'Spanish Page'
end,
# lang: de - should be excluded
Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'de'
doc.data['title'] = 'German Page'
end
]

coordinated = @site.coordinate_documents(docs)
coordinated_langs = coordinated.map { |d| d.data['lang'] }

expect(coordinated_langs).to include('en') # default_lang always included
expect(coordinated_langs).to include('es')
expect(coordinated_langs).not_to include('de')
end

it 'should log warning when excluding documents with unconfigured language' do
@site.config['languages'] = ['en', 'es']
@site.config['default_lang'] = 'en'
@site.prepare

doc = Jekyll::Document.new('test-de.md', site: @site, collection: @collection)
doc.data['lang'] = 'de'
doc.data['title'] = 'German Page'

expect(Jekyll.logger).to receive(:warn).with("Polyglot:", /lang 'de' not in configured/)

@site.coordinate_documents([doc])
end
end

it 'should not serve unconfigured language pages as default language content' do
# Real-world scenario: site configured with limited languages for dev builds,
# but pages exist for many languages from production config.
# German pages should not appear when only en and pt-BR are configured.
@site.config['languages'] = ['en', 'pt-BR']
@site.config['default_lang'] = 'en'
@site.prepare

# Simulate Jekyll::Page objects using OpenStruct (like site.pages)
pages = [
OpenStruct.new(
data: { 'lang' => 'en', 'page_id' => 'home', 'permalink' => '/',
'title' => 'The SQL Editor You Love' },
url: '/'
),
OpenStruct.new(
data: { 'lang' => 'de', 'page_id' => 'home', 'permalink' => '/',
'title' => 'Der SQL-Editor Ihrer Träume' },
url: '/'
),
OpenStruct.new(
data: { 'lang' => 'pt-BR', 'page_id' => 'home', 'permalink' => '/',
'title' => 'O Editor SQL dos Seus Sonhos' },
url: '/'
),
OpenStruct.new(
data: { 'lang' => 'fr', 'page_id' => 'home', 'permalink' => '/',
'title' => "L'éditeur SQL de vos rêves" },
url: '/'
)
]

# Building for default language (en)
@site.active_lang = 'en'
coordinated = @site.coordinate_documents(pages)

# Only configured languages should appear
coordinated_langs = coordinated.map { |p| p.data['lang'] }
expect(coordinated_langs).not_to include('de')
expect(coordinated_langs).not_to include('fr')

# The winning page for the default language build should be English
expect(coordinated.length).to eq(1)
expect(coordinated[0].data['title']).to eq('The SQL Editor You Love')
end

it 'should filter by explicit frontmatter lang even if lang resolves to default' do
# This test catches a bug where normalize_lang (on other branches) converts
# unknown lang codes to nil, which then falls back to default_lang,
# bypassing the valid_languages filter.
# The filter must check the ORIGINAL frontmatter lang value, not the resolved one.
@site.config['languages'] = ['en', 'pt-BR']
@site.config['default_lang'] = 'en'
@site.prepare

collection = Jekyll::Collection.new(@site, 'test')
docs = [
Jekyll::Document.new('index.html', site: @site, collection: collection).tap do |doc|
doc.data['lang'] = 'en'
doc.data['page_id'] = 'home'
doc.data['title'] = 'English Home'
end,
# This doc has explicit lang: de in frontmatter.
# Even if a normalize step maps 'de' -> nil -> default_lang,
# it should still be excluded because 'de' is not configured.
Jekyll::Document.new('index-de.html', site: @site, collection: collection).tap do |doc|
doc.data['lang'] = 'de'
doc.data['page_id'] = 'home'
doc.data['title'] = 'German Home'
end
]

@site.active_lang = 'en'
coordinated = @site.coordinate_documents(docs)

# The German doc must NOT be in the output
titles = coordinated.map { |d| d.data['title'] }
expect(titles).to include('English Home')
expect(titles).not_to include('German Home')

# Verify the German doc's explicit lang was the reason for exclusion
coordinated_langs = coordinated.map { |d| d.data['lang'] }
expect(coordinated_langs).not_to include('de')
end

describe 'assignPageLanguagePermalinks with unconfigured languages' do
before do
@collection = Jekyll::Collection.new(@site, 'test')
end

it 'should only include configured languages in permalink_lang' do
@site.config['languages'] = ['en', 'es']
@site.config['default_lang'] = 'en'
@site.prepare

# Create docs with en, es, de (de not configured)
docs = [
Jekyll::Document.new('test-en.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'en'
doc.data['page_id'] = 'test-page'
doc.data['permalink'] = '/test/'
end,
Jekyll::Document.new('test-es.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'es'
doc.data['page_id'] = 'test-page'
doc.data['permalink'] = '/es/test/'
end,
Jekyll::Document.new('test-de.md', site: @site, collection: @collection).tap do |doc|
doc.data['lang'] = 'de'
doc.data['page_id'] = 'test-page'
doc.data['permalink'] = '/de/test/'
end
]

# Call assignPageLanguagePermalinks on the English doc
@site.assignPageLanguagePermalinks(docs[0], docs)

# Verify permalink_lang only has keys for en and es
expect(docs[0].data['permalink_lang'].keys).to include('en')
expect(docs[0].data['permalink_lang'].keys).to include('es')
expect(docs[0].data['permalink_lang'].keys).not_to include('de')
end
end
end
end