Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 91 additions & 11 deletions beetsplug/lastgenre/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from beets import config, library, plugins, ui
from beets.library import Album, Item
from beets.util import plurality, unique_list
from beetsplug.lastgenre.utils import drop_ignored_genres, is_ignored
from beetsplug.lastgenre.utils import is_ignored, normalize_genre

from .client import LastFmClient

Expand All @@ -48,7 +48,7 @@
from beets.importer import ImportSession, ImportTask
from beets.library import LibModel

from .utils import GenreIgnorePatterns
from .utils import Aliases, GenreIgnorePatterns

Whitelist = set[str]
"""Set of valid genre names (lowercase). Empty set means all genres allowed."""
Expand Down Expand Up @@ -115,6 +115,7 @@ def sort_by_depth(tags: list[str], branches: CanonTree) -> list[str]:

WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt")
C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml")
ALIASES = os.path.join(os.path.dirname(__file__), "aliases.yaml")


class LastGenrePlugin(plugins.BeetsPlugin):
Expand All @@ -137,6 +138,7 @@ def __init__(self) -> None:
"title_case": True,
"pretend": False,
"ignorelist": {},
"aliases": True,
}
)
self.setup()
Expand All @@ -150,8 +152,12 @@ def setup(self) -> None:
self.c14n_branches: CanonTree
self.c14n_branches, self.canonicalize = self._load_c14n_tree()
self.ignore_patterns: GenreIgnorePatterns = self._load_ignorelist()
self.aliases: Aliases = self._load_aliases()
self.client = LastFmClient(
self._log, self.config["min_weight"].get(int), self.ignore_patterns
self._log,
self.config["min_weight"].get(int),
self.ignore_patterns,
self.aliases,
)

def _load_whitelist(self) -> Whitelist:
Expand Down Expand Up @@ -246,6 +252,67 @@ def _load_ignorelist(self) -> GenreIgnorePatterns:

return compiled_ignorelist

def _load_aliases(self) -> Aliases:
"""Load the genre alias table from the beets config.

Reads ``lastgenre.aliases`` as a mapping of genre names to lists of
regex patterns::

lastgenre:
aliases:
drum and bass:
- d(rum)?[ &n/]*b(ass)?
\\g<1> hop:
- (glitch|hip|jazz|trip)y?[ /-]*hop

The key (genre name) is used as a ``re.Match.expand()`` template,
so ``\\g<N>`` back-references to capture groups are supported.

Setting ``aliases: true`` (the default) loads the bundled
``aliases.yaml`` file. Setting ``aliases: false`` disables
normalization entirely.

Raises:
confuse.ConfigTypeError: when the config value is not a mapping
or a list entry is not a string.
"""
aliases_raw = self.config["aliases"].get()
if aliases_raw is False:
return []
if aliases_raw in (True, "", None):
self._log.debug("Loading default aliases from {}", ALIASES)
with Path(ALIASES).open(encoding="utf-8") as f:
aliases_dict = yaml.safe_load(f)
if not aliases_dict:
return []
else:
# Validate only the effective aliases value to avoid stale lower-
# priority config layers affecting type checking.
aliases_cfg = confuse.Configuration("lastgenre_aliases", read=False)
aliases_cfg.set({"aliases": aliases_raw})
aliases_dict = aliases_cfg["aliases"].get(
confuse.MappingValues(confuse.Sequence(str))
)

entries: Aliases = []
for canonical, patterns in aliases_dict.items():
template = str(canonical).lower()
for raw_pat in patterns:
try:
entries.append(
(re.compile(str(raw_pat), re.IGNORECASE), template)
)
except re.error:
entries.append(
(
re.compile(re.escape(str(raw_pat)), re.IGNORECASE),
template,
)
)

self._log.extra_debug("Loaded {} alias entries", len(entries))
return entries

@property
def sources(self) -> tuple[str, ...]:
"""A tuple of allowed genre sources. May contain 'track',
Expand All @@ -267,6 +334,8 @@ def _resolve_genres(
"""Canonicalize, sort and filter a list of genres.

- Returns an empty list if the input tags list is empty.
- If aliases are configured, variant spellings are normalised first
(e.g. 'hip-hop' → 'hip hop', 'dnb' → 'drum and bass').
- If canonicalization is enabled, it extends the list by incorporating
parent genres from the canonicalization tree. When a whitelist is set,
only parent tags that pass the whitelist filter are included;
Expand All @@ -286,6 +355,12 @@ def _resolve_genres(
if not tags:
return []

# Normalize variant spellings before any other processing.
if self.aliases:
tags = [
normalize_genre(self._log, self.aliases, tag) for tag in tags
]

count = self.config["count"].get(int)

# Canonicalization (if enabled)
Expand Down Expand Up @@ -353,14 +428,19 @@ def _filter_valid(
if not self.whitelist and not self.ignore_patterns:
return cleaned

whitelisted = [
g
for g in cleaned
if not self.whitelist or g.lower() in self.whitelist
]
return drop_ignored_genres(
self._log, self.ignore_patterns, whitelisted, artist
)
result = []
for genre in cleaned:
if self.whitelist and genre.lower() not in self.whitelist:
continue

if self.ignore_patterns and is_ignored(
self._log, self.ignore_patterns, genre, artist
):
continue

result.append(genre)

return result

# Genre resolution pipeline.

Expand Down
112 changes: 112 additions & 0 deletions beetsplug/lastgenre/aliases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Default genre aliases for the lastgenre plugin.
#
# Keys are canonical names and support \g<N> back-references to pattern groups.
# Patterns are case-insensitive full-matches. Order matters: first match wins.

# ---------------------------------------------------------------------------
# Ampersands / Delimiters
# ---------------------------------------------------------------------------

# drum and bass (d&b, dnb, drum n bass, ...)
drum and bass:
- d(rum)?[ &n/]*b(ass)?

# rhythm and blues (r&b, rnb, ...)
rhythm and blues:
- r(hythm)?[ &n/]*b(lues)?

# rock and roll (rock & roll, rock'n'roll, rock-n-roll, ...)
rock and roll:
- rock[ '‐&n/ \-]*roll

# ---------------------------------------------------------------------------
# Consistent Delimiters (Hyphenation)
# ---------------------------------------------------------------------------


# Hyphenate j-pop, k-pop, c-pop, post-rock, etc.
# Matches: kpop, k pop -> k-pop; j rock -> j-rock; post rock -> post-rock
#
# Hyphenate neo-soul, euro-house, tech-house, etc.
# (Negative lookaheads exclude 'european' and 'techno'/'technic*'.)
\g<1>-\g<2>:
- (c|k|j)[ /-]*(folk|goth|hip hop|pop|rock|ska|trance)
- (euro(?!p[ae]+n?)|neo|post|tech(?!n[io]))[ /-]*(\w+)

# lo-fi, glo-fi (lofi, lo fi -> lo-fi)
lo-fi:
- (g?lo)[ /-]*fi

# p-funk, g-funk, etc. (p funk -> p-funk)
\g<1>-funk:
- (p|g)[ /-]*funk

# synthpop, synthwave, etc. (synth pop -> synthpop)
synth\g<1>:
- synth[ /-]+(\w+)

# avant-garde (avantgarde, avant gard, avant-gard)
avant-garde:
- avant[ /-]*(gard(e)?)?
- avant-gard
- avant

# ---------------------------------------------------------------------------
# Nu- Genre Spelling (nu jazz, nu-disco, etc.)
# ---------------------------------------------------------------------------

# Matches: nu-jazz -> nu jazz; nu disco -> nu disco
# Note: 'nu-disco' is hyphenated in the tree but 'nu jazz' isn't in genres.txt
nu \g<1>:
- nu[ /-]*(disco|jazz|metal|soul)

# ---------------------------------------------------------------------------
# Terminology / Synonym / Translation fixes
# ---------------------------------------------------------------------------

# electronic (electronic music, elektronika)
electronic:
- electronic[ /]music
- elektronika

# downtempo (downbeat)
downtempo:
- down[ /-]*beat

# shoegaze (shoegazer, shoegazing)
shoegaze:
- shoegaze?r?
- shoegazing

# ---------------------------------------------------------------------------
# Form Fixes (Hip Hop, Trip Hop, etc.)
# ---------------------------------------------------------------------------

# Normalized spacing: hip-hop, hiphop -> hip hop
\g<1> hop:
- (glitch|hip|jazz|trip)y?([ /-]*hip)?[ /-]*hop

# ---------------------------------------------------------------------------
# Abbreviations & International Spellings
# ---------------------------------------------------------------------------

# alternative rock (alt, alternative, alt rock, ...)
# indie rock (indie, indie rock)
\g<1> rock:
- (alt|alternative|indie)([ /-]*rock)?

# gothic rock (goth, goth rock) - doesn't catch gothic metal
gothic rock:
- goth(?!ic)([ /-]*rock)?
- gothic[ /-]*rock

# progressive rock (prog, prog rock, progressive rock)
# Note: mapping standalone 'progressive' is avoided to prevent catching 'progressive metal', etc.
progressive rock:
- prog([ /-]*rock)?
- progressive[ /-]*rock

# traditional folk (trad, traditional)
# Note: avoids matching 'trad jazz' or 'traditional country'.
traditional folk:
- trad(/|ition(/|al)?)?-?
19 changes: 14 additions & 5 deletions beetsplug/lastgenre/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@

from beets import plugins

from .utils import drop_ignored_genres
from .utils import is_ignored, normalize_genre

if TYPE_CHECKING:
from collections.abc import Callable

from beets.library import LibModel
from beets.logging import BeetsLogger

from .utils import GenreIgnorePatterns
from .utils import Aliases, GenreIgnorePatterns

GenreCache = dict[str, list[str]]
"""Cache mapping entity keys to their genre lists.
Expand Down Expand Up @@ -69,6 +69,7 @@ def __init__(
log: BeetsLogger,
min_weight: int,
ignore_patterns: GenreIgnorePatterns,
aliases: Aliases,
):
"""Initialize the client.

Expand All @@ -78,6 +79,7 @@ def __init__(
self._log = log
self._min_weight = min_weight
self._ignore_patterns: GenreIgnorePatterns = ignore_patterns
self._aliases: Aliases = aliases
self._genre_cache: GenreCache = {}

def fetch_genres(
Expand Down Expand Up @@ -127,11 +129,18 @@ def _last_lookup(
"last.fm (unfiltered) {} tags: {}", entity, genres
)

# Apply aliases and log each change.
# Filter forbidden genres on every call so ignorelist hits are logged.
# Artist is always the first element in args (album, artist, track lookups).
return drop_ignored_genres(
self._log, self._ignore_patterns, genres, args[0]
)
result = []
for genre in genres:
if self._aliases:
genre = normalize_genre(self._log, self._aliases, genre)

if not is_ignored(self._log, self._ignore_patterns, genre, args[0]):
result.append(genre)

return result

def fetch(self, kind: str, obj: LibModel, *args: str) -> list[str]:
"""Fetch Last.fm genres for the specified kind and entity.
Expand Down
44 changes: 32 additions & 12 deletions beetsplug/lastgenre/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,20 @@

from __future__ import annotations

import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
import re

from beets.logging import BeetsLogger

GenreIgnorePatterns = dict[str, list[re.Pattern[str]]]
"""Mapping of artist name to list of compiled case-insensitive patterns."""

AliasEntry = tuple[re.Pattern[str], str]
"""A compiled full-match pattern paired with its replacement template."""

def drop_ignored_genres(
logger: BeetsLogger,
ignore_patterns: GenreIgnorePatterns,
genres: list[str],
artist: str | None = None,
) -> list[str]:
"""Drop genres that match the ignorelist."""
return [
g for g in genres if not is_ignored(logger, ignore_patterns, g, artist)
]
Aliases = list[AliasEntry]
"""Ordered list of (pattern, replacement_template) alias entries."""


def is_ignored(
Expand All @@ -57,3 +50,30 @@ def is_ignored(
logger.extra_debug("ignored (artist: {}): {}", artist, genre)
return True
return False


def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str:
"""Return the canonical form of *genre* using *aliases*.

Tries each alias entry in order. The first full-match wins; the replacement
template is expanded via ``re.Match.expand()`` so ``\\g<N>``
back-references work. Returns original (lowercased) *genre* when no alias
matches.
"""
genre_lower = genre.lower()
for pattern, template in aliases:
if m := pattern.fullmatch(genre_lower):
try:
expanded = m.expand(template)
except (re.error, IndexError) as exc:
logger.warning(
"invalid alias template {}; skipping for genre {}: {}",
template,
genre,
exc,
)
continue
if expanded != genre:
logger.extra_debug("aliased: {} -> {}", genre, expanded)
return expanded
Comment thread
JOJ0 marked this conversation as resolved.
return genre_lower
Loading
Loading