Skip to content

Commit ff36dd2

Browse files
committed
Implement aliases
1 parent 66a7d98 commit ff36dd2

3 files changed

Lines changed: 129 additions & 22 deletions

File tree

beetsplug/lastgenre/__init__.py

Lines changed: 88 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from beets.library import Album, Item
3838
from beets.ui import UserError
3939
from beets.util import plurality, unique_list
40-
from beetsplug.lastgenre.utils import is_ignored
40+
from beetsplug.lastgenre.utils import is_ignored, normalize_genre
4141

4242
from .client import LastFmClient
4343

@@ -48,7 +48,7 @@
4848
from beets.importer import ImportSession, ImportTask
4949
from beets.library import LibModel
5050

51-
from .utils import Ignorelist
51+
from .utils import Aliases, Ignorelist
5252

5353
Whitelist = set[str]
5454
"""Set of valid genre names (lowercase). Empty set means all genres allowed."""
@@ -118,6 +118,7 @@ def sort_by_depth(tags: list[str], branches: CanonTree) -> list[str]:
118118

119119
WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt")
120120
C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml")
121+
ALIASES = os.path.join(os.path.dirname(__file__), "aliases.yaml")
121122

122123

123124
class LastGenrePlugin(plugins.BeetsPlugin):
@@ -140,6 +141,7 @@ def __init__(self) -> None:
140141
"title_case": True,
141142
"pretend": False,
142143
"ignorelist": False,
144+
"aliases": True,
143145
}
144146
)
145147
self.setup()
@@ -153,8 +155,12 @@ def setup(self) -> None:
153155
self.c14n_branches: CanonTree
154156
self.c14n_branches, self.canonicalize = self._load_c14n_tree()
155157
self.ignorelist: Ignorelist = self._load_ignorelist()
158+
self.aliases: Aliases = self._load_aliases()
156159
self.client = LastFmClient(
157-
self._log, self.config["min_weight"].get(int), self.ignorelist
160+
self._log,
161+
self.config["min_weight"].get(int),
162+
self.ignorelist,
163+
self.aliases,
158164
)
159165

160166
def _load_whitelist(self) -> Whitelist:
@@ -250,6 +256,18 @@ def _load_ignorelist(self) -> Ignorelist:
250256
self._log.extra_debug("Ignorelist: {}", ignorelist_raw)
251257
return self._compile_ignorelist_patterns(ignorelist_raw)
252258

259+
@staticmethod
260+
def _compile_pattern(raw: str) -> re.Pattern[str]:
261+
"""Compile *raw* as a case-insensitive full-match regex.
262+
263+
Tries regex compilation first; falls back to treating the string as a
264+
literal (via ``re.escape``) if it is not valid regex.
265+
"""
266+
try:
267+
return re.compile(raw, re.IGNORECASE)
268+
except re.error:
269+
return re.compile(re.escape(raw), re.IGNORECASE)
270+
253271
@staticmethod
254272
def _compile_ignorelist_patterns(
255273
ignorelist: RawIgnorelist,
@@ -265,17 +283,67 @@ def _compile_ignorelist_patterns(
265283
defaultdict(list)
266284
)
267285
for artist, patterns in ignorelist.items():
268-
compiled_patterns = []
269-
for pattern in patterns:
270-
try:
271-
compiled_patterns.append(re.compile(pattern, re.IGNORECASE))
272-
except re.error:
273-
compiled_patterns.append(
274-
re.compile(re.escape(pattern), re.IGNORECASE)
275-
)
276-
compiled_ignorelist[artist] = compiled_patterns
286+
compiled_ignorelist[artist] = [
287+
LastGenrePlugin._compile_pattern(p) for p in patterns
288+
]
277289
return compiled_ignorelist
278290

291+
def _load_aliases(self) -> Aliases:
292+
"""Load the genre alias table from the beets config.
293+
294+
Reads ``lastgenre.aliases`` as a mapping of canonical genre names
295+
(the replacement template) to lists of regex patterns::
296+
297+
lastgenre:
298+
aliases:
299+
drum and bass:
300+
- d(rum)?[ &n/]*b(ass)?
301+
\\g<1> hop:
302+
- (glitch|hip|jazz|trip)y?[ /-]*hop
303+
304+
The key (canonical name) is used as a ``re.Match.expand()`` template,
305+
so ``\\g<N>`` back-references to capture groups are supported. Plain
306+
string keys work without any special syntax.
307+
308+
Setting ``aliases: true`` (the default) loads the bundled
309+
``aliases.yaml`` file. Setting ``aliases: false`` disables
310+
normalization entirely.
311+
312+
Patterns are applied in order; the first full-match wins.
313+
314+
Raises:
315+
UserError: if the config value is not a mapping, or if an entry's
316+
value is not a list of strings.
317+
"""
318+
aliases_config = self.config["aliases"].get()
319+
if aliases_config is False:
320+
return []
321+
if aliases_config in (True, "", None):
322+
self._log.debug("Loading default aliases from {}", ALIASES)
323+
with Path(ALIASES).open(encoding="utf-8") as f:
324+
aliases_config = yaml.safe_load(f)
325+
if not aliases_config:
326+
return []
327+
if not isinstance(aliases_config, dict):
328+
raise UserError(
329+
"Invalid value for lastgenre.aliases: expected a mapping "
330+
f"(canonical \u2192 list of patterns), got {aliases_config!r}"
331+
)
332+
333+
entries: Aliases = []
334+
for canonical, patterns in aliases_config.items():
335+
if not isinstance(patterns, list):
336+
raise UserError(
337+
f"Invalid lastgenre.aliases entry for {canonical!r}: "
338+
f"expected a list of patterns, got {patterns!r}"
339+
)
340+
template = str(canonical).lower()
341+
for raw in patterns:
342+
entries.append((self._compile_pattern(str(raw)), template))
343+
344+
self._log.extra_debug("Loaded {} alias entries", len(entries))
345+
return entries
346+
279347
@property
280348
def sources(self) -> tuple[str, ...]:
281349
"""A tuple of allowed genre sources. May contain 'track',
@@ -297,6 +365,8 @@ def _resolve_genres(
297365
"""Canonicalize, sort and filter a list of genres.
298366
299367
- Returns an empty list if the input tags list is empty.
368+
- If aliases are configured, variant spellings are normalised first
369+
(e.g. 'hip-hop' → 'hip hop', 'dnb' → 'drum and bass').
300370
- If canonicalization is enabled, it extends the list by incorporating
301371
parent genres from the canonicalization tree. When a whitelist is set,
302372
only parent tags that pass the whitelist filter are included;
@@ -316,6 +386,12 @@ def _resolve_genres(
316386
if not tags:
317387
return []
318388

389+
# Normalize variant spellings before any other processing.
390+
if self.aliases:
391+
tags = [
392+
normalize_genre(self._log, self.aliases, tag) for tag in tags
393+
]
394+
319395
count = self.config["count"].get(int)
320396

321397
# Canonicalization (if enabled)

beetsplug/lastgenre/client.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@
2525

2626
from beets import plugins
2727

28-
from .utils import is_ignored
28+
from .utils import is_ignored, normalize_genre
2929

3030
if TYPE_CHECKING:
3131
from collections.abc import Callable
3232

3333
from beets.logging import BeetsLogger
3434

35-
from .utils import Ignorelist
35+
from .utils import Aliases, Ignorelist
3636

3737
GenreCache = dict[str, list[str]]
3838
"""Cache mapping entity keys to their genre lists.
@@ -52,7 +52,11 @@ class LastFmClient:
5252
"""Client for fetching genres from Last.fm."""
5353

5454
def __init__(
55-
self, log: BeetsLogger, min_weight: int, ignorelist: Ignorelist
55+
self,
56+
log: BeetsLogger,
57+
min_weight: int,
58+
ignorelist: Ignorelist,
59+
aliases: Aliases,
5660
):
5761
"""Initialize the client.
5862
@@ -62,6 +66,7 @@ def __init__(
6266
self._log = log
6367
self._min_weight = min_weight
6468
self._ignorelist: Ignorelist = ignorelist
69+
self._aliases: Aliases = aliases
6570
self._genre_cache: GenreCache = {}
6671

6772
def fetch_genre(
@@ -138,13 +143,15 @@ def _last_lookup(
138143
"last.fm (unfiltered) {} tags: {}", entity, genres
139144
)
140145

141-
# Filter forbidden genres on every call so ignorelist hits are logged.
142-
# Artist is always the first element in args (album, artist, track lookups).
143-
return [
144-
g
145-
for g in genres
146-
if not is_ignored(self._log, self._ignorelist, g, args[0])
147-
]
146+
result = []
147+
for genre in genres:
148+
if self._aliases:
149+
genre = normalize_genre(self._log, self._aliases, genre)
150+
151+
if not is_ignored(self._log, self._ignorelist, genre, args[0]):
152+
result.append(genre)
153+
154+
return result
148155

149156
def fetch_album_genre(self, albumartist: str, albumtitle: str) -> list[str]:
150157
"""Return genres from Last.fm for the album by albumartist."""

beetsplug/lastgenre/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
Ignorelist = dict[str, list[re.Pattern[str]]]
2828
"""Mapping of artist name to list of compiled case-insensitive patterns."""
2929

30+
AliasEntry = tuple[re.Pattern[str], str]
31+
"""A compiled full-match pattern paired with its replacement template."""
32+
33+
Aliases = list[AliasEntry]
34+
"""Ordered list of (pattern, replacement_template) alias entries."""
35+
3036

3137
def is_ignored(
3238
logger: BeetsLogger,
@@ -47,3 +53,21 @@ def is_ignored(
4753
logger.extra_debug("ignored (artist: {}): {}", artist, genre)
4854
return True
4955
return False
56+
57+
58+
def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str:
59+
"""Return the canonical form of *genre* using *aliases*.
60+
61+
Tries each alias entry in order. The first full-match wins; the
62+
replacement template is expanded via ``re.Match.expand()`` so
63+
``\\g<N>`` back-references work. Returns *genre* unchanged when
64+
no alias matches.
65+
"""
66+
genre_lower = genre.lower()
67+
for pattern, template in aliases:
68+
if m := pattern.fullmatch(genre_lower):
69+
expanded = m.expand(template)
70+
if expanded != genre:
71+
logger.extra_debug("aliased: {} -> {}", genre, expanded)
72+
return expanded
73+
return genre_lower

0 commit comments

Comments
 (0)