3737from beets .library import Album , Item
3838from beets .ui import UserError
3939from beets .util import plurality , unique_list
40- from beetsplug .lastgenre .utils import is_ignored
40+ from beetsplug .lastgenre .utils import is_ignored , normalize_genre
4141
4242from .client import LastFmClient
4343
4848 from beets .importer import ImportSession , ImportTask
4949 from beets .library import LibModel
5050
51- from .utils import Ignorelist
51+ from .utils import Aliases , Ignorelist
5252
5353 Whitelist = set [str ]
5454 """Set of valid genre names (lowercase). Empty set means all genres allowed."""
@@ -118,6 +118,7 @@ def sort_by_depth(tags: list[str], branches: CanonTree) -> list[str]:
118118
119119WHITELIST = os .path .join (os .path .dirname (__file__ ), "genres.txt" )
120120C14N_TREE = os .path .join (os .path .dirname (__file__ ), "genres-tree.yaml" )
121+ ALIASES = os .path .join (os .path .dirname (__file__ ), "aliases.yaml" )
121122
122123
123124class LastGenrePlugin (plugins .BeetsPlugin ):
@@ -140,6 +141,7 @@ def __init__(self) -> None:
140141 "title_case" : True ,
141142 "pretend" : False ,
142143 "ignorelist" : False ,
144+ "aliases" : True ,
143145 }
144146 )
145147 self .setup ()
@@ -153,8 +155,12 @@ def setup(self) -> None:
153155 self .c14n_branches : CanonTree
154156 self .c14n_branches , self .canonicalize = self ._load_c14n_tree ()
155157 self .ignorelist : Ignorelist = self ._load_ignorelist ()
158+ self .aliases : Aliases = self ._load_aliases ()
156159 self .client = LastFmClient (
157- self ._log , self .config ["min_weight" ].get (int ), self .ignorelist
160+ self ._log ,
161+ self .config ["min_weight" ].get (int ),
162+ self .ignorelist ,
163+ self .aliases ,
158164 )
159165
160166 def _load_whitelist (self ) -> Whitelist :
@@ -250,6 +256,18 @@ def _load_ignorelist(self) -> Ignorelist:
250256 self ._log .extra_debug ("Ignorelist: {}" , ignorelist_raw )
251257 return self ._compile_ignorelist_patterns (ignorelist_raw )
252258
259+ @staticmethod
260+ def _compile_pattern (raw : str ) -> re .Pattern [str ]:
261+ """Compile *raw* as a case-insensitive full-match regex.
262+
263+ Tries regex compilation first; falls back to treating the string as a
264+ literal (via ``re.escape``) if it is not valid regex.
265+ """
266+ try :
267+ return re .compile (raw , re .IGNORECASE )
268+ except re .error :
269+ return re .compile (re .escape (raw ), re .IGNORECASE )
270+
253271 @staticmethod
254272 def _compile_ignorelist_patterns (
255273 ignorelist : RawIgnorelist ,
@@ -265,17 +283,67 @@ def _compile_ignorelist_patterns(
265283 defaultdict (list )
266284 )
267285 for artist , patterns in ignorelist .items ():
268- compiled_patterns = []
269- for pattern in patterns :
270- try :
271- compiled_patterns .append (re .compile (pattern , re .IGNORECASE ))
272- except re .error :
273- compiled_patterns .append (
274- re .compile (re .escape (pattern ), re .IGNORECASE )
275- )
276- compiled_ignorelist [artist ] = compiled_patterns
286+ compiled_ignorelist [artist ] = [
287+ LastGenrePlugin ._compile_pattern (p ) for p in patterns
288+ ]
277289 return compiled_ignorelist
278290
291+ def _load_aliases (self ) -> Aliases :
292+ """Load the genre alias table from the beets config.
293+
294+ Reads ``lastgenre.aliases`` as a mapping of canonical genre names
295+ (the replacement template) to lists of regex patterns::
296+
297+ lastgenre:
298+ aliases:
299+ drum and bass:
300+ - d(rum)?[ &n/]*b(ass)?
301+ \\ g<1> hop:
302+ - (glitch|hip|jazz|trip)y?[ /-]*hop
303+
304+ The key (canonical name) is used as a ``re.Match.expand()`` template,
305+ so ``\\ g<N>`` back-references to capture groups are supported. Plain
306+ string keys work without any special syntax.
307+
308+ Setting ``aliases: true`` (the default) loads the bundled
309+ ``aliases.yaml`` file. Setting ``aliases: false`` disables
310+ normalization entirely.
311+
312+ Patterns are applied in order; the first full-match wins.
313+
314+ Raises:
315+ UserError: if the config value is not a mapping, or if an entry's
316+ value is not a list of strings.
317+ """
318+ aliases_config = self .config ["aliases" ].get ()
319+ if aliases_config is False :
320+ return []
321+ if aliases_config in (True , "" , None ):
322+ self ._log .debug ("Loading default aliases from {}" , ALIASES )
323+ with Path (ALIASES ).open (encoding = "utf-8" ) as f :
324+ aliases_config = yaml .safe_load (f )
325+ if not aliases_config :
326+ return []
327+ if not isinstance (aliases_config , dict ):
328+ raise UserError (
329+ "Invalid value for lastgenre.aliases: expected a mapping "
330+ f"(canonical \u2192 list of patterns), got { aliases_config !r} "
331+ )
332+
333+ entries : Aliases = []
334+ for canonical , patterns in aliases_config .items ():
335+ if not isinstance (patterns , list ):
336+ raise UserError (
337+ f"Invalid lastgenre.aliases entry for { canonical !r} : "
338+ f"expected a list of patterns, got { patterns !r} "
339+ )
340+ template = str (canonical ).lower ()
341+ for raw in patterns :
342+ entries .append ((self ._compile_pattern (str (raw )), template ))
343+
344+ self ._log .extra_debug ("Loaded {} alias entries" , len (entries ))
345+ return entries
346+
279347 @property
280348 def sources (self ) -> tuple [str , ...]:
281349 """A tuple of allowed genre sources. May contain 'track',
@@ -297,6 +365,8 @@ def _resolve_genres(
297365 """Canonicalize, sort and filter a list of genres.
298366
299367 - Returns an empty list if the input tags list is empty.
368+ - If aliases are configured, variant spellings are normalised first
369+ (e.g. 'hip-hop' → 'hip hop', 'dnb' → 'drum and bass').
300370 - If canonicalization is enabled, it extends the list by incorporating
301371 parent genres from the canonicalization tree. When a whitelist is set,
302372 only parent tags that pass the whitelist filter are included;
@@ -316,6 +386,12 @@ def _resolve_genres(
316386 if not tags :
317387 return []
318388
389+ # Normalize variant spellings before any other processing.
390+ if self .aliases :
391+ tags = [
392+ normalize_genre (self ._log , self .aliases , tag ) for tag in tags
393+ ]
394+
319395 count = self .config ["count" ].get (int )
320396
321397 # Canonicalization (if enabled)
0 commit comments