Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d4d265e
batch spotify requests
arsaboo Mar 31, 2026
2d6711e
lint
arsaboo Mar 31, 2026
913149e
Fix Spotify batch helper typing for mypy
arsaboo Mar 31, 2026
75cfa0d
Address reviewer coments
arsaboo Mar 31, 2026
a65ba38
Merge remote-tracking branch 'upstream/master' into spotify_batch
arsaboo Apr 4, 2026
1540105
Merge remote-tracking branch 'upstream/master' into spotify_batch
arsaboo Apr 8, 2026
72d76f6
Merge upstream/master into spotify_batch branch
arsaboo Apr 14, 2026
cd06e62
Resolve merge conflict in changelog.rst
arsaboo Apr 14, 2026
bd6c737
Merge branch 'master' into spotify_batch
arsaboo Apr 18, 2026
826aaeb
Address reviewer comments
arsaboo Apr 19, 2026
92481b6
Merge branch 'spotify_batch' of https://github.com/arsaboo/beets into…
arsaboo Apr 19, 2026
99b81c5
lint
arsaboo Apr 19, 2026
b8e0296
simplify
arsaboo Apr 19, 2026
ea72d3f
more lint
arsaboo Apr 19, 2026
b42d6ef
Merge branch 'master' into spotify_batch
arsaboo Apr 19, 2026
2c7274f
Address reviewer comments and update related tests
arsaboo Apr 19, 2026
8c7868d
Merge branch 'spotify_batch' of https://github.com/arsaboo/beets into…
arsaboo Apr 19, 2026
7d756d7
Merge branch 'master' into spotify_batch
arsaboo Apr 19, 2026
e0aec44
Merge branch 'master' into spotify_batch
arsaboo Apr 20, 2026
2c0dbb1
fix CI failure
arsaboo Apr 20, 2026
786e79f
Merge branch 'spotify_batch' of https://github.com/arsaboo/beets into…
arsaboo Apr 20, 2026
48d6661
fix CI issues
arsaboo Apr 20, 2026
98d4431
Merge branch 'master' into spotify_batch
arsaboo Apr 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions beets/library/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from contextlib import suppress
from functools import cached_property
from typing import TYPE_CHECKING, ClassVar, NamedTuple, TypeVar
from typing import TYPE_CHECKING, ClassVar, NamedTuple

from confuse.exceptions import ConfigError

Expand All @@ -12,23 +12,13 @@
from beets.dbcore.db import Migration
from beets.dbcore.pathutils import normalize_path_for_db
from beets.dbcore.types import MULTI_VALUE_DELIMITER
from beets.util import unique_list
from beets.util import chunks, unique_list
from beets.util.lyrics import Lyrics

if TYPE_CHECKING:
from collections.abc import Iterator

from beets.dbcore.db import Model
from beets.library import Library

T = TypeVar("T")


def chunks(lst: list[T], n: int) -> Iterator[list[T]]:
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]


class MultiValueFieldMigration(Migration):
"""Backfill multi-valued field from legacy single-string values."""
Expand Down
6 changes: 6 additions & 0 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,3 +1204,9 @@
def unique_list(elements: Iterable[T]) -> list[T]:
"""Return a list with unique elements in the original order."""
return list(dict.fromkeys(elements))


def chunks(lst: Sequence[T], n: int) -> Iterator[list[T]]:
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]

Check failure on line 1212 in beets/util/__init__.py

View workflow job for this annotation

GitHub Actions / Check types with mypy

Incompatible types in "yield" (actual type "Sequence[T]", expected type "list[T]")
186 changes: 150 additions & 36 deletions beetsplug/spotify.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"""Adds Spotify release and track search support to the autotagger.

Also includes Spotify playlist construction.

"""

from __future__ import annotations
Expand All @@ -28,7 +29,7 @@
import time
import webbrowser
from http import HTTPStatus
from typing import TYPE_CHECKING, Any, ClassVar, Literal
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, TypedDict

import confuse
import requests
Expand All @@ -38,6 +39,7 @@
from beets.dbcore import types
from beets.library import Library
from beets.metadata_plugins import IDResponse, SearchApiMetadataSourcePlugin
from beets.util import chunks, unique_list

if TYPE_CHECKING:
from collections.abc import Sequence
Expand All @@ -49,6 +51,33 @@
DEFAULT_WAITING_TIME = 5


class SpotifyTrackInfo(NamedTuple):
"""Popularity and external IDs returned by the /v1/tracks batch endpoint."""

popularity: int | None = None
isrc: str | None = None
ean: str | None = None
upc: str | None = None
Comment thread
arsaboo marked this conversation as resolved.
Outdated


class SpotifyAudioFeatureData(TypedDict, total=False):
Comment thread
arsaboo marked this conversation as resolved.
Outdated
"""Audio feature fields returned by the /v1/audio-features endpoint."""

id: str
acousticness: float
danceability: float
energy: float
instrumentalness: float
key: int
liveness: float
loudness: float
mode: int
speechiness: float
tempo: float
time_signature: int
valence: float


class SearchResponseAlbums(IDResponse):
"""A response returned by the Spotify API.

Expand Down Expand Up @@ -112,8 +141,8 @@ class SpotifyPlugin(
open_track_url = "https://open.spotify.com/track/"
search_url = "https://api.spotify.com/v1/search"
album_url = "https://api.spotify.com/v1/albums/"
track_url = "https://api.spotify.com/v1/tracks/"
audio_features_url = "https://api.spotify.com/v1/audio-features/"
track_url = "https://api.spotify.com/v1/tracks"
audio_features_url = "https://api.spotify.com/v1/audio-features"

spotify_audio_features: ClassVar[dict[str, str]] = {
"acousticness": "spotify_acousticness",
Expand Down Expand Up @@ -212,8 +241,8 @@ def _handle_response(

:param method: HTTP method to use for the request.
:param url: URL for the new :class:`Request` object.
:param dict params: (optional) list of tuples or bytes to send
in the query string for the :class:`Request`.
:param dict params: (optional) list of tuples or bytes to send in the
query string for the :class:`Request`.

"""

Expand Down Expand Up @@ -301,10 +330,7 @@ def _handle_response(
def _multi_artist_credit(
self, artists: list[dict[str | int, str]]
) -> tuple[list[str], list[str]]:
"""Given a list of artist dictionaries, accumulate data into a pair
of lists: the first being the artist names, and the second being the
artist IDs.
"""
"""Accumulate data from artist dicts into name and ID lists."""
artist_names = []
artist_ids = []
for artist in artists:
Expand All @@ -313,8 +339,9 @@ def _multi_artist_credit(
return artist_names, artist_ids

def album_for_id(self, album_id: str) -> AlbumInfo | None:
"""Fetch an album by its Spotify ID or URL and return an
AlbumInfo object or None if the album is not found.
"""Fetch an album by its Spotify ID or URL.

Returns an AlbumInfo object, or None if the album is not found.

:param str album_id: Spotify ID or URL for the album

Expand Down Expand Up @@ -444,7 +471,7 @@ def track_for_id(self, track_id: str) -> None | TrackInfo:

if not (
track_data := self._handle_response(
"get", f"{self.track_url}{spotify_id}"
"get", f"{self.track_url}/{spotify_id}"
)
):
self._log.debug("Track not found: {}", track_id)
Expand Down Expand Up @@ -488,6 +515,7 @@ def get_search_response(

Unauthorized responses trigger one token refresh attempt before the
method gives up and falls back to an empty result set.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

Copy link
Copy Markdown
Member

@snejus snejus Apr 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And undo all unrelated changes in the docstrings please.

"""
for _ in range(2):
response = requests.get(
Expand Down Expand Up @@ -590,8 +618,8 @@ def _match_library_tracks(self, library: Library, keywords: str):
:param library: beets library object to query.
:param keywords: Query to match library items against.

:returns: List of simplified track object dicts for library
items matching the specified query.
:returns: List of simplified track object dicts for library items
matching the specified query.

"""
results = []
Expand Down Expand Up @@ -722,17 +750,99 @@ def _output_match_results(self, results):
"No {.data_source} tracks found from beets query", self
)

def _disable_audio_features(self) -> None:
"""Disable audio features globally and warn only once."""
should_log = False
with self._audio_features_lock:
if self.audio_features_available:
self.audio_features_available = False
should_log = True
if should_log:
self._log.warning(
"Audio features API is unavailable (403 error). "
"Skipping audio features for remaining tracks."
)

def track_info_batch(
Comment thread
arsaboo marked this conversation as resolved.
Outdated
self, track_ids: Sequence[str]
) -> dict[str, SpotifyTrackInfo]:
"""Fetch popularity and external IDs in batches of 50 tracks."""
if not track_ids:
return {}

info_by_id: dict[str, SpotifyTrackInfo] = {}
for chunk in chunks(track_ids, 50):
track_data = self._handle_response(
"get",
self.track_url,
params={"ids": ",".join(chunk)},
)

for idx, track in enumerate(track_data.get("tracks", [])):
if track is None:
continue

external_ids = track.get("external_ids", {})
track_id = track.get("id") or chunk[idx]
info_by_id[track_id] = SpotifyTrackInfo(
popularity=track.get("popularity"),
isrc=external_ids.get("isrc"),
ean=external_ids.get("ean"),
upc=external_ids.get("upc"),
)

for track_id in chunk:
info_by_id.setdefault(track_id, SpotifyTrackInfo())

return info_by_id

def track_audio_features_batch(
self, track_ids: Sequence[str]
) -> dict[str, SpotifyAudioFeatureData]:
"""Fetch track audio features in batches of 100 tracks."""
if not track_ids:
return {}

with self._audio_features_lock:
if not self.audio_features_available:
return {}

features_by_id: dict[str, SpotifyAudioFeatureData] = {}
for chunk in chunks(track_ids, 100):
try:
features_data = self._handle_response(
"get",
self.audio_features_url,
params={"ids": ",".join(chunk)},
)
except AudioFeaturesUnavailableError:
self._disable_audio_features()
break
except APIError as e:
self._log.debug("Spotify API error: {}", e)
continue

for idx, feature_data in enumerate(
features_data.get("audio_features", [])
):
if feature_data is None:
continue
track_id = feature_data.get("id") or chunk[idx]
features_by_id[track_id] = feature_data
Comment thread
arsaboo marked this conversation as resolved.
Outdated

return features_by_id

def _fetch_info(self, items, write, force):
"""Obtain track information from Spotify."""

self._log.debug("Total {} tracks", len(items))

items_to_update: list[tuple[Item, str]] = []

for index, item in enumerate(items, start=1):
self._log.info(
"Processing {}/{} tracks - {} ", index, len(items), item
)
# If we're not forcing re-downloading for all tracks, check
# whether the popularity data is already present
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Configure your AI agent to not remove stuff like this

if not force:
if "spotify_track_popularity" in item:
self._log.debug("Popularity already present for: {}", item)
Expand All @@ -743,14 +853,27 @@ def _fetch_info(self, items, write, force):
self._log.debug("No track_id present for: {}", item)
continue

popularity, isrc, ean, upc = self.track_info(spotify_track_id)
item["spotify_track_popularity"] = popularity
item["isrc"] = isrc
item["ean"] = ean
item["upc"] = upc
items_to_update.append((item, spotify_track_id))

if not items_to_update:
return

unique_track_ids = unique_list(
Comment thread
arsaboo marked this conversation as resolved.
Outdated
track_id for _, track_id in items_to_update
)
track_info_by_id = self.track_info_batch(unique_track_ids)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
track_info_by_id = self.track_info_batch(unique_track_ids)
audio_features_by_id = self.track_info_batch(unique_track_ids)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renaming here would clash with audio_features_by_id already used for track_audio_features_batch. Kept as track_info_by_id for now.

Copy link
Copy Markdown
Member

@snejus snejus Apr 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I renamed the typed dict to TrackDetails so you can have track_details_by_id here, to remove ambiguity regarding beets.hooks.info::TrackInfo.

audio_features_by_id = self.track_audio_features_batch(unique_track_ids)

for item, spotify_track_id in items_to_update:
track_info = track_info_by_id.get(spotify_track_id, SpotifyTrackInfo())

item["spotify_track_popularity"] = track_info.popularity
item["isrc"] = track_info.isrc
item["ean"] = track_info.ean
item["upc"] = track_info.upc

if self.audio_features_available:
audio_features = self.track_audio_features(spotify_track_id)
audio_features = audio_features_by_id.get(spotify_track_id)
Comment thread
arsaboo marked this conversation as resolved.
if audio_features is None:
self._log.info("No audio features found for: {}", item)
else:
Expand All @@ -767,7 +890,9 @@ def _fetch_info(self, items, write, force):

def track_info(self, track_id: str):
"""Fetch a track's popularity and external IDs using its Spotify ID."""
track_data = self._handle_response("get", f"{self.track_url}{track_id}")
track_data = self._handle_response(
"get", f"{self.track_url}/{track_id}"
)
external_ids = track_data.get("external_ids", {})
popularity = track_data.get("popularity")
self._log.debug(
Expand All @@ -789,27 +914,16 @@ def track_audio_features(self, track_id: str):
once.

"""
# Fast path: if we've already detected unavailability, skip the call.
with self._audio_features_lock:
if not self.audio_features_available:
return None

try:
return self._handle_response(
"get", f"{self.audio_features_url}{track_id}"
"get", f"{self.audio_features_url}/{track_id}"
)
except AudioFeaturesUnavailableError:
# Disable globally in a thread-safe manner and warn once.
should_log = False
with self._audio_features_lock:
if self.audio_features_available:
self.audio_features_available = False
should_log = True
if should_log:
self._log.warning(
"Audio features API is unavailable (403 error). "
"Skipping audio features for remaining tracks."
)
self._disable_audio_features()
return None
except APIError as e:
self._log.debug("Spotify API error: {}", e)
Expand Down
8 changes: 5 additions & 3 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ For plugin developers
``TypedDict`` models for releases, recordings, works, and relations. Update
direct access to raw MusicBrainz response keys if needed.

..
Other changes
~~~~~~~~~~~~~
Other changes
~~~~~~~~~~~~~

- :doc:`plugins/spotify`: Batch ``spotifysync`` track and audio-features API
Comment thread
arsaboo marked this conversation as resolved.
Outdated
requests and deduplicate repeated Spotify track IDs within a run.

2.9.0 (April 11, 2026)
----------------------
Expand Down
Loading
Loading