From 2efea37dda358ce469b982acd6ede78c0bc67c2a Mon Sep 17 00:00:00 2001 From: Nik Shevchenko Date: Thu, 11 Jun 2026 07:57:52 -0400 Subject: [PATCH] fix(backend): stop expiring speech profiles in has_profile check (#5128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /v3/speech-profile endpoint applied a 90-day expiry (get_user_has_speech_profile(uid, max_age_days=90), added in 34be1700c), but nothing else honors that expiry: the /v4/listen pipeline (routers/transcribe.py) enables speaker identification and downloads the profile for embedding extraction regardless of age. So users whose profile is older than 90 days — and actively used for diarization — are told has_profile=false and nagged to "Teach Omi your voice" again. Remove the age cutoff so the banner endpoint agrees with the pipeline: an existing profile is a profile. Also drops the per-request blob metadata round-trip (blob.reload) the age check needed. Adds tests/unit/test_speech_profile_existence.py (registered in test.sh) covering existence semantics, the signature, and a structural guard that the router passes no age cutoff. Co-Authored-By: Claude Fable 5 --- backend/routers/speech_profile.py | 2 +- backend/test.sh | 1 + .../unit/test_speech_profile_existence.py | 65 +++++++++++++++++++ backend/utils/other/storage.py | 19 ++---- 4 files changed, 72 insertions(+), 15 deletions(-) create mode 100644 backend/tests/unit/test_speech_profile_existence.py diff --git a/backend/routers/speech_profile.py b/backend/routers/speech_profile.py index 4cacdebb220..44418b47d80 100644 --- a/backend/routers/speech_profile.py +++ b/backend/routers/speech_profile.py @@ -29,7 +29,7 @@ @router.get('/v3/speech-profile', tags=['v3']) def has_speech_profile(uid: str = Depends(auth.get_current_user_uid)): - return {'has_profile': get_user_has_speech_profile(uid, max_age_days=90)} + return {'has_profile': get_user_has_speech_profile(uid)} @router.get('/v4/speech-profile', tags=['v3']) diff --git a/backend/test.sh b/backend/test.sh index efadf7c5089..b971fca18e7 100755 --- a/backend/test.sh +++ b/backend/test.sh @@ -63,6 +63,7 @@ pytest tests/unit/test_pusher_private_cloud_data_protection.py -v pytest tests/unit/test_pusher_batch_upload.py -v pytest tests/unit/test_storage_upload_audio_chunk_data_protection.py -v pytest tests/unit/test_storage_opus_encoding.py -v +pytest tests/unit/test_speech_profile_existence.py -v pytest tests/unit/test_storage_fanout_limits.py -v pytest tests/unit/test_people_conversations_500s.py -v pytest tests/unit/test_firestore_read_ops_cache.py -v diff --git a/backend/tests/unit/test_speech_profile_existence.py b/backend/tests/unit/test_speech_profile_existence.py new file mode 100644 index 00000000000..2a4e1597692 --- /dev/null +++ b/backend/tests/unit/test_speech_profile_existence.py @@ -0,0 +1,65 @@ +"""Unit tests for the speech profile existence check (#5128). + +/v3/speech-profile must report has_profile=true for ANY existing profile, +because the listen pipeline (routers/transcribe.py) uses the profile +regardless of age. A 90-day expiry applied only to this endpoint caused +users with older, actively-used profiles to be re-prompted to +"Teach Omi your voice" on every launch. +""" + +import inspect +import os +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +os.environ.setdefault("ENCRYPTION_SECRET", "omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv") + +# Mock heavy dependencies at sys.modules level before importing storage +sys.modules.setdefault("database._client", MagicMock()) + +_mock_gcs_storage = MagicMock() +_mock_gcs_storage.Client.return_value = MagicMock() +sys.modules.setdefault("google.cloud.storage", _mock_gcs_storage) +sys.modules.setdefault("google.cloud.storage.transfer_manager", MagicMock()) +sys.modules.setdefault("google.cloud.exceptions", MagicMock()) +sys.modules.setdefault("google.oauth2", MagicMock()) +sys.modules.setdefault("google.oauth2.service_account", MagicMock()) + +from utils.other import storage as storage_mod + + +class TestGetUserHasSpeechProfile: + def _bucket_with_blob(self, exists: bool): + blob = MagicMock() + blob.exists.return_value = exists + bucket = MagicMock() + bucket.blob.return_value = blob + return bucket, blob + + def test_existing_profile_counts_regardless_of_age(self): + """An existing profile is reported as present — no age cutoff (#5128).""" + bucket, blob = self._bucket_with_blob(exists=True) + with patch.object(storage_mod, "_get_speech_profiles_bucket", return_value=bucket): + assert storage_mod.get_user_has_speech_profile("uid1") is True + # No metadata fetch for age checks — the old expiry code called blob.reload() + blob.reload.assert_not_called() + + def test_missing_profile(self): + bucket, _ = self._bucket_with_blob(exists=False) + with patch.object(storage_mod, "_get_speech_profiles_bucket", return_value=bucket): + assert storage_mod.get_user_has_speech_profile("uid1") is False + + def test_missing_bucket(self): + with patch.object(storage_mod, "_get_speech_profiles_bucket", return_value=None): + assert storage_mod.get_user_has_speech_profile("uid1") is False + + def test_no_age_parameter_in_signature(self): + """Guard against reintroducing an expiry knob on the existence check.""" + params = inspect.signature(storage_mod.get_user_has_speech_profile).parameters + assert list(params) == ["uid"] + + def test_endpoint_does_not_pass_age_cutoff(self): + """The /v3/speech-profile router must not filter profiles by age (#5128).""" + router_src = Path(storage_mod.__file__).parents[2] / "routers" / "speech_profile.py" + assert "max_age_days" not in router_src.read_text() diff --git a/backend/utils/other/storage.py b/backend/utils/other/storage.py index d74a7d8a431..a887ef9341e 100644 --- a/backend/utils/other/storage.py +++ b/backend/utils/other/storage.py @@ -91,24 +91,15 @@ def upload_profile_audio(file_path: str, uid: str): return f'https://storage.googleapis.com/{speech_profiles_bucket}/{path}' -def get_user_has_speech_profile(uid: str, max_age_days: int = None) -> bool: +def get_user_has_speech_profile(uid: str) -> bool: + # No age cutoff: the listen pipeline (routers/transcribe.py) uses the profile + # regardless of age, so reporting an old profile as absent only causes the app + # to re-prompt users whose profile is still in active use (#5128). bucket = _get_speech_profiles_bucket() if bucket is None: return False - blob = bucket.blob(f'{uid}/speech_profile.wav') - if not blob.exists(): - return False - - # Check age if max_age_days is specified - if max_age_days is not None: - blob.reload() - if blob.time_created: - age = datetime.datetime.now(datetime.timezone.utc) - blob.time_created - if age.days > max_age_days: - return False - - return True + return bucket.blob(f'{uid}/speech_profile.wav').exists() def get_profile_audio_if_exists(uid: str, download: bool = True) -> str: