diff --git a/backend/routers/developer.py b/backend/routers/developer.py index 730f89e1ad..f5c8d722af 100644 --- a/backend/routers/developer.py +++ b/backend/routers/developer.py @@ -6,7 +6,7 @@ from typing import List, Optional from fastapi import APIRouter, HTTPException, Depends, Query -from pydantic import BaseModel, Field, ValidationError +from pydantic import BaseModel, Field, ValidationError, field_validator import database.folders as folders_db import database.memories as memories_db @@ -104,20 +104,103 @@ def delete_key(key_id: str, uid: str = Depends(get_current_user_id)): # ****************************************************** +def _coerce_required_memory_id(value) -> str: + if not value and value != 0: + raise ValueError('id is required') + return str(value) + + +def _coerce_optional_memory_datetime(value) -> Optional[datetime]: + if value in [None, '']: + return None + if isinstance(value, datetime): + return value + if isinstance(value, str): + try: + return datetime.fromisoformat(value.replace('Z', '+00:00')) + except ValueError: + return None + if isinstance(value, (int, float)) and not isinstance(value, bool): + try: + return datetime.fromtimestamp(value, tz=timezone.utc) + except (OSError, OverflowError, ValueError): + return None + return None + + class CleanerMemory(BaseModel): # Core fields (aligned with MemoryResponse) id: str - content: str - category: MemoryCategory + content: str = '' + category: MemoryCategory = MemoryCategory.interesting visibility: Optional[str] = 'private' - tags: List[str] = [] - created_at: datetime - updated_at: datetime - manually_added: bool + tags: List[str] = Field(default_factory=list) + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + manually_added: bool = False scoring: Optional[str] = None - reviewed: bool + reviewed: bool = False user_review: Optional[bool] = None - edited: bool + edited: bool = False + + @field_validator('id', mode='before') + def coerce_id(cls, value): + return _coerce_required_memory_id(value) + + @field_validator('content', mode='before') + def coerce_content(cls, value): + if value is None: + return '' + return str(value) + + @field_validator('category', mode='before') + def coerce_category(cls, value): + if isinstance(value, MemoryCategory): + return value + try: + return MemoryCategory(value) + except (TypeError, ValueError): + return MemoryCategory.interesting + + @field_validator('visibility', mode='before') + def coerce_visibility(cls, value): + return value if value in ['public', 'private'] else 'private' + + @field_validator('tags', mode='before') + def coerce_tags(cls, value): + if not isinstance(value, list): + return [] + return [str(tag) for tag in value if tag is not None] + + @field_validator('scoring', mode='before') + def coerce_scoring(cls, value): + if value is None: + return None + return str(value) + + @field_validator('created_at', 'updated_at', mode='before') + def coerce_datetime(cls, value): + return _coerce_optional_memory_datetime(value) + + @field_validator('user_review', mode='before') + def coerce_user_review(cls, value): + if isinstance(value, bool): + return value + if value in [None, '']: + return None + if isinstance(value, str): + return value.lower() in ['true', '1', 'yes'] + return bool(value) + + @field_validator('manually_added', 'reviewed', 'edited', mode='before') + def coerce_bool(cls, value): + if isinstance(value, bool): + return value + if value in [None, '']: + return False + if isinstance(value, str): + return value.lower() in ['true', '1', 'yes'] + return bool(value) class CreateMemoryRequest(BaseModel): @@ -176,8 +259,11 @@ def get_memories( # hardening already applied to GET /v3/memories. valid_memories = [] for memory in memories: + if not isinstance(memory, dict) or not memory.get('id'): + logger.warning('Skipping malformed memory in Developer API memory list') + continue if memory.get('is_locked', False): - content = memory.get('content', '') + content = str(memory.get('content') or '') memory['content'] = (content[:70] + '...') if len(content) > 70 else content try: valid_memories.append(CleanerMemory.model_validate(memory)) diff --git a/backend/tests/unit/test_dev_api_folder_filters.py b/backend/tests/unit/test_dev_api_folder_filters.py index d4189c183c..0a9ea2d974 100644 --- a/backend/tests/unit/test_dev_api_folder_filters.py +++ b/backend/tests/unit/test_dev_api_folder_filters.py @@ -11,6 +11,8 @@ from types import ModuleType from unittest.mock import MagicMock +import pytest + os.environ.setdefault('OPENAI_API_KEY', 'sk-test-not-real') os.environ.setdefault('ENCRYPTION_SECRET', 'omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv') @@ -361,6 +363,19 @@ def _build_test_app(): return app, TestClient(app) +def _build_memories_test_app(): + """Build a minimal FastAPI app for Developer API memories routes.""" + from fastapi import FastAPI + from fastapi.testclient import TestClient + from routers.developer import router as developer_router + from dependencies import get_uid_with_memories_read + + app = FastAPI() + app.include_router(developer_router) + app.dependency_overrides[get_uid_with_memories_read] = lambda: 'uid1' + return app, TestClient(app, raise_server_exceptions=False) + + class TestDevApiHttpLayer: """HTTP layer tests using FastAPI TestClient.""" @@ -461,3 +476,118 @@ def test_dev_get_folders_returns_200_with_list_body(self): assert len(body) == 1 assert body[0]['id'] == 'f1' assert body[0]['name'] == 'Work' + + +class TestDevApiMemoriesHttpLayer: + """HTTP layer tests for Developer API memory list resilience.""" + + def setup_method(self): + import database.memories as memories_db + + memories_db.get_memories = MagicMock() + + def test_memories_page_tolerates_legacy_malformed_record(self): + """A single legacy record must not turn an offset page into HTTP 500.""" + import database.memories as memories_db + + now = datetime(2025, 1, 15, 10, 0, 0, tzinfo=timezone.utc) + memories_db.get_memories.return_value = [ + { + 'id': 'mem-ok', + 'content': 'normal memory', + 'category': 'manual', + 'visibility': 'private', + 'tags': ['source'], + 'created_at': now, + 'updated_at': now, + 'manually_added': True, + 'scoring': '01_998_1736935200', + 'reviewed': True, + 'user_review': True, + 'edited': False, + }, + { + 'id': 'mem-legacy', + 'content': 'legacy memory', + 'category': 'unexpected-legacy-category', + 'visibility': None, + 'tags': None, + 'created_at': 'not-a-date', + 'updated_at': {'bad': 'date'}, + 'user_review': 'yes', + }, + { + 'id': 'mem-locked-legacy', + 'content': None, + 'category': None, + 'is_locked': True, + 'scoring': 123, + }, + { + 'content': 'no id should be skipped', + 'category': 'manual', + }, + ] + + _, client = _build_memories_test_app() + resp = client.get('/v1/dev/user/memories?limit=3&offset=7') + + assert resp.status_code == 200 + body = resp.json() + assert len(body) == 3 + legacy = body[1] + assert legacy['id'] == 'mem-legacy' + assert legacy['content'] == 'legacy memory' + assert legacy['category'] == 'interesting' + assert legacy['visibility'] == 'private' + assert legacy['tags'] == [] + assert legacy['created_at'] is None + assert legacy['updated_at'] is None + assert legacy['manually_added'] is False + assert legacy['reviewed'] is False + assert legacy['user_review'] is True + assert legacy['edited'] is False + + locked_legacy = body[2] + assert locked_legacy['id'] == 'mem-locked-legacy' + assert locked_legacy['content'] == '' + assert locked_legacy['category'] == 'interesting' + assert locked_legacy['scoring'] == '123' + + def test_cleaner_memory_coerces_edge_values(self): + """CleanerMemory validators should be resilient outside the endpoint pre-filter too.""" + from routers.developer import CleanerMemory + + memory = CleanerMemory( + id='mem-edge', + content=None, + category=None, + created_at=1736935200, + updated_at=[], + manually_added='1', + reviewed='true', + user_review='no', + edited='', + ) + + assert memory.id == 'mem-edge' + assert memory.content == '' + assert memory.category == 'interesting' + assert memory.created_at == datetime(2025, 1, 15, 10, 0, 0, tzinfo=timezone.utc) + assert memory.updated_at is None + assert memory.manually_added is True + assert memory.reviewed is True + assert memory.user_review is False + assert memory.edited is False + + def test_cleaner_memory_rejects_empty_id(self): + """CleanerMemory should not serialize malformed responses with an empty id.""" + from pydantic import ValidationError + from routers.developer import CleanerMemory + + with pytest.raises(ValidationError): + CleanerMemory( + id=None, + content='legacy memory', + category='manual', + ) diff --git a/backend/tests/unit/test_dev_api_memories_pagination.py b/backend/tests/unit/test_dev_api_memories_pagination.py index 1f6621a893..2c20db0713 100644 --- a/backend/tests/unit/test_dev_api_memories_pagination.py +++ b/backend/tests/unit/test_dev_api_memories_pagination.py @@ -1,9 +1,9 @@ -"""Regression test for GET /v1/dev/user/memories resilience (issue #7492). +"""Regression tests for GET /v1/dev/user/memories resilience (issue #7492). The endpoint declares response_model=List[CleanerMemory] and returned raw Firestore dicts, so a -single malformed/legacy record (missing a required field or an out-of-enum category) made FastAPI -raise ResponseValidationError -> HTTP 500 for the whole page (only the offsets containing that record -failed). The handler now validates each record and skips+logs invalid ones, mirroring GET /v3/memories. +single malformed/legacy record could make FastAPI raise ResponseValidationError -> HTTP 500 for the +whole page. The handler now validates each record individually, skips records without required +identity fields, and coerces legacy optional fields to safe defaults. """ import os @@ -80,6 +80,7 @@ def __getattr__(self, name): sys.modules[_mod_name] = _AutoMockModule(_mod_name) sys.modules['firebase_admin.auth'].InvalidIdTokenError = type('InvalidIdTokenError', (Exception,), {}) +sys.modules['utils.apps'].update_personas_async = MagicMock() # utils.other.endpoints must expose real callables (used in route signatures); provide stand-ins. _endpoints = ModuleType('utils.other.endpoints') @@ -129,9 +130,24 @@ def _valid_memory(mid): } -def _invalid_memory(mid): - # Legacy/malformed record missing a required CleanerMemory field ('edited'). +def _missing_id_memory(mid): + # Malformed record missing the required identity field. m = _valid_memory(mid) + del m['id'] + return m + + +def _legacy_memory(mid): + # Legacy record with missing/invalid optional fields that should be coerced. + m = _valid_memory(mid) + m['category'] = 'old-category' + m['visibility'] = None + m['tags'] = None + m['created_at'] = 'not-a-date' + m['updated_at'] = {'bad': 'date'} + m['manually_added'] = '' + m['reviewed'] = None + m['user_review'] = 'yes' del m['edited'] return m @@ -144,7 +160,7 @@ def _build(): def test_invalid_record_is_skipped_not_500(): - page = [_valid_memory('good1'), _invalid_memory('bad1'), _valid_memory('good2')] + page = [_valid_memory('good1'), _missing_id_memory('bad1'), _valid_memory('good2')] with patch.object(memories_db, 'get_memories', return_value=page): client = _build() resp = client.get('/v1/dev/user/memories') @@ -152,6 +168,25 @@ def test_invalid_record_is_skipped_not_500(): assert [m['id'] for m in resp.json()] == ['good1', 'good2'] +def test_legacy_optional_fields_are_defaulted_not_500(): + page = [_legacy_memory('legacy1')] + with patch.object(memories_db, 'get_memories', return_value=page): + client = _build() + resp = client.get('/v1/dev/user/memories') + assert resp.status_code == 200 + [memory] = resp.json() + assert memory['id'] == 'legacy1' + assert memory['category'] == 'interesting' + assert memory['visibility'] == 'private' + assert memory['tags'] == [] + assert memory['created_at'] is None + assert memory['updated_at'] is None + assert memory['manually_added'] is False + assert memory['reviewed'] is False + assert memory['user_review'] is True + assert memory['edited'] is False + + def test_all_valid_records_returned(): page = [_valid_memory('a'), _valid_memory('b')] with patch.object(memories_db, 'get_memories', return_value=page):