Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 88 additions & 10 deletions backend/routers/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import List, Optional

from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, Field, ValidationError
from pydantic import BaseModel, Field, ValidationError, field_validator

import database.folders as folders_db
import database.memories as memories_db
Expand Down Expand Up @@ -107,17 +107,92 @@ def delete_key(key_id: str, uid: str = Depends(get_current_user_id)):
class CleanerMemory(BaseModel):
# Core fields (aligned with MemoryResponse)
id: str
content: str
category: MemoryCategory
content: str = ''
category: MemoryCategory = MemoryCategory.interesting
visibility: Optional[str] = 'private'
tags: List[str] = []
created_at: datetime
updated_at: datetime
manually_added: bool
tags: List[str] = Field(default_factory=list)
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
manually_added: bool = False
scoring: Optional[str] = None
reviewed: bool
reviewed: bool = False
user_review: Optional[bool] = None
edited: bool
edited: bool = False

@field_validator('id', mode='before')
def coerce_id(cls, value):
if not value and value != 0:
raise ValueError('id is required')
return str(value)
Comment on lines +122 to +126

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 coerce_id allows an empty-string id through CleanerMemory

The coerce_id validator converts None'', and there is no further check to reject an empty string. The endpoint's pre-filter (not memory.get('id')) blocks empty-ID dicts before model_validate, but CleanerMemory is also used as the response_model for PATCH /v1/dev/user/memories/{memory_id}, which returns memories_db.get_memory(uid, memory_id) raw — with no equivalent pre-filter. A Firestore doc that somehow lacks an id key would produce a serialized response containing "id": "".

Suggested change
@field_validator('id', mode='before')
def coerce_id(cls, value):
if value is None:
return ''
return str(value)
@field_validator('id', mode='before')
def coerce_id(cls, value):
if not value and value != 0:
return ''
return str(value)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 746e0c00e: CleanerMemory now rejects missing/empty IDs at model validation time, while the list endpoint still pre-filters malformed records before serialization. Added a regression test for the model-level ValidationError; local validation: 19 passed.


@field_validator('content', mode='before')
def coerce_content(cls, value):
if value is None:
return ''
return str(value)

@field_validator('category', mode='before')
def coerce_category(cls, value):
if isinstance(value, MemoryCategory):
return value
try:
return MemoryCategory(value)
except (TypeError, ValueError):
return MemoryCategory.interesting

@field_validator('visibility', mode='before')
def coerce_visibility(cls, value):
return value if value in ['public', 'private'] else 'private'

@field_validator('tags', mode='before')
def coerce_tags(cls, value):
if not isinstance(value, list):
return []
return [str(tag) for tag in value if tag is not None]

@field_validator('scoring', mode='before')
def coerce_scoring(cls, value):
if value is None:
return None
return str(value)

@field_validator('created_at', 'updated_at', mode='before')
def coerce_datetime(cls, value):
if value in [None, '']:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
try:
return datetime.fromisoformat(value.replace('Z', '+00:00'))
except ValueError:
return None
if isinstance(value, (int, float)) and not isinstance(value, bool):
try:
return datetime.fromtimestamp(value, tz=timezone.utc)
except (OSError, OverflowError, ValueError):
return None
return None
Comment on lines +159 to +175

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 coerce_datetime returns raw numeric without conversion

When value is an int or float, the validator returns it as-is and relies on Pydantic V2's implicit lax-mode coercion (treating the number as a Unix-seconds timestamp). This works today, but the silent dependency on Pydantic's internal coercion semantics makes the intent opaque and couples correctness to Pydantic's undocumented numeric-datetime interpretation. An explicit datetime.fromtimestamp(value, tz=timezone.utc) inside the validator would both document intent and fail loudly if the stored value is out of range, rather than surfacing as a confusing None in the response after the except ValidationError swallows it.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 746e0c00e: numeric created_at/updated_at values are now explicitly converted with datetime.fromtimestamp(..., tz=timezone.utc) and invalid numeric ranges fall back to None. Added coverage for the Unix timestamp path in test_cleaner_memory_coerces_edge_values; local validation: 19 passed.


@field_validator('user_review', mode='before')
def coerce_user_review(cls, value):
if isinstance(value, bool):
return value
if value in [None, '']:
return None
if isinstance(value, str):
return value.lower() in ['true', '1', 'yes']
return bool(value)

@field_validator('manually_added', 'reviewed', 'edited', mode='before')
def coerce_bool(cls, value):
if isinstance(value, bool):
return value
if value in [None, '']:
return False
if isinstance(value, str):
return value.lower() in ['true', '1', 'yes']
return bool(value)


class CreateMemoryRequest(BaseModel):
Expand Down Expand Up @@ -176,8 +251,11 @@ def get_memories(
# hardening already applied to GET /v3/memories.
valid_memories = []
for memory in memories:
if not isinstance(memory, dict) or not memory.get('id'):
logger.warning('Skipping malformed memory in Developer API memory list')
continue
if memory.get('is_locked', False):
content = memory.get('content', '')
content = str(memory.get('content') or '')
memory['content'] = (content[:70] + '...') if len(content) > 70 else content
try:
valid_memories.append(CleanerMemory.model_validate(memory))
Expand Down
130 changes: 130 additions & 0 deletions backend/tests/unit/test_dev_api_folder_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from types import ModuleType
from unittest.mock import MagicMock

import pytest

os.environ.setdefault('OPENAI_API_KEY', 'sk-test-not-real')
os.environ.setdefault('ENCRYPTION_SECRET', 'omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv')

Expand Down Expand Up @@ -361,6 +363,19 @@ def _build_test_app():
return app, TestClient(app)


def _build_memories_test_app():
"""Build a minimal FastAPI app for Developer API memories routes."""
from fastapi import FastAPI
from fastapi.testclient import TestClient
from routers.developer import router as developer_router
from dependencies import get_uid_with_memories_read

app = FastAPI()
app.include_router(developer_router)
app.dependency_overrides[get_uid_with_memories_read] = lambda: 'uid1'
return app, TestClient(app, raise_server_exceptions=False)


class TestDevApiHttpLayer:
"""HTTP layer tests using FastAPI TestClient."""

Expand Down Expand Up @@ -461,3 +476,118 @@ def test_dev_get_folders_returns_200_with_list_body(self):
assert len(body) == 1
assert body[0]['id'] == 'f1'
assert body[0]['name'] == 'Work'


class TestDevApiMemoriesHttpLayer:
"""HTTP layer tests for Developer API memory list resilience."""

def setup_method(self):
import database.memories as memories_db

memories_db.get_memories = MagicMock()

def test_memories_page_tolerates_legacy_malformed_record(self):
"""A single legacy record must not turn an offset page into HTTP 500."""
import database.memories as memories_db

now = datetime(2025, 1, 15, 10, 0, 0, tzinfo=timezone.utc)
memories_db.get_memories.return_value = [
{
'id': 'mem-ok',
'content': 'normal memory',
'category': 'manual',
'visibility': 'private',
'tags': ['source'],
'created_at': now,
'updated_at': now,
'manually_added': True,
'scoring': '01_998_1736935200',
'reviewed': True,
'user_review': True,
'edited': False,
},
{
'id': 'mem-legacy',
'content': 'legacy memory',
'category': 'unexpected-legacy-category',
'visibility': None,
'tags': None,
'created_at': 'not-a-date',
'updated_at': {'bad': 'date'},
'user_review': 'yes',
},
{
'id': 'mem-locked-legacy',
'content': None,
'category': None,
'is_locked': True,
'scoring': 123,
},
{
'content': 'no id should be skipped',
'category': 'manual',
},
]

_, client = _build_memories_test_app()
resp = client.get('/v1/dev/user/memories?limit=3&offset=7')

assert resp.status_code == 200
body = resp.json()
assert len(body) == 3
legacy = body[1]
assert legacy['id'] == 'mem-legacy'
assert legacy['content'] == 'legacy memory'
assert legacy['category'] == 'interesting'
assert legacy['visibility'] == 'private'
assert legacy['tags'] == []
assert legacy['created_at'] is None
assert legacy['updated_at'] is None
assert legacy['manually_added'] is False
assert legacy['reviewed'] is False
assert legacy['user_review'] is True
assert legacy['edited'] is False

locked_legacy = body[2]
assert locked_legacy['id'] == 'mem-locked-legacy'
assert locked_legacy['content'] == ''
assert locked_legacy['category'] == 'interesting'
assert locked_legacy['scoring'] == '123'

def test_cleaner_memory_coerces_edge_values(self):
"""CleanerMemory validators should be resilient outside the endpoint pre-filter too."""
from routers.developer import CleanerMemory

memory = CleanerMemory(
id='mem-edge',
content=None,
category=None,
created_at=1736935200,
updated_at=[],
manually_added='1',
reviewed='true',
user_review='no',
edited='',
)

assert memory.id == 'mem-edge'
assert memory.content == ''
assert memory.category == 'interesting'
assert memory.created_at == datetime(2025, 1, 15, 10, 0, 0, tzinfo=timezone.utc)
assert memory.updated_at is None
assert memory.manually_added is True
assert memory.reviewed is True
assert memory.user_review is False
assert memory.edited is False

def test_cleaner_memory_rejects_empty_id(self):
"""CleanerMemory should not serialize malformed responses with an empty id."""
from pydantic import ValidationError
from routers.developer import CleanerMemory

with pytest.raises(ValidationError):
CleanerMemory(
id=None,
content='legacy memory',
category='manual',
)
49 changes: 42 additions & 7 deletions backend/tests/unit/test_dev_api_memories_pagination.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Regression test for GET /v1/dev/user/memories resilience (issue #7492).
"""Regression tests for GET /v1/dev/user/memories resilience (issue #7492).

The endpoint declares response_model=List[CleanerMemory] and returned raw Firestore dicts, so a
single malformed/legacy record (missing a required field or an out-of-enum category) made FastAPI
raise ResponseValidationError -> HTTP 500 for the whole page (only the offsets containing that record
failed). The handler now validates each record and skips+logs invalid ones, mirroring GET /v3/memories.
single malformed/legacy record could make FastAPI raise ResponseValidationError -> HTTP 500 for the
whole page. The handler now validates each record individually, skips records without required
identity fields, and coerces legacy optional fields to safe defaults.
"""

import os
Expand Down Expand Up @@ -80,6 +80,7 @@ def __getattr__(self, name):
sys.modules[_mod_name] = _AutoMockModule(_mod_name)

sys.modules['firebase_admin.auth'].InvalidIdTokenError = type('InvalidIdTokenError', (Exception,), {})
sys.modules['utils.apps'].update_personas_async = MagicMock()

# utils.other.endpoints must expose real callables (used in route signatures); provide stand-ins.
_endpoints = ModuleType('utils.other.endpoints')
Expand Down Expand Up @@ -129,9 +130,24 @@ def _valid_memory(mid):
}


def _invalid_memory(mid):
# Legacy/malformed record missing a required CleanerMemory field ('edited').
def _missing_id_memory(mid):
# Malformed record missing the required identity field.
m = _valid_memory(mid)
del m['id']
return m


def _legacy_memory(mid):
# Legacy record with missing/invalid optional fields that should be coerced.
m = _valid_memory(mid)
m['category'] = 'old-category'
m['visibility'] = None
m['tags'] = None
m['created_at'] = 'not-a-date'
m['updated_at'] = {'bad': 'date'}
m['manually_added'] = ''
m['reviewed'] = None
m['user_review'] = 'yes'
del m['edited']
return m

Expand All @@ -144,14 +160,33 @@ def _build():


def test_invalid_record_is_skipped_not_500():
page = [_valid_memory('good1'), _invalid_memory('bad1'), _valid_memory('good2')]
page = [_valid_memory('good1'), _missing_id_memory('bad1'), _valid_memory('good2')]
with patch.object(memories_db, 'get_memories', return_value=page):
client = _build()
resp = client.get('/v1/dev/user/memories')
assert resp.status_code == 200
assert [m['id'] for m in resp.json()] == ['good1', 'good2']


def test_legacy_optional_fields_are_defaulted_not_500():
page = [_legacy_memory('legacy1')]
with patch.object(memories_db, 'get_memories', return_value=page):
client = _build()
resp = client.get('/v1/dev/user/memories')
assert resp.status_code == 200
[memory] = resp.json()
assert memory['id'] == 'legacy1'
assert memory['category'] == 'interesting'
assert memory['visibility'] == 'private'
assert memory['tags'] == []
assert memory['created_at'] is None
assert memory['updated_at'] is None
assert memory['manually_added'] is False
assert memory['reviewed'] is False
assert memory['user_review'] is True
assert memory['edited'] is False


def test_all_valid_records_returned():
page = [_valid_memory('a'), _valid_memory('b')]
with patch.object(memories_db, 'get_memories', return_value=page):
Expand Down
Loading