-
Notifications
You must be signed in to change notification settings - Fork 2k
Harden developer memories list against malformed records #7784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ | |
| from typing import List, Optional | ||
|
|
||
| from fastapi import APIRouter, HTTPException, Depends, Query | ||
| from pydantic import BaseModel, Field, ValidationError | ||
| from pydantic import BaseModel, Field, ValidationError, field_validator | ||
|
|
||
| import database.folders as folders_db | ||
| import database.memories as memories_db | ||
|
|
@@ -107,17 +107,92 @@ def delete_key(key_id: str, uid: str = Depends(get_current_user_id)): | |
| class CleanerMemory(BaseModel): | ||
| # Core fields (aligned with MemoryResponse) | ||
| id: str | ||
| content: str | ||
| category: MemoryCategory | ||
| content: str = '' | ||
| category: MemoryCategory = MemoryCategory.interesting | ||
| visibility: Optional[str] = 'private' | ||
| tags: List[str] = [] | ||
| created_at: datetime | ||
| updated_at: datetime | ||
| manually_added: bool | ||
| tags: List[str] = Field(default_factory=list) | ||
| created_at: Optional[datetime] = None | ||
| updated_at: Optional[datetime] = None | ||
| manually_added: bool = False | ||
| scoring: Optional[str] = None | ||
| reviewed: bool | ||
| reviewed: bool = False | ||
| user_review: Optional[bool] = None | ||
| edited: bool | ||
| edited: bool = False | ||
|
|
||
| @field_validator('id', mode='before') | ||
| def coerce_id(cls, value): | ||
| if not value and value != 0: | ||
| raise ValueError('id is required') | ||
| return str(value) | ||
|
|
||
| @field_validator('content', mode='before') | ||
| def coerce_content(cls, value): | ||
| if value is None: | ||
| return '' | ||
| return str(value) | ||
|
|
||
| @field_validator('category', mode='before') | ||
| def coerce_category(cls, value): | ||
| if isinstance(value, MemoryCategory): | ||
| return value | ||
| try: | ||
| return MemoryCategory(value) | ||
| except (TypeError, ValueError): | ||
| return MemoryCategory.interesting | ||
|
|
||
| @field_validator('visibility', mode='before') | ||
| def coerce_visibility(cls, value): | ||
| return value if value in ['public', 'private'] else 'private' | ||
|
|
||
| @field_validator('tags', mode='before') | ||
| def coerce_tags(cls, value): | ||
| if not isinstance(value, list): | ||
| return [] | ||
| return [str(tag) for tag in value if tag is not None] | ||
|
|
||
| @field_validator('scoring', mode='before') | ||
| def coerce_scoring(cls, value): | ||
| if value is None: | ||
| return None | ||
| return str(value) | ||
|
|
||
| @field_validator('created_at', 'updated_at', mode='before') | ||
| def coerce_datetime(cls, value): | ||
| if value in [None, '']: | ||
| return None | ||
| if isinstance(value, datetime): | ||
| return value | ||
| if isinstance(value, str): | ||
| try: | ||
| return datetime.fromisoformat(value.replace('Z', '+00:00')) | ||
| except ValueError: | ||
| return None | ||
| if isinstance(value, (int, float)) and not isinstance(value, bool): | ||
| try: | ||
| return datetime.fromtimestamp(value, tz=timezone.utc) | ||
| except (OSError, OverflowError, ValueError): | ||
| return None | ||
| return None | ||
|
Comment on lines
+159
to
+175
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in |
||
|
|
||
| @field_validator('user_review', mode='before') | ||
| def coerce_user_review(cls, value): | ||
| if isinstance(value, bool): | ||
| return value | ||
| if value in [None, '']: | ||
| return None | ||
| if isinstance(value, str): | ||
| return value.lower() in ['true', '1', 'yes'] | ||
| return bool(value) | ||
|
|
||
| @field_validator('manually_added', 'reviewed', 'edited', mode='before') | ||
| def coerce_bool(cls, value): | ||
| if isinstance(value, bool): | ||
| return value | ||
| if value in [None, '']: | ||
| return False | ||
| if isinstance(value, str): | ||
| return value.lower() in ['true', '1', 'yes'] | ||
| return bool(value) | ||
|
|
||
|
|
||
| class CreateMemoryRequest(BaseModel): | ||
|
|
@@ -176,8 +251,11 @@ def get_memories( | |
| # hardening already applied to GET /v3/memories. | ||
| valid_memories = [] | ||
| for memory in memories: | ||
| if not isinstance(memory, dict) or not memory.get('id'): | ||
| logger.warning('Skipping malformed memory in Developer API memory list') | ||
| continue | ||
| if memory.get('is_locked', False): | ||
| content = memory.get('content', '') | ||
| content = str(memory.get('content') or '') | ||
| memory['content'] = (content[:70] + '...') if len(content) > 70 else content | ||
| try: | ||
| valid_memories.append(CleanerMemory.model_validate(memory)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
coerce_idallows an empty-stringidthroughCleanerMemoryThe
coerce_idvalidator convertsNone→'', and there is no further check to reject an empty string. The endpoint's pre-filter (not memory.get('id')) blocks empty-ID dicts beforemodel_validate, butCleanerMemoryis also used as theresponse_modelforPATCH /v1/dev/user/memories/{memory_id}, which returnsmemories_db.get_memory(uid, memory_id)raw — with no equivalent pre-filter. A Firestore doc that somehow lacks anidkey would produce a serialized response containing"id": "".There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in
746e0c00e:CleanerMemorynow rejects missing/empty IDs at model validation time, while the list endpoint still pre-filters malformed records before serialization. Added a regression test for the model-levelValidationError; local validation:19 passed.