diff --git a/backend/database/vector_db.py b/backend/database/vector_db.py index bc12c41f8d..017dcc837b 100644 --- a/backend/database/vector_db.py +++ b/backend/database/vector_db.py @@ -611,14 +611,26 @@ def delete_conversation_vectors_batch(uid: str, conversation_ids: List[str]): logger.info(f'delete_conversation_vectors_batch count={len(vector_ids)}') -def delete_memory_vectors_batch(uid: str, memory_ids: List[str]): - """Delete a user's memory vectors (ns2) in one batched, chunked call.""" +def delete_memory_vectors_batch(uid: str, memory_ids: List[str]) -> int: + """Delete a user's memory vectors (ns2) in batched, chunked calls. + + Each chunk is individually wrapped in try/except so a transient failure + on one chunk does not abandon the rest. Returns the number of vectors + successfully deleted (0 if Pinecone is not configured). + """ if index is None: logger.warning('Pinecone index not initialized, skipping memory vector batch delete') - return + return 0 if not memory_ids: - return + return 0 vector_ids = [f'{uid}-{mid}' for mid in memory_ids] + total_deleted = 0 for i in range(0, len(vector_ids), 1000): - index.delete(ids=vector_ids[i : i + 1000], namespace=MEMORIES_NAMESPACE) - logger.info(f'delete_memory_vectors_batch count={len(vector_ids)}') + chunk = vector_ids[i : i + 1000] + try: + index.delete(ids=chunk, namespace=MEMORIES_NAMESPACE) + total_deleted += len(chunk) + except Exception: + logger.warning(f'delete_memory_vectors_batch chunk failed uid={uid} chunk={i // 1000}') + logger.info(f'delete_memory_vectors_batch uid={uid} total_deleted={total_deleted}') + return total_deleted diff --git a/backend/routers/memories.py b/backend/routers/memories.py index 603a78272e..060c6de490 100644 --- a/backend/routers/memories.py +++ b/backend/routers/memories.py @@ -9,6 +9,7 @@ import database.memories as memories_db from database.vector_db import ( delete_memory_vector, + delete_memory_vectors_batch, upsert_memory_vector, upsert_memory_vectors_batch, ) @@ -187,7 +188,25 @@ def delete_memory( def delete_memories( uid: str = Depends(auth.with_rate_limit(auth.get_current_user_uid, "memories:delete_all")), ): + # Collect all memory IDs before Firestore delete so we can also purge + # their Pinecone vectors — otherwise orphaned vectors become search + # noise that never gets cleaned up. + memory_ids = [] + offset = 0 + batch_size = 1000 + while True: + memories = memories_db.get_memories(uid, limit=batch_size, offset=offset, include_invalidated=True) + if not memories: + break + batch_ids = [m.get('id') for m in memories if m.get('id')] + memory_ids.extend(batch_ids) + offset += batch_size + memories_db.delete_all_memories(uid) + + if memory_ids: + delete_memory_vectors_batch(uid, memory_ids) + return {'status': 'ok'}