OWASP · rudransh-shrivastava · Apr 13, 2026 · Apr 15, 2026 · Apr 16, 2026 · Apr 16, 2026
@@ -4,6 +4,7 @@
 
 from apps.ai.models.chunk import Chunk
 from apps.ai.models.context import Context
+from apps.ai.models.semantic_cache import SemanticCache
 
 
 class ChunkAdmin(admin.ModelAdmin):
@@ -32,5 +33,20 @@ class ContextAdmin(admin.ModelAdmin):
     search_fields = ("content", "source")
 
 
+class SemanticCacheAdmin(admin.ModelAdmin):
+    """Admin for SemanticCache model."""
+
+    list_display = (
+        "confidence",
+        "id",
+        "intent",
+        "nest_created_at",
+        "query_text",
+    )
+    list_filter = ("intent",)
+    search_fields = ("query_text", "response_text")
+
+
 admin.site.register(Chunk, ChunkAdmin)
 admin.site.register(Context, ContextAdmin)
+admin.site.register(SemanticCache, SemanticCacheAdmin)
@@ -0,0 +1,12 @@
+"""CrewAI assistant configuration."""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class CrewAIConfig:
+    """CrewAI assistant configuration."""
+
+    semantic_cache_enabled: bool = True
+    semantic_cache_similarity_threshold: float = 0.95
+    semantic_cache_ttl_seconds: int = 86400  # 24 hours
@@ -14,6 +14,7 @@
 from apps.ai.flows.collaborative import handle_collaborative_query
 from apps.ai.query_analyzer import analyze_query
 from apps.ai.router import route
+from apps.ai.semantic_cache import get_cached_response, store_cached_response
 from apps.common.open_ai import OpenAi
 from apps.slack.constants import (
     OWASP_COMMUNITY_CHANNEL_ID,
@@ -77,6 +78,13 @@
             if image_context:
                 query = f"{query}{DELIMITER}Image context: {image_context}"
 
+        # Step 0: Check semantic cache
+        try:
+            if (cached := get_cached_response(query)) is not None:
+                return cached
+        except Exception:
+            logger.exception("Semantic cache lookup failed, proceeding without cache")
+
         # Step 1: Handle queries in owasp-community channel - suggest channels
         # If query is in owasp-community channel, ALWAYS route to community agent
         # for channel suggestions regardless of intent
@@ -258,12 +266,23 @@
                     extra={"query": query[:200]},
                 )
                 channel_agent = create_channel_agent()
-                return execute_task(
+                response = execute_task(
                     channel_agent,
                     query,
                     channel_id=channel_id,
                     is_channel_suggestion=True,
                 )
+                if response:
+                    try:
+                        store_cached_response(
+                            query=query,
+                            response=response,
+                        )
+                    except Exception:
+                        logger.exception(
+                            "Failed to store semantic cache entry"
+                        )  # NOSONAR: duplicate string literal
+                return response
 
         # Step 2: Analyze query complexity before routing
         try:
@@ -283,7 +302,18 @@
         # Step 3: Use collaborative flow for complex query
         if not query_analysis["is_simple"] and len(query_analysis["sub_queries"]) > 1:
             try:
-                return handle_collaborative_query(query, query_analysis["sub_queries"])
+                response = handle_collaborative_query(query, query_analysis["sub_queries"])
+                if response:
+                    try:
+                        store_cached_response(
+                            query=query,
+                            response=response,
+                        )
+                    except Exception:
+                        logger.exception(
+                            "Failed to store semantic cache entry"
+                        )  # NOSONAR: duplicate string literal
+                    return response
             except Exception:
                 logger.exception(
                     "Collaborative flow failed, falling back to single agent: %s", query
@@ -383,11 +413,27 @@
         agent = agent_factory()
 
         # Step 8: Execute task with agent
-        return execute_task(agent, query)
+        response = execute_task(agent, query)
+
+        # Step 9: Store in semantic cache
+        if response:
+            try:
+                store_cached_response(
+                    query=query,
+                    response=response,
+                    intent=intent,
+                    confidence=confidence,
+                )
+            except Exception:
+                logger.exception(
+                    "Failed to store semantic cache entry"
+                )  # NOSONAR: duplicate string literal
 
     except Exception:
         logger.exception("Failed to process query: %s", query)
         return get_fallback_response()
+    else:
+        return response
 
 
 def execute_task(

@@ -0,0 +1,43 @@
+# Generated by Django 6.0.3 on 2026-04-13 14:06
+
+import pgvector.django.vector
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("ai", "0010_alter_context_unique_together"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="SemanticCache",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+                    ),
+                ),
+                ("nest_created_at", models.DateTimeField(auto_now_add=True)),
+                ("nest_updated_at", models.DateTimeField(auto_now=True)),
+                ("confidence", models.FloatField(default=0.0, verbose_name="Confidence")),
+                (
+                    "intent",
+                    models.CharField(blank=True, default="", max_length=50, verbose_name="Intent"),
+                ),
+                (
+                    "query_embedding",
+                    pgvector.django.vector.VectorField(
+                        dimensions=1536, verbose_name="Query Embedding"
+                    ),
+                ),
+                ("query_text", models.TextField(verbose_name="Query Text")),
+                ("response_text", models.TextField(verbose_name="Response Text")),
+            ],
+            options={
+                "verbose_name": "Semantic Cache",
+                "db_table": "ai_semantic_cache",
+            },
+        ),
+    ]
@@ -1 +1,2 @@
 from .chunk import Chunk
+from .semantic_cache import SemanticCache
@@ -7,6 +7,8 @@
 from apps.common.models import BulkSaveModel, TimestampedModel
 from apps.common.utils import truncate
 
+EMBEDDING_DIMENSIONS = 1536
+
 
 class Chunk(TimestampedModel):
     """AI Chunk model for storing text chunks with embeddings."""
@@ -19,7 +21,7 @@ class Meta:
         unique_together = ("context", "text")
 
     context = models.ForeignKey(Context, on_delete=models.CASCADE, related_name="chunks")
-    embedding = VectorField(verbose_name="Embedding", dimensions=1536)
+    embedding = VectorField(verbose_name="Embedding", dimensions=EMBEDDING_DIMENSIONS)
     text = models.TextField(verbose_name="Text")
 
     def __str__(self):

@@ -0,0 +1,120 @@
+"""AI app semantic cache model."""
+
+import logging
+from datetime import UTC, datetime, timedelta
+
+from django.db import models
+from pgvector.django import VectorField
+from pgvector.django.functions import CosineDistance
+
+from apps.ai.models.chunk import EMBEDDING_DIMENSIONS
+from apps.common.models import TimestampedModel
+from apps.common.utils import truncate
+
+logger = logging.getLogger(__name__)
+
+
+class SemanticCache(TimestampedModel):
+    """Semantic cache model for storing query-response pairs with embeddings."""
+
+    class Meta:
+        """Model options."""
+
+        db_table = "ai_semantic_cache"
+        verbose_name = "Semantic Cache"
+
+    confidence = models.FloatField(verbose_name="Confidence", default=0.0)
+    intent = models.CharField(verbose_name="Intent", blank=True, default="", max_length=50)
+    query_embedding = VectorField(verbose_name="Query Embedding", dimensions=EMBEDDING_DIMENSIONS)
+    query_text = models.TextField(verbose_name="Query Text")
+    response_text = models.TextField(verbose_name="Response Text")
+
+    def __str__(self):
+        """Human readable representation."""
+        return f"SemanticCache {self.id}: {truncate(self.query_text, 50)}"
+
+    @staticmethod
+    def get_cached_response(
+        query: str,
+        *,
+        similarity_threshold: float = 0.95,
+        ttl_seconds: int = 86400,
+    ) -> str | None:
+        """Look up semantically similar cached response.
+
+        Args:
+            query: User query text.
+            similarity_threshold: Minimum cosine similarity (0.0-1.0).
+            ttl_seconds: Maximum age of cached entries in seconds.
+
+        Returns:
+            Cached response string if found, None otherwise.
+
+        """
+        from apps.ai.embeddings.factory import get_embedder  # noqa: PLC0415
+
+        ttl_cutoff = datetime.now(UTC) - timedelta(seconds=ttl_seconds)
+        max_distance = 1.0 - similarity_threshold
+
+        result = (
+            SemanticCache.objects.filter(nest_created_at__gte=ttl_cutoff)
+            .annotate(
+                distance=CosineDistance("query_embedding", get_embedder().embed_query(query))
+            )
+            .filter(distance__lte=max_distance)
+            .order_by("distance")
+            .first()
+        )
+
+        if result is not None:
+            logger.info(
+                "Semantic cache hit",
+                extra={
+                    "cache_id": result.id,
+                    "distance": float(result.distance),
+                    "query_preview": query[:100],
+                },
+            )
+            return result.response_text
+
+        return None
+
+    @staticmethod
+    def store_response(
+        query: str,
+        response: str,
+        intent: str = "",
+        confidence: float = 0.0,
+    ) -> "SemanticCache":
+        """Store query-response pair in semantic cache.
+
+        Args:
+            query: Original query text.
+            response: Generated response text.
+            intent: Classified intent for the query.
+            confidence: Router confidence score.
+
+        Returns:
+            Created SemanticCache instance.
+
+        """
+        from apps.ai.embeddings.factory import get_embedder  # noqa: PLC0415
+
+        entry = SemanticCache(
+            query_text=query,
+            query_embedding=get_embedder().embed_query(query),
+            response_text=response,
+            intent=intent,
+            confidence=confidence,
+        )
+        entry.save()
+
+        logger.info(
+            "Semantic cache stored",
+            extra={
+                "cache_id": entry.id,
+                "intent": intent,
+                "query_preview": query[:100],
+            },
+        )
+        return entry
@@ -0,0 +1,56 @@
+"""Semantic cache service for AI query responses."""
+
+from apps.ai.common.crewai_config import CrewAIConfig
+from apps.ai.models.semantic_cache import SemanticCache
+
+_config = CrewAIConfig()
+
+
+def get_cached_response(query: str) -> str | None:
+    """Look up semantically similar cached response.
+
+    Args:
+        query: User query text.
+
+    Returns:
+        Cached response string if found within similarity threshold and TTL,
+        None otherwise.
+
+    """
+    if not _config.semantic_cache_enabled:
+        return None
+
+    return SemanticCache.get_cached_response(
+        query,
+        similarity_threshold=_config.semantic_cache_similarity_threshold,
+        ttl_seconds=_config.semantic_cache_ttl_seconds,
+    )
+
+
+def store_cached_response(
+    query: str,
+    response: str,
+    intent: str = "",
+    confidence: float = 0.0,
+) -> SemanticCache | None:
+    """Store query-response pair in semantic cache.
+
+    Args:
+        query: Original query text.
+        response: Generated response text.
+        intent: Classified intent for the query.
+        confidence: Router confidence score.
+
+    Returns:
+        Created SemanticCache instance.
+
+    """
+    if not _config.semantic_cache_enabled:
+        return None
+
+    return SemanticCache.store_response(
+        query=query,
+        response=response,
+        intent=intent,
+        confidence=confidence,
+    )
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .chunk import Chunk
		from .semantic_cache import SemanticCache