amd · kovtcharov · Jun 19, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
@@ -287,43 +287,75 @@ runs:
             Reconcile-LemonadeVersion -Expected $expected
 
             # An MSI version change swaps the server exe but leaves the
-            # versioned llama.cpp backend cached under ``%USERPROFILE%\.cache\
-            # lemonade\bin`` (lemonade ``path_utils.cpp`` get_cache_dir /
-            # get_downloaded_bin_dir). A *downgraded* server then can't spawn
-            # the stale newer backend -- the load fails with ``llama-server
-            # failed to start``. Wipe the backend cache so the pinned version
-            # reinstalls a matching one. GGUF weights live in the separate
-            # HuggingFace hub cache (``%USERPROFILE%\.cache\huggingface\hub``)
-            # and are untouched, so only the small backend is re-fetched, never
-            # the models. Runs whenever the action reconciles (a drifted *or*
-            # missing install); a version-matched runner skips this block
-            # entirely and pays nothing.
-            $cacheRoots = @(
+            # separately-downloaded llama.cpp backend (the actual ``llama-server``
+            # binary) in place. A *downgraded* server then spawns the stale newer
+            # backend and the load fails with ``llama-server failed to start``.
+            # Wipe the backend so the pinned version re-fetches a matching one.
+            #
+            # The backend dir is not at a single fixed path across runners: depending
+            # on Lemonade version and whether the server runs as SYSTEM it lands under
+            # the ``.cache\lemonade`` cache OR alongside the MSI under
+            # ``...\AppData\Local\lemonade_server``, as ``.../bin/llamacpp/<backend>/``
+            # (lemonade ``path_utils.cpp`` get_downloaded_bin_dir). So search every
+            # known root and remove the ``llamacpp`` backend dirs specifically --
+            # never the MSI's own LemonadeServer.exe/lemonade.exe, which live directly
+            # in ``bin`` (no ``llamacpp`` parent). GGUF weights live in the separate
+            # HuggingFace hub cache and are untouched, so only the small backend is
+            # re-fetched, never the models. Reconcile-gated: a version-matched runner
+            # skips this entirely and pays nothing.
+            $searchRoots = @(
                 "$env:USERPROFILE\.cache\lemonade",
-                "C:\windows\system32\config\systemprofile\.cache\lemonade"
+                "C:\windows\system32\config\systemprofile\.cache\lemonade",
+                "$env:LOCALAPPDATA\lemonade_server",
+                "C:\windows\system32\config\systemprofile\AppData\Local\lemonade_server"
             )
-            if ($env:LEMONADE_CACHE_DIR) { $cacheRoots += $env:LEMONADE_CACHE_DIR }
-            # Case-insensitive dedup (Windows paths) while preserving original
-            # casing for the log line -- Select-Object -Unique alone is case-sensitive.
+            if ($env:LEMONADE_CACHE_DIR) { $searchRoots += $env:LEMONADE_CACHE_DIR }
+            # Case-insensitive dedup (Windows paths) preserving original casing.
             $seenRoots = @{}
-            foreach ($root in ($cacheRoots | Where-Object { $_ })) {
+            $roots = foreach ($root in ($searchRoots | Where-Object { $_ })) {
                 $rootKey = $root.ToLowerInvariant()
                 if ($seenRoots.ContainsKey($rootKey)) { continue }
                 $seenRoots[$rootKey] = $true
-                $backendDir = Join-Path $root "bin"
-                if (Test-Path $backendDir) {
-                    Write-Host "Reconcile: clearing stale llama.cpp backend cache at $backendDir"
-                    # Fail loud: a swallowed wipe (e.g. a DLL locked by a stray
-                    # llama-server) would leave the stale backend in place and
-                    # the next load would fail with the same "llama-server
-                    # failed to start" -- with no signal pointing back here.
-                    try {
-                        Remove-Item $backendDir -Recurse -Force -ErrorAction Stop
-                    } catch {
-                        Write-Host "ERROR: failed to clear stale backend cache at $backendDir : $($_.Exception.Message)"
-                        Write-Host "       A stray llama-server may be holding a file lock; the runner needs manual cleanup."
-                        exit 1
+                if (Test-Path $root) { $root }
+            }
+
+            # Log every llama-server binary found (path + size + version) so the real
+            # backend layout is visible even when the wipe succeeds -- this is how we
+            # confirm a version change actually swapped the backend, not just the exe.
+            Write-Host "Reconcile: scanning for llama.cpp backend binaries..."
+            foreach ($root in $roots) {
+                Get-ChildItem $root -Recurse -File -Filter "llama-server*" -ErrorAction SilentlyContinue |
+                    ForEach-Object {
+                        $ver = ""
+                        try { $ver = (Get-Item -LiteralPath $_.FullName).VersionInfo.ProductVersion } catch {}
+                        Write-Host ("   found  {0,12:N0} bytes  ver={1,-12}  {2}" -f $_.Length, $ver, $_.FullName)
                     }
+            }
+
+            # Remove only the ``llamacpp`` backend dirs (unambiguously the downloaded
+            # backend; never the MSI core binaries).
+            $backendDirs = @()
+            foreach ($root in $roots) {
+                $backendDirs += Get-ChildItem $root -Recurse -Directory -Filter "llamacpp" -ErrorAction SilentlyContinue |
+                    Select-Object -ExpandProperty FullName
+            }
+            $backendDirs = $backendDirs | Sort-Object -Unique
+            if (-not $backendDirs) {
+                Write-Host "Reconcile: no llamacpp backend dir found under known roots -- the pinned version will fetch a matching backend on first load."
+            }
+            foreach ($bd in $backendDirs) {
+                if (-not (Test-Path $bd)) { continue }
+                Write-Host "Reconcile: clearing stale llama.cpp backend at $bd"
+                # Fail loud: a swallowed wipe (e.g. a DLL locked by a stray
+                # llama-server) would leave the stale backend in place and the next
+                # load would fail with the same "llama-server failed to start" --
+                # with no signal pointing back here.
+                try {
+                    Remove-Item $bd -Recurse -Force -ErrorAction Stop
+                } catch {
+                    Write-Host "ERROR: failed to clear stale backend at $bd : $($_.Exception.Message)"
+                    Write-Host "       A stray llama-server may be holding a file lock; the runner needs manual cleanup."
+                    exit 1
                 }
             }
 

@@ -12,6 +12,7 @@ on:
       - 'src/gaia/llm/**'
       - 'tests/test_lemonade_embeddings.py'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_embeddings.yml'
   pull_request:
     branches: [ main ]
@@ -21,6 +22,7 @@ on:
       - 'src/gaia/llm/**'
       - 'tests/test_lemonade_embeddings.py'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_embeddings.yml'
   merge_group:
   workflow_dispatch:
@@ -135,6 +137,49 @@ jobs:
                 if ($_.ErrorDetails) {
                     Write-Host "Error details: $($_.ErrorDetails.Message)"
                 }
+                # Lemonade returns a generic 500 "llama-server failed to start"
+                # and hides the backend's own stderr. Surface ground truth so the
+                # real cause is debuggable (gaia#941): (1) the actual llama-server /
+                # server logs, and (2) where the llama.cpp backend binary actually
+                # lives + its build version, to confirm whether a stale newer
+                # backend survived an MSI downgrade or a fresh one still crashes.
+                Write-Host "`n=== Lemonade log files (most recent, tail 120) ==="
+                $logRoots = @(
+                    "$env:USERPROFILE\.cache\lemonade",
+                    "$env:LOCALAPPDATA\lemonade_server",
+                    "C:\windows\system32\config\systemprofile\AppData\Local\lemonade_server",
+                    "C:\windows\system32\config\systemprofile\.cache\lemonade"
+                ) | Where-Object { $_ } | Select-Object -Unique
+                foreach ($lr in $logRoots) {
+                    if (-not (Test-Path $lr)) { continue }
+                    Get-ChildItem $lr -Recurse -File -Include "*.log","server.log","*llama*server*.txt" -ErrorAction SilentlyContinue |
+                        Sort-Object LastWriteTime -Descending | Select-Object -First 4 | ForEach-Object {
+                            Write-Host "--- $($_.FullName) (tail 120) ---"
+                            Get-Content $_.FullName -Tail 120 -ErrorAction SilentlyContinue | ForEach-Object { Write-Host $_ }
+                        }
+                }
+                Write-Host "`n=== llama.cpp backend binaries found (path + size + version) ==="
+                foreach ($lr in $logRoots) {
+                    if (-not (Test-Path $lr)) { continue }
+                    Get-ChildItem $lr -Recurse -File -Filter "llama-server*" -ErrorAction SilentlyContinue |
+                        ForEach-Object {
+                            $ver = ""
+                            try { $ver = (Get-Item -LiteralPath $_.FullName).VersionInfo.ProductVersion } catch {}
+                            Write-Host ("   {0,14:N0} bytes  ver={1,-12}  {2}" -f $_.Length, $ver, $_.FullName)
+                        }
+                }
+                Write-Host "`n=== lemonade cache/install tree (dirs only, for layout) ==="
+                foreach ($lr in $logRoots) {
+                    if (-not (Test-Path $lr)) { continue }
+                    Write-Host "--- under $lr ---"
+                    Get-ChildItem $lr -Recurse -Directory -ErrorAction SilentlyContinue |
+                        Select-Object -First 60 | ForEach-Object { Write-Host "   $($_.FullName)" }
+                }
+                if ($env:LEMONADE_JOB_ID) {
+                    Write-Host "`n=== Lemonade serve job stream ==="
+                    $jobOutput = Receive-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue
+                    if ($jobOutput) { $jobOutput | ForEach-Object { Write-Host $_ } } else { Write-Host "(empty)" }
+                }
                 throw "Failed to load embedding model"
             }
 

@@ -11,6 +11,7 @@ on:
       - 'src/gaia/llm/**'
       - 'src/gaia/installer/**'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_lemonade_server.yml'
       - '.github/actions/install-lemonade/**'
       - '.github/actions/setup-venv/**'
@@ -23,6 +24,7 @@ on:
       - 'src/gaia/llm/**'
       - 'src/gaia/installer/**'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_lemonade_server.yml'
       - '.github/actions/install-lemonade/**'
       - '.github/actions/setup-venv/**'

@@ -13,6 +13,7 @@ on:
       - 'src/gaia/vlm/**'
       - 'tests/test_rag*.py'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_rag.yml'
   pull_request:
     branches: [ main ]
@@ -23,6 +24,7 @@ on:
       - 'src/gaia/vlm/**'
       - 'tests/test_rag*.py'
       - 'setup.py'
+      - 'src/gaia/version.py'
       - '.github/workflows/test_rag.yml'
   merge_group:
   workflow_dispatch:

@@ -28,12 +28,12 @@ Included demos:
 
 The agent connects to an OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The reference backend is [Lemonade Server](https://github.com/lemonade-sdk/lemonade), which runs models locally on AMD hardware.
 
-Download and install Lemonade Server v10.7.0, then start it:
+Download and install Lemonade Server v10.2.0, then start it:
 
 **Windows:**
 ```powershell
 # Download and run the MSI installer
-curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.7.0/lemonade-server-minimal.msi
+curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi
 msiexec /i lemonade-server-minimal.msi
 ```
 
@@ -44,7 +44,7 @@ sudo add-apt-repository ppa:lemonade-team/stable
 sudo apt install lemonade-server
 ```
 
-Or browse all platform options on the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0).
+Or browse all platform options on the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0).
 
 After installation, start the server:
 ```bash

@@ -76,17 +76,17 @@ icon: "wrench"
 
     The agent needs an OpenAI-compatible LLM server. [Lemonade Server](https://lemonade-server.ai) is recommended (optimized for AMD hardware).
 
-    Download and run the installer (v10.7.0):
+    Download and run the installer (v10.2.0):
 
     ```powershell
     # Download the MSI installer
-    curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.7.0/lemonade-server-minimal.msi
+    curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi
 
     # Run the installer
     msiexec /i lemonade-server-minimal.msi
     ```
 
-    Or download directly from the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0).
+    Or download directly from the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0).
 
     After installation, restart your terminal and start the server:
     ```powershell
@@ -176,7 +176,7 @@ icon: "wrench"
     sudo apt-get install -y lemonade-server
     ```
 
-    Or browse all platform options on the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0).
+    Or browse all platform options on the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0).
 
     After installation, start the server:
     ```bash
@@ -219,7 +219,7 @@ icon: "wrench"
 | C++ Compiler | C++17 support (MSVC 2019+ or GCC 9+) | `cl` (Windows) or `g++ --version` (Linux) |
 | CMake | 3.14+ | `cmake --version` |
 | Git | any | `git --version` |
-| [Lemonade Server](https://lemonade-server.ai) | 10.7.0 | `lemonade-server --version` |
+| [Lemonade Server](https://lemonade-server.ai) | 10.2.0 | `lemonade-server --version` |
 | uvx | any (optional) | `uvx --version` |
 
 <Note>

@@ -12,7 +12,7 @@ GAIA supports running agents on your AMD Ryzen AI NPU via the [FastFlowLM (FLM)]
 - **Hardware:** AMD Ryzen AI 300/400/Max series processor with XDNA2 NPU
   - Strix Point, Strix Halo, Kraken Point, or Gorgon Point
   - Ryzen AI 7000/8000/200-series (XDNA1) is **not supported**
-- **Software:** [Lemonade Server](https://lemonade-server.ai) v10.7.0+
+- **Software:** [Lemonade Server](https://lemonade-server.ai) v10.2.0+
 - **Driver:** Latest AMD NPU driver (firmware v1.1.0.0+)
 
 <Warning>

@@ -8,8 +8,18 @@
 
 __version__ = "0.21.2"
 
-# Lemonade version used across CI and installer
-LEMONADE_VERSION = "10.7.0"
+# Lemonade version used across CI and installer.
+#
+# DO NOT bump past this pin until the embedding regression below is verified fixed.
+# Lemonade's newer bundled llama.cpp crashes loading the nomic-embed-text-v2-moe-GGUF
+# embedding model -- llama-server "failed to start" -- breaking RAG / code-index /
+# agent-memory embeddings. Regular LLM GGUFs load fine, so CI fails ONLY on the
+# embedding jobs. CI proved 10.2.0 (pre-b8766) loads the model and BOTH 10.6.0 (b9253)
+# and 10.7.0 (b9585) crash, so the regression entered the build range b8766..b9253.
+# 10.2.0 is the proven last-known-good and is pinned here until a newer version is
+# verified to load this model -- the embeddings/RAG CI jobs print the llama-server
+# log on load failure so a future bump can confirm before landing. Tracked by gaia#941.
+LEMONADE_VERSION = "10.2.0"
 
 
 def get_package_version() -> str: