diff --git a/.github/actions/install-lemonade/action.yml b/.github/actions/install-lemonade/action.yml index c2e5ca74d..0745081b6 100644 --- a/.github/actions/install-lemonade/action.yml +++ b/.github/actions/install-lemonade/action.yml @@ -287,43 +287,75 @@ runs: Reconcile-LemonadeVersion -Expected $expected # An MSI version change swaps the server exe but leaves the - # versioned llama.cpp backend cached under ``%USERPROFILE%\.cache\ - # lemonade\bin`` (lemonade ``path_utils.cpp`` get_cache_dir / - # get_downloaded_bin_dir). A *downgraded* server then can't spawn - # the stale newer backend -- the load fails with ``llama-server - # failed to start``. Wipe the backend cache so the pinned version - # reinstalls a matching one. GGUF weights live in the separate - # HuggingFace hub cache (``%USERPROFILE%\.cache\huggingface\hub``) - # and are untouched, so only the small backend is re-fetched, never - # the models. Runs whenever the action reconciles (a drifted *or* - # missing install); a version-matched runner skips this block - # entirely and pays nothing. - $cacheRoots = @( + # separately-downloaded llama.cpp backend (the actual ``llama-server`` + # binary) in place. A *downgraded* server then spawns the stale newer + # backend and the load fails with ``llama-server failed to start``. + # Wipe the backend so the pinned version re-fetches a matching one. + # + # The backend dir is not at a single fixed path across runners: depending + # on Lemonade version and whether the server runs as SYSTEM it lands under + # the ``.cache\lemonade`` cache OR alongside the MSI under + # ``...\AppData\Local\lemonade_server``, as ``.../bin/llamacpp//`` + # (lemonade ``path_utils.cpp`` get_downloaded_bin_dir). So search every + # known root and remove the ``llamacpp`` backend dirs specifically -- + # never the MSI's own LemonadeServer.exe/lemonade.exe, which live directly + # in ``bin`` (no ``llamacpp`` parent). GGUF weights live in the separate + # HuggingFace hub cache and are untouched, so only the small backend is + # re-fetched, never the models. Reconcile-gated: a version-matched runner + # skips this entirely and pays nothing. + $searchRoots = @( "$env:USERPROFILE\.cache\lemonade", - "C:\windows\system32\config\systemprofile\.cache\lemonade" + "C:\windows\system32\config\systemprofile\.cache\lemonade", + "$env:LOCALAPPDATA\lemonade_server", + "C:\windows\system32\config\systemprofile\AppData\Local\lemonade_server" ) - if ($env:LEMONADE_CACHE_DIR) { $cacheRoots += $env:LEMONADE_CACHE_DIR } - # Case-insensitive dedup (Windows paths) while preserving original - # casing for the log line -- Select-Object -Unique alone is case-sensitive. + if ($env:LEMONADE_CACHE_DIR) { $searchRoots += $env:LEMONADE_CACHE_DIR } + # Case-insensitive dedup (Windows paths) preserving original casing. $seenRoots = @{} - foreach ($root in ($cacheRoots | Where-Object { $_ })) { + $roots = foreach ($root in ($searchRoots | Where-Object { $_ })) { $rootKey = $root.ToLowerInvariant() if ($seenRoots.ContainsKey($rootKey)) { continue } $seenRoots[$rootKey] = $true - $backendDir = Join-Path $root "bin" - if (Test-Path $backendDir) { - Write-Host "Reconcile: clearing stale llama.cpp backend cache at $backendDir" - # Fail loud: a swallowed wipe (e.g. a DLL locked by a stray - # llama-server) would leave the stale backend in place and - # the next load would fail with the same "llama-server - # failed to start" -- with no signal pointing back here. - try { - Remove-Item $backendDir -Recurse -Force -ErrorAction Stop - } catch { - Write-Host "ERROR: failed to clear stale backend cache at $backendDir : $($_.Exception.Message)" - Write-Host " A stray llama-server may be holding a file lock; the runner needs manual cleanup." - exit 1 + if (Test-Path $root) { $root } + } + + # Log every llama-server binary found (path + size + version) so the real + # backend layout is visible even when the wipe succeeds -- this is how we + # confirm a version change actually swapped the backend, not just the exe. + Write-Host "Reconcile: scanning for llama.cpp backend binaries..." + foreach ($root in $roots) { + Get-ChildItem $root -Recurse -File -Filter "llama-server*" -ErrorAction SilentlyContinue | + ForEach-Object { + $ver = "" + try { $ver = (Get-Item -LiteralPath $_.FullName).VersionInfo.ProductVersion } catch {} + Write-Host (" found {0,12:N0} bytes ver={1,-12} {2}" -f $_.Length, $ver, $_.FullName) } + } + + # Remove only the ``llamacpp`` backend dirs (unambiguously the downloaded + # backend; never the MSI core binaries). + $backendDirs = @() + foreach ($root in $roots) { + $backendDirs += Get-ChildItem $root -Recurse -Directory -Filter "llamacpp" -ErrorAction SilentlyContinue | + Select-Object -ExpandProperty FullName + } + $backendDirs = $backendDirs | Sort-Object -Unique + if (-not $backendDirs) { + Write-Host "Reconcile: no llamacpp backend dir found under known roots -- the pinned version will fetch a matching backend on first load." + } + foreach ($bd in $backendDirs) { + if (-not (Test-Path $bd)) { continue } + Write-Host "Reconcile: clearing stale llama.cpp backend at $bd" + # Fail loud: a swallowed wipe (e.g. a DLL locked by a stray + # llama-server) would leave the stale backend in place and the next + # load would fail with the same "llama-server failed to start" -- + # with no signal pointing back here. + try { + Remove-Item $bd -Recurse -Force -ErrorAction Stop + } catch { + Write-Host "ERROR: failed to clear stale backend at $bd : $($_.Exception.Message)" + Write-Host " A stray llama-server may be holding a file lock; the runner needs manual cleanup." + exit 1 } } diff --git a/.github/workflows/test_embeddings.yml b/.github/workflows/test_embeddings.yml index d1bcc0be8..4b3ba50c2 100644 --- a/.github/workflows/test_embeddings.yml +++ b/.github/workflows/test_embeddings.yml @@ -12,6 +12,7 @@ on: - 'src/gaia/llm/**' - 'tests/test_lemonade_embeddings.py' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_embeddings.yml' pull_request: branches: [ main ] @@ -21,6 +22,7 @@ on: - 'src/gaia/llm/**' - 'tests/test_lemonade_embeddings.py' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_embeddings.yml' merge_group: workflow_dispatch: @@ -135,6 +137,49 @@ jobs: if ($_.ErrorDetails) { Write-Host "Error details: $($_.ErrorDetails.Message)" } + # Lemonade returns a generic 500 "llama-server failed to start" + # and hides the backend's own stderr. Surface ground truth so the + # real cause is debuggable (gaia#941): (1) the actual llama-server / + # server logs, and (2) where the llama.cpp backend binary actually + # lives + its build version, to confirm whether a stale newer + # backend survived an MSI downgrade or a fresh one still crashes. + Write-Host "`n=== Lemonade log files (most recent, tail 120) ===" + $logRoots = @( + "$env:USERPROFILE\.cache\lemonade", + "$env:LOCALAPPDATA\lemonade_server", + "C:\windows\system32\config\systemprofile\AppData\Local\lemonade_server", + "C:\windows\system32\config\systemprofile\.cache\lemonade" + ) | Where-Object { $_ } | Select-Object -Unique + foreach ($lr in $logRoots) { + if (-not (Test-Path $lr)) { continue } + Get-ChildItem $lr -Recurse -File -Include "*.log","server.log","*llama*server*.txt" -ErrorAction SilentlyContinue | + Sort-Object LastWriteTime -Descending | Select-Object -First 4 | ForEach-Object { + Write-Host "--- $($_.FullName) (tail 120) ---" + Get-Content $_.FullName -Tail 120 -ErrorAction SilentlyContinue | ForEach-Object { Write-Host $_ } + } + } + Write-Host "`n=== llama.cpp backend binaries found (path + size + version) ===" + foreach ($lr in $logRoots) { + if (-not (Test-Path $lr)) { continue } + Get-ChildItem $lr -Recurse -File -Filter "llama-server*" -ErrorAction SilentlyContinue | + ForEach-Object { + $ver = "" + try { $ver = (Get-Item -LiteralPath $_.FullName).VersionInfo.ProductVersion } catch {} + Write-Host (" {0,14:N0} bytes ver={1,-12} {2}" -f $_.Length, $ver, $_.FullName) + } + } + Write-Host "`n=== lemonade cache/install tree (dirs only, for layout) ===" + foreach ($lr in $logRoots) { + if (-not (Test-Path $lr)) { continue } + Write-Host "--- under $lr ---" + Get-ChildItem $lr -Recurse -Directory -ErrorAction SilentlyContinue | + Select-Object -First 60 | ForEach-Object { Write-Host " $($_.FullName)" } + } + if ($env:LEMONADE_JOB_ID) { + Write-Host "`n=== Lemonade serve job stream ===" + $jobOutput = Receive-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue + if ($jobOutput) { $jobOutput | ForEach-Object { Write-Host $_ } } else { Write-Host "(empty)" } + } throw "Failed to load embedding model" } diff --git a/.github/workflows/test_lemonade_server.yml b/.github/workflows/test_lemonade_server.yml index a3ed71404..341b60a65 100644 --- a/.github/workflows/test_lemonade_server.yml +++ b/.github/workflows/test_lemonade_server.yml @@ -11,6 +11,7 @@ on: - 'src/gaia/llm/**' - 'src/gaia/installer/**' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_lemonade_server.yml' - '.github/actions/install-lemonade/**' - '.github/actions/setup-venv/**' @@ -23,6 +24,7 @@ on: - 'src/gaia/llm/**' - 'src/gaia/installer/**' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_lemonade_server.yml' - '.github/actions/install-lemonade/**' - '.github/actions/setup-venv/**' diff --git a/.github/workflows/test_rag.yml b/.github/workflows/test_rag.yml index edbc99d81..97cbca282 100644 --- a/.github/workflows/test_rag.yml +++ b/.github/workflows/test_rag.yml @@ -13,6 +13,7 @@ on: - 'src/gaia/vlm/**' - 'tests/test_rag*.py' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_rag.yml' pull_request: branches: [ main ] @@ -23,6 +24,7 @@ on: - 'src/gaia/vlm/**' - 'tests/test_rag*.py' - 'setup.py' + - 'src/gaia/version.py' - '.github/workflows/test_rag.yml' merge_group: workflow_dispatch: diff --git a/cpp/README.md b/cpp/README.md index 01fd86bef..ea0af6af4 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -28,12 +28,12 @@ Included demos: The agent connects to an OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The reference backend is [Lemonade Server](https://github.com/lemonade-sdk/lemonade), which runs models locally on AMD hardware. -Download and install Lemonade Server v10.7.0, then start it: +Download and install Lemonade Server v10.2.0, then start it: **Windows:** ```powershell # Download and run the MSI installer -curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.7.0/lemonade-server-minimal.msi +curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi msiexec /i lemonade-server-minimal.msi ``` @@ -44,7 +44,7 @@ sudo add-apt-repository ppa:lemonade-team/stable sudo apt install lemonade-server ``` -Or browse all platform options on the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0). +Or browse all platform options on the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0). After installation, start the server: ```bash diff --git a/docs/cpp/setup.mdx b/docs/cpp/setup.mdx index 08811215b..b62197855 100644 --- a/docs/cpp/setup.mdx +++ b/docs/cpp/setup.mdx @@ -76,17 +76,17 @@ icon: "wrench" The agent needs an OpenAI-compatible LLM server. [Lemonade Server](https://lemonade-server.ai) is recommended (optimized for AMD hardware). - Download and run the installer (v10.7.0): + Download and run the installer (v10.2.0): ```powershell # Download the MSI installer - curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.7.0/lemonade-server-minimal.msi + curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi # Run the installer msiexec /i lemonade-server-minimal.msi ``` - Or download directly from the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0). + Or download directly from the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0). After installation, restart your terminal and start the server: ```powershell @@ -176,7 +176,7 @@ icon: "wrench" sudo apt-get install -y lemonade-server ``` - Or browse all platform options on the [Lemonade v10.7.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.7.0). + Or browse all platform options on the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0). After installation, start the server: ```bash @@ -219,7 +219,7 @@ icon: "wrench" | C++ Compiler | C++17 support (MSVC 2019+ or GCC 9+) | `cl` (Windows) or `g++ --version` (Linux) | | CMake | 3.14+ | `cmake --version` | | Git | any | `git --version` | -| [Lemonade Server](https://lemonade-server.ai) | 10.7.0 | `lemonade-server --version` | +| [Lemonade Server](https://lemonade-server.ai) | 10.2.0 | `lemonade-server --version` | | uvx | any (optional) | `uvx --version` | diff --git a/docs/guides/npu.mdx b/docs/guides/npu.mdx index ec9966f82..c476798c8 100644 --- a/docs/guides/npu.mdx +++ b/docs/guides/npu.mdx @@ -12,7 +12,7 @@ GAIA supports running agents on your AMD Ryzen AI NPU via the [FastFlowLM (FLM)] - **Hardware:** AMD Ryzen AI 300/400/Max series processor with XDNA2 NPU - Strix Point, Strix Halo, Kraken Point, or Gorgon Point - Ryzen AI 7000/8000/200-series (XDNA1) is **not supported** -- **Software:** [Lemonade Server](https://lemonade-server.ai) v10.7.0+ +- **Software:** [Lemonade Server](https://lemonade-server.ai) v10.2.0+ - **Driver:** Latest AMD NPU driver (firmware v1.1.0.0+) diff --git a/src/gaia/version.py b/src/gaia/version.py index 5b752e3bf..308cf8e96 100644 --- a/src/gaia/version.py +++ b/src/gaia/version.py @@ -8,8 +8,18 @@ __version__ = "0.21.2" -# Lemonade version used across CI and installer -LEMONADE_VERSION = "10.7.0" +# Lemonade version used across CI and installer. +# +# DO NOT bump past this pin until the embedding regression below is verified fixed. +# Lemonade's newer bundled llama.cpp crashes loading the nomic-embed-text-v2-moe-GGUF +# embedding model -- llama-server "failed to start" -- breaking RAG / code-index / +# agent-memory embeddings. Regular LLM GGUFs load fine, so CI fails ONLY on the +# embedding jobs. CI proved 10.2.0 (pre-b8766) loads the model and BOTH 10.6.0 (b9253) +# and 10.7.0 (b9585) crash, so the regression entered the build range b8766..b9253. +# 10.2.0 is the proven last-known-good and is pinned here until a newer version is +# verified to load this model -- the embeddings/RAG CI jobs print the llama-server +# log on load failure so a future bump can confirm before landing. Tracked by gaia#941. +LEMONADE_VERSION = "10.2.0" def get_package_version() -> str: