From 07618b4c25698fbe3ed9ce6739a87bb266fa398b Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Sun, 10 May 2026 15:14:02 -0500 Subject: [PATCH 1/5] ci: convert to pure DAG pipeline Remove the top-level stages: list so that execution ordering is enforced exclusively through the needs: dependency graph. Keep individual stage: labels on jobs for UI grouping only. Add needs: [] to root-node jobs that have no upstream dependencies: - version - nvidia-smi - status:pending - .prune (and derived prune:gpu, prune:docker-new) - clean_internal_tag - .clean_unstable_mr (and derived clean_unstable_mr:gpu, :docker-new) - status:success - status:failure Add needs: [version] to spack-cache-cleanup so it can access the INTERNAL_TAG artifact produced by version. Remove the redundant dependencies: [] from status:success and status:failure, since needs: [] already prevents artifact download. Without stages:, GitLab cannot fall back to stage-based ordering when a needs: entry is accidentally omitted, making DAG violations immediately visible as pipeline validation errors. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitlab-ci.yml | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e86ec08c2..0c5151907 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -50,17 +50,6 @@ variables: GITHUB_SHA: "" GITHUB_PR: "" -stages: - - status-pending - - config - - base ## base OS image - - eic ## EIC container images - - deploy ## build/deploy singularity images - - benchmarks - - test - - finalize - - status-report - ## only run CI for in the following cases: ## master, stable branch, release tag, MR event and nightly builds ## nightly builds are now part of the regular master build in order to keep @@ -100,6 +89,7 @@ workflow: nvidia-smi: stage: config + needs: [] image: nvidia/cuda:${CUDA_VERSION}-base-${CUDA_OS} tags: - gpu @@ -116,6 +106,7 @@ nvidia-smi: ## - export tag to public registries, optional secondary export tag version: stage: config + needs: [] script: - | if [ -n "${VERSION}" ]; then @@ -188,6 +179,7 @@ version: status:pending: stage: status-pending + needs: [] extends: .status variables: STATE: "pending" @@ -747,6 +739,7 @@ df: .prune: extends: .build stage: config + needs: [] rules: - when: manual script: @@ -771,6 +764,7 @@ prune:docker-new: clean_internal_tag: image: alpine/curl stage: finalize + needs: [] rules: - when: manual script: @@ -785,6 +779,7 @@ clean_internal_tag: .clean_unstable_mr: extends: .docker stage: finalize + needs: [] when: always script: - apk add curl jq @@ -881,6 +876,7 @@ clean_pipeline:docker-new: spack-cache-cleanup: extends: .build stage: finalize + needs: [version] when: always allow_failure: true script: @@ -892,7 +888,7 @@ spack-cache-cleanup: status:success: stage: status-report - dependencies: [] + needs: [] extends: .status variables: STATE: "success" @@ -901,7 +897,7 @@ status:success: status:failure: stage: status-report - dependencies: [] + needs: [] extends: .status variables: STATE: "failure" From 98d4c1e40a2cb4686bbcb334aa1e4107c74a0e9d Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Sun, 10 May 2026 15:26:16 -0500 Subject: [PATCH 2/5] ci: restore stages: list (required for custom stage names) GitLab requires stages: to be defined when jobs use custom stage names. Without it, GitLab falls back to the built-in stages only (.pre, build, test, deploy, .post), causing pipeline validation to fail for any job with stage: config, base, eic, etc. Keep stages: for stage name registration; all job ordering is still enforced purely through needs:. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitlab-ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0c5151907..ce7792953 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -50,6 +50,17 @@ variables: GITHUB_SHA: "" GITHUB_PR: "" +stages: + - status-pending + - config + - base ## base OS image + - eic ## EIC container images + - deploy ## build/deploy singularity images + - benchmarks + - test + - finalize + - status-report + ## only run CI for in the following cases: ## master, stable branch, release tag, MR event and nightly builds ## nightly builds are now part of the regular master build in order to keep From 9742e23897d9d0994b1ba1f11a398d3d6dbe71e8 Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Sun, 10 May 2026 15:30:37 -0500 Subject: [PATCH 3/5] ci: make status:success depend on all terminal jobs With needs: [], status:success ran immediately at pipeline start. Enumerate all non-manual terminal jobs (those nothing else transitively depends on) as explicit needs: so status:success only runs after the entire DAG has completed. Terminal jobs listed in needs:: - nvidia-smi (config; allow_failure: true) - user_spack_environment (benchmarks) - cuda:torch (benchmarks; allow_failure: true) - eic_xl:singularity:default/nightly (deploy) - benchmarks:geoviewer/detector/phyiscs:default (benchmarks) - benchmarks:detector/physics:nightly (benchmarks) - clean_pipeline:gpu/docker-new (finalize; when: always) - clean_unstable_mr:gpu/docker-new (finalize; when: always) - spack-cache-cleanup (finalize; when: always) status:failure keeps needs: [] + when: on_failure which in GitLab fires when any job in the pipeline fails (special behaviour of needs: [] + on_failure). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitlab-ci.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ce7792953..2b9181298 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -899,7 +899,22 @@ spack-cache-cleanup: status:success: stage: status-report - needs: [] + needs: + - nvidia-smi + - user_spack_environment + - cuda:torch + - eic_xl:singularity:default + - eic_xl:singularity:nightly + - benchmarks:geoviewer:default + - benchmarks:detector:default + - benchmarks:detector:nightly + - benchmarks:phyiscs:default + - benchmarks:physics:nightly + - clean_pipeline:gpu + - clean_pipeline:docker-new + - clean_unstable_mr:gpu + - clean_unstable_mr:docker-new + - spack-cache-cleanup extends: .status variables: STATE: "success" From 03a8c3e18e7ef1a2aea34d34ec332f79ef0dd904 Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Sun, 10 May 2026 15:38:58 -0500 Subject: [PATCH 4/5] ci: mark all status:success needs as optional: true Many terminal jobs are conditional (rules:) and may not exist in every pipeline variant. GitLab requires optional: true for needs entries that may be absent. Mark all fifteen terminal-job needs as optional so the validator does not reject the config when any of them is skipped. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitlab-ci.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2b9181298..d4326dc4e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -900,21 +900,21 @@ spack-cache-cleanup: status:success: stage: status-report needs: - - nvidia-smi - - user_spack_environment - - cuda:torch - - eic_xl:singularity:default - - eic_xl:singularity:nightly - - benchmarks:geoviewer:default - - benchmarks:detector:default - - benchmarks:detector:nightly - - benchmarks:phyiscs:default - - benchmarks:physics:nightly - - clean_pipeline:gpu - - clean_pipeline:docker-new - - clean_unstable_mr:gpu - - clean_unstable_mr:docker-new - - spack-cache-cleanup + - { job: nvidia-smi, optional: true } + - { job: user_spack_environment, optional: true } + - { job: "cuda:torch", optional: true } + - { job: "eic_xl:singularity:default", optional: true } + - { job: "eic_xl:singularity:nightly", optional: true } + - { job: "benchmarks:geoviewer:default", optional: true } + - { job: "benchmarks:detector:default", optional: true } + - { job: "benchmarks:detector:nightly", optional: true } + - { job: "benchmarks:phyiscs:default", optional: true } + - { job: "benchmarks:physics:nightly", optional: true } + - { job: "clean_pipeline:gpu", optional: true } + - { job: "clean_pipeline:docker-new", optional: true } + - { job: "clean_unstable_mr:gpu", optional: true } + - { job: "clean_unstable_mr:docker-new", optional: true } + - { job: spack-cache-cleanup, optional: true } extends: .status variables: STATE: "success" From c4e4256e551ff1e0d1d042d5e29571f3f16f03ed Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Sun, 10 May 2026 15:43:34 -0500 Subject: [PATCH 5/5] ci: run cleanup jobs only after benchmarks and singularity complete The clean_pipeline and clean_unstable_mr jobs remove Docker images from runners. If they run before benchmarks/singularity jobs finish, those jobs can fail when their images are gone. Add all benchmark and singularity terminal jobs as optional needs (optional: true because they are conditional on rules) to both .clean_pipeline and .clean_unstable_mr base templates. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitlab-ci.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d4326dc4e..3a6830f20 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -790,7 +790,16 @@ clean_internal_tag: .clean_unstable_mr: extends: .docker stage: finalize - needs: [] + needs: + - { job: user_spack_environment, optional: true } + - { job: "cuda:torch", optional: true } + - { job: "eic_xl:singularity:default", optional: true } + - { job: "eic_xl:singularity:nightly", optional: true } + - { job: "benchmarks:geoviewer:default", optional: true } + - { job: "benchmarks:detector:default", optional: true } + - { job: "benchmarks:detector:nightly", optional: true } + - { job: "benchmarks:phyiscs:default", optional: true } + - { job: "benchmarks:physics:nightly", optional: true } when: always script: - apk add curl jq @@ -834,6 +843,15 @@ clean_unstable_mr:docker-new: needs: - version - clean_internal_tag + - { job: user_spack_environment, optional: true } + - { job: "cuda:torch", optional: true } + - { job: "eic_xl:singularity:default", optional: true } + - { job: "eic_xl:singularity:nightly", optional: true } + - { job: "benchmarks:geoviewer:default", optional: true } + - { job: "benchmarks:detector:default", optional: true } + - { job: "benchmarks:detector:nightly", optional: true } + - { job: "benchmarks:phyiscs:default", optional: true } + - { job: "benchmarks:physics:nightly", optional: true } when: always script: - apk add curl jq