Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions .github/workflows/multi_arch_ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -287,22 +287,48 @@ jobs:
# so the split path is unconditional here. The split itself runs inline
# in multi_arch_build_portable_linux_pytorch_wheels_ci.yml based on
# inputs.kpack_split.
#
# Per-ref amdgpu family exclusions (e.g. gfx125x on release/2.10) are
# applied via configure_pytorch_release_matrix.py, matching the release
# workflow in multi_arch_release_linux_pytorch_wheels.yml.
setup_pytorch_matrix:
name: Setup PyTorch CI Matrix
if: ${{ fromJSON(inputs.build_config).build_pytorch == true }}
runs-on: ubuntu-24.04
outputs:
pytorch_matrix: ${{ steps.matrix.outputs.pytorch_matrix }}
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
repository: ${{ inputs.repository || github.repository }}
ref: ${{ inputs.ref }}

- name: Generate PyTorch matrix
id: matrix
run: |
python ./build_tools/github_actions/configure_pytorch_release_matrix.py \
--python-versions="3.12" \
--platform=linux \
--pytorch-refs="release/2.10;release/2.11;release/2.12" \
--amdgpu-families="${{ fromJSON(inputs.build_config).dist_amdgpu_families }}"
Comment on lines +291 to +314

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already have a draft PR for this which follows CI conventions: #6082 (this belongs up a level in the entry point CI configuration code)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will close this one than , need to land this one though before 5 pm - https://github.com/ROCm/TheRock/actions/runs/28133800797/job/83339717736?pr=6122


build_pytorch_wheel_fat:
needs: [build_python_packages]
needs: [build_python_packages, setup_pytorch_matrix]
name: Build PyTorch (fat + split) | ${{ matrix.pytorch_git_ref }}
if: ${{ !failure() && !cancelled() && fromJSON(inputs.build_config).build_pytorch == true }}
strategy:
fail-fast: false
matrix:
pytorch_git_ref: ["release/2.10", "release/2.11", "release/2.12"]
include: ${{ fromJSON(needs.setup_pytorch_matrix.outputs.pytorch_matrix) }}
uses: ./.github/workflows/multi_arch_build_portable_linux_pytorch_wheels_ci.yml
with:
artifact_group: ${{ fromJSON(inputs.build_config).artifact_group }}
python_version: "3.12"
python_version: ${{ matrix.python_version }}
pytorch_git_ref: ${{ matrix.pytorch_git_ref }}
# The reusable workflow expands this to gfx targets via
# cmake/therock_amdgpu_targets.cmake.
amdgpu_families: ${{ fromJSON(inputs.build_config).dist_amdgpu_families }}
# Per-ref families from configure_pytorch_release_matrix.py; expanded
# to gfx targets via cmake/therock_amdgpu_targets.cmake.
amdgpu_families: ${{ matrix.amdgpu_families }}
kpack_split: ${{ needs.build_python_packages.outputs.kpack_split }}
rocm_package_find_links_url: ${{ needs.build_python_packages.outputs.package_find_links_url }}
rocm_version: ${{ inputs.rocm_package_version }}
Expand Down
31 changes: 25 additions & 6 deletions build_tools/github_actions/configure_pytorch_release_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@
},
{
"pytorch_git_ref": "release/2.11",
# gfx125x not yet upstreamed to pytorch/pytorch.
# See https://github.com/ROCm/TheRock/issues/5833.
"exclude_amdgpu_families": {"gfx125x"},
# gfx125x upstreamed via https://github.com/ROCm/pytorch/pull/3346.
},
{
"pytorch_git_ref": "release/2.12",
Expand Down Expand Up @@ -94,12 +92,16 @@ def generate_pytorch_matrix(
python_versions: list[str] | None,
amdgpu_families: str,
platform: str = "linux",
pytorch_refs: list[str] | None = None,
) -> list[dict]:
versions = python_versions if python_versions else PYTHON_VERSIONS
pytorch_refs = PYTORCH_REFS_WINDOWS if platform == "windows" else PYTORCH_REFS_LINUX
refs_cfg = PYTORCH_REFS_WINDOWS if platform == "windows" else PYTORCH_REFS_LINUX
if pytorch_refs is not None:
ref_set = {r.strip() for r in pytorch_refs if r.strip()}
refs_cfg = [cfg for cfg in refs_cfg if cfg["pytorch_git_ref"] in ref_set]
matrix = []
for py in versions:
for ref_cfg in pytorch_refs:
for ref_cfg in refs_cfg:
ref = ref_cfg["pytorch_git_ref"]
exclude = ref_cfg.get("exclude_amdgpu_families", set())
families = _filter_families(amdgpu_families, exclude)
Expand Down Expand Up @@ -139,6 +141,15 @@ def main(argv: list[str] | None = None) -> int:
"filtered out of this list for that ref's matrix entry."
),
)
parser.add_argument(
"--pytorch-refs",
type=str,
default="",
help=(
"Comma or semicolon separated list of PyTorch git refs to include "
"(default: all configured refs for the platform)"
),
)
args = parser.parse_args(argv)

python_versions = None
Expand All @@ -148,8 +159,16 @@ def main(argv: list[str] | None = None) -> int:
v.strip() for v in args.python_versions.split(sep) if v.strip()
]

pytorch_refs = None
if args.pytorch_refs:
sep = ";" if ";" in args.pytorch_refs else ","
pytorch_refs = [r.strip() for r in args.pytorch_refs.split(sep) if r.strip()]

matrix = generate_pytorch_matrix(
python_versions, args.amdgpu_families, args.platform
python_versions,
args.amdgpu_families,
args.platform,
pytorch_refs,
)
gha_set_output({"pytorch_matrix": json.dumps(matrix)})
return 0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT

import os
import sys
import unittest
from pathlib import Path

THIS_DIR = Path(__file__).resolve().parent
sys.path.insert(0, os.fspath(THIS_DIR.parent))

import configure_pytorch_release_matrix as m


class ConfigurePyTorchReleaseMatrixTest(unittest.TestCase):
def test_release_2_10_excludes_gfx125x(self) -> None:
matrix = m.generate_pytorch_matrix(
python_versions=["3.12"],
amdgpu_families="gfx94X-dcgpu;gfx125X-dcgpu",
platform="linux",
pytorch_refs=["release/2.10"],
)
self.assertEqual(len(matrix), 1)
self.assertEqual(matrix[0]["pytorch_git_ref"], "release/2.10")
self.assertEqual(matrix[0]["amdgpu_families"], "gfx94X-dcgpu")

def test_release_2_11_includes_gfx125x(self) -> None:
matrix = m.generate_pytorch_matrix(
python_versions=["3.12"],
amdgpu_families="gfx94X-dcgpu;gfx125X-dcgpu",
platform="linux",
pytorch_refs=["release/2.11"],
)
self.assertEqual(len(matrix), 1)
self.assertEqual(matrix[0]["amdgpu_families"], "gfx94X-dcgpu;gfx125X-dcgpu")

def test_pytorch_refs_filter_limits_matrix_rows(self) -> None:
matrix = m.generate_pytorch_matrix(
python_versions=["3.12"],
amdgpu_families="gfx94X-dcgpu",
platform="linux",
pytorch_refs=["release/2.10", "release/2.11"],
)
refs = {row["pytorch_git_ref"] for row in matrix}
self.assertEqual(refs, {"release/2.10", "release/2.11"})


if __name__ == "__main__":
unittest.main()
Loading