diff --git a/.github/workflows/multi_arch_ci_linux.yml b/.github/workflows/multi_arch_ci_linux.yml index 5415cf36c7c..5f0d62b16f9 100644 --- a/.github/workflows/multi_arch_ci_linux.yml +++ b/.github/workflows/multi_arch_ci_linux.yml @@ -287,22 +287,48 @@ jobs: # so the split path is unconditional here. The split itself runs inline # in multi_arch_build_portable_linux_pytorch_wheels_ci.yml based on # inputs.kpack_split. + # + # Per-ref amdgpu family exclusions (e.g. gfx125x on release/2.10) are + # applied via configure_pytorch_release_matrix.py, matching the release + # workflow in multi_arch_release_linux_pytorch_wheels.yml. + setup_pytorch_matrix: + name: Setup PyTorch CI Matrix + if: ${{ fromJSON(inputs.build_config).build_pytorch == true }} + runs-on: ubuntu-24.04 + outputs: + pytorch_matrix: ${{ steps.matrix.outputs.pytorch_matrix }} + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref }} + + - name: Generate PyTorch matrix + id: matrix + run: | + python ./build_tools/github_actions/configure_pytorch_release_matrix.py \ + --python-versions="3.12" \ + --platform=linux \ + --pytorch-refs="release/2.10;release/2.11;release/2.12" \ + --amdgpu-families="${{ fromJSON(inputs.build_config).dist_amdgpu_families }}" + build_pytorch_wheel_fat: - needs: [build_python_packages] + needs: [build_python_packages, setup_pytorch_matrix] name: Build PyTorch (fat + split) | ${{ matrix.pytorch_git_ref }} if: ${{ !failure() && !cancelled() && fromJSON(inputs.build_config).build_pytorch == true }} strategy: fail-fast: false matrix: - pytorch_git_ref: ["release/2.10", "release/2.11", "release/2.12"] + include: ${{ fromJSON(needs.setup_pytorch_matrix.outputs.pytorch_matrix) }} uses: ./.github/workflows/multi_arch_build_portable_linux_pytorch_wheels_ci.yml with: artifact_group: ${{ fromJSON(inputs.build_config).artifact_group }} - python_version: "3.12" + python_version: ${{ matrix.python_version }} pytorch_git_ref: ${{ matrix.pytorch_git_ref }} - # The reusable workflow expands this to gfx targets via - # cmake/therock_amdgpu_targets.cmake. - amdgpu_families: ${{ fromJSON(inputs.build_config).dist_amdgpu_families }} + # Per-ref families from configure_pytorch_release_matrix.py; expanded + # to gfx targets via cmake/therock_amdgpu_targets.cmake. + amdgpu_families: ${{ matrix.amdgpu_families }} kpack_split: ${{ needs.build_python_packages.outputs.kpack_split }} rocm_package_find_links_url: ${{ needs.build_python_packages.outputs.package_find_links_url }} rocm_version: ${{ inputs.rocm_package_version }} diff --git a/build_tools/github_actions/configure_pytorch_release_matrix.py b/build_tools/github_actions/configure_pytorch_release_matrix.py index 69c68e0e506..716706eae37 100644 --- a/build_tools/github_actions/configure_pytorch_release_matrix.py +++ b/build_tools/github_actions/configure_pytorch_release_matrix.py @@ -40,9 +40,7 @@ }, { "pytorch_git_ref": "release/2.11", - # gfx125x not yet upstreamed to pytorch/pytorch. - # See https://github.com/ROCm/TheRock/issues/5833. - "exclude_amdgpu_families": {"gfx125x"}, + # gfx125x upstreamed via https://github.com/ROCm/pytorch/pull/3346. }, { "pytorch_git_ref": "release/2.12", @@ -94,12 +92,16 @@ def generate_pytorch_matrix( python_versions: list[str] | None, amdgpu_families: str, platform: str = "linux", + pytorch_refs: list[str] | None = None, ) -> list[dict]: versions = python_versions if python_versions else PYTHON_VERSIONS - pytorch_refs = PYTORCH_REFS_WINDOWS if platform == "windows" else PYTORCH_REFS_LINUX + refs_cfg = PYTORCH_REFS_WINDOWS if platform == "windows" else PYTORCH_REFS_LINUX + if pytorch_refs is not None: + ref_set = {r.strip() for r in pytorch_refs if r.strip()} + refs_cfg = [cfg for cfg in refs_cfg if cfg["pytorch_git_ref"] in ref_set] matrix = [] for py in versions: - for ref_cfg in pytorch_refs: + for ref_cfg in refs_cfg: ref = ref_cfg["pytorch_git_ref"] exclude = ref_cfg.get("exclude_amdgpu_families", set()) families = _filter_families(amdgpu_families, exclude) @@ -139,6 +141,15 @@ def main(argv: list[str] | None = None) -> int: "filtered out of this list for that ref's matrix entry." ), ) + parser.add_argument( + "--pytorch-refs", + type=str, + default="", + help=( + "Comma or semicolon separated list of PyTorch git refs to include " + "(default: all configured refs for the platform)" + ), + ) args = parser.parse_args(argv) python_versions = None @@ -148,8 +159,16 @@ def main(argv: list[str] | None = None) -> int: v.strip() for v in args.python_versions.split(sep) if v.strip() ] + pytorch_refs = None + if args.pytorch_refs: + sep = ";" if ";" in args.pytorch_refs else "," + pytorch_refs = [r.strip() for r in args.pytorch_refs.split(sep) if r.strip()] + matrix = generate_pytorch_matrix( - python_versions, args.amdgpu_families, args.platform + python_versions, + args.amdgpu_families, + args.platform, + pytorch_refs, ) gha_set_output({"pytorch_matrix": json.dumps(matrix)}) return 0 diff --git a/build_tools/github_actions/tests/configure_pytorch_release_matrix_test.py b/build_tools/github_actions/tests/configure_pytorch_release_matrix_test.py new file mode 100644 index 00000000000..4be2b9dfea5 --- /dev/null +++ b/build_tools/github_actions/tests/configure_pytorch_release_matrix_test.py @@ -0,0 +1,49 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import os +import sys +import unittest +from pathlib import Path + +THIS_DIR = Path(__file__).resolve().parent +sys.path.insert(0, os.fspath(THIS_DIR.parent)) + +import configure_pytorch_release_matrix as m + + +class ConfigurePyTorchReleaseMatrixTest(unittest.TestCase): + def test_release_2_10_excludes_gfx125x(self) -> None: + matrix = m.generate_pytorch_matrix( + python_versions=["3.12"], + amdgpu_families="gfx94X-dcgpu;gfx125X-dcgpu", + platform="linux", + pytorch_refs=["release/2.10"], + ) + self.assertEqual(len(matrix), 1) + self.assertEqual(matrix[0]["pytorch_git_ref"], "release/2.10") + self.assertEqual(matrix[0]["amdgpu_families"], "gfx94X-dcgpu") + + def test_release_2_11_includes_gfx125x(self) -> None: + matrix = m.generate_pytorch_matrix( + python_versions=["3.12"], + amdgpu_families="gfx94X-dcgpu;gfx125X-dcgpu", + platform="linux", + pytorch_refs=["release/2.11"], + ) + self.assertEqual(len(matrix), 1) + self.assertEqual(matrix[0]["amdgpu_families"], "gfx94X-dcgpu;gfx125X-dcgpu") + + def test_pytorch_refs_filter_limits_matrix_rows(self) -> None: + matrix = m.generate_pytorch_matrix( + python_versions=["3.12"], + amdgpu_families="gfx94X-dcgpu", + platform="linux", + pytorch_refs=["release/2.10", "release/2.11"], + ) + refs = {row["pytorch_git_ref"] for row in matrix} + self.assertEqual(refs, {"release/2.10", "release/2.11"}) + + +if __name__ == "__main__": + unittest.main()