diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index c55987f4bae35..bf9526bad3530 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -2259,13 +2259,18 @@ memory resources handles can take different forms of structure and type depending on the API and operating system, so do external semaphore resource handles. -It is important to note, that the use of imported external semaphore objects -within SYCL has the restriction in that imported external semaphores can only -be used in conjuction with SYCL queues that have been constructed with the -`property::queue::in_order` property. The semaphore synchronization mechanism -is not supported for the default SYCL out-of-order queues. Use of the semaphore -synchronization mechanism with SYCL queues which were not constructed with the -`queue::in_order` property will result in undefined behaviour. +It is important to note that the use of imported external semaphore objects +within SYCL requires the SYCL queue to have been constructed with both of the +following properties: + +* `sycl::property::queue::in_order` -- the semaphore synchronization mechanism + is not supported on the default out-of-order queues. +* `sycl::ext::intel::property::queue::immediate_command_list` -- external + semaphore operations are only supported on queues backed by immediate + command lists. This restriction might be lifted in the future. + +Use of the semaphore synchronization mechanism with a SYCL queue that was not +constructed with both of these properties will result in undefined behaviour. External semaphore import is facilitated through the following proposed descriptor struct. diff --git a/sycl/test-e2e/bindless_images/dx11_interop/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx11_interop/read_write_unsampled.cpp index a08cde973cd9e..990d2bed635be 100644 --- a/sycl/test-e2e/bindless_images/dx11_interop/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx11_interop/read_write_unsampled.cpp @@ -17,6 +17,7 @@ #include "dx11_interop.h" #include +#include #ifdef TEST_SEMAPHORE_IMPORT #include // Used for ID3D11Device5 / ID3D11DeviceContext4 / ID3D11Fence @@ -438,8 +439,16 @@ int runTest(D3D11ProgramState &d3d11ProgramState, sycl::queue syclQueue, } int main() { - // Create SYCL queue, relying on SYCL device selection + // Create SYCL queue, relying on SYCL device selection. +#ifdef TEST_SEMAPHORE_IMPORT + // External semaphore ops require an in-order queue backed by immediate + // command lists (see sycl_ext_oneapi_bindless_images.asciidoc). + sycl::queue syclQueue{ + {sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::immediate_command_list{}}}; +#else sycl::queue syclQueue; +#endif sycl::device syclDevice = syclQueue.get_device(); // Initialize D3D11 and create DX11 programs state from the SYCL device diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_timeline_semaphore.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_timeline_semaphore.cpp index e3115a72deb92..e697842d9a760 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_timeline_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_timeline_semaphore.cpp @@ -10,11 +10,6 @@ // UNSUPPORTED-TRACKER: GSD-12427 // Gen12-semaphores-work-but-this-test-hangs. -// UNSUPPORTED: arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: GSD-12436 -// this test works on BMG, but if run in parallel with itself, or with other -// semaphore tests, it can hang. - // RUN: %{build} %link-directx -o %t.exe %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.exe --no-sem // RUN: %{run} %t.exe @@ -50,6 +45,7 @@ #include #include #include +#include #include #define WIN32_LEAN_AND_MEAN @@ -120,7 +116,15 @@ int main(int argc, char **argv) { // SYCL INTEROP try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto device = q.get_device(); auto context = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_win32_name_native.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_win32_name_native.cpp index 9a8b020e6ea6a..3d0eba09f493a 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_win32_name_native.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_buffer_win32_name_native.cpp @@ -4,13 +4,11 @@ // UNSUPPORTED: gpu-intel-dg2 // UNSUPPORTED-TRACKER: GSD-12428 +// semaphores-do-not-work-dg2 // UNSUPPORTED: gpu-intel-gen12 // UNSUPPORTED-TRACKER: GSD-12427 -// UNSUPPORTED: arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22028 - // RUN: %{build} %link-directx -o %t.exe %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.exe --no-sem // RUN: %{run} %t.exe @@ -35,6 +33,7 @@ #include #include #include +#include #include #define WIN32_LEAN_AND_MEAN @@ -183,7 +182,15 @@ int main(int argc, char **argv) { // SYCL INTEROP - using resource_win32_name NATIVELY try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto device = q.get_device(); auto context = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_read.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_read.cpp index 8e8096461735f..bbb978f426fb4 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_read.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_read.cpp @@ -74,19 +74,18 @@ // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 1 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --sampled --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --sampled --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 33x +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type int32 --channels 1 --semaphores 33x +// RUN: %{run} %t.exe --type uint32 --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type uint16 --channels 1 --semaphores 33x +// RUN: %{run} %t.exe --type uint8 --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type float --channels 4 --sampled --semaphores 33x +// RUN: %{run} %t.exe --type half --channels 2 --sampled --semaphores 33x +// RUN: %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 33x +// RUN: %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 33x // clang-format on @@ -98,6 +97,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -187,7 +187,15 @@ int runTest( // SYCL Import and Verification try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_write_unsampled.cpp index 6a65ebd906869..569581ae071f4 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_1D_write_unsampled.cpp @@ -7,7 +7,7 @@ // clang-format off /* - clang++.exe -fsycl -o ds1w.exe D3D12_sycl_interop_1D_write.cpp -ld3d12 -ldxgi -ld3dcompiler + clang++.exe -fsycl -o ds1w.exe D3D12_sycl_interop_1D_write_unsampled.cpp -ld3d12 -ldxgi -ld3dcompiler FLAGS: --sampled ERROR: Sampled image writes are not supported @@ -47,19 +47,18 @@ // RUN: %{run} %t.exe --type unorm8 --channels 4 33x // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 1 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 1 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 1 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 4 --semaphores 33x -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type float --channels 1 --semaphores 33x +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type int32 --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type uint32 --channels 1 --semaphores 33x +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type uint16 --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type uint8 --channels 1 --semaphores 33x +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 33x +// RUN: %{run} %t.exe --type unorm8 --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type half --channels 4 --semaphores 33x +// RUN: %{run} %t.exe --type uint32 --channels 2 --semaphores 33x // clang-format on @@ -71,6 +70,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -148,7 +148,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_arithmetic.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_arithmetic.cpp index 4c4cdc594be42..55e6e5064b8d7 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_arithmetic.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_arithmetic.cpp @@ -2,10 +2,6 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: windows -// UNSUPPORTED: arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20384 -// also GSD-12429 - // RUN: %{build} -o %t.exe %link-directx // RUN: %{run} %t.exe --type float --channels 4 8x8 @@ -26,7 +22,6 @@ DG2: - WORKS, including --sampled - - semaphores segfault DG2 $ sycl-ls [level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over @@ -61,9 +56,9 @@ // RUN: %{run} %t.exe --type int8 --channels 1 32x33 // RUN: %{run} %t.exe --type int8 --channels 2 32x33 // RUN: %{run} %t.exe --type int8 --channels 4 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 1 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 2 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 4 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 1 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 2 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 4 32x33 // RUN: %{run} %t.exe --type float --channels 1 --sampled 32x33 // RUN: %{run} %t.exe --type float --channels 2 --sampled 32x33 // RUN: %{run} %t.exe --type float --channels 4 --sampled 32x33 @@ -88,24 +83,23 @@ // RUN: %{run} %t.exe --type int8 --channels 1 --sampled 32x33 // RUN: %{run} %t.exe --type int8 --channels 2 --sampled 32x33 // RUN: %{run} %t.exe --type int8 --channels 4 --sampled 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 1 --sampled 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 2 --sampled 32x33 -// RUN-IF: !gpu-intel-bmg, %{run} %t.exe --type unorm8 --channels 4 --sampled 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 1 --sampled 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 2 --sampled 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 4 --sampled 32x33 // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 4 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type int32 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint32 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint16 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint8 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 4 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 2 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 4 --sampled --semaphores 32x33 // clang-format on @@ -118,6 +112,7 @@ #include #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; @@ -415,7 +410,15 @@ int runTest( signalExportableFence(ctx, extFenceB); try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto extMemA = syclexp::import_external_memory( syclexp::external_mem_descriptor{ diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_read.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_read.cpp index 63b9e979f2410..c1d69f8c9e59d 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_read.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_read.cpp @@ -2,10 +2,6 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: windows -// UNSUPPORTED: arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20384 -// also: GSD-12429 - // RUN: %{build} -o %t.exe %link-directx // RUN: %{run} %t.exe --type float --channels 4 32x33 @@ -23,9 +19,7 @@ WxH Set custom Width x Height (e.g. 8x4) - BMG: - - 1x1 works, nothing else. Suggesting offset/pitch issue. - - semaphores segfault. suggesting segfaulting semaphores. + BMG $ sycl-ls [level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over @@ -35,7 +29,6 @@ DG2: - WORKS, including --sampled - - semaphores segfault DG2 $ sycl-ls [level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over @@ -98,19 +91,18 @@ // RUN: %{run} %t.exe --type int8 --channels 4 --sampled 32x33 // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type int32 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint32 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint16 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint8 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 4 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 2 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 32x33 +// RUN: %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 32x33 // clang-format on @@ -122,6 +114,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -208,7 +201,15 @@ int runTest( // SYCL Import and Verification try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_write_unsampled.cpp index 8a9b8fd3bcfc3..75f30256565ea 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_2D_write_unsampled.cpp @@ -7,7 +7,7 @@ // clang-format off /* - clang++.exe -fsycl -o ds2w.exe D3D12_sycl_interop_2D_write.cpp -ld3d12 -ldxgi -ld3dcompiler + clang++.exe -fsycl -o ds2w.exe D3D12_sycl_interop_2D_write_unsampled.cpp -ld3d12 -ldxgi -ld3dcompiler FLAGS: --sampled ERROR: Sampled image writes are not supported @@ -47,19 +47,18 @@ // RUN: %{run} %t.exe --type unorm8 --channels 4 32x33 // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 1 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 4 --semaphores 32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type float --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type int32 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint32 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint16 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint8 --channels 1 --semaphores 32x33 +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 32x33 +// RUN: %{run} %t.exe --type unorm8 --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type half --channels 4 --semaphores 32x33 +// RUN: %{run} %t.exe --type uint32 --channels 2 --semaphores 32x33 // clang-format on @@ -71,6 +70,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -189,7 +189,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_read.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_read.cpp index 3e3ec7ee0172c..d6ea87d3c4ae5 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_read.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_read.cpp @@ -2,18 +2,13 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: windows -// UNSUPPORTED: arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20384 -// also GSD-12430 - -// UNSUPPORTED: gpu-intel-dg2 -// UNSUPPORTED-TRACKER: GSD-12430 - // RUN: %{build} -o %t.exe %link-directx // RUN: %{run} %t.exe --type float --channels 4 8x8x8 // clang-format off +// clang++.exe -fsycl -o ds3r.exe D3D12_sycl_interop_3D_read.cpp -ld3d12 -ldxgi -ld3dcompiler + // RUN: %{run} %t.exe --type float --channels 1 33x32x31 // RUN: %{run} %t.exe --type float --channels 2 32x33x31 // RUN: %{run} %t.exe --type float --channels 4 31x32x33 @@ -62,24 +57,23 @@ // RUN: %{run} %t.exe --type int8 --channels 1 --sampled 17x16x15 // RUN: %{run} %t.exe --type int8 --channels 2 --sampled 16x15x17 // RUN: %{run} %t.exe --type int8 --channels 4 --sampled 15x17x16 -// RUN-IF: !gpu-intel-gen12, %{run} %t.exe --type unorm8 --channels 1 --sampled 9x8x7 -// RUN-IF: !gpu-intel-gen12, %{run} %t.exe --type unorm8 --channels 2 --sampled 8x7x9 -// RUN-IF: !gpu-intel-gen12, %{run} %t.exe --type unorm8 --channels 4 --sampled 7x9x8 +// RUN-IF: (!gpu-intel-gen12 && !gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 1 --sampled 9x8x7 +// RUN-IF: (!gpu-intel-gen12 && !gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 2 --sampled 8x7x9 +// RUN-IF: (!gpu-intel-gen12 && !gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 4 --sampled 7x9x8 // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 16x17x15 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 17x16x15 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --semaphores 9x8x7 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --semaphores 33x31x32 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 15x17x16 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 1 --semaphores 9x7x8 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 4 --semaphores 32x31x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 16x15x17 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --sampled --semaphores 31x32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --sampled --semaphores 15x16x17 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 7x8x9 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 32x31x33 +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 16x17x15 +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 17x16x15 +// RUN: %{run} %t.exe --type int32 --channels 1 --semaphores 9x8x7 +// RUN: %{run} %t.exe --type uint32 --channels 4 --semaphores 33x31x32 +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 15x17x16 +// RUN: %{run} %t.exe --type uint16 --channels 1 --semaphores 9x7x8 +// RUN: %{run} %t.exe --type uint8 --channels 4 --semaphores 32x31x33 +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 16x15x17 +// RUN: %{run} %t.exe --type float --channels 4 --sampled --semaphores 31x32x33 +// RUN: %{run} %t.exe --type half --channels 2 --sampled --semaphores 15x16x17 +// RUN: %{run} %t.exe --type int32 --channels 1 --sampled --semaphores 7x8x9 +// RUN: %{run} %t.exe --type uint32 --channels 4 --sampled --semaphores 32x31x33 /* clang++.exe -fsycl -o ds3r.exe D3D12_sycl_interop_3D_read.cpp -ld3d12 -ldxgi -ld3dcompiler @@ -94,12 +88,13 @@ WxHxD Set custom Width x Height x Depth (e.g. 16x15x14) - BMG: - + BMG: + - WORKS, including --sampled and semaphores + - unorm8 does NOT work DG2: - - WORKS, including --sampled - - semaphores segfault + - WORKS, including --sampled and semaphores + - unorm8 does NOT work. GEN12: - WORKS, including --sampled and semaphores @@ -116,6 +111,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -201,7 +197,15 @@ int runTest( // SYCL Import and Verification try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_write_unsampled.cpp index 498f0cde6ecf9..f1fb08791ea45 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/D3D12_sycl_interop_3D_write_unsampled.cpp @@ -7,6 +7,20 @@ // clang-format off +/* + clang++.exe -fsycl -o ds3w.exe D3D12_sycl_interop_3D_write_unsampled.cpp -ld3d12 -ldxgi -ld3dcompiler + + FLAGS: + --sampled ERROR: Sampled image writes are not supported + --semaphores Use DX12 Fences for SYCL Interop Sync + --channels X Set number of channels (1, 2, or 4). Default is 4 (RGBA) + --type XXX Set data type (float, half, uint32, int32, uint16, int16, uint8, int8, unorm8). + Default is float + WxHxD Set custom Width x Height x Depth (e.g. 8x4x2) +*/ + + + // RUN: %{run} %t.exe --type float --channels 1 33x32x31 // RUN: %{run} %t.exe --type float --channels 2 32x33x31 // RUN: %{run} %t.exe --type float --channels 4 31x32x33 @@ -36,31 +50,19 @@ // RUN: %{run} %t.exe --type unorm8 --channels 4 7x9x8 // Semaphore coverage tests -// At this time, semaphores aren't working on DG2 (GSD-12428), and can hang on BMG if run in parallel (GSD-12436). -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 4 --semaphores 16x17x15 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type float --channels 1 --semaphores 31x32x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 2 --semaphores 17x16x15 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int32 --channels 4 --semaphores 9x8x7 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 1 --semaphores 33x31x32 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int16 --channels 2 --semaphores 15x17x16 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint16 --channels 4 --semaphores 9x7x8 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint8 --channels 1 --semaphores 32x31x33 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type int8 --channels 2 --semaphores 16x15x17 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type unorm8 --channels 4 --semaphores 7x9x8 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type half --channels 4 --semaphores 15x16x17 -// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.exe --type uint32 --channels 2 --semaphores 32x31x33 - -/* - clang++.exe -fsycl -o ds3w.exe D3D12_sycl_interop_3D_write.cpp -ld3d12 -ldxgi -ld3dcompiler +// RUN: %{run} %t.exe --type float --channels 4 --semaphores 16x17x15 +// RUN: %{run} %t.exe --type float --channels 1 --semaphores 31x32x33 +// RUN: %{run} %t.exe --type half --channels 2 --semaphores 17x16x15 +// RUN: %{run} %t.exe --type int32 --channels 4 --semaphores 9x8x7 +// RUN: %{run} %t.exe --type uint32 --channels 1 --semaphores 33x31x32 +// RUN: %{run} %t.exe --type int16 --channels 2 --semaphores 15x17x16 +// RUN: %{run} %t.exe --type uint16 --channels 4 --semaphores 9x7x8 +// RUN: %{run} %t.exe --type uint8 --channels 1 --semaphores 32x31x33 +// RUN: %{run} %t.exe --type int8 --channels 2 --semaphores 16x15x17 +// RUN: %{run} %t.exe --type unorm8 --channels 4 --semaphores 7x9x8 +// RUN: %{run} %t.exe --type half --channels 4 --semaphores 15x16x17 +// RUN: %{run} %t.exe --type uint32 --channels 2 --semaphores 32x31x33 - FLAGS: - --sampled ERROR: Sampled image writes are not supported - --semaphores Use DX12 Fences for SYCL Interop Sync - --channels X Set number of channels (1, 2, or 4). Default is 4 (RGBA) - --type XXX Set data type (float, half, uint32, int32, uint16, int16, uint8, int8, unorm8). - Default is float - WxHxD Set custom Width x Height x Depth (e.g. 8x4x41) -*/ // clang-format on #include "d3d12_setup.hpp" @@ -71,6 +73,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -148,7 +151,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; syclexp::external_mem_descriptor extMemDesc{ imgRes.sharedHandle, syclexp::external_mem_handle_type::win32_nt_handle, diff --git a/sycl/test-e2e/bindless_images/dx12_interop/external_semaphore_regular_cl_fails.cpp b/sycl/test-e2e/bindless_images/dx12_interop/external_semaphore_regular_cl_fails.cpp new file mode 100644 index 0000000000000..aa416c0b82f71 --- /dev/null +++ b/sycl/test-e2e/bindless_images/dx12_interop/external_semaphore_regular_cl_fails.cpp @@ -0,0 +1,79 @@ + +// +// REQUIRES: aspect-ext_oneapi_external_semaphore_import, windows, level_zero +// +// RUN: %{build} %link-directx -o %t.exe %if target-spir %{ -Wno-ignored-attributes %} +// RUN: %{run} %t.exe + +// Waiting on a DX12-fence external semaphore from a queue backed by a +// regular (non-immediate) command list must throw sycl::exception. +// +// Mirrors vulkan_interop/external_semaphore_regular_cl_fails.cpp but uses +// the win32_nt_dx12_fence handle-type path. The Level Zero adapter +// rejects external_semaphore wait/signal at submission time when the +// queue is not using immediate command lists; this test verifies the +// rejection still fires for the DX12 fence handle type. +// +// Flow: +// 1. Create a real, exportable D3D12 timeline fence and signal it. +// 2. Import it into SYCL via a (lawful) immediate-CL queue. +// 3. Call ext_oneapi_wait_external_semaphore on a separate queue that +// explicitly opts into no_immediate_command_list, and expect a +// sycl::exception. + +#include "d3d12_setup.hpp" +#include +#include +#include +#include + +#define WIN32_LEAN_AND_MEAN +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main() { + D3D12Context d3dCtx = createD3D12Context(); + D3D12ExportableFence extFence = createExportableFence(d3dCtx); + + // Make the fence reach a known value so wait(value=1) is satisfiable + // if it ever gets that far. + signalExportableFence(d3dCtx, extFence); + + // Lawful queue: import the semaphore here. + sycl::queue immQ{ + {sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::immediate_command_list{}}}; + auto device = immQ.get_device(); + auto context = immQ.get_context(); + + auto semDesc = + syclexp::external_semaphore_descriptor{ + extFence.sharedHandle, + syclexp::external_semaphore_handle_type::win32_nt_dx12_fence}; + syclexp::external_semaphore syclSem = + syclexp::import_external_semaphore(semDesc, device, context); + + // The non-immediate-CL queue is what should trigger rejection on use. + sycl::queue regQ{ + context, device, + sycl::property_list{ + sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + int ret = 1; + try { + regQ.ext_oneapi_wait_external_semaphore(syclSem, extFence.fenceValue); + regQ.wait_and_throw(); + std::cerr << "FAIL: ext_oneapi_wait_external_semaphore (dx12_fence) on a " + "non-immediate-CL queue did not throw." + << std::endl; + } catch (const sycl::exception &e) { + std::cout << "Got expected sycl::exception: " << e.what() << std::endl; + ret = 0; + } + + syclexp::release_external_semaphore(syclSem, device, context); + cleanupExportableFence(extFence); + return ret; +} diff --git a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp index 4451e0f59116f..0b91824cf9bc8 100644 --- a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp @@ -11,11 +11,16 @@ #include #include +#include int main() { - // Set up queue + // Set up queue. + // External semaphore ops require an in-order queue backed by immediate + // command lists (see sycl_ext_oneapi_bindless_images.asciidoc). sycl::device dev; - sycl::queue q(dev); + sycl::queue q(dev, + {sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::immediate_command_list{}}); size_t width = 123 /* passed from external API */; size_t height = 123 /* passed from external API */; diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/external_semaphore_regular_cl_fails.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/external_semaphore_regular_cl_fails.cpp new file mode 100644 index 0000000000000..c7616be3060db --- /dev/null +++ b/sycl/test-e2e/bindless_images/vulkan_interop/external_semaphore_regular_cl_fails.cpp @@ -0,0 +1,80 @@ + +// +// REQUIRES: aspect-ext_oneapi_external_semaphore_import +// REQUIRES: vulkan && level_zero +// +// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} +// RUN: %{run} %t.out + +// Waiting on an external semaphore from a queue backed by a regular +// (non-immediate) command list must throw sycl::exception. +// +// sycl_ext_oneapi_bindless_images requires queues used with external +// semaphore wait/signal to be constructed with BOTH +// - sycl::property::queue::in_order +// - sycl::ext::intel::property::queue::immediate_command_list +// The Level Zero adapter rejects external_semaphore wait/signal at the +// point of submission when the queue is not using immediate command +// lists. This test verifies that contract by: +// 1. Creating a real, exportable Vulkan binary semaphore. +// 2. Importing it into SYCL via a (lawful) immediate-CL queue. +// 3. Calling ext_oneapi_wait_external_semaphore on a separate queue +// that explicitly opts into no_immediate_command_list, and +// expecting a sycl::exception. + +#include "vulkan_setup.hpp" +#include +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main() { + VulkanContext vkCtx = createVulkanContext(); + VkSemaphore vkSem = createExportableSemaphore(vkCtx); + + // Lawful queue: import the semaphore here. + sycl::queue immQ{ + {sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::immediate_command_list{}}}; + auto device = immQ.get_device(); + auto context = immQ.get_context(); + +#ifdef _WIN32 + HANDLE semHandle = getSemaphoreHandle(vkCtx, vkSem); + syclexp::external_semaphore_descriptor desc{ + semHandle, syclexp::external_semaphore_handle_type::win32_nt_handle}; +#else + int semFd = getSemaphoreFd(vkCtx, vkSem); + syclexp::external_semaphore_descriptor desc{ + semFd, syclexp::external_semaphore_handle_type::opaque_fd}; +#endif + + syclexp::external_semaphore syclSem = + syclexp::import_external_semaphore(desc, device, context); + + // The non-immediate-CL queue is what should trigger rejection on use. + sycl::queue regQ{ + context, device, + sycl::property_list{ + sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + int ret = 1; + try { + regQ.ext_oneapi_wait_external_semaphore(syclSem); + regQ.wait_and_throw(); + std::cerr << "FAIL: ext_oneapi_wait_external_semaphore on a " + "non-immediate-CL queue did not throw." + << std::endl; + } catch (const sycl::exception &e) { + std::cout << "Got expected sycl::exception: " << e.what() << std::endl; + ret = 0; + } + + syclexp::release_external_semaphore(syclSem, device, context); + vkDestroySemaphore(vkCtx.device, vkSem, nullptr); + cleanupVulkanContext(vkCtx); + return ret; +} diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_setup.hpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_setup.hpp index 3bd402194cc94..7fa71442bb168 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_setup.hpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_setup.hpp @@ -334,6 +334,11 @@ inline VulkanContext createVulkanContext() { appInfo.apiVersion = VK_API_VERSION_1_2; std::vector instanceExtensions; + VkInstanceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + createInfo.pApplicationInfo = &appInfo; + +#ifndef ONT_VALIDATE uint32_t instanceExtensionCount = 0; VK_CHECK(vkEnumerateInstanceExtensionProperties( nullptr, &instanceExtensionCount, nullptr)); @@ -373,13 +378,8 @@ inline VulkanContext createVulkanContext() { throw std::runtime_error("failed to find Vulkan validation layer!"); } - VkInstanceCreateInfo createInfo{}; - createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - createInfo.pApplicationInfo = &appInfo; createInfo.ppEnabledLayerNames = &validationLayerName; createInfo.enabledLayerCount = 1; - createInfo.enabledExtensionCount = (uint32_t)instanceExtensions.size(); - createInfo.ppEnabledExtensionNames = instanceExtensions.data(); VkDebugUtilsMessengerCreateInfoEXT debugUtilsMessengerInfo{}; debugUtilsMessengerInfo.sType = @@ -397,8 +397,16 @@ inline VulkanContext createVulkanContext() { // that occur while creating the instance itself. This debug util messangegr // will be alive only during instance creation. createInfo.pNext = &debugUtilsMessengerInfo; +#else + ctx.debugMessenger = VK_NULL_HANDLE; +#endif + + createInfo.enabledExtensionCount = (uint32_t)instanceExtensions.size(); + createInfo.ppEnabledExtensionNames = instanceExtensions.data(); + VK_CHECK(vkCreateInstance(&createInfo, nullptr, &ctx.instance)); +#ifndef ONT_VALIDATE // Create a persistent debug messenger that stays alive for the application's // lifetime to capture all subsequent events. auto vkCreateDebugUtilsMessengerFuncPtr = @@ -411,6 +419,7 @@ inline VulkanContext createVulkanContext() { throw std::runtime_error( "Failed to fetch vkCreateDebugUtilsMessengerEXT function pointer!"); } +#endif uint32_t deviceCount = 0; vkEnumeratePhysicalDevices(ctx.instance, &deviceCount, nullptr); @@ -466,6 +475,7 @@ inline VulkanContext createVulkanContext() { inline void cleanupVulkanContext(VulkanContext &ctx) { vkDestroyDevice(ctx.device, nullptr); +#ifndef ONT_VALIDATE auto vkDestroyDebugUtilsMessengerFuncPtr = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( ctx.instance, "vkDestroyDebugUtilsMessengerEXT"); @@ -473,6 +483,7 @@ inline void cleanupVulkanContext(VulkanContext &ctx) { vkDestroyDebugUtilsMessengerFuncPtr(ctx.instance, ctx.debugMessenger, nullptr); } +#endif vkDestroyInstance(ctx.instance, nullptr); } @@ -1230,6 +1241,7 @@ inline void cleanupVulkan(VulkanContext &ctx, ImageResources &res) { vkDestroyImage(ctx.device, res.image, nullptr); vkFreeMemory(ctx.device, res.memory, nullptr); vkDestroyDevice(ctx.device, nullptr); +#ifndef ONT_VALIDATE auto vkDestroyDebugUtilsMessengerFuncPtr = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( ctx.instance, "vkDestroyDebugUtilsMessengerEXT"); @@ -1237,5 +1249,6 @@ inline void cleanupVulkan(VulkanContext &ctx, ImageResources &res) { vkDestroyDebugUtilsMessengerFuncPtr(ctx.instance, ctx.debugMessenger, nullptr); } +#endif vkDestroyInstance(ctx.instance, nullptr); } diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_2d_arithmetic.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_2d_arithmetic.cpp index 93a64bc9f84e7..82c9c1b21f8bf 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_2d_arithmetic.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_2d_arithmetic.cpp @@ -7,8 +7,7 @@ // UNSUPPORTED: linux // UNSUPPORTED-TRACKER: GSD-12357 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 +// clang-format off // UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21986 @@ -105,24 +104,24 @@ /* Vulkan/SYCL 2D Arithmetic (A + B = C) - clang++ -fsycl -o vs_2d_arith.bin vulkan_sycl_2d_arithmetic.cpp -lvulkan - -I$VULKAN_SDK/include -L$VULKAN_SDK/lib + clang++ -fsycl -o vs_2d_arith.bin vulkan_sycl_2d_arithmetic.cpp -lvulkan -I$VULKAN_SDK/include -L$VULKAN_SDK/lib - clang++ -fsycl -o vs_2d_arith.exe vulkan_sycl_2d_arithmetic.cpp - -Wno-ignored-attributes -lvulkan-1 -I$VULKAN_SDK/Include -L$VULKAN_SDK/Lib + clang++ -fsycl -o vs_2d_arith.exe vulkan_sycl_2d_arithmetic.cpp -Wno-ignored-attributes -lvulkan-1 -I$VULKAN_SDK/Include -L$VULKAN_SDK/Lib FLAGS --semaphores Use Vulkan Semaphores for SYCL Interop Sync --linear Use LINEAR tiling for the Vulkan Image (default is OPTIMAL) --channels X Set number of channels (1, 2, or 4). Default is 4 (RGBA) - --type XXX Set data type (float, half, uint32, int32, uint16, int16, - uint8, int8, unorm8). Default is float WxH Set custom Width x - Height (e.g. 8x4) + --type XXX Set data type (float, half, uint32, int32, uint16, int16, uint8, int8, unorm8). Default is float + WxH Set custom Width x Height (e.g. 8x4) --sampled // RUN: %{run} %t.out --type float --semaphores // RUN: %{run} %t.out --type unorm8 --sampled --semaphores */ + +// clang-format on + #include "vulkan_setup.hpp" #include @@ -133,6 +132,7 @@ #include #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; @@ -246,7 +246,15 @@ int runTest( }); try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; #ifdef _WIN32 auto extMemA = syclexp::import_external_memory( syclexp::external_mem_descriptor{ diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer.cpp index 3d9642bf8b559..68b1e14b55fb0 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer.cpp @@ -1,9 +1,6 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: vulkan -// UNSUPPORTED: windows && gpu-intel-gen12 -// UNSUPPORTED-TRACKER: URLZA-723 - // clang-format off // On Linux L0, there are problem with semaphores and latest drivers. @@ -59,6 +56,7 @@ #include #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; @@ -277,7 +275,15 @@ int main(int argc, char **argv) { // SYCL INTEROP try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto device = q.get_device(); auto context = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp index f192a25d3e66d..6514ff324ca2d 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp @@ -8,12 +8,6 @@ // UNSUPPORTED: windows && gpu-intel-gen12 // UNSUPPORTED-TRACKER: URLZA-723 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - -// UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21986 - // RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.out --no-sem // RUN: %{run} %t.out --dual-sem @@ -55,6 +49,7 @@ #include #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; @@ -149,7 +144,15 @@ int main(int argc, char **argv) { // SYCL INTEROP try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto device = q.get_device(); auto context = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_timeline_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_timeline_semaphore.cpp index a364380230efe..4e55b18a2160f 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_timeline_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_timeline_semaphore.cpp @@ -10,12 +10,6 @@ // UNSUPPORTED: windows && gpu-intel-gen12 // UNSUPPORTED-TRACKER: URLZA-723 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - -// UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21986 - // RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.out --no-sem // RUN: %{run} %t.out @@ -59,6 +53,7 @@ #include #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; @@ -140,7 +135,15 @@ int main(int argc, char **argv) { // SYCL INTEROP try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; auto device = q.get_device(); auto context = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_1d.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_1d.cpp index 43c0dfc76b4cb..f3b497c881563 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_1d.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_1d.cpp @@ -2,9 +2,6 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - // RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} /* @@ -164,6 +161,7 @@ VK_FORMAT_R8G8B8A8_UNORM #include #include #include +#include // --------------------------------------------------------- // SYCL TYPE MAPPING HELPERS @@ -257,7 +255,15 @@ int runTest( // SYCL Import and Verification namespace syclexp = sycl::ext::oneapi::experimental; try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // Import Memory (Platform Specific) #ifdef _WIN32 diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_2d.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_2d.cpp index ac59681b4071b..1c02ce0d683b5 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_2d.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_2d.cpp @@ -7,12 +7,6 @@ // UNSUPPORTED: linux // UNSUPPORTED-TRACKER: GSD-12357 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - -// XFAIL: windows && arch-intel_gpu_bmg_g21 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21986 - /* Run ALL the vulkan formats through the gauntlet. sampled and unsampled. This entire test takes less than 30 seconds on a slow machine. MUCH faster @@ -27,6 +21,7 @@ will fail. This is being tracked as a separate issue. */ +// clang-format off // RUN: %{run} %t.out --type float --channels 1 32x33 // RUN: %{run} %t.out --type float --channels 2 32x33 @@ -52,9 +47,9 @@ // RUN: %{run} %t.out --type int8 --channels 1 32x33 // RUN: %{run} %t.out --type int8 --channels 2 32x33 // RUN: %{run} %t.out --type int8 --channels 4 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 1 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 2 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 4 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 1 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 2 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 4 32x33 // RUN: %{run} %t.out --type float --channels 1 --sampled 32x33 // RUN: %{run} %t.out --type float --channels 2 --sampled 32x33 // RUN: %{run} %t.out --type float --channels 4 --sampled 32x33 @@ -79,9 +74,9 @@ // RUN: %{run} %t.out --type int8 --channels 1 --sampled 32x33 // RUN: %{run} %t.out --type int8 --channels 2 --sampled 32x33 // RUN: %{run} %t.out --type int8 --channels 4 --sampled 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 1 --sampled 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 2 --sampled 32x33 -// RUN: %{run} %t.out --type unorm8 --channels 4 --sampled 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 1 --sampled 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 2 --sampled 32x33 +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 4 --sampled 32x33 // RUN: %{run} %t.out --type float --channels 1 32x33 --semaphores // RUN: %{run} %t.out --type float --channels 4 32x33 --semaphores @@ -93,9 +88,8 @@ // RUN: %{run} %t.out --type int32 --channels 4 --sampled 32x33 --semaphores // RUN: %{run} %t.out --type int16 --channels 4 --sampled 32x33 --semaphores // RUN: %{run} %t.out --type uint8 --channels 2 --sampled 32x33 --semaphores -// RUN: %{run} %t.out --type unorm8 --channels 4 --sampled 32x33 --semaphores +// RUN-IF: (!gpu-intel-dg2 && !arch-intel_gpu_bmg_g21), %{run} %t.out --type unorm8 --channels 4 --sampled 32x33 --semaphores -// clang-format off /* Vulkan/SYCL 2D Image Read Test (Sampled + Unsampled) @@ -135,6 +129,7 @@ #include #include #include +#include // --------------------------------------------------------- // SYCL TYPE MAPPING HELPERS @@ -245,7 +240,15 @@ int runTest( // SYCL Import and Verification namespace syclexp = sycl::ext::oneapi::experimental; try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // Import Memory (Platform Specific) #ifdef _WIN32 diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_3d.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_3d.cpp index 9b19aba506ebc..c92afa758ad34 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_3d.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_read_3d.cpp @@ -44,6 +44,7 @@ #include #include #include +#include // --------------------------------------------------------- // SYCL TYPE MAPPING HELPERS @@ -138,7 +139,15 @@ int runTest( // SYCL Import and Verification namespace syclexp = sycl::ext::oneapi::experimental; try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // Import Memory (Platform Specific) #ifdef _WIN32 diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_1d_unsampled.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_1d_unsampled.cpp index aa8532eaf61a5..adba096a55b6c 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_1d_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_1d_unsampled.cpp @@ -99,6 +99,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -212,7 +213,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // IMPORT MEMORY #ifdef _WIN32 @@ -313,97 +322,99 @@ int runTest( syclexp::destroy_image_handle(unsampledHandle, q.get_device(), q.get_context()); syclexp::release_external_memory(extMem, q.get_device(), q.get_context()); + + // Vulkan Verify + vkDeviceWaitIdle(vkCtx.device); + VkBuffer verifyBuffer; + VkDeviceMemory verifyMem; + size_t dataSize = width * channels * sizeof(T); + VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bi.size = dataSize; + bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = req.size; + ai.memoryTypeIndex = + findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); + vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); + + { + VkCommandPoolCreateInfo poolInfo = { + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; + VkCommandPool pool; + vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); + VkCommandBuffer cmd; + VkCommandBufferAllocateInfo ca = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + ca.commandPool = pool; + ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + ca.commandBufferCount = 1; + vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); + VkCommandBufferBeginInfo bi = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + vkBeginCommandBuffer(cmd, &bi); + VkBufferImageCopy reg = {}; + reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + reg.imageExtent = extent; + vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, + verifyBuffer, 1, ®); + vkEndCommandBuffer(cmd); + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.commandBufferCount = 1; + si.pCommandBuffers = &cmd; + std::vector waitStages = { + VK_PIPELINE_STAGE_TRANSFER_BIT}; + if (useSemaphores) { + si.waitSemaphoreCount = 1; + si.pWaitSemaphores = &vkSem; + si.pWaitDstStageMask = waitStages.data(); + } + vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); + vkQueueWaitIdle(vkCtx.queue); + vkDestroyCommandPool(vkCtx.device, pool, nullptr); + } + + void *ptr; + vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); + T *vData = (T *)ptr; + bool passed = true; + int errorCount = 0; + for (size_t i = 0; i < width * channels; ++i) { + T expected = generateTestValue(i / channels, i % channels, width); + if (!checkValue(vData[i], expected)) { + passed = false; + if (errorCount++ < 5) + std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] + << " Exp: " << (double)expected << std::endl; + } + } + vkUnmapMemory(vkCtx.device, verifyMem); + if (passed) + std::cout << "SUCCESS!" << std::endl; + else + std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; + + vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); + vkFreeMemory(vkCtx.device, verifyMem, nullptr); if (useSemaphores) { syclexp::release_external_semaphore(extSem, q.get_device(), q.get_context()); + vkDestroySemaphore(vkCtx.device, vkSem, nullptr); } + cleanupVulkan(vkCtx, imgRes); + return passed ? 0 : 1; + } catch (std::exception &e) { std::cerr << "SYCL Exception: " << e.what() << std::endl; + cleanupVulkan(vkCtx, imgRes); return 1; } - - // Vulkan Verify - vkDeviceWaitIdle(vkCtx.device); - VkBuffer verifyBuffer; - VkDeviceMemory verifyMem; - size_t dataSize = width * channels * sizeof(T); - VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bi.size = dataSize; - bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; - vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); - VkMemoryRequirements req; - vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); - VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; - ai.allocationSize = req.size; - ai.memoryTypeIndex = findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); - vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); - - { - VkCommandPoolCreateInfo poolInfo = { - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; - poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; - VkCommandPool pool; - vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); - VkCommandBuffer cmd; - VkCommandBufferAllocateInfo ca = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; - ca.commandPool = pool; - ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - ca.commandBufferCount = 1; - vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); - VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; - vkBeginCommandBuffer(cmd, &bi); - VkBufferImageCopy reg = {}; - reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - reg.imageExtent = extent; - vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, - verifyBuffer, 1, ®); - vkEndCommandBuffer(cmd); - VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; - si.commandBufferCount = 1; - si.pCommandBuffers = &cmd; - std::vector waitStages = { - VK_PIPELINE_STAGE_TRANSFER_BIT}; - if (useSemaphores) { - si.waitSemaphoreCount = 1; - si.pWaitSemaphores = &vkSem; - si.pWaitDstStageMask = waitStages.data(); - } - vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); - vkQueueWaitIdle(vkCtx.queue); - vkDestroyCommandPool(vkCtx.device, pool, nullptr); - } - - void *ptr; - vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); - T *vData = (T *)ptr; - bool passed = true; - int errorCount = 0; - for (size_t i = 0; i < width * channels; ++i) { - T expected = generateTestValue(i / channels, i % channels, width); - if (!checkValue(vData[i], expected)) { - passed = false; - if (errorCount++ < 5) - std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] - << " Exp: " << (double)expected << std::endl; - } - } - vkUnmapMemory(vkCtx.device, verifyMem); - if (passed) - std::cout << "SUCCESS!" << std::endl; - else - std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; - - vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); - vkFreeMemory(vkCtx.device, verifyMem, nullptr); - if (useSemaphores) - vkDestroySemaphore(vkCtx.device, vkSem, nullptr); - cleanupVulkan(vkCtx, imgRes); - - return passed ? 0 : 1; } int main(int argc, char **argv) { diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_2d_unsampled.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_2d_unsampled.cpp index 3644fd78b7fd0..14a8ec67c267a 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_2d_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_2d_unsampled.cpp @@ -7,12 +7,6 @@ // UNSUPPORTED: linux // UNSUPPORTED-TRACKER: GSD-12357 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - -// UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 -// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21986 - /* Run all the vulkan formats through a write test. Note this is unsampled only, you can't "write" with the image sampler. @@ -96,6 +90,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -208,7 +203,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // IMPORT MEMORY #ifdef _WIN32 @@ -324,100 +327,103 @@ int runTest( syclexp::destroy_image_handle(unsampledHandle, q.get_device(), q.get_context()); syclexp::release_external_memory(extMem, q.get_device(), q.get_context()); + + // Vulkan Verify + vkDeviceWaitIdle(vkCtx.device); + VkBuffer verifyBuffer; + VkDeviceMemory verifyMem; + size_t dataSize = width * height * channels * sizeof(T); + VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bi.size = dataSize; + bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = req.size; + ai.memoryTypeIndex = + findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); + vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); + + { + VkCommandPoolCreateInfo poolInfo = { + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; + VkCommandPool pool; + vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); + VkCommandBuffer cmd; + VkCommandBufferAllocateInfo ca = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + ca.commandPool = pool; + ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + ca.commandBufferCount = 1; + vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); + VkCommandBufferBeginInfo bi = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + vkBeginCommandBuffer(cmd, &bi); + VkBufferImageCopy reg = {}; + reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + reg.imageExtent = extent; + vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, + verifyBuffer, 1, ®); + vkEndCommandBuffer(cmd); + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.commandBufferCount = 1; + si.pCommandBuffers = &cmd; + std::vector waitStages = { + VK_PIPELINE_STAGE_TRANSFER_BIT}; + if (useSemaphores) { + si.waitSemaphoreCount = 1; + si.pWaitSemaphores = &vkSem; + si.pWaitDstStageMask = waitStages.data(); + } + vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); + vkQueueWaitIdle(vkCtx.queue); + vkDestroyCommandPool(vkCtx.device, pool, nullptr); + } + + void *ptr; + vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); + T *vData = (T *)ptr; + bool passed = true; + int errorCount = 0; + size_t totalPixels = width * height; + + for (size_t i = 0; i < totalPixels * channels; ++i) { + size_t pixelIdx = i / channels; + int ch = i % channels; + T expected = generateTestValue(pixelIdx, ch, totalPixels); + if (!checkValue(vData[i], expected)) { + passed = false; + if (errorCount++ < 5) + std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] + << " Exp: " << (double)expected << std::endl; + } + } + vkUnmapMemory(vkCtx.device, verifyMem); + if (passed) + std::cout << "SUCCESS!" << std::endl; + else + std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; + + vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); + vkFreeMemory(vkCtx.device, verifyMem, nullptr); if (useSemaphores) { syclexp::release_external_semaphore(extSem, q.get_device(), q.get_context()); + vkDestroySemaphore(vkCtx.device, vkSem, nullptr); } + cleanupVulkan(vkCtx, imgRes); + return passed ? 0 : 1; + } catch (std::exception &e) { std::cerr << "SYCL Exception: " << e.what() << std::endl; + cleanupVulkan(vkCtx, imgRes); return 1; } - - // Vulkan Verify - vkDeviceWaitIdle(vkCtx.device); - VkBuffer verifyBuffer; - VkDeviceMemory verifyMem; - size_t dataSize = width * height * channels * sizeof(T); - VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bi.size = dataSize; - bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; - vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); - VkMemoryRequirements req; - vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); - VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; - ai.allocationSize = req.size; - ai.memoryTypeIndex = findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); - vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); - - { - VkCommandPoolCreateInfo poolInfo = { - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; - poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; - VkCommandPool pool; - vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); - VkCommandBuffer cmd; - VkCommandBufferAllocateInfo ca = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; - ca.commandPool = pool; - ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - ca.commandBufferCount = 1; - vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); - VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; - vkBeginCommandBuffer(cmd, &bi); - VkBufferImageCopy reg = {}; - reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - reg.imageExtent = extent; - vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, - verifyBuffer, 1, ®); - vkEndCommandBuffer(cmd); - VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; - si.commandBufferCount = 1; - si.pCommandBuffers = &cmd; - std::vector waitStages = { - VK_PIPELINE_STAGE_TRANSFER_BIT}; - if (useSemaphores) { - si.waitSemaphoreCount = 1; - si.pWaitSemaphores = &vkSem; - si.pWaitDstStageMask = waitStages.data(); - } - vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); - vkQueueWaitIdle(vkCtx.queue); - vkDestroyCommandPool(vkCtx.device, pool, nullptr); - } - - void *ptr; - vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); - T *vData = (T *)ptr; - bool passed = true; - int errorCount = 0; - size_t totalPixels = width * height; - - for (size_t i = 0; i < totalPixels * channels; ++i) { - size_t pixelIdx = i / channels; - int ch = i % channels; - T expected = generateTestValue(pixelIdx, ch, totalPixels); - if (!checkValue(vData[i], expected)) { - passed = false; - if (errorCount++ < 5) - std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] - << " Exp: " << (double)expected << std::endl; - } - } - vkUnmapMemory(vkCtx.device, verifyMem); - if (passed) - std::cout << "SUCCESS!" << std::endl; - else - std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; - - vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); - vkFreeMemory(vkCtx.device, verifyMem, nullptr); - if (useSemaphores) - vkDestroySemaphore(vkCtx.device, vkSem, nullptr); - cleanupVulkan(vkCtx, imgRes); - return passed ? 0 : 1; } int main(int argc, char **argv) { diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_3d_unsampled.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_3d_unsampled.cpp index f3e1b5458e07b..17526a62bc7f3 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_3d_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_interop_write_3d_unsampled.cpp @@ -40,6 +40,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -152,7 +153,15 @@ int runTest( } try { - sycl::queue q; + // Bindless image interop requires an in-order queue (per spec). External + // semaphore ops additionally require immediate command lists; see + // sycl_ext_oneapi_bindless_images.asciidoc. + sycl::property_list qProps = + useSemaphores ? sycl::property_list{sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue:: + immediate_command_list{}} + : sycl::property_list{sycl::property::queue::in_order{}}; + sycl::queue q{qProps}; // IMPORT MEMORY #ifdef _WIN32 @@ -267,97 +276,101 @@ int runTest( syclexp::destroy_image_handle(unsampledHandle, q.get_device(), q.get_context()); syclexp::release_external_memory(extMem, q.get_device(), q.get_context()); + + // Vulkan Verify + vkDeviceWaitIdle(vkCtx.device); + VkBuffer verifyBuffer; + VkDeviceMemory verifyMem; + size_t dataSize = width * height * depth * channels * sizeof(T); + VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bi.size = dataSize; + bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); + VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + ai.allocationSize = req.size; + ai.memoryTypeIndex = + findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); + vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); + + { + VkCommandPoolCreateInfo poolInfo = { + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; + VkCommandPool pool; + vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); + VkCommandBuffer cmd; + VkCommandBufferAllocateInfo ca = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + ca.commandPool = pool; + ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + ca.commandBufferCount = 1; + vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); + VkCommandBufferBeginInfo bi = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + vkBeginCommandBuffer(cmd, &bi); + VkBufferImageCopy reg = {}; + reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + reg.imageExtent = extent; + vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, + verifyBuffer, 1, ®); + vkEndCommandBuffer(cmd); + VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + si.commandBufferCount = 1; + si.pCommandBuffers = &cmd; + std::vector waitStages = { + VK_PIPELINE_STAGE_TRANSFER_BIT}; + if (useSemaphores) { + si.waitSemaphoreCount = 1; + si.pWaitSemaphores = &vkSem; + si.pWaitDstStageMask = waitStages.data(); + } + vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); + vkQueueWaitIdle(vkCtx.queue); + vkDestroyCommandPool(vkCtx.device, pool, nullptr); + } + + void *ptr; + vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); + T *vData = (T *)ptr; + bool passed = true; + int errorCount = 0; + size_t totalPixels = width * height * depth; + for (size_t i = 0; i < totalPixels * channels; ++i) { + T expected = + generateTestValue(i / channels, i % channels, totalPixels); + if (!checkValue(vData[i], expected)) { + passed = false; + if (errorCount++ < 5) + std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] + << " Exp: " << (double)expected << std::endl; + } + } + vkUnmapMemory(vkCtx.device, verifyMem); + if (passed) + std::cout << "SUCCESS!" << std::endl; + else + std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; + + vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); + vkFreeMemory(vkCtx.device, verifyMem, nullptr); if (useSemaphores) { syclexp::release_external_semaphore(extSem, q.get_device(), q.get_context()); + vkDestroySemaphore(vkCtx.device, vkSem, nullptr); } + cleanupVulkan(vkCtx, imgRes); + return passed ? 0 : 1; + } catch (std::exception &e) { std::cerr << "SYCL Exception: " << e.what() << std::endl; + cleanupVulkan(vkCtx, imgRes); return 1; } - - // Vulkan Verify - vkDeviceWaitIdle(vkCtx.device); - VkBuffer verifyBuffer; - VkDeviceMemory verifyMem; - size_t dataSize = width * height * depth * channels * sizeof(T); - VkBufferCreateInfo bi = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bi.size = dataSize; - bi.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; - vkCreateBuffer(vkCtx.device, &bi, nullptr, &verifyBuffer); - VkMemoryRequirements req; - vkGetBufferMemoryRequirements(vkCtx.device, verifyBuffer, &req); - VkMemoryAllocateInfo ai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; - ai.allocationSize = req.size; - ai.memoryTypeIndex = findMemoryType(vkCtx.physicalDevice, req.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - vkAllocateMemory(vkCtx.device, &ai, nullptr, &verifyMem); - vkBindBufferMemory(vkCtx.device, verifyBuffer, verifyMem, 0); - - { - VkCommandPoolCreateInfo poolInfo = { - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; - poolInfo.queueFamilyIndex = vkCtx.queueFamilyIndex; - VkCommandPool pool; - vkCreateCommandPool(vkCtx.device, &poolInfo, nullptr, &pool); - VkCommandBuffer cmd; - VkCommandBufferAllocateInfo ca = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; - ca.commandPool = pool; - ca.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - ca.commandBufferCount = 1; - vkAllocateCommandBuffers(vkCtx.device, &ca, &cmd); - VkCommandBufferBeginInfo bi = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; - vkBeginCommandBuffer(cmd, &bi); - VkBufferImageCopy reg = {}; - reg.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - reg.imageExtent = extent; - vkCmdCopyImageToBuffer(cmd, imgRes.image, VK_IMAGE_LAYOUT_GENERAL, - verifyBuffer, 1, ®); - vkEndCommandBuffer(cmd); - VkSubmitInfo si = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; - si.commandBufferCount = 1; - si.pCommandBuffers = &cmd; - std::vector waitStages = { - VK_PIPELINE_STAGE_TRANSFER_BIT}; - if (useSemaphores) { - si.waitSemaphoreCount = 1; - si.pWaitSemaphores = &vkSem; - si.pWaitDstStageMask = waitStages.data(); - } - vkQueueSubmit(vkCtx.queue, 1, &si, VK_NULL_HANDLE); - vkQueueWaitIdle(vkCtx.queue); - vkDestroyCommandPool(vkCtx.device, pool, nullptr); - } - - void *ptr; - vkMapMemory(vkCtx.device, verifyMem, 0, dataSize, 0, &ptr); - T *vData = (T *)ptr; - bool passed = true; - int errorCount = 0; - size_t totalPixels = width * height * depth; - for (size_t i = 0; i < totalPixels * channels; ++i) { - T expected = generateTestValue(i / channels, i % channels, totalPixels); - if (!checkValue(vData[i], expected)) { - passed = false; - if (errorCount++ < 5) - std::cout << "Mismatch at " << i << " Got: " << (double)vData[i] - << " Exp: " << (double)expected << std::endl; - } - } - vkUnmapMemory(vkCtx.device, verifyMem); - if (passed) - std::cout << "SUCCESS!" << std::endl; - else - std::cout << "FAILURE! (" << errorCount << " errors)" << std::endl; - - vkDestroyBuffer(vkCtx.device, verifyBuffer, nullptr); - vkFreeMemory(vkCtx.device, verifyMem, nullptr); - if (useSemaphores) - vkDestroySemaphore(vkCtx.device, vkSem, nullptr); - cleanupVulkan(vkCtx, imgRes); - return passed ? 0 : 1; } int main(int argc, char **argv) { diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_unsampled_timeline_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_unsampled_timeline_semaphore.cpp index c7060b7bfd48c..efaa62054e181 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_unsampled_timeline_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_image_unsampled_timeline_semaphore.cpp @@ -5,9 +5,6 @@ // UNSUPPORTED: linux && (gpu-intel-dg2 || arch-intel_gpu_mtl_u) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21764 -// XFAIL: windows && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/21985 - // RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.out --type float --channels 1 @@ -60,6 +57,7 @@ #include #include #include +#include namespace syclexp = sycl::ext::oneapi::experimental; @@ -174,7 +172,11 @@ int runTest( uint64_t syclSignalVal = 2; try { - sycl::queue q; + // External semaphore ops require an in-order queue backed by immediate + // command lists (see sycl_ext_oneapi_bindless_images.asciidoc). + sycl::queue q{ + {sycl::property::queue::in_order{}, + sycl::ext::intel::property::queue::immediate_command_list{}}}; #ifdef _WIN32 HANDLE memHandle = getMemHandle(vkCtx, imgResc.memory);