diff --git a/libdevice/sanitizer/asan_rtl.cpp b/libdevice/sanitizer/asan_rtl.cpp index 3fcb0f12f53b5..2aa365d50633e 100644 --- a/libdevice/sanitizer/asan_rtl.cpp +++ b/libdevice/sanitizer/asan_rtl.cpp @@ -1029,4 +1029,70 @@ ASAN_MEMMOVE(1) ASAN_MEMMOVE(3) ASAN_MEMMOVE(4) +static void Block2DCheck(uptr surface_base, uptr block_ptr, int element_size, + int block_width, int block_height, int block_count, + int surface_pitch, int coord_x, int coord_y, + bool is_load, const char __SYCL_CONSTANT__ *file, + uint32_t line, const char __SYCL_CONSTANT__ *func) { + // Per SPV_INTEL_2d_block_io spec: + // element_size: bytes per element + // block_width: elements per row + // surface_pitch: bytes between rows + // coord_x: element offset, coord_y: row offset + int row_width_bytes = block_width * element_size * block_count; + size_t block_size = (size_t)row_width_bytes * block_height; + + { + DebugInfo debug{block_ptr, /*as=*/4, block_size, is_load, file, func, line}; + if (auto poisoned_addr = + IsRegionPoisoned(block_ptr, 4, block_size, &debug)) { + ReportAccessError(poisoned_addr, 4, false, &debug); + return; + } + } + + uptr start_addr = surface_base + (uptr)coord_y * surface_pitch + + (uptr)coord_x * element_size; + for (int row = 0; row < block_height; row++) { + uptr row_addr = start_addr + (uptr)row * surface_pitch; + DebugInfo debug{row_addr, /*as=*/4, (size_t)row_width_bytes, !is_load, file, + func, line}; + if (auto poisoned_addr = + IsRegionPoisoned(row_addr, 4, (size_t)row_width_bytes, &debug)) { + ReportAccessError(poisoned_addr, 4, false, &debug); + return; + } + } +} + +DEVICE_EXTERN_C_NOINLINE void __asan_block2d_load_check( + __attribute__((address_space(4))) const char *src_base_ptr, char *dst_ptr, + int element_size, int block_width, int block_height, int block_count, + int surface_width, int surface_height, int surface_pitch, int coord_x, + int coord_y, const char __SYCL_CONSTANT__ *file, uint32_t line, + const char __SYCL_CONSTANT__ *func) { + if (!__AsanLaunchInfo) + return; + if (__spirv_BuiltInSubgroupLocalInvocationId() != 0) + return; + Block2DCheck((uptr)src_base_ptr, (uptr)dst_ptr, element_size, block_width, + block_height, block_count, surface_pitch, coord_x, coord_y, + /*is_load=*/true, file, line, func); +} + +DEVICE_EXTERN_C_NOINLINE void __asan_block2d_store_check( + __attribute__((address_space(4))) const char *dst_base_ptr, char *src_ptr, + int element_size, int block_width, int block_height, int block_count, + int surface_width, int surface_height, int surface_pitch, int coord_x, + int coord_y, const char __SYCL_CONSTANT__ *file, uint32_t line, + const char __SYCL_CONSTANT__ *func) { + if (!__AsanLaunchInfo) + return; + if (__spirv_BuiltInSubgroupLocalInvocationId() != 0) + return; + Block2DCheck((uptr)dst_base_ptr, (uptr)src_ptr, element_size, block_width, + block_height, block_count, surface_pitch, coord_x, coord_y, + /*is_load=*/false, file, line, func); +} + #endif // __SPIR__ || __SPIRV__ diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index a5c1d7316e177..4c0dcdfbe19f8 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -900,6 +900,7 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); + void instrumentBlock2DOp(CallBase *CB, RuntimeCallInserter &RTCI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB, uint32_t AddressSpace = kSpirOffloadPrivateAS); bool suppressInstrumentationSiteForDebug(int &Instrumented); @@ -977,6 +978,7 @@ struct AddressSanitizer { FunctionCallee AsanMemcpyAS[kNumberOfAddressSpace][kNumberOfAddressSpace], AsanMemmoveAS[kNumberOfAddressSpace][kNumberOfAddressSpace], AsanMemsetAS[kNumberOfAddressSpace]; + FunctionCallee AsanSGBlock2DLoadCheck, AsanSGBlock2DStoreCheck; Value *LocalDynamicShadow = nullptr; const StackSafetyGlobalInfo *SSGI; DenseMap ProcessedAllocas; @@ -1999,6 +2001,50 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI, MI->eraseFromParent(); } +void AddressSanitizer::instrumentBlock2DOp(CallBase *CB, + RuntimeCallInserter &RTCI) { + InstrumentationIRBuilder IRB(CB); + Function *Callee = CB->getCalledFunction(); + StringRef Name = Callee->getName(); + bool IsStore = Name.contains("__spirv_Subgroup2DBlockStore"); + + // Per SPIRVBuiltins.td, both load and store have 10 args: + // Load: (elem_size, width, height, count, src_base_ptr, surf_w, surf_h, + // surf_pitch, <2xi32> coord, dst_ptr) + // Store: (elem_size, width, height, count, src_ptr, dst_base_ptr, surf_w, + // surf_h, surf_pitch, <2xi32> coord) + Value *BasePtr = IsStore ? CB->getArgOperand(5) : CB->getArgOperand(4); + Value *BlockPtr = IsStore ? CB->getArgOperand(4) : CB->getArgOperand(9); + Value *ElemSize = CB->getArgOperand(0); + Value *BlockWidth = CB->getArgOperand(1); + Value *BlockHeight = CB->getArgOperand(2); + Value *BlockCount = CB->getArgOperand(3); + Value *SurfWidth = IsStore ? CB->getArgOperand(6) : CB->getArgOperand(5); + Value *SurfHeight = IsStore ? CB->getArgOperand(7) : CB->getArgOperand(6); + Value *SurfPitch = IsStore ? CB->getArgOperand(8) : CB->getArgOperand(7); + Value *Coord = IsStore ? CB->getArgOperand(9) : CB->getArgOperand(8); + + Value *CoordX = IRB.CreateExtractElement(Coord, uint64_t(0)); + Value *CoordY = IRB.CreateExtractElement(Coord, uint64_t(1)); + + SmallVector Args; + Args.push_back(BasePtr); + Args.push_back(BlockPtr); + Args.push_back(ElemSize); + Args.push_back(BlockWidth); + Args.push_back(BlockHeight); + Args.push_back(BlockCount); + Args.push_back(SurfWidth); + Args.push_back(SurfHeight); + Args.push_back(SurfPitch); + Args.push_back(CoordX); + Args.push_back(CoordY); + AppendDebugInfoToArgs(CB, Args); + + RTCI.createRuntimeCall( + IRB, IsStore ? AsanSGBlock2DStoreCheck : AsanSGBlock2DLoadCheck, Args); +} + /// Check if we want (and can) handle this alloca. bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { auto [It, Inserted] = ProcessedAllocas.try_emplace(&AI); @@ -3857,6 +3903,32 @@ void AddressSanitizer::initializeCallbacks(const TargetLibraryInfo *TLI) { IRB.getInt32Ty(), Int8PtrTy); } } + + // 2D block load/store checks + // __asan_block2d_{load,store}_check( + // ptr base_ptr, // surface base address (generic AS 4) + // ptr block_ptr, // dst for load, src for store (generic AS 4) + // i32 element_size, // bytes per element + // i32 block_width, // elements per row + // i32 block_height, // rows per block + // i32 block_count, // number of blocks + // i32 surface_width, // surface width in bytes + // i32 surface_height, // surface height in rows + // i32 surface_pitch, // bytes between rows + // i32 coord_x, // element offset + // i32 coord_y, // row offset + // ptr file, i32 line, ptr func) + auto *GenericPtrTy = PointerType::get(*C, kSpirOffloadGenericAS); + auto GetBlock2DCheckFunc = [&](StringRef Name) { + return M.getOrInsertFunction( + Name, IRB.getVoidTy(), GenericPtrTy, GenericPtrTy, IRB.getInt32Ty(), + IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32Ty(), + IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32Ty(), + IRB.getInt32Ty(), IRB.getInt32Ty(), Int8PtrTy, IRB.getInt32Ty(), + Int8PtrTy); + }; + AsanSGBlock2DLoadCheck = GetBlock2DCheckFunc("__asan_block2d_load_check"); + AsanSGBlock2DStoreCheck = GetBlock2DCheckFunc("__asan_block2d_store_check"); } const std::string MemIntrinCallbackPrefix = @@ -4057,6 +4129,7 @@ bool AddressSanitizer::instrumentFunction(Function &F, SmallPtrSet TempsToInstrument; SmallVector OperandsToInstrument; SmallVector IntrinToInstrument; + SmallVector Block2DToInstrument; SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; @@ -4103,9 +4176,18 @@ bool AddressSanitizer::instrumentFunction(Function &F, NumInsnsPerBB++; } else { if (auto *CB = dyn_cast(&Inst)) { - // On device side, the only non return cases should be *.trap or - // assert, and none of these cases need to be handles. - if (!TargetTriple.isSPIROrSPIRV()) { + if (TargetTriple.isSPIROrSPIRV()) { + if (Function *F = CB->getCalledFunction()) { + StringRef Name = F->getName(); + if (Name.contains("__spirv_Subgroup2DBlockLoad") || + Name.contains("__spirv_Subgroup2DBlockStore")) { + Block2DToInstrument.push_back(CB); + NumInsnsPerBB++; + } + } + } else { + // On device side, the only non return cases should be *.trap or + // assert, and none of these cases need to be handles. // A call inside BB. TempsToInstrument.clear(); if (CB->doesNotReturn()) @@ -4138,6 +4220,11 @@ bool AddressSanitizer::instrumentFunction(Function &F, instrumentMemIntrinsic(Inst, RTCI); FunctionModified = true; } + for (auto *CB : Block2DToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentBlock2DOp(CB, RTCI); + FunctionModified = true; + } FunctionStackPoisoner FSP(F, *this, RTCI); bool ChangedStack = FSP.runOnFunction(); diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/block2d_load_store.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/block2d_load_store.ll new file mode 100644 index 0000000000000..8325fc08a7f99 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/block2d_load_store.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-use-after-return=never -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +declare dso_local spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 noundef, i32 noundef, i32 noundef, i32 noundef, ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, <2 x i32> noundef, ptr addrspace(4) noundef) local_unnamed_addr + +declare dso_local spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKviPviiiDv2_i(i32 noundef, i32 noundef, i32 noundef, i32 noundef, ptr addrspace(4) noundef, ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, <2 x i32> noundef) local_unnamed_addr + +; Test: Function without sanitize_address should NOT be instrumented +define weak_odr dso_local spir_kernel void @test_block2d_no_sanitize(ptr addrspace(4) %base, ptr addrspace(4) %dst) { +; CHECK-LABEL: define weak_odr dso_local spir_kernel void @test_block2d_no_sanitize( +; CHECK-SAME: ptr addrspace(4) [[BASE:%.*]], ptr addrspace(4) [[DST:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store ptr addrspace(1) null, ptr addrspace(3) @__AsanLaunchInfo, align 8 +; CHECK-NEXT: [[__PRIVATE_BASE:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @__asan_set_private_base(ptr [[__PRIVATE_BASE]]) +; CHECK-NEXT: [[COORD:%.*]] = insertelement <2 x i32> poison, i32 0, i64 0 +; CHECK-NEXT: [[COORD1:%.*]] = insertelement <2 x i32> [[COORD]], i32 0, i64 1 +; CHECK-NEXT: call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 16, i32 32, i32 32, i32 1, ptr addrspace(4) [[BASE]], i32 2047, i32 1023, i32 2047, <2 x i32> [[COORD1]], ptr addrspace(4) [[DST]]) +; CHECK-NEXT: ret void +; +entry: + %coord = insertelement <2 x i32> poison, i32 0, i64 0 + %coord1 = insertelement <2 x i32> %coord, i32 0, i64 1 + call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 16, i32 32, i32 32, i32 1, ptr addrspace(4) %base, i32 2047, i32 1023, i32 2047, <2 x i32> %coord1, ptr addrspace(4) %dst) + ret void +} + +; Test: 2D block load should be instrumented with __asan_block2d_load_check +; Function Attrs: sanitize_address +define weak_odr dso_local spir_kernel void @test_block2d_load(ptr addrspace(4) %base, ptr addrspace(4) %dst) #0 { +; CHECK-LABEL: define weak_odr dso_local spir_kernel void @test_block2d_load( +; CHECK-SAME: ptr addrspace(4) [[BASE:%.*]], ptr addrspace(4) [[DST:%.*]], ptr addrspace(1) noundef [[__ASAN_LAUNCH:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store ptr addrspace(1) [[__ASAN_LAUNCH]], ptr addrspace(3) @__AsanLaunchInfo, align 8 +; CHECK-NEXT: [[__PRIVATE_BASE:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @__asan_set_private_base(ptr [[__PRIVATE_BASE]]) +; CHECK-NEXT: [[COORD:%.*]] = insertelement <2 x i32> poison, i32 0, i64 0 +; CHECK-NEXT: [[COORD1:%.*]] = insertelement <2 x i32> [[COORD]], i32 0, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[COORD1]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[COORD1]], i64 1 +; CHECK-NEXT: call void @__asan_block2d_load_check(ptr addrspace(4) [[BASE]], ptr addrspace(4) [[DST]], i32 16, i32 32, i32 32, i32 1, i32 2047, i32 1023, i32 2047, i32 [[TMP0]], i32 [[TMP1]], ptr addrspace(2) null, i32 0, ptr addrspace(2) @__asan_kernel) +; CHECK-NEXT: call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 16, i32 32, i32 32, i32 1, ptr addrspace(4) [[BASE]], i32 2047, i32 1023, i32 2047, <2 x i32> [[COORD1]], ptr addrspace(4) [[DST]]) +; CHECK-NEXT: ret void +; +entry: + %coord = insertelement <2 x i32> poison, i32 0, i64 0 + %coord1 = insertelement <2 x i32> %coord, i32 0, i64 1 + call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 16, i32 32, i32 32, i32 1, ptr addrspace(4) %base, i32 2047, i32 1023, i32 2047, <2 x i32> %coord1, ptr addrspace(4) %dst) + ret void +} + +; Test: 2D block store should be instrumented with __asan_block2d_store_check +; Function Attrs: sanitize_address +define weak_odr dso_local spir_kernel void @test_block2d_store(ptr addrspace(4) %src, ptr addrspace(4) %base) #0 { +; CHECK-LABEL: define weak_odr dso_local spir_kernel void @test_block2d_store( +; CHECK-SAME: ptr addrspace(4) [[SRC:%.*]], ptr addrspace(4) [[BASE:%.*]], ptr addrspace(1) noundef [[__ASAN_LAUNCH:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store ptr addrspace(1) [[__ASAN_LAUNCH]], ptr addrspace(3) @__AsanLaunchInfo, align 8 +; CHECK-NEXT: [[__PRIVATE_BASE:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @__asan_set_private_base(ptr [[__PRIVATE_BASE]]) +; CHECK-NEXT: [[COORD:%.*]] = insertelement <2 x i32> poison, i32 64, i64 0 +; CHECK-NEXT: [[COORD1:%.*]] = insertelement <2 x i32> [[COORD]], i32 128, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[COORD1]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[COORD1]], i64 1 +; CHECK-NEXT: call void @__asan_block2d_store_check(ptr addrspace(4) [[BASE]], ptr addrspace(4) [[SRC]], i32 16, i32 32, i32 8, i32 1, i32 2047, i32 1023, i32 2047, i32 [[TMP0]], i32 [[TMP1]], ptr addrspace(2) null, i32 0, ptr addrspace(2) @__asan_kernel.1) +; CHECK-NEXT: call spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKviPviiiDv2_i(i32 16, i32 32, i32 8, i32 1, ptr addrspace(4) [[SRC]], ptr addrspace(4) [[BASE]], i32 2047, i32 1023, i32 2047, <2 x i32> [[COORD1]]) +; CHECK-NEXT: ret void +; +entry: + %coord = insertelement <2 x i32> poison, i32 64, i64 0 + %coord1 = insertelement <2 x i32> %coord, i32 128, i64 1 + call spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKviPviiiDv2_i(i32 16, i32 32, i32 8, i32 1, ptr addrspace(4) %src, ptr addrspace(4) %base, i32 2047, i32 1023, i32 2047, <2 x i32> %coord1) + ret void +} + +; Test: 2D block load with non-zero coords extracts x and y correctly +; Function Attrs: sanitize_address +define weak_odr dso_local spir_kernel void @test_block2d_load_nonzero_coord(ptr addrspace(4) %base, ptr addrspace(4) %dst) #0 { +; CHECK-LABEL: define weak_odr dso_local spir_kernel void @test_block2d_load_nonzero_coord( +; CHECK-SAME: ptr addrspace(4) [[BASE:%.*]], ptr addrspace(4) [[DST:%.*]], ptr addrspace(1) noundef [[__ASAN_LAUNCH:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store ptr addrspace(1) [[__ASAN_LAUNCH]], ptr addrspace(3) @__AsanLaunchInfo, align 8 +; CHECK-NEXT: [[__PRIVATE_BASE:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @__asan_set_private_base(ptr [[__PRIVATE_BASE]]) +; CHECK-NEXT: [[COORD:%.*]] = insertelement <2 x i32> poison, i32 16, i64 0 +; CHECK-NEXT: [[COORD1:%.*]] = insertelement <2 x i32> [[COORD]], i32 32, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[COORD1]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[COORD1]], i64 1 +; CHECK-NEXT: call void @__asan_block2d_load_check(ptr addrspace(4) [[BASE]], ptr addrspace(4) [[DST]], i32 2, i32 16, i32 32, i32 2, i32 1023, i32 511, i32 1023, i32 [[TMP0]], i32 [[TMP1]], ptr addrspace(2) null, i32 0, ptr addrspace(2) @__asan_kernel.2) +; CHECK-NEXT: call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 2, i32 16, i32 32, i32 2, ptr addrspace(4) [[BASE]], i32 1023, i32 511, i32 1023, <2 x i32> [[COORD1]], ptr addrspace(4) [[DST]]) +; CHECK-NEXT: ret void +; +entry: + %coord = insertelement <2 x i32> poison, i32 16, i64 0 + %coord1 = insertelement <2 x i32> %coord, i32 32, i64 1 + call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPKviiiDv2_iPv(i32 2, i32 16, i32 32, i32 2, ptr addrspace(4) %base, i32 1023, i32 511, i32 1023, <2 x i32> %coord1, ptr addrspace(4) %dst) + ret void +} + +attributes #0 = { sanitize_address } diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_load.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_load.cpp new file mode 100644 index 0000000000000..a7fba251218b3 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_load.cpp @@ -0,0 +1,65 @@ +// REQUIRES: linux, gpu && level_zero && arch-intel_gpu_pvc +// RUN: %{build} %device_asan_flags -O2 -g -DOOB_SRC -Xspirv-translator -spirv-ext=+SPV_INTEL_2d_block_io -Xs "-options ' -cl-intel-enable-auto-large-GRF-mode'" -o %t1.out +// RUN: %{run} not --crash %t1.out 2>&1 | FileCheck --check-prefix=CHECK,CHECK-SRC %s +// RUN: %{build} %device_asan_flags -O2 -g -DOOB_DST -Xspirv-translator -spirv-ext=+SPV_INTEL_2d_block_io -Xs "-options ' -cl-intel-enable-auto-large-GRF-mode'" -o %t2.out +// RUN: %{run} not --crash %t2.out 2>&1 | FileCheck --check-prefix=CHECK,CHECK-DST %s + +// Test that ASAN detects out-of-bounds access from 2D block load operations. +// The __spirv_Subgroup2DBlockLoadINTEL builtin is called directly to trigger +// the ASAN interception without requiring ESIMD kernel mode. + +#include +#include +#include + +typedef int int2 __attribute__((ext_vector_type(2))); + +#ifdef __SYCL_DEVICE_ONLY__ +SYCL_EXTERNAL __attribute__((convergent)) void __spirv_Subgroup2DBlockLoadINTEL( + int elem_size, int block_width, int block_height, int block_count, + const void *base_ptr, int surface_width, int surface_height, + int surface_pitch, int2 coord, void *dst); +#else +void __spirv_Subgroup2DBlockLoadINTEL(int, int, int, int, const void *, int, + int, int, int2, void *) {} +#endif + +int main() { + sycl::queue Q(sycl::gpu_selector_v); + + constexpr int Width = 16; + constexpr int Height = 32; +#ifdef OOB_SRC + auto *A = sycl::malloc_device(Width * Height - 1, Q); +#else + auto *A = sycl::malloc_device(Width * Height, Q); +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(32, 32), [=](sycl::nd_item<1> item) { + if (item.get_sub_group().get_local_linear_id() == 0) { +#ifdef OOB_SRC + int dst_buf[Width * Height]; +#else + int dst_buf[Width * Height - 1]; +#endif + int2 coord = {0, 0}; + __spirv_Subgroup2DBlockLoadINTEL( + /*elem_size=*/2, /*block_width=*/16, /*block_height=*/32, + /*block_count=*/2, /*base_ptr=*/A, + /*surface_width=*/Width * (int)sizeof(int), + /*surface_height=*/Height, + /*surface_pitch=*/Width * (int)sizeof(int), coord, + /*dst=*/dst_buf); + } + }); + }); + Q.wait(); + // CHECK: ERROR: DeviceSanitizer: out-of-bounds-access + // CHECK-SRC: {{READ of size .* at kernel <.*block2d_oob_load>}} + // CHECK-DST: {{WRITE of size .* at kernel <.*block2d_oob_load>}} + + sycl::free(A, Q); + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_store.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_store.cpp new file mode 100644 index 0000000000000..d4f4c371ae4ff --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/block2d_store.cpp @@ -0,0 +1,66 @@ +// REQUIRES: linux, gpu && level_zero && arch-intel_gpu_pvc +// RUN: %{build} %device_asan_flags -O2 -g -DOOB_SRC -Xspirv-translator -spirv-ext=+SPV_INTEL_2d_block_io -Xs "-options ' -cl-intel-enable-auto-large-GRF-mode'" -o %t1.out +// RUN: %{run} not --crash %t1.out 2>&1 | FileCheck --check-prefix=CHECK,CHECK-SRC %s +// RUN: %{build} %device_asan_flags -O2 -g -DOOB_DST -Xspirv-translator -spirv-ext=+SPV_INTEL_2d_block_io -Xs "-options ' -cl-intel-enable-auto-large-GRF-mode'" -o %t2.out +// RUN: %{run} not --crash %t2.out 2>&1 | FileCheck --check-prefix=CHECK,CHECK-DST %s + +// Test that ASAN detects out-of-bounds access from 2D block store operations. + +#include +#include +#include + +typedef int int2 __attribute__((ext_vector_type(2))); + +#ifdef __SYCL_DEVICE_ONLY__ +SYCL_EXTERNAL __attribute__((convergent)) void +__spirv_Subgroup2DBlockStoreINTEL(int elem_size, int block_width, + int block_height, int block_count, + void *src_ptr, const void *base_ptr, + int surface_width, int surface_height, + int surface_pitch, int2 coord); +#else +void __spirv_Subgroup2DBlockStoreINTEL(int, int, int, int, void *, const void *, + int, int, int, int2) {} +#endif + +int main() { + sycl::queue Q(sycl::gpu_selector_v); + + constexpr int Width = 16; + constexpr int Height = 8; +#ifdef OOB_SRC + auto *A = sycl::malloc_device(Width * Height, Q); +#else + auto *A = sycl::malloc_device(Width * Height - 1, Q); +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(32, 32), [=](sycl::nd_item<1> item) { + if (item.get_sub_group().get_local_linear_id() == 0) { +#ifdef OOB_SRC + uint16_t src_buf[Width * Height - 1] = {}; +#else + uint16_t src_buf[Width * Height] = {}; +#endif + // Store a 16-element-wide x 2-row block starting at coord (0, 0) + // Row 1 at y=1 is out-of-bounds (surface_height=1, max y=0) + int2 coord = {0, 0}; + __spirv_Subgroup2DBlockStoreINTEL( + /*elem_size=*/2, /*block_width=*/16, /*block_height=*/8, + /*block_count=*/1, /*src_ptr=*/src_buf, /*base_ptr=*/A, + /*surface_width=*/Width * (int)sizeof(uint16_t), + /*surface_height=*/Height, + /*surface_pitch=*/Width * (int)sizeof(uint16_t), coord); + } + }); + }); + Q.wait(); + // CHECK: ERROR: DeviceSanitizer: out-of-bounds-access + // CHECK-SRC: {{READ of size .* at kernel <.*block2d_oob_store>}} + // CHECK-DST: {{WRITE of size .* at kernel <.*block2d_oob_store>}} + + sycl::free(A, Q); + return 0; +}