diff --git a/MicroBenchmarks/CMakeLists.txt b/MicroBenchmarks/CMakeLists.txt index 08d3dd44f5d5..c16a0850e1d5 100644 --- a/MicroBenchmarks/CMakeLists.txt +++ b/MicroBenchmarks/CMakeLists.txt @@ -8,5 +8,6 @@ add_subdirectory(harris) add_subdirectory(ImageProcessing) add_subdirectory(LoopInterchange) add_subdirectory(LoopVectorization) +add_subdirectory(LoopSplit) add_subdirectory(MemFunctions) add_subdirectory(SLPVectorization) diff --git a/MicroBenchmarks/LoopSplit/CMakeLists.txt b/MicroBenchmarks/LoopSplit/CMakeLists.txt new file mode 100644 index 000000000000..a8f5493eb93f --- /dev/null +++ b/MicroBenchmarks/LoopSplit/CMakeLists.txt @@ -0,0 +1,14 @@ +# LoopSplit microbenchmark for #pragma omp split counts(...). +# Copy this directory to llvm-test-suite/MicroBenchmarks/LoopSplit/ +# and add: add_subdirectory(LoopSplit) to MicroBenchmarks/CMakeLists.txt. +# +# Configure test-suite with a Clang that supports -fopenmp and -fopenmp-version=60. + +llvm_test_run(WORKDIR %S) +llvm_test_verify(%b/${FPCMP} %S/LoopSplit.reference_output %S/LoopSplit.txt) + +llvm_test_executable(LoopSplit main.cpp) +llvm_test_data(LoopSplit LoopSplit.reference_output) + +target_compile_options(LoopSplit PRIVATE -fopenmp -fopenmp-version=60) +target_link_libraries(LoopSplit benchmark) diff --git a/MicroBenchmarks/LoopSplit/LoopSplit.reference_output b/MicroBenchmarks/LoopSplit/LoopSplit.reference_output new file mode 100644 index 000000000000..19b08f93990a --- /dev/null +++ b/MicroBenchmarks/LoopSplit/LoopSplit.reference_output @@ -0,0 +1 @@ +test1: 19999999900000000 diff --git a/MicroBenchmarks/LoopSplit/main.cpp b/MicroBenchmarks/LoopSplit/main.cpp new file mode 100644 index 000000000000..ad481b0c5dd0 --- /dev/null +++ b/MicroBenchmarks/LoopSplit/main.cpp @@ -0,0 +1,57 @@ +// Microbenchmark for #pragma omp split counts(...). +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Large N for measurable runs; lit verification uses same kernel once. +#define N 200000000 + +// Kernel: sum 0..(N-1) with split into four segments. +static long run_split() { + long sum = 0; +#pragma omp split counts(50000000, 50000000, 50000000, omp_fill) + for (int i = 0; i < N; ++i) + sum += i; + return sum; +} + +// Baseline: same loop without split (for comparison). +static long run_baseline() { + long sum = 0; + for (int i = 0; i < N; ++i) + sum += i; + return sum; +} + +int main(int argc, char *argv[]) { + benchmark::Initialize(&argc, argv); + + // Run kernel once and write result for lit verification. + std::ofstream myfile("LoopSplit.txt"); + if (myfile.is_open()) { + long y = run_split(); + myfile << "test1: " << y << "\n"; + myfile.close(); + } else { + return EXIT_FAILURE; + } + + benchmark::RunSpecifiedBenchmarks(); + return EXIT_SUCCESS; +} + +static void BM_Split(benchmark::State &state) { + long x = 0; + for (auto _ : state) + benchmark::DoNotOptimize(x += run_split()); +} +BENCHMARK(BM_Split)->Unit(benchmark::kMicrosecond)->MinTime(2.0); + +static void BM_Baseline(benchmark::State &state) { + long x = 0; + for (auto _ : state) + benchmark::DoNotOptimize(x += run_baseline()); +} +BENCHMARK(BM_Baseline)->Unit(benchmark::kMicrosecond)->MinTime(2.0);