Skip to content

Commit 60d8e6f

Browse files
MattPDMatt P. Dziubinski
and
Matt P. Dziubinski
authored
[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering (#123867)
A proposed fix for #95611 [OpenMP][SIMD] ordered has no effect in a loop SIMD region as of LLVM 18.1.0 Changes: - Implement new lowering behavior: Conservatively serialize "omp simd" loops that have `omp simd ordered` directive to prevent incorrect vectorization (which results in incorrect execution behavior of the miscompiled program). Implementation outline: - We start with the optimistic default initial value of `LoopStack.setParallel(/Enable=/true);` in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D)`. - We only disable the loop parallel memory access assumption with `if (HasOrderedDirective) LoopStack.setParallel(/Enable=/false);` using the `HasOrderedDirective` (which tests for the presence of an `OMPOrderedDirective`). - This results in no longer incorrectly vectorizing the loop when the `omp simd ordered` directive is present. Motivation: We'd like to prevent incorrect vectorization of the loops marked with the `#pragma omp ordered simd` directive which has previously resulted in miscompiled code. At the same time, we'd like the usage outside of the `#pragma omp ordered simd` context to remain unaffected: Note that in the test "clang/test/OpenMP/ordered_codegen.cpp" we only "lose" the `!llvm.access.group` metadata in `foo_simd` alone. This is conservative, in that it's possible some of the loops would be possible to vectorize, but we prefer to avoid miscompilation of the loops that are currently illegal to vectorize. A concrete example follows: ```cpp // "test.c" #include <float.h> #include <math.h> #include <omp.h> #include <stdio.h> #include <stdlib.h> #include <time.h> int compare_float(float x1, float x2, float scalar) { const float diff = fabsf(x1 - x2); x1 = fabsf(x1); x2 = fabsf(x2); const float l = (x2 > x1) ? x2 : x1; if (diff <= l * scalar * FLT_EPSILON) return 1; else return 0; } #define ARRAY_SIZE 256 __attribute__((noinline)) void initialization_loop( float X[ARRAY_SIZE][ARRAY_SIZE], float Y[ARRAY_SIZE][ARRAY_SIZE]) { const float max = 1000.0; srand(time(NULL)); for (int r = 0; r < ARRAY_SIZE; r++) { for (int c = 0; c < ARRAY_SIZE; c++) { X[r][c] = ((float)rand() / (float)(RAND_MAX)) * max; Y[r][c] = X[r][c]; } } } __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE]) { for (int r = 1; r < ARRAY_SIZE; ++r) { for (int c = 1; c < ARRAY_SIZE; ++c) { #pragma omp simd for (int k = 2; k < ARRAY_SIZE; ++k) { #pragma omp ordered simd X[r][k] = X[r][k - 2] + sinf((float)(r / c)); } } } } __attribute__((noinline)) int comparison_loop(float X[ARRAY_SIZE][ARRAY_SIZE], float Y[ARRAY_SIZE][ARRAY_SIZE]) { int totalErrors_simd = 0; const float scalar = 1.0; for (int r = 1; r < ARRAY_SIZE; ++r) { for (int c = 1; c < ARRAY_SIZE; ++c) { for (int k = 2; k < ARRAY_SIZE; ++k) { Y[r][k] = Y[r][k - 2] + sinf((float)(r / c)); } } // check row for simd update for (int k = 0; k < ARRAY_SIZE; ++k) { if (!compare_float(X[r][k], Y[r][k], scalar)) { ++totalErrors_simd; } } } return totalErrors_simd; } int main(void) { float X[ARRAY_SIZE][ARRAY_SIZE]; float Y[ARRAY_SIZE][ARRAY_SIZE]; initialization_loop(X, Y); omp_simd_loop(X); const int totalErrors_simd = comparison_loop(X, Y); if (totalErrors_simd) { fprintf(stdout, "totalErrors_simd: %d \n", totalErrors_simd); fprintf(stdout, "%s : %d - FAIL: error in ordered simd computation.\n", __FILE__, __LINE__); } else { fprintf(stdout, "Success!\n"); } return totalErrors_simd; } ``` Before: ``` $ clang -fopenmp-simd -O3 -ffast-math -lm test.c -o test && ./test totalErrors_simd: 15408 test.c : 76 - FAIL: error in ordered simd computation. ``` clang 19.1.0: https://godbolt.org/z/6EvhxqEhe After: ``` $ clang -fopenmp-simd -O3 -ffast-math test.c -o test && ./test Success! ``` Co-authored-by: Matt P. Dziubinski <[email protected]>
1 parent 0bfcb76 commit 60d8e6f

File tree

3 files changed

+302
-116
lines changed

3 files changed

+302
-116
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2457,10 +2457,86 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
24572457
}
24582458
}
24592459

2460+
// Check for the presence of an `OMPOrderedDirective`,
2461+
// i.e., `ordered` in `#pragma omp ordered simd`.
2462+
//
2463+
// Consider the following source code:
2464+
// ```
2465+
// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2466+
// {
2467+
// for (int r = 1; r < ARRAY_SIZE; ++r) {
2468+
// for (int c = 1; c < ARRAY_SIZE; ++c) {
2469+
// #pragma omp simd
2470+
// for (int k = 2; k < ARRAY_SIZE; ++k) {
2471+
// #pragma omp ordered simd
2472+
// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2473+
// }
2474+
// }
2475+
// }
2476+
// }
2477+
// ```
2478+
//
2479+
// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2480+
// &D)`. By examining `D.dump()` we have the following AST containing
2481+
// `OMPOrderedDirective`:
2482+
//
2483+
// ```
2484+
// OMPSimdDirective 0x1c32950
2485+
// `-CapturedStmt 0x1c32028
2486+
// |-CapturedDecl 0x1c310e8
2487+
// | |-ForStmt 0x1c31e30
2488+
// | | |-DeclStmt 0x1c31298
2489+
// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2490+
// | | | `-IntegerLiteral 0x1c31278 'int' 2
2491+
// | | |-<<<NULL>>>
2492+
// | | |-BinaryOperator 0x1c31308 'int' '<'
2493+
// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2494+
// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2495+
// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2496+
// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2497+
// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2498+
// | | `-CompoundStmt 0x1c31e18
2499+
// | | `-OMPOrderedDirective 0x1c31dd8
2500+
// | | |-OMPSimdClause 0x1c31380
2501+
// | | `-CapturedStmt 0x1c31cd0
2502+
// ```
2503+
//
2504+
// Note the presence of `OMPOrderedDirective` above:
2505+
// It's (transitively) nested in a `CapturedStmt` representing the pragma
2506+
// annotated compound statement. Thus, we need to consider this nesting and
2507+
// include checking the `getCapturedStmt` in this case.
2508+
static bool hasOrderedDirective(const Stmt *S) {
2509+
if (isa<OMPOrderedDirective>(S))
2510+
return true;
2511+
2512+
if (const auto *CS = dyn_cast<CapturedStmt>(S))
2513+
return hasOrderedDirective(CS->getCapturedStmt());
2514+
2515+
for (const Stmt *Child : S->children()) {
2516+
if (Child && hasOrderedDirective(Child))
2517+
return true;
2518+
}
2519+
2520+
return false;
2521+
}
2522+
2523+
static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2524+
LoopInfoStack &LoopStack) {
2525+
// Check for the presence of an `OMPOrderedDirective`
2526+
// i.e., `ordered` in `#pragma omp ordered simd`
2527+
bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
2528+
// If present then conservatively disable loop vectorization
2529+
// analogously to how `emitSimdlenSafelenClause` does.
2530+
if (HasOrderedDirective)
2531+
LoopStack.setParallel(/*Enable=*/false);
2532+
}
2533+
24602534
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
24612535
// Walk clauses and process safelen/lastprivate.
24622536
LoopStack.setParallel(/*Enable=*/true);
24632537
LoopStack.setVectorizeEnable();
2538+
const Stmt *AssociatedStmt = D.getAssociatedStmt();
2539+
applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
24642540
emitSimdlenSafelenClause(*this, D);
24652541
if (const auto *C = D.getSingleClause<OMPOrderClause>())
24662542
if (C->getKind() == OMPC_ORDER_concurrent)

0 commit comments

Comments
 (0)