Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][OpenMP] Rewrite standalone loop (without bind) directives to simd #122632

Merged
merged 1 commit into from
Jan 21, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 81 additions & 6 deletions flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
Original file line number Diff line number Diff line change
@@ -30,19 +30,39 @@ class GenericLoopConversionPattern
: public mlir::OpConversionPattern<mlir::omp::LoopOp> {
public:
enum class GenericLoopCombinedInfo {
None,
Standalone,
TargetTeamsLoop,
TargetParallelLoop
};

using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;

explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx)
: mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
// Enable rewrite recursion to make sure nested `loop` directives are
// handled.
this->setHasBoundedRewriteRecursion(true);
}

mlir::LogicalResult
matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp)));

rewriteToDistributeParallelDo(loopOp, rewriter);
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);

switch (combinedInfo) {
case GenericLoopCombinedInfo::Standalone:
rewriteToSimdLoop(loopOp, rewriter);
break;
case GenericLoopCombinedInfo::TargetParallelLoop:
llvm_unreachable("not yet implemented: `parallel loop` direcitve");
break;
case GenericLoopCombinedInfo::TargetTeamsLoop:
rewriteToDistributeParallelDo(loopOp, rewriter);
break;
}

rewriter.eraseOp(loopOp);
return mlir::success();
}
@@ -52,9 +72,8 @@ class GenericLoopConversionPattern
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);

switch (combinedInfo) {
case GenericLoopCombinedInfo::None:
return loopOp.emitError(
"not yet implemented: Standalone `omp loop` directive");
case GenericLoopCombinedInfo::Standalone:
break;
case GenericLoopCombinedInfo::TargetParallelLoop:
return loopOp.emitError(
"not yet implemented: Combined `omp target parallel loop` directive");
@@ -86,7 +105,7 @@ class GenericLoopConversionPattern
static GenericLoopCombinedInfo
findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) {
mlir::Operation *parentOp = loopOp->getParentOp();
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None;
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;

if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
@@ -100,6 +119,62 @@ class GenericLoopConversionPattern
return result;
}

/// Rewrites standalone `loop` directives to equivalent `simd` constructs.
/// The reasoning behind this decision is that according to the spec (version
/// 5.2, section 11.7.1):
///
/// "If the bind clause is not specified on a construct for which it may be
/// specified and the construct is closely nested inside a teams or parallel
/// construct, the effect is as if binding is teams or parallel. If none of
/// those conditions hold, the binding region is not defined."
///
/// which means that standalone `loop` directives have undefined binding
/// region. Moreover, the spec says (in the next paragraph):
///
/// "The specified binding region determines the binding thread set.
/// Specifically, if the binding region is a teams region, then the binding
/// thread set is the set of initial threads that are executing that region
/// while if the binding region is a parallel region, then the binding thread
/// set is the team of threads that are executing that region. If the binding
/// region is not defined, then the binding thread set is the encountering
/// thread."
///
/// which means that the binding thread set for a standalone `loop` directive
/// is only the encountering thread.
///
/// Since the encountering thread is the binding thread (set) for a
/// standalone `loop` directive, the best we can do in such case is to "simd"
/// the directive.
void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
loopOp.emitWarning("Detected standalone OpenMP `loop` directive, the "
"associated loop will be rewritten to `simd`.");
mlir::omp::SimdOperands simdClauseOps;
simdClauseOps.privateVars = loopOp.getPrivateVars();

auto privateSyms = loopOp.getPrivateSyms();
if (privateSyms)
simdClauseOps.privateSyms.assign(privateSyms->begin(),
privateSyms->end());

Fortran::common::openmp::EntryBlockArgs simdArgs;
simdArgs.priv.vars = simdClauseOps.privateVars;

auto simdOp =
rewriter.create<mlir::omp::SimdOp>(loopOp.getLoc(), simdClauseOps);
mlir::Block *simdBlock =
genEntryBlock(rewriter, simdArgs, simdOp.getRegion());

mlir::IRMapping mapper;
mlir::Block &loopBlock = *loopOp.getRegion().begin();

for (auto [loopOpArg, simdopArg] :
llvm::zip_equal(loopBlock.getArguments(), simdBlock->getArguments()))
mapper.map(loopOpArg, simdopArg);

rewriter.clone(*loopOp.begin(), mapper);
}

void rewriteToDistributeParallelDo(
mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
45 changes: 42 additions & 3 deletions flang/test/Lower/OpenMP/loop-directive.f90
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
subroutine test_no_clauses()
integer :: i, j, dummy = 1

! CHECK: omp.loop private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
! CHECK: omp.simd private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
! CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]]
! CHECK: fir.store %[[IV]] to %[[ARG_DECL]]#1 : !fir.ref<i32>
@@ -27,7 +27,7 @@ subroutine test_no_clauses()
! CHECK-LABEL: func.func @_QPtest_collapse
subroutine test_collapse()
integer :: i, j, dummy = 1
! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 {{.*}} {
! CHECK: }
! CHECK: }
@@ -43,7 +43,7 @@ subroutine test_collapse()
! CHECK-LABEL: func.func @_QPtest_private
subroutine test_private()
integer :: i, dummy = 1
! CHECK: omp.loop private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK: omp.simd private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_privateEdummy"}
! CHECK: %{{.*}} = fir.load %[[DUMMY_DECL]]#0
@@ -100,3 +100,42 @@ subroutine test_bind()
end do
!$omp end loop
end subroutine

! CHECK-LABEL: func.func @_QPtest_nested_directives
subroutine test_nested_directives
implicit none
integer, parameter :: N = 100000
integer a(N), b(N), c(N)
integer j,i, num, flag;
num = N

! CHECK: omp.teams {

! Verify the first `loop` directive was combined with `target teams` into
! `target teams distribute parallel do`.
! CHECK: omp.parallel {{.*}} {
! CHECK: omp.distribute {
! CHECK: omp.wsloop {
! CHECK: omp.loop_nest {{.*}} {

! Very the second `loop` directive was rewritten to `simd`.
! CHECK: omp.simd {{.*}} {
! CHECK: omp.loop_nest {{.*}} {
! CHECK: }
! CHECK: }

! CHECK: }
! CHECK: } {omp.composite}
! CHECK: } {omp.composite}
! CHECK: } {omp.composite}
! CHECK: }
!$omp target teams map(to: a,b) map(from: c)
!$omp loop
do j=1,1000
!$omp loop
do i=1,N
c(i) = a(i) * b(i)
end do
end do
!$omp end target teams
end subroutine
13 changes: 0 additions & 13 deletions flang/test/Transforms/generic-loop-rewriting-todo.mlir
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s

func.func @_QPtarget_loop() {
%c0 = arith.constant 0 : i32
%c10 = arith.constant 10 : i32
%c1 = arith.constant 1 : i32
// expected-error@below {{not yet implemented: Standalone `omp loop` directive}}
omp.loop {
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
omp.yield
}
}
return
}

func.func @_QPtarget_parallel_loop() {
omp.target {
omp.parallel {