Skip to content

add -floop-fuse to clang and flang #142686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled.
VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds),
///< traced by time profiler
CODEGENOPT(InterchangeLoops , 1, 0) ///< Run loop-interchange.
CODEGENOPT(FuseLoops , 1, 0) ///< Run loop-fuse.
CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled.
CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled.
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4189,6 +4189,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def floop_fuse : Flag<["-"], "fexperimental-fuse-loops">, Group<f_Group>,
HelpText<"Enable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_loop_fuse: Flag<["-"], "fno-experimental-fuse-loops">, Group<f_Group>,
HelpText<"Disable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
PTO.LoopFuse = CodeGenOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
Expand Down Expand Up @@ -1339,6 +1340,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex,
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
Conf.PTO.LoopFuse = CGOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7030,6 +7030,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
options::OPT_fno_loop_interchange);
Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse);

Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3157,7 +3157,7 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,

void tools::handleInterchangeLoopsArgs(const ArgList &Args,
ArgStringList &CmdArgs) {
// FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to
// FIXME: Instead of relying on shouldEnableVectorizerAtOLevel, we may want to
// implement a separate function to infer loop interchange from opt level.
// For now, enable loop-interchange at the same opt levels as loop-vectorize.
bool EnableInterchange = shouldEnableVectorizerAtOLevel(Args, false);
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");

Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse);

handleInterchangeLoopsArgs(Args, CmdArgs);
handleVectorizeLoopsArgs(Args, CmdArgs);
handleVectorizeSLPArgs(Args, CmdArgs);
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
else
GenerateArg(Consumer, OPT_fno_loop_interchange);

if (Opts.FuseLoops)
GenerateArg(Consumer, OPT_floop_fuse);
else
GenerateArg(Consumer, OPT_fno_loop_fuse);

if (!Opts.BinutilsVersion.empty())
GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);

Expand Down Expand Up @@ -1963,6 +1968,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
(Opts.OptimizationLevel > 1));
Opts.InterchangeLoops =
Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
Opts.FuseLoops = Args.hasFlag(OPT_floop_fuse, OPT_fno_loop_fuse, false);
Opts.BinutilsVersion =
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));

Expand Down
7 changes: 7 additions & 0 deletions clang/test/Driver/clang_f_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@
// CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"

// RUN: %clang -### -S -fexperimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
// RUN: %clang -### -S -fno-experimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s
// RUN: %clang -### -S -fno-experimental-fuse-loops -fexperimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
// RUN: %clang -### -S -fexperimental-fuse-loops -fno-experimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s
// CHECK-FUSE-LOOPS: "-fexperimental-fuse-loops"
// CHECK-NO-FUSE-LOOPS: "-fno-experimental-fuse-loops"

// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"

Expand Down
1 change: 1 addition & 0 deletions flang/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ page](https://llvm.org/releases/).

* -floop-interchange is now recognized by flang.
* -floop-interchange is enabled by default at -O2 and above.
* -fexperimental-fuse-loops is now recognized by flang.

## Windows Support

Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fuse.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
Expand Down
3 changes: 3 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
opts.InterchangeLoops = 1;

if (args.getLastArg(clang::driver::options::OPT_floop_fuse))
opts.FuseLoops = 1;

if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
opts.VectorizeLoop = 1;

Expand Down
1 change: 1 addition & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterchange = opts.InterchangeLoops;
pto.LoopFuse = opts.FuseLoops;
pto.LoopInterleaving = opts.UnrollLoops;
pto.LoopVectorization = opts.VectorizeLoop;
pto.SLPVectorization = opts.VectorizeSLP;
Expand Down
17 changes: 17 additions & 0 deletions flang/test/Driver/loop-fuse.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
! RUN: %flang -### -S -fexperimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s
! RUN: %flang -### -S -fno-experimental-fuse-loops %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! CHECK-LOOP-FUSE: "-fexperimental-fuse-loops"
! CHECK-NO-LOOP-FUSE-NOT: "-fexperimental-fuse-loops"
! RUN: %flang_fc1 -emit-llvm -O2 -fexperimental-fuse-loops -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s
! RUN: %flang_fc1 -emit-llvm -O2 -fno-experimental-fuse-loops -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
! CHECK-LOOP-FUSE-PASS: loop-fusion
! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion

program test
end program
3 changes: 3 additions & 0 deletions llvm/include/llvm/Passes/PassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ class PipelineTuningOptions {
/// false.
bool LoopInterchange;

/// Tuning option to enable/disable loop fuse. Its default value is false.
bool LoopFuse;

/// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
/// is that of the flag: `-forget-scev-loop-unroll`.
bool ForgetAllSCEVInLoopUnroll;
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopFlatten.h"
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopInterchange.h"
Expand Down Expand Up @@ -205,6 +206,10 @@ static cl::opt<bool>
EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the LoopInterchange Pass"));

static cl::opt<bool> EnableLoopFuse("enable-loopfuse", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFuse Pass"));

static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
cl::init(false), cl::Hidden,
cl::desc("Enable Unroll And Jam Pass"));
Expand Down Expand Up @@ -314,6 +319,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
SLPVectorization = false;
LoopUnrolling = true;
LoopInterchange = EnableLoopInterchange;
LoopFuse = EnableLoopFuse;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
Expand Down Expand Up @@ -518,6 +524,9 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,

invokeLoopOptimizerEndEPCallbacks(LPM2, Level);

if (PTO.LoopFuse)
FPM.addPass(LoopFusePass());

FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
Expand Down Expand Up @@ -709,6 +718,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

invokeLoopOptimizerEndEPCallbacks(LPM2, Level);

if (PTO.LoopFuse)
FPM.addPass(LoopFusePass());

FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
Expand Down Expand Up @@ -2112,7 +2124,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LPM.addPass(LoopFlattenPass());
LPM.addPass(IndVarSimplifyPass());
LPM.addPass(LoopDeletionPass());
// FIXME: Add loop interchange.

// Unroll small loops and perform peeling.
LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
Expand Down