llvm · matthias-springer · Oct 29, 2024 · Oct 27, 2024
diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
@@ -579,7 +579,6 @@ The code, slightly simplified and annotated, is reproduced here:
   // Partial bufferization passes.
   pm.addPass(createTensorConstantBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTCPBufferizePass()); // Bufferizes the downstream `tcp` dialect.
-  pm.addNestedPass<func::FuncOp>(createSCFBufferizePass());
   pm.addNestedPass<func::FuncOp>(createLinalgBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTensorBufferizePass());
   pm.addPass(createFuncBufferizePass());
@@ -596,7 +595,7 @@ must be module passes because they make changes to the top-level module.
 
 The bulk of the bufferization work is done by the function passes. Most of these
 passes are provided as part of the upstream MLIR distribution and bufferize
-their respective dialects (e.g. `scf-bufferize` bufferizes the `scf` dialect).
+their respective dialects (e.g. `abc-bufferize` bufferizes the `abc` dialect).
 The `tcp-bufferize` pass is an exception -- it is a partial bufferization pass
 used to bufferize the downstream `tcp` dialect, and fits in perfectly with all
 the other passes provided upstream.
@@ -694,20 +693,6 @@ which helps with this in general.
 
 ### Other partial bufferization examples
 
--   `scf-bufferize`
-    ([code](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp#L1),
-    [test](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/test/Dialect/SCF/bufferize.mlir#L1))
-
-    -   Bufferizes ops from the `scf` dialect.
-    -   This is an example of how to bufferize ops that implement
-        `RegionBranchOpInterface` (that is, they use regions to represent
-        control flow).
-    -   The bulk of the work is done by
-        `lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp`
-        ([code](https://github.com/llvm/llvm-project/blob/daaaed6bb89044ac58a23f1bb1ccdd12342a5a58/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp#L1)),
-        which is well-commented and covers how to correctly convert ops that
-        contain regions.
-
 -   `func-bufferize`
     ([code](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp#L1),
     [test](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/test/Dialect/Func/func-bufferize.mlir#L1))

diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
@@ -20,9 +20,6 @@ namespace mlir {
 #define GEN_PASS_DECL
 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
 
-/// Creates a pass that bufferizes the SCF dialect.
-std::unique_ptr<Pass> createSCFBufferizePass();
-
 /// Creates a pass that specializes for loop for unrolling and
 /// vectorization.
 std::unique_ptr<Pass> createForLoopSpecializationPass();

diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -11,13 +11,6 @@
 
 include "mlir/Pass/PassBase.td"
 
-def SCFBufferize : Pass<"scf-bufferize"> {
-  let summary = "Bufferize the scf dialect.";
-  let constructor = "mlir::createSCFBufferizePass()";
-  let dependentDialects = ["bufferization::BufferizationDialect",
-                           "memref::MemRefDialect"];
-}
-
 // Note: Making these canonicalization patterns would require a dependency
 // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa.
 def SCFForLoopCanonicalization

@@ -649,7 +649,8 @@ struct ForOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult
@@ -889,7 +890,8 @@ struct WhileOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult

diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_mlir_dialect_library(MLIRSCFTransforms
   BufferDeallocationOpInterfaceImpl.cpp
   BufferizableOpInterfaceImpl.cpp
-  Bufferize.cpp
   ForallToFor.cpp
   ForallToParallel.cpp
   ForToWhile.cpp

diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -scf-bufferize | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="dialect-filter=scf,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s
 
 // CHECK-LABEL:   func @if(
 // CHECK-SAME:             %[[PRED:.*]]: i1,
@@ -23,15 +23,21 @@ func.func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) ->
   return %0 : tensor<?xf32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[LB:.*]]: index, %[[UB:.*]]: index,
 // CHECK-SAME:              %[[STEP:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %{{.*}} = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             scf.yield %[[ITER]] : memref<f32>
 // CHECK:           } {some_attr}
-// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<f32>
 // CHECK:           return %[[VAL_8]] : tensor<f32>
 // CHECK:         }
 func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tensor<f32> {
@@ -41,6 +47,8 @@ func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tens
   return %ret : tensor<f32>
 }
 
+// -----
+
 // Check whether this converts at all.
 //
 // It would previously fail altogether.
@@ -57,17 +65,23 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor
   return %0 : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for_correct_recursive_legalization_behavior(
 // CHECK-SAME:                                                      %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:                                                      %[[INDEX:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT:.*]] = scf.for %{{.*}} = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref<f32>
 // CHECK:             %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor<f32>) -> tensor<f32>
 // CHECK:             %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_MUNGED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT:.*]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT]] : memref<f32>
 // CHECK:           return %[[TENSOR]] : tensor<f32>
 // CHECK:         }
 func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: index) -> tensor<f32> {
@@ -78,11 +92,17 @@ func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %inde
   return %ret : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @bufferize_while(
 // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor<f32>
 // CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref<f32>
-// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %{{.*}} = %[[M]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
-// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, memref<f32>
+// Note: scf.while iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[M]], %[[MEMREF_COPY]]
+// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %[[ITER:.*]] = %[[MEMREF_COPY]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
+// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %[[ITER]] : i64, i64, memref<f32>
 // CHECK: ^bb0(%{{.*}}: i64, %{{.*}}: i64, %{{.*}}: memref<f32>):
 // CHECK: scf.yield %{{.*}}, %{{.*}} : i64, memref<f32>
 // CHECK:  %[[RES2:.*]] = bufferization.to_tensor %[[RES1]]#2 : memref<f32>