Skip to content

Commit cb3306c

Browse files
committed
Fix up booleans to be explicitly set on a branch. Added a break to a loop.
1 parent 845498b commit cb3306c

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

torch/csrc/jit/codegen/cuda/scheduler.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,14 +349,17 @@ ReductionParams reductionHeuristic(
349349
inputs_consumed_per_block_iter *= rparams.block_dim_y_;
350350
red_elems_per_thread = ceilDiv(red_elems_per_thread, rparams.block_dim_y_);
351351
rparams.cross_warp_ = true;
352+
rparams.mul_reds_per_blk_ = false;
352353
// Do multiple reductions per block
353354
} else {
355+
rparams.cross_warp_ = false;
354356
rparams.mul_reds_per_blk_ = true;
355357
outputs_produced_per_block_iter *= rparams.block_dim_y_;
356358
}
357359

358360
// 5. Distributing work across blocks
359361

362+
// WARNING: Current device for codegen may not be the target device
360363
int device_max_threads_per_multiprocessor =
361364
at::cuda::getCurrentDeviceProperties()->maxThreadsPerMultiProcessor;
362365
int device_multiprocessor_count =
@@ -402,10 +405,13 @@ bool scheduleReduction(Fusion* fusion, const at::ArrayRef<c10::IValue> inputs) {
402405
// 2D at this point to make the issue easier, right now.
403406

404407
// Find Reduction TensorView
408+
// TODO: This is making an assumption there is only one reduction
409+
// in a kernel. This will not be true in the long run.
405410
TensorView* red_tv = nullptr;
406411
for (auto& expr : fusion->exprs(/*from_outputs_only*/ true)) {
407412
if (expr->type() == ExprType::ReductionOp) {
408413
red_tv = static_cast<TensorView*>(expr->output(0));
414+
break;
409415
}
410416
}
411417
if (red_tv == nullptr) { // No reduction found

0 commit comments

Comments
 (0)