Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion torch/csrc/jit/codegen/cuda/fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>
#include <torch/csrc/jit/codegen/cuda/ir_printer.h>
#include <torch/csrc/jit/codegen/cuda/kernel.h>
#include <torch/csrc/jit/codegen/cuda/lower2device.h>

namespace torch {
namespace jit {
Expand Down Expand Up @@ -218,10 +219,16 @@ void Fusion::print() {
std::cout << "}\n";
}

void Fusion::printKernel() {
FusionGuard fg(this);
GPULower lower(this);
lower.printKernel(std::cout);
}

void Fusion::printMath() {
FusionGuard fg(this);
for (auto expr : exprs(true))
std::cout << expr << std::endl;
std::cout << expr;
}

void Fusion::printTransforms() {
Expand Down
3 changes: 2 additions & 1 deletion torch/csrc/jit/codegen/cuda/fusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ struct TORCH_CUDA_API Fusion : public IRInputOutput {
void printMath();
// Print transformations used in fusion (can be very verbose)
void printTransforms();

// Lower the fusion and print a kernel
void printKernel();
// Register the Val with this fusion
StmtNameType registerVal(Val* val);

Expand Down
18 changes: 12 additions & 6 deletions torch/csrc/jit/codegen/cuda/lower_unroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,24 @@ Bool* getPredicate(TensorView* tv, std::vector<Val*> inds_, Bool* thread_pred) {
inds_.size() == tv->nDims() ||
inds_.size() == tv->domain()->noReductions().size());

// Do we need to adjust for reduction axes?
bool reductions = inds_.size() != tv->nDims();

std::vector<Val*> inds;
if (inds_.size() < tv->nDims()) {
size_t i_ = 0;
for (size_t i = 0; i < tv->nDims() && i_ < inds_.size(); i++) {
if (tv->axis(i)->isReduction())
if (reductions) {
for (size_t ind_i = 0, tv_i = 0; tv_i < tv->nDims();) {
if (tv->axis(tv_i++)->isReduction()) {
inds.push_back(new Int(0));
else
inds.push_back(inds_[i_++]);
} else {
TORCH_INTERNAL_ASSERT(
ind_i < inds_.size(), "Ran out of indices to generate predicate.");
inds.push_back(inds_[ind_i++]);
}
}
} else {
inds = inds_;
}

if (tv->nDims() > inds.size()) {
for (decltype(tv->nDims()) i{0}; i < tv->nDims(); i++) {
if (tv->axis(i)->isReduction())
Expand Down
4 changes: 4 additions & 0 deletions torch/csrc/jit/codegen/cuda/lower_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ struct FirstInnerMostScope : private OptInDispatch {

FirstInnerMostScope fims;
Expr* inner = fims.getInner(scope);

if (inner == nullptr)
return scope;

while (fims.getInner(inner) != nullptr)
inner = fims.getInner(inner);
return inner;
Expand Down