Skip to content

Commit 83dbf56

Browse files
authored
Patch dropout fix (#1898)
1 parent 69d3519 commit 83dbf56

File tree

3 files changed

+45
-12
lines changed

3 files changed

+45
-12
lines changed

torch/csrc/jit/codegen/cuda/codegen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ class CudaKernelGenerator : private OptOutConstDispatch {
708708
if (!print_inline_) {
709709
if (op_type == UnaryOpType::RandLike) {
710710
auto out_tv = uop->out()->as<kir::TensorIndex>()->view();
711-
auto index = genTensorIndex(uop->out()->as<kir::TensorIndex>());
711+
auto index = genTensorIndex(uop->in()->as<kir::TensorIndex>());
712712
int multiple = out_tv->getDataType() == DataType::Double ? 2 : 4;
713713
indent() << "nvfuser_index_t subseq" << uop->name() << " = (" << index
714714
<< ") / " << multiple << ";\n";

torch/csrc/jit/codegen/cuda/lower_index.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,20 +94,19 @@ void IndexLowering::handle(const kir::ForLoop* for_loop) {
9494

9595
// TODO: use a separate IR node to represent rand like
9696
void IndexLowering::lowerRandLike(const UnaryOp* uop) {
97-
// TODO: not using this input any more, remove
98-
// when making RandLike a no-input op.
99-
const auto in = lowerSrcIndex(uop->in(), uop->out());
100-
101-
// Default path for scalar output.
102-
Val* out = uop->out();
103-
10497
// Write random tensor indices into the consumer
10598
// tensor index if the output is a tensor.
10699
auto out_tv = dynamic_cast<TensorView*>(uop->out());
107-
if (out_tv != nullptr) {
108-
out = SimplifyingIrBuilder::create<kir::TensorIndex>(
109-
out_tv, Index::getRandomTensorStridedIndices(out_tv, for_loops_));
110-
}
100+
TORCH_INTERNAL_ASSERT(out_tv != nullptr, "rand scalar not yet supported");
101+
102+
// TODO: using in as a placeholder for the random tensor index
103+
// would need to keep this space on the new rand op when separating
104+
// randlike from the unary op.
105+
auto in = SimplifyingIrBuilder::create<kir::TensorIndex>(
106+
out_tv, Index::getRandomTensorStridedIndices(out_tv, for_loops_));
107+
108+
// TensorIndex for writing randlike output.
109+
const auto out = lowerDstIndex(uop->out());
111110

112111
pushBack(IrBuilder::create<UnaryOp>(
113112
UnaryOpType::RandLike, out, in, uop->getRNGOffset()));

torch/csrc/jit/codegen/cuda/test/test_gpu_rng.cu

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,40 @@ TEST_F(NVFuserTest, FusionRNGValidateWithCURand_CUDA) {
133133
}
134134
}
135135
136+
TEST_F(NVFuserTest, FusionRNGSimpleValidateWithCURand_CUDA) {
137+
int64_t size = 128;
138+
auto dtype = kFloat;
139+
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
140+
auto fusion = fusion_ptr.get();
141+
FusionGuard fg(fusion);
142+
143+
TensorView* tv0 = makeSymbolicTensor(1, aten_to_data_type(dtype));
144+
fusion->addInput(tv0);
145+
auto tv1 = randlike(tv0);
146+
auto tv2 = set(tv1);
147+
fusion->addOutput(tv2);
148+
149+
tv2->split(0, 8);
150+
tv2->axis(0)->parallelize(ParallelType::TIDx);
151+
152+
tv0->computeAt(tv2, 1);
153+
154+
auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
155+
at::Tensor t0 = at::zeros({size}, options);
156+
157+
FusionExecutor fe;
158+
fe.compileFusion(fusion, {t0});
159+
160+
at::manual_seed(0);
161+
auto cg_outputs = fe.runFusion({t0});
162+
auto out = cg_outputs[0];
163+
164+
at::manual_seed(0);
165+
auto ref = generate_uniform(size, dtype);
166+
167+
testValidate(fusion, {out}, {t0}, {ref}, __LINE__, __FILE__);
168+
}
169+
136170
TEST_F(NVFuserTest, FusionBroadcastingRNG_CUDA) {
137171
for (auto dtype : {kFloat, kDouble}) {
138172
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();

0 commit comments

Comments
 (0)