61
61
#include < c10/core/SymIntArrayRef.h>
62
62
#include < utility>
63
63
#include < vector>
64
+ #include < iostream>
64
65
65
66
static const int MIOPEN_DIM_MAX = 5 ;
66
67
@@ -514,8 +515,8 @@ BatchNormBackend _select_batch_norm_backend(
514
515
input.is_cuda ()
515
516
&& input.dim () <= MIOPEN_DIM_MAX
516
517
&& input.scalar_type () != at::kDouble
517
- && input.scalar_type () != at::kBFloat16
518
518
&& (weight.scalar_type () != at::kHalf )
519
+ && (weight.scalar_type () != at::kBFloat16 )
519
520
&& weight.defined () && bias.defined ()
520
521
&& ((running_mean.defined () && running_var.defined ())
521
522
|| (!running_mean.defined () && !running_var.defined () && training))
@@ -531,6 +532,7 @@ BatchNormBackend _select_batch_norm_backend(
531
532
return BatchNormBackend::Native;
532
533
}
533
534
535
+ bool PYTORCH_MIOPEN_EXTRA_LOGGING = c10::utils::check_env(" PYTORCH_MIOPEN_EXTRA_LOGGING" ).value_or(false );
534
536
535
537
// _batch_norm_impl_index(_backward) are used in the JIT be able to keep the run-time selection
536
538
// of backends, while enabling it to keep the information about the used backend, so that it can
@@ -541,6 +543,20 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, int64_t> _batch_norm_impl_index(
541
543
const Tensor& input, const std::optional<Tensor>& weight_opt /* optional */ , const std::optional<Tensor>& bias_opt /* optional */ , const std::optional<Tensor>& running_mean_opt /* optional */ , const std::optional<Tensor>& running_var_opt /* optional */ ,
542
544
bool training, double momentum, double eps, bool cudnn_enabled) {
543
545
// See [Note: hacky wrapper removal for optional tensor]
546
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
547
+ std :: cout
548
+ << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index"
549
+ << " input=" << input.scalar_type ()
550
+ << " weight=" << (weight_opt.has_value () ? weight_opt.value ().scalar_type () : at::ScalarType::Undefined)
551
+ << " bias=" << (bias_opt.has_value () ? bias_opt.value ().scalar_type () : at::ScalarType::Undefined)
552
+ << " running_mean=" << (running_mean_opt.has_value () ? running_mean_opt.value ().scalar_type () : at::ScalarType::Undefined)
553
+ << " running_var=" << (running_var_opt.has_value () ? running_var_opt.value ().scalar_type () : at::ScalarType::Undefined)
554
+ << " training=" << training
555
+ // << " momentum=" << momentum
556
+ // << " eps=" << eps
557
+ << " cudnn_enabled=" << cudnn_enabled
558
+ << std::endl;
559
+
544
560
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor (weight_opt);
545
561
const Tensor& weight = *weight_maybe_owned;
546
562
const Tensor& bias = c10::value_or_else (bias_opt, [] {return Tensor ();});
@@ -600,7 +616,24 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, int64_t> _batch_norm_impl_index(
600
616
601
617
Tensor reserve = at::empty ({0 }, input.options ().dtype (kByte ));
602
618
619
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
620
+ std::cout
621
+ << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index (use_miopen)"
622
+ << " use_miopen=" << (backend == BatchNormBackend::Miopen)
623
+ << " cudnn_enabled=" << cudnn_enabled
624
+ << " dim=" << input.dim ()
625
+ << " memory_format=" << input.suggest_memory_format ()
626
+ << " input.dtype=" << input.scalar_type ()
627
+ << " weight.dtype=" << (weight.defined ()?" +" :" -" ) << weight.scalar_type ()
628
+ << " bias.dtype=" << (bias.defined ()?" +" :" -" ) << bias.scalar_type ()
629
+ << " running_mean.dtype=" << (running_mean.defined ()?" +" :" -" ) << running_mean.scalar_type ()
630
+ << " running_var.dtype=" << (running_mean.defined ()?" +" :" -" ) << running_mean.scalar_type ()
631
+ << " training=" << training
632
+ << std::endl;
633
+
603
634
if (backend == BatchNormBackend::Miopen) {
635
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
636
+ std::cout << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index (calling miopen_batch_norm)" << std::endl;
604
637
return std::tuple_cat (
605
638
at::miopen_batch_norm (
606
639
input.contiguous (), weight.contiguous (), bias.contiguous (),
@@ -623,6 +656,8 @@ std::tuple<Tensor, Tensor, Tensor> _batch_norm_impl_index_backward(
623
656
const Tensor& input, const Tensor& grad_output, const std::optional<Tensor>& weight_opt /* optional */ , const std::optional<Tensor>& running_mean_opt /* optional */ , const std::optional<Tensor>& running_var_opt /* optional */ , const std::optional<Tensor>& save_mean_opt /* optional */ , const std::optional<Tensor>& save_var_transform_opt /* optional */ ,
624
657
bool train, double epsilon, std::array<bool , 3 > output_mask, const Tensor &reservedSpace) {
625
658
// See [Note: hacky wrapper removal for optional tensor]
659
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
660
+ std :: cout << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index_backward" << std::endl;
626
661
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor (weight_opt);
627
662
const Tensor& weight = *weight_maybe_owned;
628
663
const Tensor& running_mean = c10::value_or_else (running_mean_opt, [] {return Tensor ();});
@@ -653,12 +688,16 @@ std::tuple<Tensor, Tensor, Tensor> _batch_norm_impl_index_backward(
653
688
654
689
// backward in inference mode is not supported in cudnn, fallback to native
655
690
if (impl_index == 0 || (!train)) {
691
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
692
+ std :: cout << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index_backward (calling native_batch_norm_backward)" << std::endl;
656
693
return at::native_batch_norm_backward (grad_output, input, weight, running_mean, running_var, save_mean, save_var_transform, train, epsilon, output_mask);
657
694
} else if (impl_index == 1 ) {
658
695
// TODO: _batch_norm_impl_index_backward is only used in JIT. cudnn NHWC
659
696
// format conversion is done inside cudnn_batch_norm_backward instead
660
697
return at::cudnn_batch_norm_backward (input, grad_output, weight, running_mean, running_var, save_mean, save_var_transform, epsilon, reservedSpace);
661
698
} else if (impl_index == 2 ) {
699
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
700
+ std :: cout << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* _batch_norm_impl_index_backward (calling miopen_batch_norm_backward)" << std::endl;
662
701
return at::miopen_batch_norm_backward (input, grad_output, weight, running_mean, running_var, save_mean, save_var_transform, epsilon);
663
702
}
664
703
TORCH_INTERNAL_ASSERT (false , " Unsupported impl_index in _batch_norm_impl_index_backward: " , impl_index);
@@ -669,6 +708,20 @@ Tensor batch_norm(
669
708
const Tensor& input, const std::optional<Tensor>& weight_opt, const std::optional<Tensor>& bias_opt,
670
709
const std::optional<Tensor>& running_mean_opt, const std::optional<Tensor>& running_var_opt,
671
710
bool training, double momentum, double eps, bool cudnn_enabled) {
711
+ if (PYTORCH_MIOPEN_EXTRA_LOGGING)
712
+ std :: cout
713
+ << " PYTORCH_MIOPEN_EXTRA_LOGGING: ********************* batch_norm"
714
+ << " input=" << input.scalar_type ()
715
+ << " weight=" << (weight_opt.has_value () ? weight_opt.value ().scalar_type () : at::ScalarType::Undefined)
716
+ << " bias=" << (bias_opt.has_value () ? bias_opt.value ().scalar_type () : at::ScalarType::Undefined)
717
+ << " running_mean=" << (running_mean_opt.has_value () ? running_mean_opt.value ().scalar_type () : at::ScalarType::Undefined)
718
+ << " running_var=" << (running_var_opt.has_value () ? running_var_opt.value ().scalar_type () : at::ScalarType::Undefined)
719
+ << " training=" << training
720
+ // << " momentum=" << momentum
721
+ // << " eps=" << eps
722
+ << " cudnn_enabled=" << cudnn_enabled
723
+ << std::endl;
724
+
672
725
const Tensor& weight = c10::value_or_else (weight_opt, [] {return Tensor ();});
673
726
const Tensor& bias = c10::value_or_else (bias_opt, [] {return Tensor ();});
674
727
const Tensor& running_mean = c10::value_or_else (running_mean_opt, [] {return Tensor ();});
0 commit comments