WIP: Include note on cudnn determinism in each function backed by cudnn (pytorch#11434)

Rasmus Diederichsen · facebook-github-bot · commit 35348dab1033 · 2018-09-11T20:27:09.000-07:00
Summary: Ping ezyang This addresses your comment in ROCm#114. Strangely, when running the doc build (`make html`) none of my changes are actually showing, could you point out what I'm doing wrong? Once pytorch#11329 is merged it might make sense to link to the reproducibility note everywhere. Pull Request resolved: pytorch#11434 Differential Revision: D9751208 Pulled By: ezyang fbshipit-source-id: cc672472449564ff099323c39603e8ff2b2d35c9
diff --git a/docs/source/cudnn_deterministic.rst b/docs/source/cudnn_deterministic.rst
@@ -0,0 +1,7 @@
+.. note::
+
+    In some circumstances when using the CUDA backend with CuDNN, this operator
+    may select a nondeterministic algorithm to increase performance. If this is
+    undesirable, you can try to make the operation deterministic (potentially at
+    a performance cost) by setting ``torch.backends.cudnn.deterministic =
+    True``.
diff --git a/torch/nn/functional.py b/torch/nn/functional.py
@@ -67,6 +67,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.Conv1d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iW)`
     weight: filters of shape :math:`(\text{out\_channels} \times \frac{\text{in\_channels}}{\text{groups}} \times kW)`
@@ -95,6 +97,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.Conv2d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iH \times iW)`
     weight: filters of shape :math:`(\text{out\_channels} \times \frac{\text{in\_channels}}{\text{groups}} \times kH \times kW)`
@@ -124,6 +128,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.Conv3d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iT \times iH \times iW)`
     weight: filters of shape :math:`(\text{out\_channels} \times \frac{\text{in\_channels}}{\text{groups}} \times kT \times kH \times kW)`
@@ -152,6 +158,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.ConvTranspose1d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iW)`
     weight: filters of shape :math:`(\text{in\_channels} \times \frac{\text{out\_channels}}{\text{groups}} \times kW)`
@@ -183,6 +191,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.ConvTranspose2d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iH \times iW)`
     weight: filters of shape :math:`(\text{in\_channels} \times \frac{\text{out\_channels}}{\text{groups}} \times kH \times kW)`
@@ -216,6 +226,8 @@ def legacy_get_enum(size_average, reduce, emit_warning=True):
 
 See :class:`~torch.nn.ConvTranspose3d` for details and output shape.
 
+.. include:: cudnn_deterministic.rst
+
 Args:
     input: input tensor of shape :math:`(\text{minibatch} \times \text{in\_channels} \times iT \times iH \times iW)`
     weight: filters of shape :math:`(\text{in\_channels} \times \frac{\text{out\_channels}}{\text{groups}} \times kT \times kH \times kW)`
@@ -1438,6 +1450,8 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0,
 
     See :class:`~torch.nn.CTCLoss` for details.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         log_probs: :math:`(T, N, C)` where `C = number of characters in alphabet including blank`,
             `T = input length`, and `N = batch size`.
diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py
@@ -120,6 +120,8 @@ class Conv1d(_ConvNd):
         a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
         :math:`(C_\text{in}=C_{in}, C_\text{out}=C_{in} \times K, ..., \text{groups}=C_{in})`.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
@@ -243,6 +245,8 @@ class Conv2d(_ConvNd):
         a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
         :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
@@ -366,6 +370,8 @@ class Conv3d(_ConvNd):
          a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
          :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
@@ -527,6 +533,8 @@ class ConvTranspose1d(_ConvTransposeMixin, _ConvNd):
         that :attr:`output_padding` is only used to find output shape, but does
         not actually add zero-padding to output.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
@@ -637,6 +645,8 @@ class ConvTranspose2d(_ConvTransposeMixin, _ConvNd):
         that :attr:`output_padding` is only used to find output shape, but does
         not actually add zero-padding to output.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
@@ -777,6 +787,8 @@ class ConvTranspose3d(_ConvTransposeMixin, _ConvNd):
         that :attr:`output_padding` is only used to find output shape, but does
         not actually add zero-padding to output.
 
+    .. include:: cudnn_deterministic.rst
+
     Args:
         in_channels (int): Number of channels in the input image
         out_channels (int): Number of channels produced by the convolution
diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py
@@ -1177,6 +1177,11 @@ class CTCLoss(_Loss):
         dtype :attr:`torch.int32`.
 
         The regular implementation uses the (more common in PyTorch) `torch.long` dtype.
+
+
+    .. include:: cudnn_deterministic.rst
+
+
     """
 
     def __init__(self, blank=0, reduction='elementwise_mean'):