From 44df6819e1841d00ec639c1b5f260f3e04d29b91 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 11:13:59 -0700
Subject: [PATCH 01/20] Torch setup.py

---
 ml-agents/setup.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 0cfa25ff79..758f5b217e 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -63,13 +63,11 @@ def run(self):
         "Pillow>=4.2.1",
         "protobuf>=3.6",
         "pyyaml>=3.1.0",
-        "tensorflow>=1.14,<3.0",
+        # Windows ver. of PyTorch doesn't work from PyPi, use PyTorch server
+        "torch>=1.6.0",
         "cattrs>=1.0.0",
         "attrs>=19.3.0",
         'pypiwin32==223;platform_system=="Windows"',
-        # We don't actually need six, but tensorflow does, and pip seems
-        # to get confused and install the wrong version.
-        "six>=1.12.0",
     ],
     python_requires=">=3.6.1",
     entry_points={
@@ -79,5 +77,5 @@ def run(self):
         ]
     },
     cmdclass={"verify": VerifyVersionCommand},
-    extras_require={"torch": ["torch>=1.5.0"]},
+    extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
 )

From 9df44fd9b4c6ae0106d5e45c02c137bb3832e594 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 16:07:20 -0700
Subject: [PATCH 02/20] Set torch to default

---
 ml-agents/mlagents/tf_utils/__init__.py       |  1 +
 ml-agents/mlagents/tf_utils/tf.py             | 42 +++++++++++-------
 ml-agents/mlagents/torch_utils/__init__.py    |  1 -
 ml-agents/mlagents/torch_utils/torch.py       | 43 ++++++-------------
 ml-agents/mlagents/trainers/ppo/trainer.py    | 16 +++----
 ml-agents/mlagents/trainers/sac/trainer.py    | 20 ++++-----
 .../mlagents/trainers/trainer/rl_trainer.py   | 21 ++++-----
 .../mlagents/trainers/trainer_controller.py   |  7 +--
 8 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/ml-agents/mlagents/tf_utils/__init__.py b/ml-agents/mlagents/tf_utils/__init__.py
index 2acce8bb8c..b128304716 100644
--- a/ml-agents/mlagents/tf_utils/__init__.py
+++ b/ml-agents/mlagents/tf_utils/__init__.py
@@ -1,3 +1,4 @@
 from mlagents.tf_utils.tf import tf as tf  # noqa
 from mlagents.tf_utils.tf import set_warnings_enabled  # noqa
 from mlagents.tf_utils.tf import generate_session_config  # noqa
+from mlagents.tf_utils.tf import is_available  # noqa
diff --git a/ml-agents/mlagents/tf_utils/tf.py b/ml-agents/mlagents/tf_utils/tf.py
index 0cbd2d4145..7bb2d3544b 100644
--- a/ml-agents/mlagents/tf_utils/tf.py
+++ b/ml-agents/mlagents/tf_utils/tf.py
@@ -1,24 +1,35 @@
 # This should be the only place that we import tensorflow directly.
 # Everywhere else is caught by the banned-modules setting for flake8
-import tensorflow as tf  # noqa I201
+
 from distutils.version import LooseVersion
 
+try:
+    import tensorflow as tf  # noqa I201
 
-# LooseVersion handles things "1.2.3a" or "4.5.6-rc7" fairly sensibly.
-_is_tensorflow2 = LooseVersion(tf.__version__) >= LooseVersion("2.0.0")
+    # LooseVersion handles things "1.2.3a" or "4.5.6-rc7" fairly sensibly.
+    _is_tensorflow2 = LooseVersion(tf.__version__) >= LooseVersion("2.0.0")
 
-if _is_tensorflow2:
-    import tensorflow.compat.v1 as tf
+    if _is_tensorflow2:
+        import tensorflow.compat.v1 as tf
 
-    tf.disable_v2_behavior()
-    tf_logging = tf.logging
-else:
-    try:
-        # Newer versions of tf 1.x will complain that tf.logging is deprecated
-        tf_logging = tf.compat.v1.logging
-    except AttributeError:
-        # Fall back to the safe import, even if it might generate a warning or two.
+        tf.disable_v2_behavior()
         tf_logging = tf.logging
+    else:
+        try:
+            # Newer versions of tf 1.x will complain that tf.logging is deprecated
+            tf_logging = tf.compat.v1.logging
+        except AttributeError:
+            # Fall back to the safe import, even if it might generate a warning or two.
+            tf_logging = tf.logging
+except ImportError:
+    tf = None
+
+
+def is_available():
+    """
+    Returns whether Torch is available in this Python environment
+    """
+    return tf is not None
 
 
 def set_warnings_enabled(is_enabled: bool) -> None:
@@ -26,8 +37,9 @@ def set_warnings_enabled(is_enabled: bool) -> None:
     Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
     :param is_enabled:
     """
-    level = tf_logging.WARN if is_enabled else tf_logging.ERROR
-    tf_logging.set_verbosity(level)
+    if is_available():
+        level = tf_logging.WARN if is_enabled else tf_logging.ERROR
+        tf_logging.set_verbosity(level)
 
 
 def generate_session_config() -> tf.ConfigProto:
diff --git a/ml-agents/mlagents/torch_utils/__init__.py b/ml-agents/mlagents/torch_utils/__init__.py
index 509739171e..9ba35a3500 100644
--- a/ml-agents/mlagents/torch_utils/__init__.py
+++ b/ml-agents/mlagents/torch_utils/__init__.py
@@ -1,4 +1,3 @@
 from mlagents.torch_utils.torch import torch as torch  # noqa
 from mlagents.torch_utils.torch import nn  # noqa
-from mlagents.torch_utils.torch import is_available  # noqa
 from mlagents.torch_utils.torch import default_device  # noqa
diff --git a/ml-agents/mlagents/torch_utils/torch.py b/ml-agents/mlagents/torch_utils/torch.py
index f2fd8d18aa..98d7ae15cd 100644
--- a/ml-agents/mlagents/torch_utils/torch.py
+++ b/ml-agents/mlagents/torch_utils/torch.py
@@ -2,38 +2,23 @@
 
 from mlagents.torch_utils import cpu_utils
 
-# Detect availability of torch package here.
-# NOTE: this try/except is temporary until torch is required for ML-Agents.
-try:
-    # This should be the only place that we import torch directly.
-    # Everywhere else is caught by the banned-modules setting for flake8
-    import torch  # noqa I201
+# This should be the only place that we import torch directly.
+# Everywhere else is caught by the banned-modules setting for flake8
+import torch  # noqa I201
 
-    torch.set_num_threads(cpu_utils.get_num_threads_to_use())
-    os.environ["KMP_BLOCKTIME"] = "0"
+torch.set_num_threads(cpu_utils.get_num_threads_to_use())
+os.environ["KMP_BLOCKTIME"] = "0"
 
-    # Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
-    # pylint: disable=E1101
-    if torch.cuda.is_available():
-        torch.set_default_tensor_type(torch.cuda.FloatTensor)
-        device = torch.device("cuda")
-    else:
-        torch.set_default_tensor_type(torch.FloatTensor)
-        device = torch.device("cpu")
-    nn = torch.nn
-    # pylint: disable=E1101
-except ImportError:
-    torch = None
-    nn = None
-    device = None
+# Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701
+# pylint: disable=E1101
+if torch.cuda.is_available():
+    torch.set_default_tensor_type(torch.cuda.FloatTensor)
+    device = torch.device("cuda")
+else:
+    torch.set_default_tensor_type(torch.FloatTensor)
+    device = torch.device("cpu")
+nn = torch.nn
 
 
 def default_device():
     return device
-
-
-def is_available():
-    """
-    Returns whether Torch is available in this Python environment
-    """
-    return torch is not None
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 0ba6ee9de6..b117984aa7 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -11,20 +11,20 @@
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.policy import Policy
-from mlagents.trainers.policy.tf_policy import TFPolicy
-from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
+from mlagents.trainers.policy.torch_policy import TorchPolicy
+from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
 from mlagents.trainers.components.reward_signals import RewardSignal
-from mlagents import torch_utils
+from mlagents import tf_utils
 
-if torch_utils.is_available():
-    from mlagents.trainers.policy.torch_policy import TorchPolicy
-    from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
+if tf_utils.is_available():
+    from mlagents.trainers.policy.tf_policy import TFPolicy
+    from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
 else:
-    TorchPolicy = None  # type: ignore
-    TorchPPOOptimizer = None  # type: ignore
+    TFPolicy = None  # type: ignore
+    PPOOptimizer = None  # type: ignore
 
 
 logger = get_logger(__name__)
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 220f6205d6..bbd7d6ee6c 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -12,22 +12,22 @@
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import timed
 from mlagents_envs.base_env import BehaviorSpec
-from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.policy import Policy
-from mlagents.trainers.sac.optimizer_tf import SACOptimizer
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
+from mlagents.trainers.policy.torch_policy import TorchPolicy
+from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
 from mlagents.trainers.trajectory import Trajectory, SplitObservations
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
 from mlagents.trainers.components.reward_signals import RewardSignal
-from mlagents import torch_utils
+from mlagents import tf_utils
 
-if torch_utils.is_available():
-    from mlagents.trainers.policy.torch_policy import TorchPolicy
-    from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
+if tf_utils.is_available():
+    from mlagents.trainers.policy.tf_policy import TFPolicy
+    from mlagents.trainers.sac.optimizer_tf import SACOptimizer
 else:
-    TorchPolicy = None  # type: ignore
-    TorchSACOptimizer = None  # type: ignore
+    TFPolicy = None  # type: ignore
+    SACOptimizer = None  # type: ignore
 
 logger = get_logger(__name__)
 
@@ -71,7 +71,7 @@ def __init__(
 
         self.seed = seed
         self.policy: Policy = None  # type: ignore
-        self.optimizer: SACOptimizer = None  # type: ignore
+        self.optimizer: TorchSACOptimizer = None  # type: ignore
         self.hyperparameters: SACSettings = cast(
             SACSettings, trainer_settings.hyperparameters
         )
@@ -378,7 +378,7 @@ def _update_reward_signals(self) -> None:
             for stat, stat_list in batch_update_stats.items():
                 self._stats_reporter.add_stat(stat, np.mean(stat_list))
 
-    def create_sac_optimizer(self) -> SACOptimizer:
+    def create_sac_optimizer(self) -> TorchSACOptimizer:
         if self.framework == FrameworkType.PYTORCH:
             return TorchSACOptimizer(  # type: ignore
                 cast(TorchPolicy, self.policy), self.trainer_settings  # type: ignore
diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
index da8c172615..6a6f4af303 100644
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
@@ -17,23 +17,24 @@
 from mlagents_envs.timers import hierarchical_timer
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.policy.policy import Policy
-from mlagents.trainers.policy.tf_policy import TFPolicy
+from mlagents.trainers.policy.torch_policy import TorchPolicy
+from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.settings import TrainerSettings, FrameworkType
 from mlagents.trainers.stats import StatsPropertyType
 from mlagents.trainers.model_saver.model_saver import BaseModelSaver
-from mlagents.trainers.model_saver.tf_model_saver import TFModelSaver
+
 from mlagents.trainers.exception import UnityTrainerException
-from mlagents import torch_utils
+from mlagents import tf_utils
 
-if torch_utils.is_available():
-    from mlagents.trainers.policy.torch_policy import TorchPolicy
-    from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
+if tf_utils.is_available():
+    from mlagents.trainers.policy.tf_policy import TFPolicy
+    from mlagents.trainers.model_saver.tf_model_saver import TFModelSaver
 else:
-    TorchPolicy = None  # type: ignore
-    TorchSaver = None  # type: ignore
+    TFPolicy = None  # type: ignore
+    TFModelSaver = None  # type: ignore
 
 RewardSignalResults = Dict[str, RewardSignalResult]
 
@@ -59,9 +60,9 @@ def __init__(self, *args, **kwargs):
             StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
         )
         self.framework = self.trainer_settings.framework
-        if self.framework == FrameworkType.PYTORCH and not torch_utils.is_available():
+        if self.framework == FrameworkType.TENSORFLOW and not tf_utils.is_available():
             raise UnityTrainerException(
-                "To use the experimental PyTorch backend, install the PyTorch Python package first."
+                "To use the TensorFlow backend, install the TensorFlow Python package first."
             )
 
         logger.debug(f"Using framework {self.framework.value}")
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 3be4a6b43b..c161d4b3f5 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 from mlagents.tf_utils import tf
+from mlagents import tf_utils
 
 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers.env_manager import EnvManager, EnvironmentStep
@@ -66,9 +67,9 @@ def __init__(
         self.trainer_threads: List[threading.Thread] = []
         self.kill_trainers = False
         np.random.seed(training_seed)
-        tf.set_random_seed(training_seed)
-        if torch_utils.is_available():
-            torch_utils.torch.manual_seed(training_seed)
+        if tf_utils.is_available():
+            tf.set_random_seed(training_seed)
+        torch_utils.torch.manual_seed(training_seed)
         self.rank = get_rank()
 
     @timed

From 19a88ee4c2e8d03874775589085b3ce2a01dd482 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 16:17:33 -0700
Subject: [PATCH 03/20] Make torch default in setup.py

---
 ml-agents/setup.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 758f5b217e..996738cfe5 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -1,5 +1,7 @@
 import os
 import sys
+import pkg_resources
+from distutils.version import LooseVersion
 
 from setuptools import setup, find_packages
 from setuptools.command.install import install
@@ -63,8 +65,8 @@ def run(self):
         "Pillow>=4.2.1",
         "protobuf>=3.6",
         "pyyaml>=3.1.0",
-        # Windows ver. of PyTorch doesn't work from PyPi, use PyTorch server
-        "torch>=1.6.0",
+        # Windows ver. of PyTorch doesn't work from PyPi
+        'torch>=1.6.0;platform_system!="Windows"',
         "cattrs>=1.0.0",
         "attrs>=19.3.0",
         'pypiwin32==223;platform_system=="Windows"',
@@ -79,3 +81,16 @@ def run(self):
     cmdclass={"verify": VerifyVersionCommand},
     extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
 )
+
+# Check that torch version 1.6.0 or later has been installed. If not, refer
+# user to the PyTorch webpage for install instructions.
+torch_pkg = None
+try:
+    torch_pkg = pkg_resources.get_distribution("torch")
+except pkg_resources.DistributionNotFound:
+    pass
+assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
+    "1.6.0"
+), "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage \
+    (https://pytorch.org/get-started/locally/) and follow the instructions to install. \
+    Version 1.6.0 and later are supported."

From f5761f6e27d4a2cd4c6dff6deee3b5045d5bd847 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 16:30:59 -0700
Subject: [PATCH 04/20] Remove indents

---
 ml-agents/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 996738cfe5..d098a89a56 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -92,5 +92,5 @@ def run(self):
 assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
     "1.6.0"
 ), "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage \
-    (https://pytorch.org/get-started/locally/) and follow the instructions to install. \
-    Version 1.6.0 and later are supported."
+(https://pytorch.org/get-started/locally/) and follow the instructions to install. \
+Version 1.6.0 and later are supported."

From c0d9b81ca5a84653086c56eb4c6eb037dea1cbe6 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 16:49:38 -0700
Subject: [PATCH 05/20] Remove other instances of TF being used

---
 ml-agents/mlagents/tf_utils/tf.py             | 21 +++++++-----
 ml-agents/mlagents/trainers/learn.py          |  5 +--
 ml-agents/mlagents/trainers/ppo/trainer.py    | 22 +++++++------
 ml-agents/mlagents/trainers/sac/trainer.py    | 33 ++++++++++---------
 .../mlagents/trainers/trainer/rl_trainer.py   | 12 +++----
 .../mlagents/trainers/trainer_controller.py   |  3 +-
 .../mlagents/trainers/training_status.py      | 12 +++++--
 7 files changed, 61 insertions(+), 47 deletions(-)

diff --git a/ml-agents/mlagents/tf_utils/tf.py b/ml-agents/mlagents/tf_utils/tf.py
index 7bb2d3544b..457cf01a0e 100644
--- a/ml-agents/mlagents/tf_utils/tf.py
+++ b/ml-agents/mlagents/tf_utils/tf.py
@@ -42,16 +42,19 @@ def set_warnings_enabled(is_enabled: bool) -> None:
         tf_logging.set_verbosity(level)
 
 
-def generate_session_config() -> tf.ConfigProto:
+def generate_session_config() -> "tf.ConfigProto":
     """
     Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
     and allows for soft placement in the case of multi-GPU.
     """
-    config = tf.ConfigProto()
-    config.gpu_options.allow_growth = True
-    # For multi-GPU training, set allow_soft_placement to True to allow
-    # placing the operation into an alternative device automatically
-    # to prevent from exceptions if the device doesn't suppport the operation
-    # or the device does not exist
-    config.allow_soft_placement = True
-    return config
+    if is_available():
+        config = tf.ConfigProto()
+        config.gpu_options.allow_growth = True
+        # For multi-GPU training, set allow_soft_placement to True to allow
+        # placing the operation into an alternative device automatically
+        # to prevent from exceptions if the device doesn't suppport the operation
+        # or the device does not exist
+        config.allow_soft_placement = True
+        return config
+    else:
+        return None
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 88edfc49cc..a779b28819 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -1,4 +1,5 @@
 # # Unity ML-Agents Toolkit
+from mlagents import torch_utils
 import yaml
 
 import os
@@ -46,7 +47,7 @@ def get_version_string() -> str:
   ml-agents: {mlagents.trainers.__version__},
   ml-agents-envs: {mlagents_envs.__version__},
   Communicator API: {UnityEnvironment.API_VERSION},
-  TensorFlow: {tf_utils.tf.__version__}"""
+  PyTorch: {torch_utils.torch.__version__}"""
 
 
 def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:
@@ -264,7 +265,7 @@ def run_cli(options: RunOptions) -> None:
     add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
     add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
     add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
-    add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)
+    add_timer_metadata("pytorch_version", torch_utils.torch.__version__)
     add_timer_metadata("numpy_version", np.__version__)
 
     if options.env_settings.seed == -1:
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index b117984aa7..7fd9e384fe 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -16,7 +16,9 @@
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
-from mlagents.trainers.components.reward_signals import RewardSignal
+from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
+    BaseRewardProvider,
+)
 from mlagents import tf_utils
 
 if tf_utils.is_available():
@@ -90,14 +92,14 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
 
         for name, v in value_estimates.items():
             agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
-            if isinstance(self.optimizer.reward_signals[name], RewardSignal):
+            if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
                 self._stats_reporter.add_stat(
-                    self.optimizer.reward_signals[name].value_name, np.mean(v)
+                    f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
+                    np.mean(v),
                 )
             else:
                 self._stats_reporter.add_stat(
-                    f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
-                    np.mean(v),
+                    self.optimizer.reward_signals[name].value_name, np.mean(v)
                 )
 
         # Evaluate all reward functions
@@ -105,15 +107,15 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
-            if isinstance(reward_signal, RewardSignal):
-                evaluate_result = reward_signal.evaluate_batch(
-                    agent_buffer_trajectory
-                ).scaled_reward
-            else:
+            if isinstance(reward_signal, BaseRewardProvider):
                 evaluate_result = (
                     reward_signal.evaluate(agent_buffer_trajectory)
                     * reward_signal.strength
                 )
+            else:
+                evaluate_result = reward_signal.evaluate_batch(
+                    agent_buffer_trajectory
+                ).scaled_reward
             agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
             # Report the reward signals
             self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index bbd7d6ee6c..3550d69323 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -19,7 +19,7 @@
 from mlagents.trainers.trajectory import Trajectory, SplitObservations
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
-from mlagents.trainers.components.reward_signals import RewardSignal
+from mlagents.trainers.torch.components.reward_providers import BaseRewardProvider
 from mlagents import tf_utils
 
 if tf_utils.is_available():
@@ -149,15 +149,16 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
-            if isinstance(reward_signal, RewardSignal):
-                evaluate_result = reward_signal.evaluate_batch(
-                    agent_buffer_trajectory
-                ).scaled_reward
-            else:
+            if isinstance(reward_signal, BaseRewardProvider):
                 evaluate_result = (
                     reward_signal.evaluate(agent_buffer_trajectory)
                     * reward_signal.strength
                 )
+            else:
+                evaluate_result = reward_signal.evaluate_batch(
+                    agent_buffer_trajectory
+                ).scaled_reward
+
             # Report the reward signals
             self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
 
@@ -166,14 +167,14 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
         )
         for name, v in value_estimates.items():
-            if isinstance(self.optimizer.reward_signals[name], RewardSignal):
+            if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
                 self._stats_reporter.add_stat(
-                    self.optimizer.reward_signals[name].value_name, np.mean(v)
+                    f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
+                    np.mean(v),
                 )
             else:
                 self._stats_reporter.add_stat(
-                    f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
-                    np.mean(v),
+                    self.optimizer.reward_signals[name].value_name, np.mean(v)
                 )
 
         # Bootstrap using the last step rather than the bootstrap step if max step is reached.
@@ -301,14 +302,14 @@ def _update_sac_policy(self) -> bool:
                 )
                 # Get rewards for each reward
                 for name, signal in self.optimizer.reward_signals.items():
-                    if isinstance(signal, RewardSignal):
-                        sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
-                            sampled_minibatch
-                        ).scaled_reward
-                    else:
+                    if isinstance(signal, BaseRewardProvider):
                         sampled_minibatch[f"{name}_rewards"] = (
                             signal.evaluate(sampled_minibatch) * signal.strength
                         )
+                    else:
+                        sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
+                            sampled_minibatch
+                        ).scaled_reward
 
                 update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
                 for stat_name, value in update_stats.items():
@@ -355,7 +356,7 @@ def _update_reward_signals(self) -> None:
             reward_signal_minibatches = {}
             for name, signal in self.optimizer.reward_signals.items():
                 logger.debug(f"Updating {name} at step {self.step}")
-                if isinstance(signal, RewardSignal):
+                if not isinstance(signal, BaseRewardProvider):
                     # Some signals don't need a minibatch to be sampled - so we don't!
                     if signal.update_dict:
                         reward_signal_minibatches[name] = buffer.sample_mini_batch(
diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
index 6a6f4af303..1ab6618e30 100644
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
@@ -13,7 +13,9 @@
 from mlagents.trainers.optimizer import Optimizer
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.trainer import Trainer
-from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
+from mlagents.trainers.torch.components.reward_providers.base_reward_provider import (
+    BaseRewardProvider,
+)
 from mlagents_envs.timers import hierarchical_timer
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.policy.policy import Policy
@@ -36,8 +38,6 @@
     TFPolicy = None  # type: ignore
     TFModelSaver = None  # type: ignore
 
-RewardSignalResults = Dict[str, RewardSignalResult]
-
 logger = get_logger(__name__)
 
 
@@ -94,14 +94,14 @@ def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None
                 self.reward_buffer.appendleft(rewards.get(agent_id, 0))
                 rewards[agent_id] = 0
             else:
-                if isinstance(optimizer.reward_signals[name], RewardSignal):
+                if isinstance(optimizer.reward_signals[name], BaseRewardProvider):
                     self.stats_reporter.add_stat(
-                        optimizer.reward_signals[name].stat_name,
+                        f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
                         rewards.get(agent_id, 0),
                     )
                 else:
                     self.stats_reporter.add_stat(
-                        f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
+                        optimizer.reward_signals[name].stat_name,
                         rewards.get(agent_id, 0),
                     )
                 rewards[agent_id] = 0
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index c161d4b3f5..5153e745f8 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -167,7 +167,8 @@ def _create_trainers_and_managers(
     @timed
     def start_learning(self, env_manager: EnvManager) -> None:
         self._create_output_path(self.output_path)
-        tf.reset_default_graph()
+        if tf_utils.is_available():
+            tf.reset_default_graph()
         try:
             # Initial reset
             self._reset_env(env_manager)
diff --git a/ml-agents/mlagents/trainers/training_status.py b/ml-agents/mlagents/trainers/training_status.py
index cc8841c11c..466d67d595 100644
--- a/ml-agents/mlagents/trainers/training_status.py
+++ b/ml-agents/mlagents/trainers/training_status.py
@@ -5,14 +5,15 @@
 import attr
 import cattr
 
-from mlagents.tf_utils import tf
+from mlagents.torch_utils import torch
+from mlagents.tf_utils import tf, is_available
 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers import __version__
 from mlagents.trainers.exception import TrainerError
 
 logger = get_logger(__name__)
 
-STATUS_FORMAT_VERSION = "0.1.0"
+STATUS_FORMAT_VERSION = "0.2.0"
 
 
 class StatusType(Enum):
@@ -26,7 +27,8 @@ class StatusType(Enum):
 class StatusMetaData:
     stats_format_version: str = STATUS_FORMAT_VERSION
     mlagents_version: str = __version__
-    tensorflow_version: str = tf.__version__
+    torch_version: str = torch.__version__
+    tensorflow_version: str = tf.__version__ if is_available() else -1
 
     def to_dict(self) -> Dict[str, str]:
         return cattr.unstructure(self)
@@ -49,6 +51,10 @@ def check_compatibility(self, other: "StatusMetaData") -> None:
             logger.warning(
                 "Tensorflow checkpoint was saved with a different version of Tensorflow. Model may not resume properly."
             )
+        if self.torch_version != other.torch_version:
+            logger.warning(
+                "PyTorch checkpoint was saved with a different version of PyTorch. Model may not resume properly."
+            )
 
 
 class GlobalTrainingStatus:

From 978c52fa7ceb710ad3bb40377f309257a09ef77a Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 25 Sep 2020 17:06:47 -0700
Subject: [PATCH 06/20] Add tensorboard to setup.py

---
 ml-agents/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index d098a89a56..5887f28c82 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -67,6 +67,7 @@ def run(self):
         "pyyaml>=3.1.0",
         # Windows ver. of PyTorch doesn't work from PyPi
         'torch>=1.6.0;platform_system!="Windows"',
+        "tensorboard>=1.14",
         "cattrs>=1.0.0",
         "attrs>=19.3.0",
         'pypiwin32==223;platform_system=="Windows"',

From c7303f0a6190243981f8bdd360a4852d67c7db63 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Wed, 30 Sep 2020 17:21:55 -0700
Subject: [PATCH 07/20] Adding correst setup commands for verifying torch is
 installed (#4524)

* Adding correst setup commands for verifying torch is installed

* Editing the test_requirments to add tf and remove torch
---
 ml-agents/setup.py    | 51 +++++++++++++++++++++++++++++++------------
 test_requirements.txt |  4 ++--
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 5887f28c82..0c1f0a3515 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -5,6 +5,7 @@
 
 from setuptools import setup, find_packages
 from setuptools.command.install import install
+from setuptools.command.develop import develop
 import mlagents.trainers
 
 VERSION = mlagents.trainers.__version__
@@ -32,6 +33,37 @@ def run(self):
             sys.exit(info)
 
 
+def verify_torch_installed():
+    # Check that torch version 1.6.0 or later has been installed. If not, refer
+    # user to the PyTorch webpage for install instructions.
+    torch_pkg = None
+    try:
+        torch_pkg = pkg_resources.get_distribution("torch")
+    except pkg_resources.DistributionNotFound:
+        pass
+    assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
+        "1.6.0"
+    ), (
+        "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage ",
+        "(https://pytorch.org/get-started/locally/) and follow the instructions to install. ",
+        "Version 1.6.0 and later are supported.",
+    )
+
+
+class VerifyTorchInstallCommand(install):
+    description = "verify that Torch is installed"
+
+    def run(self):
+        verify_torch_installed()
+
+
+class VerifyTorchDevelopCommand(develop):
+    description = "verify that Torch is installed"
+
+    def run(self):
+        verify_torch_installed()
+
+
 # Get the long description from the README file
 with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
@@ -79,19 +111,10 @@ def run(self):
             "mlagents-run-experiment=mlagents.trainers.run_experiment:main",
         ]
     },
-    cmdclass={"verify": VerifyVersionCommand},
+    cmdclass={
+        "verify": VerifyVersionCommand,
+        "install": VerifyTorchInstallCommand,
+        "develop": VerifyTorchDevelopCommand,
+    },
     extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
 )
-
-# Check that torch version 1.6.0 or later has been installed. If not, refer
-# user to the PyTorch webpage for install instructions.
-torch_pkg = None
-try:
-    torch_pkg = pkg_resources.get_distribution("torch")
-except pkg_resources.DistributionNotFound:
-    pass
-assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
-    "1.6.0"
-), "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage \
-(https://pytorch.org/get-started/locally/) and follow the instructions to install. \
-Version 1.6.0 and later are supported."
diff --git a/test_requirements.txt b/test_requirements.txt
index 656ae427a4..b08f286fa7 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -3,7 +3,7 @@ pytest>4.0.0,<6.0.0
 pytest-cov==2.6.1
 pytest-xdist==1.34.0
 
-# PyTorch tests are here for the time being, before they are used in the codebase.
-torch>=1.5.0
+# Tensorflow tests are here for the time being, before they are used in the codebase.
+tensorflow>=1.14,<3.0
 
 tf2onnx>=1.5.5

From 86faff298c21d0582dc105a995ce47be7a72a899 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Thu, 1 Oct 2020 15:05:42 -0700
Subject: [PATCH 08/20] Develop torchdefault raise outside setup (#4530)

* Torch not imported error to raise at first usage

* Torch not imported error to raise at first usage
---
 ml-agents/mlagents/torch_utils/torch.py | 23 ++++++++++++++
 ml-agents/setup.py                      | 40 +------------------------
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/ml-agents/mlagents/torch_utils/torch.py b/ml-agents/mlagents/torch_utils/torch.py
index 98d7ae15cd..5664a0776b 100644
--- a/ml-agents/mlagents/torch_utils/torch.py
+++ b/ml-agents/mlagents/torch_utils/torch.py
@@ -1,11 +1,34 @@
 import os
 
+from distutils.version import LooseVersion
+import pkg_resources
 from mlagents.torch_utils import cpu_utils
 
+
+def assert_torch_installed():
+    # Check that torch version 1.6.0 or later has been installed. If not, refer
+    # user to the PyTorch webpage for install instructions.
+    torch_pkg = None
+    try:
+        torch_pkg = pkg_resources.get_distribution("torch")
+    except pkg_resources.DistributionNotFound:
+        pass
+    assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
+        "1.6.0"
+    ), (
+        "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage "
+        + "(https://pytorch.org/get-started/locally/) and follow the instructions to install. "
+        + "Version 1.6.0 and later are supported."
+    )
+
+
+assert_torch_installed()
+
 # This should be the only place that we import torch directly.
 # Everywhere else is caught by the banned-modules setting for flake8
 import torch  # noqa I201
 
+
 torch.set_num_threads(cpu_utils.get_num_threads_to_use())
 os.environ["KMP_BLOCKTIME"] = "0"
 
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 0c1f0a3515..e6df549ed7 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -1,11 +1,8 @@
 import os
 import sys
-import pkg_resources
-from distutils.version import LooseVersion
 
 from setuptools import setup, find_packages
 from setuptools.command.install import install
-from setuptools.command.develop import develop
 import mlagents.trainers
 
 VERSION = mlagents.trainers.__version__
@@ -33,37 +30,6 @@ def run(self):
             sys.exit(info)
 
 
-def verify_torch_installed():
-    # Check that torch version 1.6.0 or later has been installed. If not, refer
-    # user to the PyTorch webpage for install instructions.
-    torch_pkg = None
-    try:
-        torch_pkg = pkg_resources.get_distribution("torch")
-    except pkg_resources.DistributionNotFound:
-        pass
-    assert torch_pkg is not None and LooseVersion(torch_pkg.version) >= LooseVersion(
-        "1.6.0"
-    ), (
-        "A compatible version of PyTorch was not installed. Please visit the PyTorch homepage ",
-        "(https://pytorch.org/get-started/locally/) and follow the instructions to install. ",
-        "Version 1.6.0 and later are supported.",
-    )
-
-
-class VerifyTorchInstallCommand(install):
-    description = "verify that Torch is installed"
-
-    def run(self):
-        verify_torch_installed()
-
-
-class VerifyTorchDevelopCommand(develop):
-    description = "verify that Torch is installed"
-
-    def run(self):
-        verify_torch_installed()
-
-
 # Get the long description from the README file
 with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
@@ -111,10 +77,6 @@ def run(self):
             "mlagents-run-experiment=mlagents.trainers.run_experiment:main",
         ]
     },
-    cmdclass={
-        "verify": VerifyVersionCommand,
-        "install": VerifyTorchInstallCommand,
-        "develop": VerifyTorchDevelopCommand,
-    },
+    cmdclass={"verify": VerifyVersionCommand},
     extras_require={"tensorflow": ["tensorflow>=1.14,<3.0", "six>=1.12.0"]},
 )

From 3c216001f4049d0d3f58a9bd2e1a2904440db521 Mon Sep 17 00:00:00 2001
From: Ervin T <ervin@unity3d.com>
Date: Mon, 19 Oct 2020 15:22:08 -0700
Subject: [PATCH 09/20] [refactor] Use PyTorch TensorBoard utils (#4518)

* Convert stats writer to use PyTorch TB support

* Use common function to print params

* Update test

* Bump tensorboard to 1.15 to fix the tests

* putting tensorboard 1.15.0 as min version requirement

Co-authored-by: vincentpierre <vincentpierre@unity3d.com>
---
 ml-agents/mlagents/trainers/stats.py          | 86 +++++++------------
 .../mlagents/trainers/tests/test_stats.py     | 20 ++---
 ml-agents/setup.py                            |  2 +-
 test_constraints_min_version.txt              |  1 +
 4 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index fe74dd84f6..053b44986b 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 from enum import Enum
-from typing import List, Dict, NamedTuple, Any, Optional
+from typing import List, Dict, NamedTuple, Any
 import numpy as np
 import abc
 import os
@@ -9,13 +9,34 @@
 
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
-from mlagents.tf_utils import tf, generate_session_config
+from torch.utils.tensorboard import SummaryWriter
 from mlagents.tf_utils.globals import get_rank
 
 
 logger = get_logger(__name__)
 
 
+def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
+    """
+    Takes a parameter dictionary and converts it to a human-readable string.
+    Recurses if there are multiple levels of dict. Used to print out hyperparameters.
+    param: param_dict: A Dictionary of key, value parameters.
+    return: A string version of this dictionary.
+    """
+    if not isinstance(param_dict, dict):
+        return str(param_dict)
+    else:
+        append_newline = "\n" if num_tabs > 0 else ""
+        return append_newline + "\n".join(
+            [
+                "\t"
+                + "  " * num_tabs
+                + "{}:\t{}".format(x, _dict_to_str(param_dict[x], num_tabs + 1))
+                for x in param_dict
+            ]
+        )
+
+
 class StatsSummary(NamedTuple):
     mean: float
     std: float
@@ -123,35 +144,13 @@ def add_property(
         if property_type == StatsPropertyType.HYPERPARAMETERS:
             logger.info(
                 """Hyperparameters for behavior name {}: \n{}""".format(
-                    category, self._dict_to_str(value, 0)
+                    category, _dict_to_str(value, 0)
                 )
             )
         elif property_type == StatsPropertyType.SELF_PLAY:
             assert isinstance(value, bool)
             self.self_play = value
 
-    def _dict_to_str(self, param_dict: Dict[str, Any], num_tabs: int) -> str:
-        """
-        Takes a parameter dictionary and converts it to a human-readable string.
-        Recurses if there are multiple levels of dict. Used to print out hyperparameters.
-        param: param_dict: A Dictionary of key, value parameters.
-        return: A string version of this dictionary.
-        """
-        if not isinstance(param_dict, dict):
-            return str(param_dict)
-        else:
-            append_newline = "\n" if num_tabs > 0 else ""
-            return append_newline + "\n".join(
-                [
-                    "\t"
-                    + "  " * num_tabs
-                    + "{}:\t{}".format(
-                        x, self._dict_to_str(param_dict[x], num_tabs + 1)
-                    )
-                    for x in param_dict
-                ]
-            )
-
 
 class TensorboardWriter(StatsWriter):
     def __init__(self, base_dir: str, clear_past_data: bool = False):
@@ -162,7 +161,7 @@ def __init__(self, base_dir: str, clear_past_data: bool = False):
         :param clear_past_data: Whether or not to clean up existing Tensorboard files associated with the base_dir and
             category.
         """
-        self.summary_writers: Dict[str, tf.summary.FileWriter] = {}
+        self.summary_writers: Dict[str, SummaryWriter] = {}
         self.base_dir: str = base_dir
         self._clear_past_data = clear_past_data
 
@@ -171,9 +170,7 @@ def write_stats(
     ) -> None:
         self._maybe_create_summary_writer(category)
         for key, value in values.items():
-            summary = tf.Summary()
-            summary.value.add(tag=f"{key}", simple_value=value.mean)
-            self.summary_writers[category].add_summary(summary, step)
+            self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
             self.summary_writers[category].flush()
 
     def _maybe_create_summary_writer(self, category: str) -> None:
@@ -184,7 +181,7 @@ def _maybe_create_summary_writer(self, category: str) -> None:
             os.makedirs(filewriter_dir, exist_ok=True)
             if self._clear_past_data:
                 self._delete_all_events_files(filewriter_dir)
-            self.summary_writers[category] = tf.summary.FileWriter(filewriter_dir)
+            self.summary_writers[category] = SummaryWriter(filewriter_dir)
 
     def _delete_all_events_files(self, directory_name: str) -> None:
         for file_name in os.listdir(directory_name):
@@ -206,34 +203,11 @@ def add_property(
     ) -> None:
         if property_type == StatsPropertyType.HYPERPARAMETERS:
             assert isinstance(value, dict)
-            summary = self._dict_to_tensorboard("Hyperparameters", value)
+            summary = _dict_to_str(value, 0)
             self._maybe_create_summary_writer(category)
             if summary is not None:
-                self.summary_writers[category].add_summary(summary, 0)
-
-    def _dict_to_tensorboard(
-        self, name: str, input_dict: Dict[str, Any]
-    ) -> Optional[bytes]:
-        """
-        Convert a dict to a Tensorboard-encoded string.
-        :param name: The name of the text.
-        :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
-        """
-        try:
-            with tf.Session(config=generate_session_config()) as sess:
-                s_op = tf.summary.text(
-                    name,
-                    tf.convert_to_tensor(
-                        [[str(x), str(input_dict[x])] for x in input_dict]
-                    ),
-                )
-                s = sess.run(s_op)
-                return s
-        except Exception:
-            logger.warning(
-                f"Could not write {name} summary for Tensorboard: {input_dict}"
-            )
-            return None
+                self.summary_writers[category].add_text("Hyperparameters", summary)
+                self.summary_writers[category].flush()
 
 
 class StatsReporter:
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index 0fed8210de..7a81ac684b 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -68,9 +68,8 @@ def test_stat_reporter_property():
     )
 
 
-@mock.patch("mlagents.tf_utils.tf.Summary")
-@mock.patch("mlagents.tf_utils.tf.summary.FileWriter")
-def test_tensorboard_writer(mock_filewriter, mock_summary):
+@mock.patch("mlagents.trainers.stats.SummaryWriter")
+def test_tensorboard_writer(mock_summary):
     # Test write_stats
     category = "category1"
     with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
@@ -83,22 +82,17 @@ def test_tensorboard_writer(mock_filewriter, mock_summary):
             basedir=base_dir, category=category
         )
         assert os.path.exists(filewriter_dir)
-        mock_filewriter.assert_called_once_with(filewriter_dir)
+        mock_summary.assert_called_once_with(filewriter_dir)
 
         # Test that the filewriter was written to and the summary was added.
-        mock_summary.return_value.value.add.assert_called_once_with(
-            tag="key1", simple_value=1.0
-        )
-        mock_filewriter.return_value.add_summary.assert_called_once_with(
-            mock_summary.return_value, 10
-        )
-        mock_filewriter.return_value.flush.assert_called_once()
+        mock_summary.return_value.add_scalar.assert_called_once_with("key1", 1.0, 10)
+        mock_summary.return_value.flush.assert_called_once()
 
         # Test hyperparameter writing - no good way to parse the TB string though.
         tb_writer.add_property(
             "category1", StatsPropertyType.HYPERPARAMETERS, {"example": 1.0}
         )
-        assert mock_filewriter.return_value.add_summary.call_count > 1
+        assert mock_summary.return_value.add_text.call_count >= 1
 
 
 def test_tensorboard_writer_clear(tmp_path):
@@ -153,7 +147,7 @@ def test_console_writer(self):
                 },
                 10,
             )
-            # Test hyperparameter writing - no good way to parse the TB string though.
+            # Test hyperparameter writing
             console_writer.add_property(
                 "category1", StatsPropertyType.HYPERPARAMETERS, {"example": 1.0}
             )
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 7528245155..d8119bdc4a 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -65,7 +65,7 @@ def run(self):
         "pyyaml>=3.1.0",
         # Windows ver. of PyTorch doesn't work from PyPi
         'torch>=1.6.0;platform_system!="Windows"',
-        "tensorboard>=1.14",
+        "tensorboard>=1.15",
         "cattrs>=1.0.0",
         "attrs>=19.3.0",
         'pypiwin32==223;platform_system=="Windows"',
diff --git a/test_constraints_min_version.txt b/test_constraints_min_version.txt
index b30759c89d..a6dac3fcf9 100644
--- a/test_constraints_min_version.txt
+++ b/test_constraints_min_version.txt
@@ -5,3 +5,4 @@ Pillow==4.2.1
 protobuf==3.6
 tensorflow==1.14.0
 h5py==2.9.0
+tensorboard==1.15.0

From fe0cfbf06e7be412690f37269957657cec1b2923 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Mon, 19 Oct 2020 17:18:54 -0700
Subject: [PATCH 10/20] [Docs] Initial documentation changes for making Torch
 the default (#4561)

* Initial commit

* Forgotten doc

* Removing the `Installation-Anaconda-Windows.md` as it is deprecated

* Readding the depreacted Installation-Anaconda-Windows.md but leaving it unchanged

* more references to tensorflow removed

* Update README.md

Co-authored-by: Ervin T. <ervin@unity3d.com>

* Change references to .nn to .onnx in docs (#4583)

Co-authored-by: Ervin T. <ervin@unity3d.com>
---
 .github/ISSUE_TEMPLATE/bug_report.md          |  2 +-
 README.md                                     |  2 +-
 docs/Background-Machine-Learning.md           |  2 +-
 ...nd-TensorFlow.md => Background-PyTorch.md} | 18 +++++-----
 docs/Getting-Started.md                       | 10 +++---
 docs/Installation.md                          |  9 ++---
 docs/Learning-Environment-Executable.md       |  4 +--
 docs/ML-Agents-Overview.md                    |  8 ++---
 docs/Readme.md                                |  2 +-
 docs/Training-Configuration-File.md           |  2 +-
 docs/Training-ML-Agents.md                    | 35 +------------------
 docs/Unity-Inference-Engine.md                |  6 ++--
 12 files changed, 32 insertions(+), 68 deletions(-)
 rename docs/{Background-TensorFlow.md => Background-PyTorch.md} (73%)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index f77eb279ef..2ea57e3e1c 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -27,7 +27,7 @@ If applicable, add screenshots to help explain your problem.
 - Unity Version: [e.g. Unity 2020.1f1]
 - OS + version: [e.g. Windows 10]
 - _ML-Agents version_: (e.g. ML-Agents v0.8, or latest `develop` branch from source)
-- _TensorFlow version_: (you can run `pip3 show tensorflow` to get this)
+- _Torch version_: (you can run `pip3 show torch` to get this)
 - _Environment_: (which example environment you used to reproduce the error)
 
 **NOTE:** We are unable to help reproduce bugs with custom environments.  Please attempt to reproduce your issue with one of the example environments, or provide a minimal patch to one of the environments needed to reproduce the issue.
diff --git a/README.md b/README.md
index 28dd7d5b52..a8929018c4 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 project that enables games and simulations to serve as environments for
 training intelligent agents. Agents can be trained using reinforcement learning,
 imitation learning, neuroevolution, or other machine learning methods through a
-simple-to-use Python API. We also provide implementations (based on TensorFlow)
+simple-to-use Python API. We also provide implementations (based on PyTorch)
 of state-of-the-art algorithms to enable game developers and hobbyists to easily
 train intelligent agents for 2D, 3D and VR/AR games. These trained agents can be
 used for multiple purposes, including controlling NPC behavior (in a variety of
diff --git a/docs/Background-Machine-Learning.md b/docs/Background-Machine-Learning.md
index 95298131e6..c225a23093 100644
--- a/docs/Background-Machine-Learning.md
+++ b/docs/Background-Machine-Learning.md
@@ -194,4 +194,4 @@ we can learn policies for very complex environments (a complex environment is
 one where the number of observations an agent perceives and the number of
 actions they can take are large). Many of the algorithms we provide in ML-Agents
 use some form of deep learning, built on top of the open-source library,
-[TensorFlow](Background-TensorFlow.md).
+[PyTorch](Background-PyTorch.md).
diff --git a/docs/Background-TensorFlow.md b/docs/Background-PyTorch.md
similarity index 73%
rename from docs/Background-TensorFlow.md
rename to docs/Background-PyTorch.md
index dee339b12e..f63a066745 100644
--- a/docs/Background-TensorFlow.md
+++ b/docs/Background-PyTorch.md
@@ -1,29 +1,29 @@
-# Background: TensorFlow
+# Background: PyTorch
 
 As discussed in our
 [machine learning background page](Background-Machine-Learning.md), many of the
 algorithms we provide in the ML-Agents Toolkit leverage some form of deep
 learning. More specifically, our implementations are built on top of the
-open-source library [TensorFlow](https://www.tensorflow.org/). In this page we
-provide a brief overview of TensorFlow, in addition to TensorFlow-related tools
+open-source library [PyTorch](https://pytorch.org/). In this page we
+provide a brief overview of PyTorch and TensorBoard
 that we leverage within the ML-Agents Toolkit.
 
-## TensorFlow
+## PyTorch
 
-[TensorFlow](https://www.tensorflow.org/) is an open source library for
+[PyTorch](https://pytorch.org/) is an open source library for
 performing computations using data flow graphs, the underlying representation of
 deep learning models. It facilitates training and inference on CPUs and GPUs in
 a desktop, server, or mobile device. Within the ML-Agents Toolkit, when you
-train the behavior of an agent, the output is a model (.nn) file that you can
+train the behavior of an agent, the output is a model (.onnx) file that you can
 then associate with an Agent. Unless you implement a new algorithm, the use of
-TensorFlow is mostly abstracted away and behind the scenes.
+PyTorch is mostly abstracted away and behind the scenes.
 
 ## TensorBoard
 
-One component of training models with TensorFlow is setting the values of
+One component of training models with PyTorch is setting the values of
 certain model attributes (called _hyperparameters_). Finding the right values of
 these hyperparameters can require a few iterations. Consequently, we leverage a
-visualization tool within TensorFlow called
+visualization tool called
 [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard).
 It allows the visualization of certain agent attributes (e.g. reward) throughout
 training which can be helpful in both building intuitions for the different
diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md
index 5732135208..096c5a6270 100644
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
@@ -91,7 +91,7 @@ itself to keep the ball balanced on its head.
 
 ## Running a pre-trained model
 
-We include pre-trained models for our agents (`.nn` files) and we use the
+We include pre-trained models for our agents (`.onnx` files) and we use the
 [Unity Inference Engine](Unity-Inference-Engine.md) to run these models inside
 Unity. In this section, we will use the pre-trained model for the 3D Ball
 example.
@@ -124,7 +124,7 @@ example.
 
 ## Training a new model with Reinforcement Learning
 
-While we provide pre-trained `.nn` files for the agents in this environment, any
+While we provide pre-trained models for the agents in this environment, any
 environment you make yourself will require training agents from scratch to
 generate a new model file. In this section we will demonstrate how to use the
 reinforcement learning algorithms that are part of the ML-Agents Python package
@@ -229,7 +229,7 @@ Once the training process completes, and the training process saves the model
 use it with compatible Agents (the Agents that generated the model). **Note:**
 Do not just close the Unity Window once the `Saved Model` message appears.
 Either wait for the training process to close the window or press `Ctrl+C` at
-the command-line prompt. If you close the window manually, the `.nn` file
+the command-line prompt. If you close the window manually, the `.onnx` file
 containing the trained model is not exported into the ml-agents folder.
 
 If you've quit the training early using `Ctrl+C` and want to resume training,
@@ -239,7 +239,7 @@ run the same command again, appending the `--resume` flag:
 mlagents-learn config/ppo/3DBall.yaml --run-id=first3DBallRun --resume
 ```
 
-Your trained model will be at `results/<run-identifier>/<behavior_name>.nn` where
+Your trained model will be at `results/<run-identifier>/<behavior_name>.onnx` where
 `<behavior_name>` is the name of the `Behavior Name` of the agents corresponding
 to the model. This file corresponds to your model's latest checkpoint. You can
 now embed this trained model into your Agents by following the steps below,
@@ -249,7 +249,7 @@ which is similar to the steps described [above](#running-a-pre-trained-model).
    `Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
 1. Open the Unity Editor, and select the **3DBall** scene as described above.
 1. Select the **3DBall** prefab Agent object.
-1. Drag the `<behavior_name>.nn` file from the Project window of the Editor to
+1. Drag the `<behavior_name>.onnx` file from the Project window of the Editor to
    the **Model** placeholder in the **Ball3DAgent** inspector window.
 1. Press the **Play** button at the top of the Editor.
 
diff --git a/docs/Installation.md b/docs/Installation.md
index 1c0a2295fb..e5f2b542b1 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -44,11 +44,6 @@ If your Python environment doesn't include `pip3`, see these
 [instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
 on installing it.
 
-Although we do not provide support for Anaconda installation on Windows, the
-previous
-[Windows Anaconda Installation (Deprecated) guide](Installation-Anaconda-Windows.md)
-is still available.
-
 ### Clone the ML-Agents Toolkit Repository (Optional)
 
 Now that you have installed Unity and Python, you can now install the Unity and
@@ -128,6 +123,7 @@ To install the `mlagents` Python package, activate your virtual environment and
 run from the command line:
 
 ```sh
+pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
 pip3 install mlagents
 ```
 
@@ -138,7 +134,7 @@ line parameters you can use with `mlagents-learn`.
 
 By installing the `mlagents` package, the dependencies listed in the
 [setup.py file](../ml-agents/setup.py) are also installed. These include
-[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support).
+[PyTorch](Background-PyTorch.md) (Requires a CPU w/ AVX support).
 
 #### Advanced: Local Installation for Development
 
@@ -148,6 +144,7 @@ this, you will need to install `mlagents` and `mlagents_envs` separately. From
 the repository's root directory, run:
 
 ```sh
+pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
 pip3 install -e ./ml-agents-envs
 pip3 install -e ./ml-agents
 ```
diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md
index 30a28918f0..8f354bb131 100644
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
@@ -171,7 +171,7 @@ INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 10000. Mean Reward: 2
 ```
 
 You can press Ctrl+C to stop the training, and your trained model will be at
-`results/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
+`results/<run-identifier>/<behavior_name>.onnx`, which corresponds to your model's
 latest checkpoint. (**Note:** There is a known bug on Windows that causes the
 saving of the model to fail when you early terminate the training, it's
 recommended to wait until Step has reached the max_steps parameter you set in
@@ -182,6 +182,6 @@ following the steps below:
    `Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
 1. Open the Unity Editor, and select the **3DBall** scene as described above.
 1. Select the **3DBall** prefab from the Project window and select **Agent**.
-1. Drag the `<behavior_name>.nn` file from the Project window of the Editor to
+1. Drag the `<behavior_name>.onnx` file from the Project window of the Editor to
    the **Model** placeholder in the **Ball3DAgent** inspector window.
 1. Press the **Play** button at the top of the Editor.
diff --git a/docs/ML-Agents-Overview.md b/docs/ML-Agents-Overview.md
index 022955c7df..44841f3feb 100644
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
@@ -35,7 +35,7 @@ open-source project that enables games and simulations to serve as environments
 for training intelligent agents. Agents can be trained using reinforcement
 learning, imitation learning, neuroevolution, or other machine learning methods
 through a simple-to-use Python API. We also provide implementations (based on
-TensorFlow) of state-of-the-art algorithms to enable game developers and
+PyTorch) of state-of-the-art algorithms to enable game developers and
 hobbyists to easily train intelligent agents for 2D, 3D and VR/AR games. These
 trained agents can be used for multiple purposes, including controlling NPC
 behavior (in a variety of settings such as multi-agent and adversarial),
@@ -51,9 +51,9 @@ transition to the ML-Agents Toolkit easier, we provide several background pages
 that include overviews and helpful resources on the
 [Unity Engine](Background-Unity.md),
 [machine learning](Background-Machine-Learning.md) and
-[TensorFlow](Background-TensorFlow.md). We **strongly** recommend browsing the
+[PyTorch](Background-PyTorch.md). We **strongly** recommend browsing the
 relevant background pages if you're not familiar with a Unity scene, basic
-machine learning concepts or have not previously heard of TensorFlow.
+machine learning concepts or have not previously heard of PyTorch.
 
 The remainder of this page contains a deep dive into ML-Agents, its key
 components, different training modes and scenarios. By the end of it, you should
@@ -280,7 +280,7 @@ for additional information.
 
 ### Custom Training and Inference
 
-In the previous mode, the Agents were used for training to generate a TensorFlow
+In the previous mode, the Agents were used for training to generate a PyTorch
 model that the Agents can later use. However, any user of the ML-Agents Toolkit
 can leverage their own algorithms for training. In this case, the behaviors of
 all the Agents in the scene will be controlled within Python. You can even turn
diff --git a/docs/Readme.md b/docs/Readme.md
index ced395f6c9..ad65859f21 100644
--- a/docs/Readme.md
+++ b/docs/Readme.md
@@ -11,7 +11,7 @@
 - [ML-Agents Toolkit Overview](ML-Agents-Overview.md)
   - [Background: Unity](Background-Unity.md)
   - [Background: Machine Learning](Background-Machine-Learning.md)
-  - [Background: TensorFlow](Background-TensorFlow.md)
+  - [Background: PyTorch](Background-PyTorch.md)
 - [Example Environments](Learning-Environment-Examples.md)
 
 ## Creating Learning Environments
diff --git a/docs/Training-Configuration-File.md b/docs/Training-Configuration-File.md
index 3e36660b64..5bf4f27216 100644
--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md
@@ -32,7 +32,7 @@ choice of the trainer (which we review on subsequent sections).
 | `time_horizon`           | (default = `64`) How many steps of experience to collect per-agent before adding it to the experience buffer. When this limit is reached before the end of an episode, a value estimate is used to predict the overall expected reward from the agent's current state. As such, this parameter trades off between a less biased, but higher variance estimate (long time horizon) and more biased, but less varied estimate (short time horizon). In cases where there are frequent rewards within an episode, or episodes are prohibitively large, a smaller number can be more ideal. This number should be large enough to capture all the important behavior within a sequence of an agent's actions. <br><br> Typical range: `32` - `2048` |
 | `max_steps`              | (default = `500000`) Total number of steps (i.e., observation collected and action taken) that must be taken in the environment (or across all environments if using multiple in parallel) before ending the training process. If you have multiple agents with the same behavior name within your environment, all steps taken by those agents will contribute to the same `max_steps` count. <br><br>Typical range: `5e5` - `1e7`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | `keep_checkpoints`         | (default = `5`) The maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the checkpoint_interval option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. |
-| `checkpoint_interval`         | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.nn` (and `.onnx` if applicable) files in `results/` folder.|
+| `checkpoint_interval`         | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.onnx` (and `.nn` if using TensorFlow) files in `results/` folder.|
 | `init_path`              | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. <br><br>You should provide the full path to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run.                                                                                                                                  |
 | `threaded`               | (default = `true`) By default, model updates can happen while the environment is being stepped. This violates the [on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms) assumption of PPO slightly in exchange for a training speedup. To maintain the strict on-policyness of PPO, you can disable parallel updates by setting `threaded` to `false`. There is usually no reason to turn `threaded` off for SAC.                                                                                                                                                                                                                                                       |
 | `hyperparameters -> learning_rate`          | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase. <br><br>Typical range: `1e-5` - `1e-3`                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
index 035ad5a255..67fb4d18f7 100644
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
@@ -16,7 +16,6 @@
     - [Curriculum Learning](#curriculum)
       - [Training with a Curriculum](#training-with-a-curriculum)
   - [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances)
-  - [Using PyTorch (Experimental)](#using-pytorch-experimental)
 
 For a broad overview of reinforcement learning, imitation learning and all the
 training scenarios, methods and options within the ML-Agents Toolkit, see
@@ -88,7 +87,7 @@ in the `results/<run-identifier>` folder:
    values. See [Using TensorBoard](Using-Tensorboard.md) for more details on how
    to visualize the training metrics.
 1. Models: these contain the model checkpoints that
-   are updated throughout training and the final model file (`.nn`). This final
+   are updated throughout training and the final model file (`.onnx`). This final
    model file is generated once either when training completes or is
    interrupted.
 1. Timers file (under `results/<run-identifier>/run_logs`): this contains aggregated
@@ -556,35 +555,3 @@ Some considerations:
 - **Result Variation Using Concurrent Unity Instances** - If you keep all the
   hyperparameters the same, but change `--num-envs=<n>`, the results and model
   would likely change.
-
-### Using PyTorch (Experimental)
-
-ML-Agents, by default, uses TensorFlow as its backend, but experimental support
-for PyTorch has been added. To use PyTorch, the `torch` Python package must
-be installed, and PyTorch must be enabled for your trainer.
-
-#### Installing PyTorch
-
-If you've already installed ML-Agents, follow the
-[official PyTorch install instructions](https://pytorch.org/get-started/locally/) for
-your platform and configuration. Note that on Windows, you may also need Microsoft's
-[Visual C++ Redistributable](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads) if you don't have it already.
-
-If you're installing or upgrading ML-Agents on Linux or Mac, you can also run
-`pip3 install mlagents[torch]` instead of `pip3 install mlagents`
-during [installation](Installation.md). On Windows, install ML-Agents first and then
-separately install PyTorch.
-
-#### Enabling PyTorch
-
-PyTorch can be enabled in one of two ways. First, by adding `--torch` to the
-`mlagents-learn` command. This will make all behaviors train with PyTorch.
-
-Second, by changing the `framework` option for your agent behavior in the
-configuration YAML as below. This will use PyTorch just for that behavior.
-
-```yaml
-behaviors:
-  YourAgentBehavior:
-    framework: pytorch
-```
diff --git a/docs/Unity-Inference-Engine.md b/docs/Unity-Inference-Engine.md
index c4ff47409e..3c66a6c67f 100644
--- a/docs/Unity-Inference-Engine.md
+++ b/docs/Unity-Inference-Engine.md
@@ -29,9 +29,9 @@ There are currently two supported model formats:
   [industry-standard open format](https://onnx.ai/about.html) produced by the
   [tf2onnx package](https://github.com/onnx/tensorflow-onnx).
 
-Export to ONNX is currently considered beta. To enable it, make sure
-`tf2onnx>=1.5.5` is installed in pip. tf2onnx does not currently support
-tensorflow 2.0.0 or later, or earlier than 1.12.0.
+Export to ONNX is used if using PyTorch (the default). To enable it
+while using TensorFlow, make sure `tf2onnx>=1.5.5` is installed in pip.
+tf2onnx does not currently support tensorflow 2.0.0 or later, or earlier than 1.12.0.
 
 ## Using the Unity Inference Engine
 

From 03f7e794ab03c3d4b22c2a155e76a778ed36e63b Mon Sep 17 00:00:00 2001
From: Ervin T <ervin@unity3d.com>
Date: Mon, 19 Oct 2020 17:35:06 -0700
Subject: [PATCH 11/20] [refactor] Add --tensorflow, enable Torch as default
 setting (#4582)

* Add --tensorflow option

* Switch framework to Pytorch default

* Update changelog

* Re-add --torch

* Edit warning
---
 com.unity.ml-agents/CHANGELOG.md               |  5 +++++
 ml-agents/mlagents/trainers/cli_utils.py       | 11 +++++++++--
 ml-agents/mlagents/trainers/learn.py           |  1 +
 ml-agents/mlagents/trainers/settings.py        |  2 +-
 .../trainers/trainer/trainer_factory.py        | 18 +++++++++++++++++-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 3cce0ea8ec..c991623c13 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -12,6 +12,11 @@ and this project adheres to
 ### Major Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
+ - PyTorch trainers are now the default. See the
+ [installation docs](https://github.com/Unity-Technologies/ml-agents/blob/mastere/docs/Installation.md) for
+ more information on installing PyTorch. For the time being, TensorFlow is still available;
+ you can use the TensorFlow backend by adding `--tensorflow` to the CLI, or
+ adding `framework: tensorflow` in the configuration YAML. (#4517)
 
 ### Minor Changes
 #### com.unity.ml-agents (C#)
diff --git a/ml-agents/mlagents/trainers/cli_utils.py b/ml-agents/mlagents/trainers/cli_utils.py
index 8f82751d3f..9acae72b7e 100644
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
@@ -172,8 +172,15 @@ def _create_parser() -> argparse.ArgumentParser:
         "--torch",
         default=False,
         action=DetectDefaultStoreTrue,
-        help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
-        "before using this option",
+        help="Use the PyTorch framework. Note that this option is not required anymore as PyTorch is the"
+        "default framework, and will be removed in the next release.",
+    )
+    argparser.add_argument(
+        "--tensorflow",
+        default=False,
+        action=DetectDefaultStoreTrue,
+        help="(Deprecated) Use the TensorFlow framework instead of PyTorch. Install TensorFlow "
+        "before using this option.",
     )
 
     eng_conf = argparser.add_argument_group(title="Engine Configuration")
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index e87d23e37f..27cf0f2731 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -136,6 +136,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
             init_path=maybe_init_path,
             multi_gpu=False,
             force_torch="torch" in DetectDefault.non_default_args,
+            force_tensorflow="tensorflow" in DetectDefault.non_default_args,
         )
         # Create controller and begin training.
         tc = TrainerController(
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
index ce06456e7d..0147256c1e 100644
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
@@ -620,7 +620,7 @@ def _set_default_hyperparameters(self):
     threaded: bool = True
     self_play: Optional[SelfPlaySettings] = None
     behavioral_cloning: Optional[BehavioralCloningSettings] = None
-    framework: FrameworkType = FrameworkType.TENSORFLOW
+    framework: FrameworkType = FrameworkType.PYTORCH
 
     cattr.register_structure_hook(
         Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure
diff --git a/ml-agents/mlagents/trainers/trainer/trainer_factory.py b/ml-agents/mlagents/trainers/trainer/trainer_factory.py
index daea1ccd0c..78419b6062 100644
--- a/ml-agents/mlagents/trainers/trainer/trainer_factory.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer_factory.py
@@ -27,6 +27,7 @@ def __init__(
         init_path: str = None,
         multi_gpu: bool = False,
         force_torch: bool = False,
+        force_tensorflow: bool = False,
     ):
         """
         The TrainerFactory generates the Trainers based on the configuration passed as
@@ -45,7 +46,9 @@ def __init__(
         :param init_path: Path from which to load model.
         :param multi_gpu: If True, multi-gpu will be used. (currently not available)
         :param force_torch: If True, the Trainers will all use the PyTorch framework
-        instead of the TensorFlow framework.
+        instead of what is specified in the config YAML.
+        :param force_tensorflow: If True, thee Trainers will all use the TensorFlow
+        framework.
         """
         self.trainer_config = trainer_config
         self.output_path = output_path
@@ -57,6 +60,7 @@ def __init__(
         self.multi_gpu = multi_gpu
         self.ghost_controller = GhostController()
         self._force_torch = force_torch
+        self._force_tf = force_tensorflow
 
     def generate(self, behavior_name: str) -> Trainer:
         if behavior_name not in self.trainer_config.keys():
@@ -67,6 +71,18 @@ def generate(self, behavior_name: str) -> Trainer:
         trainer_settings = self.trainer_config[behavior_name]
         if self._force_torch:
             trainer_settings.framework = FrameworkType.PYTORCH
+            logger.warning(
+                "Note that specifying --torch is not required anymore as PyTorch is the default framework."
+            )
+        if self._force_tf:
+            trainer_settings.framework = FrameworkType.TENSORFLOW
+            logger.warning(
+                "Setting the framework to TensorFlow. TensorFlow trainers will be deprecated in the future."
+            )
+            if self._force_torch:
+                logger.warning(
+                    "Both --torch and --tensorflow CLI options were specified. Using TensorFlow."
+                )
         return TrainerFactory._initialize_trainer(
             trainer_settings,
             behavior_name,

From ad958cae9413dbaea3093f7eed4ed0d4d55c7e47 Mon Sep 17 00:00:00 2001
From: Ervin T <ervin@unity3d.com>
Date: Mon, 19 Oct 2020 18:23:25 -0700
Subject: [PATCH 12/20] Modify Yamato tests (#4584)

---
 ml-agents/tests/yamato/training_int_tests.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py
index 2bec3c7435..5ef043b0a7 100644
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
@@ -26,11 +26,10 @@ def run_training(python_version: str, csharp_version: str) -> bool:
         f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
     )
     output_dir = "models" if python_version else "results"
-    nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn"
     onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"
     frozen_graph_file_expected = f"./{output_dir}/{run_id}/3DBall/frozen_graph_def.pb"
 
-    if os.path.exists(nn_file_expected):
+    if os.path.exists(onnx_file_expected):
         # Should never happen - make sure nothing leftover from an old test.
         print("Artifacts from previous build found!")
         return False
@@ -96,21 +95,16 @@ def run_training(python_version: str, csharp_version: str) -> bool:
     if csharp_version is None and python_version is None:
         model_artifacts_dir = os.path.join(get_base_output_path(), "models")
         os.makedirs(model_artifacts_dir, exist_ok=True)
-        shutil.copy(nn_file_expected, model_artifacts_dir)
         shutil.copy(onnx_file_expected, model_artifacts_dir)
         shutil.copy(frozen_graph_file_expected, model_artifacts_dir)
 
-    if (
-        res.returncode != 0
-        or not os.path.exists(nn_file_expected)
-        or not os.path.exists(onnx_file_expected)
-    ):
+    if res.returncode != 0 or not os.path.exists(onnx_file_expected):
         print("mlagents-learn run FAILED!")
         return False
 
     if csharp_version is None and python_version is None:
         # Use abs path so that loading doesn't get confused
-        model_path = os.path.abspath(os.path.dirname(nn_file_expected))
+        model_path = os.path.abspath(os.path.dirname(onnx_file_expected))
         for extension in ["nn", "onnx"]:
             inference_ok = run_inference(env_path, model_path, extension)
             if not inference_ok:

From 78bd7401fede37ccbe30b0a7e7f04b16b5fb9bba Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 19 Oct 2020 18:51:59 -0700
Subject: [PATCH 13/20] Don't check for PB file in Yamato inference

---
 ml-agents/tests/yamato/training_int_tests.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py
index 2bec3c7435..78bd7b0840 100644
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
@@ -28,7 +28,6 @@ def run_training(python_version: str, csharp_version: str) -> bool:
     output_dir = "models" if python_version else "results"
     nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn"
     onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"
-    frozen_graph_file_expected = f"./{output_dir}/{run_id}/3DBall/frozen_graph_def.pb"
 
     if os.path.exists(nn_file_expected):
         # Should never happen - make sure nothing leftover from an old test.
@@ -98,7 +97,6 @@ def run_training(python_version: str, csharp_version: str) -> bool:
         os.makedirs(model_artifacts_dir, exist_ok=True)
         shutil.copy(nn_file_expected, model_artifacts_dir)
         shutil.copy(onnx_file_expected, model_artifacts_dir)
-        shutil.copy(frozen_graph_file_expected, model_artifacts_dir)
 
     if (
         res.returncode != 0

From 5ab0ca0bc1e90973247776cbaed2bc6a7a2a0ff8 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 19 Oct 2020 19:41:29 -0700
Subject: [PATCH 14/20] Only run inference on ONNX

---
 ml-agents/tests/yamato/training_int_tests.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py
index 501de3e839..c22e3d1d39 100644
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
@@ -103,10 +103,9 @@ def run_training(python_version: str, csharp_version: str) -> bool:
     if csharp_version is None and python_version is None:
         # Use abs path so that loading doesn't get confused
         model_path = os.path.abspath(os.path.dirname(onnx_file_expected))
-        for extension in ["nn", "onnx"]:
-            inference_ok = run_inference(env_path, model_path, extension)
-            if not inference_ok:
-                return False
+        inference_ok = run_inference(env_path, model_path, "onnx")
+        if not inference_ok:
+            return False
 
     print("mlagents-learn run SUCCEEDED!")
     return True

From 45e197e45b6b28121eb1714d2d331be4983ebf10 Mon Sep 17 00:00:00 2001
From: Ervin T <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:28:17 -0700
Subject: [PATCH 15/20] Update docs/Unity-Inference-Engine.md with correct
 tf2onnx versions

Co-authored-by: Chris Elion <chris.elion@unity3d.com>
---
 docs/Unity-Inference-Engine.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/Unity-Inference-Engine.md b/docs/Unity-Inference-Engine.md
index 3c66a6c67f..89707b8e64 100644
--- a/docs/Unity-Inference-Engine.md
+++ b/docs/Unity-Inference-Engine.md
@@ -30,8 +30,7 @@ There are currently two supported model formats:
   [tf2onnx package](https://github.com/onnx/tensorflow-onnx).
 
 Export to ONNX is used if using PyTorch (the default). To enable it
-while using TensorFlow, make sure `tf2onnx>=1.5.5` is installed in pip.
-tf2onnx does not currently support tensorflow 2.0.0 or later, or earlier than 1.12.0.
+while using TensorFlow, make sure `tf2onnx>=1.6.1` is installed in pip.
 
 ## Using the Unity Inference Engine
 

From b8b91e107cc02a18bf069c1af61bc4169a9a73f8 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:33:07 -0700
Subject: [PATCH 16/20] Add reward signal class comments

---
 ml-agents/mlagents/trainers/ppo/trainer.py | 3 ++-
 ml-agents/mlagents/trainers/sac/trainer.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 7fd9e384fe..06f1b877cc 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -107,12 +107,13 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
+            # BaseRewardProvider is a PyTorch-based reward signal
             if isinstance(reward_signal, BaseRewardProvider):
                 evaluate_result = (
                     reward_signal.evaluate(agent_buffer_trajectory)
                     * reward_signal.strength
                 )
-            else:
+            else:  # reward_signal is a TensorFlow-based RewardSignal class
                 evaluate_result = reward_signal.evaluate_batch(
                     agent_buffer_trajectory
                 ).scaled_reward
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 74effedc97..2debda8215 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -302,11 +302,12 @@ def _update_sac_policy(self) -> bool:
                 )
                 # Get rewards for each reward
                 for name, signal in self.optimizer.reward_signals.items():
+                    # BaseRewardProvider is a PyTorch-based reward signal
                     if isinstance(signal, BaseRewardProvider):
                         sampled_minibatch[f"{name}_rewards"] = (
                             signal.evaluate(sampled_minibatch) * signal.strength
                         )
-                    else:
+                    else:  # reward_signal is a TensorFlow-based RewardSignal class
                         sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
                             sampled_minibatch
                         ).scaled_reward

From 7f7c573a7d82db604aef8c1ff19333f1f7d77b15 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:43:21 -0700
Subject: [PATCH 17/20] More descriptive import of is_available

---
 ml-agents/mlagents/trainers/training_status.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/training_status.py b/ml-agents/mlagents/trainers/training_status.py
index 466d67d595..3eff30b84a 100644
--- a/ml-agents/mlagents/trainers/training_status.py
+++ b/ml-agents/mlagents/trainers/training_status.py
@@ -6,7 +6,7 @@
 import cattr
 
 from mlagents.torch_utils import torch
-from mlagents.tf_utils import tf, is_available
+from mlagents.tf_utils import tf, is_available as tf_is_available
 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers import __version__
 from mlagents.trainers.exception import TrainerError
@@ -28,7 +28,7 @@ class StatusMetaData:
     stats_format_version: str = STATUS_FORMAT_VERSION
     mlagents_version: str = __version__
     torch_version: str = torch.__version__
-    tensorflow_version: str = tf.__version__ if is_available() else -1
+    tensorflow_version: str = tf.__version__ if tf_is_available() else -1
 
     def to_dict(self) -> Dict[str, str]:
         return cattr.unstructure(self)

From 2063d71b251f9b9f7db94912ea7cc54d118c6814 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:50:28 -0700
Subject: [PATCH 18/20] Updated installation instructions for PyTorch

---
 docs/Installation.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/docs/Installation.md b/docs/Installation.md
index e5f2b542b1..6d543b8da4 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -119,11 +119,24 @@ Virtual Environments. Virtual Environments provide a mechanism for isolating the
 dependencies for each project and are supported on Mac / Windows / Linux. We
 offer a dedicated [guide on Virtual Environments](Using-Virtual-Environment.md).
 
+#### (Windows) Installing PyTorch
+
+On Windows, you'll have to install the PyTorch package separately prior to
+installing ML-Agents. Activate your virtual environment and run from the command line:
+
+```sh
+pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
+```
+
+See the [PyTorch installation guide](https://pytorch.org/get-started/locally/) for
+more installation options and versions.
+
+#### Installing `mlagents`
+
 To install the `mlagents` Python package, activate your virtual environment and
 run from the command line:
 
 ```sh
-pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
 pip3 install mlagents
 ```
 

From 7814a255c4b9c3d6970cc90a1eac1f8f8eeb6c87 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:54:23 -0700
Subject: [PATCH 19/20] More reward signal comments

---
 ml-agents/mlagents/trainers/sac/trainer.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 2debda8215..c9e43b9443 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -149,12 +149,13 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory["environment_rewards"]
         )
         for name, reward_signal in self.optimizer.reward_signals.items():
+            # BaseRewardProvider is a PyTorch-based reward signal
             if isinstance(reward_signal, BaseRewardProvider):
                 evaluate_result = (
                     reward_signal.evaluate(agent_buffer_trajectory)
                     * reward_signal.strength
                 )
-            else:
+            else:  # reward_signal uses TensorFlow
                 evaluate_result = reward_signal.evaluate_batch(
                     agent_buffer_trajectory
                 ).scaled_reward
@@ -167,12 +168,13 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
         )
         for name, v in value_estimates.items():
+            # BaseRewardProvider is a PyTorch-based reward signal
             if isinstance(self.optimizer.reward_signals[name], BaseRewardProvider):
                 self._stats_reporter.add_stat(
                     f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
                     np.mean(v),
                 )
-            else:
+            else:  # TensorFlow reward signal
                 self._stats_reporter.add_stat(
                     self.optimizer.reward_signals[name].value_name, np.mean(v)
                 )
@@ -357,6 +359,7 @@ def _update_reward_signals(self) -> None:
             reward_signal_minibatches = {}
             for name, signal in self.optimizer.reward_signals.items():
                 logger.debug(f"Updating {name} at step {self.step}")
+                # BaseRewardProvider is a PyTorch-based reward signal
                 if not isinstance(signal, BaseRewardProvider):
                     # Some signals don't need a minibatch to be sampled - so we don't!
                     if signal.update_dict:
@@ -364,7 +367,7 @@ def _update_reward_signals(self) -> None:
                             self.hyperparameters.batch_size,
                             sequence_length=self.policy.sequence_length,
                         )
-                else:
+                else:  # TensorFlow reward signal
                     if name != "extrinsic":
                         reward_signal_minibatches[name] = buffer.sample_mini_batch(
                             self.hyperparameters.batch_size,

From 9197835f1856d3221f9817654539ee5e7149e454 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 20 Oct 2020 16:56:06 -0700
Subject: [PATCH 20/20] More Windows instructions

---
 docs/Installation.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/Installation.md b/docs/Installation.md
index 6d543b8da4..af94287d64 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -128,8 +128,10 @@ installing ML-Agents. Activate your virtual environment and run from the command
 pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
 ```
 
-See the [PyTorch installation guide](https://pytorch.org/get-started/locally/) for
-more installation options and versions.
+Note that on Windows, you may also need Microsoft's
+[Visual C++ Redistributable](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads)
+if you don't have it already. See the [PyTorch installation guide](https://pytorch.org/get-started/locally/)
+for more installation options and versions.
 
 #### Installing `mlagents`