[bug-fix] Use float64 when converting np.ndarray to torch.tensor, cap Torch version to 1.7.x (#4610)

Ervin T · web-flow · commit 75f8a2bcc72f · 2020-10-29T10:32:52.000-07:00
* Use float64 in GAIL tests

* Use float32 when converting np arrays by default

* Enforce torch 1.7.x or below

* Add comment about Windows install

* Adjust tests
diff --git a/docs/Installation.md b/docs/Installation.md
@@ -125,7 +125,7 @@ On Windows, you'll have to install the PyTorch package separately prior to
 installing ML-Agents. Activate your virtual environment and run from the command line:
 
 ```sh
-pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
+pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 ```
 
 Note that on Windows, you may also need Microsoft's
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
@@ -128,7 +128,7 @@ def test_reward_decreases_vail(
         RewardSignalType.GAIL, behavior_spec, gail_settings
     )
 
-    for _ in range(200):
+    for _ in range(300):
         gail_rp.update(buffer_policy)
         reward_expert = gail_rp.evaluate(buffer_expert)[0]
         reward_policy = gail_rp.evaluate(buffer_policy)[0]
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
@@ -9,10 +9,12 @@ def create_agent_buffer(
 ) -> AgentBuffer:
     buffer = AgentBuffer()
     curr_observations = [
-        np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
+        np.random.normal(size=shape).astype(np.float32)
+        for shape in behavior_spec.observation_shapes
     ]
     next_observations = [
-        np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
+        np.random.normal(size=shape).astype(np.float32)
+        for shape in behavior_spec.observation_shapes
     ]
     action = behavior_spec.action_spec.random_action(1)[0, :]
     for _ in range(number):
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -101,7 +101,7 @@ def test_polynomial_decay():
 
 def test_list_to_tensor():
     # Test converting pure list
-    unconverted_list = [[1, 2], [1, 3], [1, 4]]
+    unconverted_list = [[1.0, 2], [1, 3], [1, 4]]
     tensor = ModelUtils.list_to_tensor(unconverted_list)
     # Should be equivalent to torch.tensor conversion
     assert torch.equal(tensor, torch.tensor(unconverted_list))
@@ -116,7 +116,7 @@ def test_list_to_tensor():
     list_of_np = [np.asarray(_el) for _el in unconverted_list]
     tensor = ModelUtils.list_to_tensor(list_of_np)
     # Should be equivalent to torch.tensor conversion
-    assert torch.equal(tensor, torch.tensor(unconverted_list))
+    assert torch.equal(tensor, torch.tensor(unconverted_list, dtype=torch.float32))
 
 
 def test_break_into_branches():
diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
@@ -194,7 +194,7 @@ def create_input_processors(
 
     @staticmethod
     def list_to_tensor(
-        ndarray_list: List[np.ndarray], dtype: Optional[torch.dtype] = None
+        ndarray_list: List[np.ndarray], dtype: Optional[torch.dtype] = torch.float32
     ) -> torch.Tensor:
         """
         Converts a list of numpy arrays into a tensor. MUCH faster than
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
@@ -63,8 +63,9 @@ def run(self):
         "Pillow>=4.2.1",
         "protobuf>=3.6",
         "pyyaml>=3.1.0",
-        # Windows ver. of PyTorch doesn't work from PyPi
-        'torch>=1.6.0;platform_system!="Windows"',
+        # Windows ver. of PyTorch doesn't work from PyPi. Installation:
+        # https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md#windows-installing-pytorch
+        'torch>=1.6.0,<1.8.0;platform_system!="Windows"',
         "tensorboard>=1.15",
         "cattrs>=1.0.0",
         "attrs>=19.3.0",

Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ def test_reward_decreases_vail(`
`128`	`128`	`RewardSignalType.GAIL, behavior_spec, gail_settings`
`129`	`129`	`)`
`130`	`130`
`131`		`- for _ in range(200):`
	`131`	`+ for _ in range(300):`
`132`	`132`	`gail_rp.update(buffer_policy)`
`133`	`133`	`reward_expert = gail_rp.evaluate(buffer_expert)[0]`
`134`	`134`	`reward_policy = gail_rp.evaluate(buffer_policy)[0]`