diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py index e9ef77529c..b2c6afaf0f 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py @@ -105,8 +105,8 @@ def test_gaussian_dist_instance(): assert log_prob == pytest.approx(-0.919, abs=0.01) for ent in dist_instance.entropy().flatten(): - # entropy of standard normal at 0 - assert ent == pytest.approx(2.83, abs=0.01) + # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma) + assert ent == pytest.approx(1.42, abs=0.01) def test_tanh_gaussian_dist_instance(): diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py index b9a58c4617..3b9ca5e425 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py @@ -149,7 +149,7 @@ def test_get_probs_and_entropy(): for ent in entropies.flatten(): # entropy of standard normal at 0 - assert ent == pytest.approx(2.83, abs=0.01) + assert ent == pytest.approx(1.42, abs=0.01) # Test continuous # Add two dists to the list. diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py index c83ae4649e..2f5954fbad 100644 --- a/ml-agents/mlagents/trainers/torch/distributions.py +++ b/ml-agents/mlagents/trainers/torch/distributions.py @@ -66,7 +66,7 @@ def pdf(self, value): return torch.exp(log_prob) def entropy(self): - return torch.log(2 * math.pi * math.e * self.std + EPSILON) + return 0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON) class TanhGaussianDistInstance(GaussianDistInstance):