Skip to content

[add-fire] Revert unneeded changes back to master #4389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def _create_dc_critic(
name="old_probabilities",
)

# Break old log log_probs into separate branches
# Break old log probs into separate branches
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure about this one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is what it is in master

old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
Expand Down
10 changes: 5 additions & 5 deletions ml-agents/mlagents/trainers/saver/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,23 @@ def _register_optimizer(self, optimizer):
pass

@abc.abstractmethod
def save_checkpoint(self, behavior_name: str, step: int) -> str:
def save_checkpoint(self, brain_name: str, step: int) -> str:
"""
Checkpoints the policy on disk.
:param checkpoint_path: filepath to write the checkpoint
:param behavior_name: Behavior name of behavior to be trained
:param brain_name: Brain name of brain to be trained
"""
pass

@abc.abstractmethod
def export(self, output_filepath: str, behavior_name: str) -> None:
def export(self, output_filepath: str, brain_name: str) -> None:
"""
Saves the serialized model, given a path and behavior name.
Saves the serialized model, given a path and brain name.
This method will save the policy graph to the given filepath. The path
should be provided without an extension as multiple serialized model formats
may be generated as a result.
:param output_filepath: path (without suffix) for the model file(s)
:param behavior_name: Behavior name of behavior to be trained.
:param brain_name: Brain name of brain to be trained.
"""
pass

Expand Down
11 changes: 5 additions & 6 deletions ml-agents/mlagents/trainers/saver/tf_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def _register_policy(self, policy: TFPolicy) -> None:
with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)

def save_checkpoint(self, behavior_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
def save_checkpoint(self, brain_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
# Save the TF checkpoint and graph definition
if self.graph:
with self.graph.as_default():
Expand All @@ -66,16 +66,16 @@ def save_checkpoint(self, behavior_name: str, step: int) -> str:
self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.export(checkpoint_path, behavior_name)
self.export(checkpoint_path, brain_name)
return checkpoint_path

def export(self, output_filepath: str, behavior_name: str) -> None:
def export(self, output_filepath: str, brain_name: str) -> None:
# save model if there is only one worker or
# only on worker-0 if there are multiple workers
if self.policy and self.policy.rank is not None and self.policy.rank != 0:
return
export_policy_model(
self.model_path, output_filepath, behavior_name, self.graph, self.sess
self.model_path, output_filepath, brain_name, self.graph, self.sess
)

def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:
Expand All @@ -94,7 +94,6 @@ def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:
self._load_graph(policy, self.model_path, reset_global_steps=reset_steps)
else:
policy.initialize()

TFPolicy.broadcast_global_variables(0)

def _load_graph(
Expand Down
8 changes: 4 additions & 4 deletions ml-agents/mlagents/trainers/saver/torch_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None:
self.policy = module
self.exporter = ModelSerializer(self.policy)

def save_checkpoint(self, behavior_name: str, step: int) -> str:
def save_checkpoint(self, brain_name: str, step: int) -> str:
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}")
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")
state_dict = {
name: module.state_dict() for name, module in self.modules.items()
}
torch.save(state_dict, f"{checkpoint_path}.pt")
torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt"))
self.export(checkpoint_path, behavior_name)
self.export(checkpoint_path, brain_name)
return checkpoint_path

def export(self, output_filepath: str, behavior_name: str) -> None:
def export(self, output_filepath: str, brain_name: str) -> None:
if self.exporter is not None:
self.exporter.export_policy_model(output_filepath)

Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/test_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/test_reward_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/test_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer


def test_register(tmp_path):
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/tf/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,8 @@ def create_discrete_action_masking_layer(
:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated
normalized log_probs (after softmax)
and the concatenated normalized log log_probs
normalized probs (after softmax)
and the concatenated normalized log probs
"""
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [
Expand Down