diff --git a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py b/ml-agents/mlagents/trainers/ppo/optimizer.py similarity index 99% rename from ml-agents/mlagents/trainers/ppo/optimizer_tf.py rename to ml-agents/mlagents/trainers/ppo/optimizer.py index 05ce4503c8..e77ea21c5a 100644 --- a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py +++ b/ml-agents/mlagents/trainers/ppo/optimizer.py @@ -177,7 +177,7 @@ def _create_dc_critic( name="old_probabilities", ) - # Break old log log_probs into separate branches + # Break old log probs into separate branches old_log_prob_branches = ModelUtils.break_into_branches( self.all_old_log_probs, self.policy.act_size ) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index c2a712a02f..6c1f3bb54c 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -12,7 +12,7 @@ from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.policy import Policy from mlagents.trainers.policy.tf_policy import TFPolicy -from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer +from mlagents.trainers.ppo.optimizer import PPOOptimizer from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType diff --git a/ml-agents/mlagents/trainers/saver/saver.py b/ml-agents/mlagents/trainers/saver/saver.py index 2a3366017a..95ff91bf9e 100644 --- a/ml-agents/mlagents/trainers/saver/saver.py +++ b/ml-agents/mlagents/trainers/saver/saver.py @@ -34,23 +34,23 @@ def _register_optimizer(self, optimizer): pass @abc.abstractmethod - def save_checkpoint(self, behavior_name: str, step: int) -> str: + def save_checkpoint(self, brain_name: str, step: int) -> str: """ Checkpoints the policy on disk. :param checkpoint_path: filepath to write the checkpoint - :param behavior_name: Behavior name of behavior to be trained + :param brain_name: Brain name of brain to be trained """ pass @abc.abstractmethod - def export(self, output_filepath: str, behavior_name: str) -> None: + def export(self, output_filepath: str, brain_name: str) -> None: """ - Saves the serialized model, given a path and behavior name. + Saves the serialized model, given a path and brain name. This method will save the policy graph to the given filepath. The path should be provided without an extension as multiple serialized model formats may be generated as a result. :param output_filepath: path (without suffix) for the model file(s) - :param behavior_name: Behavior name of behavior to be trained. + :param brain_name: Brain name of brain to be trained. """ pass diff --git a/ml-agents/mlagents/trainers/saver/tf_saver.py b/ml-agents/mlagents/trainers/saver/tf_saver.py index 60d73e188b..d0f95d0509 100644 --- a/ml-agents/mlagents/trainers/saver/tf_saver.py +++ b/ml-agents/mlagents/trainers/saver/tf_saver.py @@ -55,8 +55,8 @@ def _register_policy(self, policy: TFPolicy) -> None: with self.policy.graph.as_default(): self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints) - def save_checkpoint(self, behavior_name: str, step: int) -> str: - checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}") + def save_checkpoint(self, brain_name: str, step: int) -> str: + checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}") # Save the TF checkpoint and graph definition if self.graph: with self.graph.as_default(): @@ -66,16 +66,16 @@ def save_checkpoint(self, behavior_name: str, step: int) -> str: self.graph, self.model_path, "raw_graph_def.pb", as_text=False ) # also save the policy so we have optimized model files for each checkpoint - self.export(checkpoint_path, behavior_name) + self.export(checkpoint_path, brain_name) return checkpoint_path - def export(self, output_filepath: str, behavior_name: str) -> None: + def export(self, output_filepath: str, brain_name: str) -> None: # save model if there is only one worker or # only on worker-0 if there are multiple workers if self.policy and self.policy.rank is not None and self.policy.rank != 0: return export_policy_model( - self.model_path, output_filepath, behavior_name, self.graph, self.sess + self.model_path, output_filepath, brain_name, self.graph, self.sess ) def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None: @@ -94,7 +94,6 @@ def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None: self._load_graph(policy, self.model_path, reset_global_steps=reset_steps) else: policy.initialize() - TFPolicy.broadcast_global_variables(0) def _load_graph( diff --git a/ml-agents/mlagents/trainers/saver/torch_saver.py b/ml-agents/mlagents/trainers/saver/torch_saver.py index 58c5810455..ce54cdc136 100644 --- a/ml-agents/mlagents/trainers/saver/torch_saver.py +++ b/ml-agents/mlagents/trainers/saver/torch_saver.py @@ -45,19 +45,19 @@ def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None: self.policy = module self.exporter = ModelSerializer(self.policy) - def save_checkpoint(self, behavior_name: str, step: int) -> str: + def save_checkpoint(self, brain_name: str, step: int) -> str: if not os.path.exists(self.model_path): os.makedirs(self.model_path) - checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}") + checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}") state_dict = { name: module.state_dict() for name, module in self.modules.items() } torch.save(state_dict, f"{checkpoint_path}.pt") torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt")) - self.export(checkpoint_path, behavior_name) + self.export(checkpoint_path, brain_name) return checkpoint_path - def export(self, output_filepath: str, behavior_name: str) -> None: + def export(self, output_filepath: str, brain_name: str) -> None: if self.exporter is not None: self.exporter.export_policy_model(output_filepath) diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index 0675ff693c..2d39949427 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -9,7 +9,7 @@ from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards -from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer +from mlagents.trainers.ppo.optimizer import PPOOptimizer from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.agent_processor import AgentManagerQueue from mlagents.trainers.tests import mock_brain as mb diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index 32138d1e60..4adc931a9e 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -4,7 +4,7 @@ import mlagents.trainers.tests.mock_brain as mb from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.sac.optimizer import SACOptimizer -from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer +from mlagents.trainers.ppo.optimizer import PPOOptimizer from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG from mlagents.trainers.settings import ( GAILSettings, diff --git a/ml-agents/mlagents/trainers/tests/test_saver.py b/ml-agents/mlagents/trainers/tests/test_saver.py index c0bf2b8d40..023626bc5f 100644 --- a/ml-agents/mlagents/trainers/tests/test_saver.py +++ b/ml-agents/mlagents/trainers/tests/test_saver.py @@ -12,7 +12,7 @@ from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.tests import mock_brain as mb from mlagents.trainers.tests.test_nn_policy import create_policy_mock -from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer +from mlagents.trainers.ppo.optimizer import PPOOptimizer def test_register(tmp_path): diff --git a/ml-agents/mlagents/trainers/tf/models.py b/ml-agents/mlagents/trainers/tf/models.py index be7d4f8c10..7c5d0770ad 100644 --- a/ml-agents/mlagents/trainers/tf/models.py +++ b/ml-agents/mlagents/trainers/tf/models.py @@ -510,8 +510,8 @@ def create_discrete_action_masking_layer( :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action] :param action_size: A list containing the number of possible actions for each branch :return: The action output dimension [batch_size, num_branches], the concatenated - normalized log_probs (after softmax) - and the concatenated normalized log log_probs + normalized probs (after softmax) + and the concatenated normalized log probs """ branch_masks = ModelUtils.break_into_branches(action_masks, action_size) raw_probs = [