diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index c2a712a02f..fbcfb56f1e 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -216,9 +216,10 @@ def create_tf_policy( create_graph: bool = False, ) -> TFPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a Tensorflow backend and PPO hyperparameters + :param parsed_behavior_id: :param behavior_spec: specifications for policy construction - :param create_graph: whether to create the graph when policy is constructed + :param create_graph: whether to create the Tensorflow graph on construction :return policy """ policy = TFPolicy( @@ -234,9 +235,9 @@ def create_torch_policy( self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec ) -> TorchPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a PyTorch backend and PPO hyperparameters :param parsed_behavior_id: - :param brain_parameters: specifications for policy construction + :param behavior_spec: specifications for policy construction :return policy """ policy = TorchPolicy( diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 11cc6762c5..6be7eb9524 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -233,6 +233,13 @@ def create_tf_policy( behavior_spec: BehaviorSpec, create_graph: bool = False, ) -> TFPolicy: + """ + Creates a policy with a Tensorflow backend and SAC hyperparameters + :param parsed_behavior_id: + :param behavior_spec: specifications for policy construction + :param create_graph: whether to create the Tensorflow graph on construction + :return policy + """ policy = TFPolicy( self.seed, behavior_spec, @@ -248,7 +255,7 @@ def create_torch_policy( self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec ) -> TorchPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a PyTorch backend and SAC hyperparameters :param parsed_behavior_id: :param behavior_spec: specifications for policy construction :return policy