From 880ceab4d7d7e6faeb63f1e57f1d1cbb716907b2 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Nov 2019 14:47:03 -0800 Subject: [PATCH 1/4] ingore attribute-defined-outside-init in multi_gpu_policy --- .pre-commit-config.yaml | 4 ++-- ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a1861765c..4eca1ab48e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -66,9 +66,9 @@ repos: .*_pb2_grpc.py| .*/tests/.* )$ - require_serial: true + args: [--score=n] -# "Local" hooks, see https://pre-commit.com/#repository-local-hooks + # "Local" hooks, see https://pre-commit.com/#repository-local-hooks - repo: local hooks: - id: markdown-link-check diff --git a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py index 98f2e3eb5b..14c11b4512 100644 --- a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py +++ b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py @@ -1,3 +1,4 @@ +# pylint: disable=attribute-defined-outside-init import logging import tensorflow as tf From 2360219f697186efe4ef917f6685df936995e010 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Nov 2019 14:48:08 -0800 Subject: [PATCH 2/4] undo yaml whitespace --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4eca1ab48e..10fec8dd60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,7 +68,7 @@ repos: )$ args: [--score=n] - # "Local" hooks, see https://pre-commit.com/#repository-local-hooks +# "Local" hooks, see https://pre-commit.com/#repository-local-hooks - repo: local hooks: - id: markdown-link-check From 8cf3b4e7a3e5ba8821363a71f2d7cab40a86d75a Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Nov 2019 15:10:10 -0800 Subject: [PATCH 3/4] declare attributes before super.init --- .../mlagents/trainers/ppo/multi_gpu_policy.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py index 14c11b4512..401ee176f7 100644 --- a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py +++ b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py @@ -1,12 +1,14 @@ -# pylint: disable=attribute-defined-outside-init import logging +from typing import Any, Dict, List, Optional import tensorflow as tf from tensorflow.python.client import device_lib +from mlagents.envs.brain import BrainParameters from mlagents.envs.timers import timed from mlagents.trainers.models import EncoderType, LearningRateSchedule from mlagents.trainers.ppo.policy import PPOPolicy from mlagents.trainers.ppo.models import PPOModel +from mlagents.trainers.components.reward_signals import RewardSignal from mlagents.trainers.components.reward_signals.reward_signal_factory import ( create_reward_signal, ) @@ -18,6 +20,23 @@ class MultiGpuPPOPolicy(PPOPolicy): + def __init__( + self, + seed: int, + brain: BrainParameters, + trainer_params: Dict[str, Any], + is_training: bool, + load: bool, + ): + self.towers: List[PPOModel] = [] + self.devices: List[str] = [] + self.model: Optional[PPOModel] = None + self.total_policy_loss: Optional[tf.Tensor] = None + self.reward_signal_towers: List[Dict[str, RewardSignal]] = [] + self.reward_signals: Dict[str, RewardSignal] = {} + + super().__init__(seed, brain, trainer_params, is_training, load) + def create_model( self, brain, trainer_params, reward_signal_configs, is_training, load, seed ): @@ -30,6 +49,7 @@ def create_model( """ self.devices = get_devices() self.towers = [] + with self.graph.as_default(): with tf.variable_scope("", reuse=tf.AUTO_REUSE): for device in self.devices: @@ -106,7 +126,6 @@ def create_reward_signals(self, reward_signal_configs): Create reward signals :param reward_signal_configs: Reward signal config. """ - self.reward_signal_towers = [] with self.graph.as_default(): with tf.variable_scope(TOWER_SCOPE_NAME, reuse=tf.AUTO_REUSE): for device_id, device in enumerate(self.devices): @@ -191,7 +210,7 @@ def average_gradients(self, tower_grads): return average_grads -def get_devices(): +def get_devices() -> List[str]: """ Get all available GPU devices """ From 15eb74d57e9314478dc4ae491f7a4a0edff73a84 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Nov 2019 15:18:11 -0800 Subject: [PATCH 4/4] remove redundant declaration --- ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py index 401ee176f7..7791579aad 100644 --- a/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py +++ b/ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py @@ -48,7 +48,6 @@ def create_model( :param seed: Random seed. """ self.devices = get_devices() - self.towers = [] with self.graph.as_default(): with tf.variable_scope("", reuse=tf.AUTO_REUSE):