Skip to content

fix attribute-defined-outside-init pylint warnings #2876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ repos:
.*_pb2_grpc.py|
.*/tests/.*
)$
require_serial: true
args: [--score=n]

# "Local" hooks, see https://pre-commit.com/#repository-local-hooks
- repo: local
Expand Down
25 changes: 22 additions & 3 deletions ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
from typing import Any, Dict, List, Optional

import tensorflow as tf
from tensorflow.python.client import device_lib
from mlagents.envs.brain import BrainParameters
from mlagents.envs.timers import timed
from mlagents.trainers.models import EncoderType, LearningRateSchedule
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.ppo.models import PPOModel
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.reward_signals.reward_signal_factory import (
create_reward_signal,
)
Expand All @@ -17,6 +20,23 @@


class MultiGpuPPOPolicy(PPOPolicy):
def __init__(
self,
seed: int,
brain: BrainParameters,
trainer_params: Dict[str, Any],
is_training: bool,
load: bool,
):
self.towers: List[PPOModel] = []
self.devices: List[str] = []
self.model: Optional[PPOModel] = None
self.total_policy_loss: Optional[tf.Tensor] = None
self.reward_signal_towers: List[Dict[str, RewardSignal]] = []
self.reward_signals: Dict[str, RewardSignal] = {}

super().__init__(seed, brain, trainer_params, is_training, load)

def create_model(
self, brain, trainer_params, reward_signal_configs, is_training, load, seed
):
Expand All @@ -28,7 +48,7 @@ def create_model(
:param seed: Random seed.
"""
self.devices = get_devices()
self.towers = []

with self.graph.as_default():
with tf.variable_scope("", reuse=tf.AUTO_REUSE):
for device in self.devices:
Expand Down Expand Up @@ -105,7 +125,6 @@ def create_reward_signals(self, reward_signal_configs):
Create reward signals
:param reward_signal_configs: Reward signal config.
"""
self.reward_signal_towers = []
with self.graph.as_default():
with tf.variable_scope(TOWER_SCOPE_NAME, reuse=tf.AUTO_REUSE):
for device_id, device in enumerate(self.devices):
Expand Down Expand Up @@ -190,7 +209,7 @@ def average_gradients(self, tower_grads):
return average_grads


def get_devices():
def get_devices() -> List[str]:
"""
Get all available GPU devices
"""
Expand Down