diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05c0614310..94bc24bf07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: rev: v2.7.0 hooks: - id: pyupgrade - args: [--py3-plus] + args: [--py3-plus, --py36-plus] exclude: .*barracuda.py - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 8bce6addc6..733dcba37b 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to ### Major Changes #### com.unity.ml-agents (C#) #### ml-agents / ml-agents-envs / gym-unity (Python) +The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244) ### Minor Changes #### com.unity.ml-agents (C#) diff --git a/gym-unity/setup.py b/gym-unity/setup.py index 4f6975e88b..228958f0d2 100755 --- a/gym-unity/setup.py +++ b/gym-unity/setup.py @@ -38,6 +38,6 @@ def run(self): author_email="ML-Agents@unity3d.com", url="https://github.com/Unity-Technologies/ml-agents", packages=find_packages(), - install_requires=["gym", "mlagents_envs=={}".format(VERSION)], + install_requires=["gym", f"mlagents_envs=={VERSION}"], cmdclass={"verify": VerifyVersionCommand}, ) diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py index 528ec50e0a..c0032f0ee6 100644 --- a/ml-agents-envs/mlagents_envs/base_env.py +++ b/ml-agents-envs/mlagents_envs/base_env.py @@ -109,9 +109,7 @@ def __getitem__(self, agent_id: AgentId) -> DecisionStep: :returns: The DecisionStep """ if agent_id not in self.agent_id_to_index: - raise KeyError( - "agent_id {} is not present in the DecisionSteps".format(agent_id) - ) + raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps") agent_index = self._agent_id_to_index[agent_id] # type: ignore agent_obs = [] for batched_obs in self.obs: @@ -214,9 +212,7 @@ def __getitem__(self, agent_id: AgentId) -> TerminalStep: specific agent """ if agent_id not in self.agent_id_to_index: - raise KeyError( - "agent_id {} is not present in the TerminalSteps".format(agent_id) - ) + raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps") agent_index = self._agent_id_to_index[agent_id] # type: ignore agent_obs = [] for batched_obs in self.obs: diff --git a/ml-agents-envs/mlagents_envs/env_utils.py b/ml-agents-envs/mlagents_envs/env_utils.py index 7af5fbae55..8ff21f0ae3 100644 --- a/ml-agents-envs/mlagents_envs/env_utils.py +++ b/ml-agents-envs/mlagents_envs/env_utils.py @@ -27,7 +27,7 @@ def validate_environment_path(env_path: str) -> Optional[str]: .replace(".x86", "") ) true_filename = os.path.basename(os.path.normpath(env_path)) - get_logger(__name__).debug("The true file name is {}".format(true_filename)) + get_logger(__name__).debug(f"The true file name is {true_filename}") if not (glob.glob(env_path) or glob.glob(env_path + ".*")): return None @@ -86,7 +86,7 @@ def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen: f"Couldn't launch the {file_name} environment. Provided filename does not match any environments." ) else: - get_logger(__name__).debug("This is the launch string {}".format(launch_string)) + get_logger(__name__).debug(f"This is the launch string {launch_string}") # Launch Unity environment subprocess_args = [launch_string] + args try: diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index 96ef768b78..be9cab13e6 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -53,6 +53,6 @@ def run(self): "protobuf>=3.6", "pyyaml>=3.1.0", ], - python_requires=">=3.5", + python_requires=">=3.6.1", cmdclass={"verify": VerifyVersionCommand}, ) diff --git a/ml-agents/mlagents/trainers/buffer.py b/ml-agents/mlagents/trainers/buffer.py index 2d24ec9665..87fd160d8f 100644 --- a/ml-agents/mlagents/trainers/buffer.py +++ b/ml-agents/mlagents/trainers/buffer.py @@ -275,7 +275,7 @@ def resequence_and_append( key_list = list(self.keys()) if not self.check_length(key_list): raise BufferException( - "The length of the fields {} were not of same length".format(key_list) + f"The length of the fields {key_list} were not of same length" ) for field_key in key_list: target_buffer[field_key].extend( diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py index decff58043..684d111094 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py @@ -51,7 +51,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, ModelUtils.swish, 1, - "curiosity_stream_{}_visual_obs_encoder".format(i), + f"curiosity_stream_{i}_visual_obs_encoder", False, ) @@ -60,7 +60,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, ModelUtils.swish, 1, - "curiosity_stream_{}_visual_obs_encoder".format(i), + f"curiosity_stream_{i}_visual_obs_encoder", True, ) visual_encoders.append(encoded_visual) diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py index 8e5554499a..fcb0a11d6b 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py @@ -124,7 +124,7 @@ def make_inputs(self) -> None: self.encoding_size, ModelUtils.swish, 1, - "gail_stream_{}_visual_obs_encoder".format(i), + f"gail_stream_{i}_visual_obs_encoder", False, ) @@ -133,7 +133,7 @@ def make_inputs(self) -> None: self.encoding_size, ModelUtils.swish, 1, - "gail_stream_{}_visual_obs_encoder".format(i), + f"gail_stream_{i}_visual_obs_encoder", True, ) visual_policy_encoders.append(encoded_policy_visual) diff --git a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py index f8ddc6ab4a..bb29eaa10b 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py @@ -31,7 +31,7 @@ def create_reward_signal( """ rcls = NAME_TO_CLASS.get(name) if not rcls: - raise UnityTrainerException("Unknown reward signal type {}".format(name)) + raise UnityTrainerException(f"Unknown reward signal type {name}") class_inst = rcls(policy, settings) return class_inst diff --git a/ml-agents/mlagents/trainers/ghost/controller.py b/ml-agents/mlagents/trainers/ghost/controller.py index 7434c6b076..84901e14f6 100644 --- a/ml-agents/mlagents/trainers/ghost/controller.py +++ b/ml-agents/mlagents/trainers/ghost/controller.py @@ -69,9 +69,7 @@ def change_training_team(self, step: int) -> None: """ self._queue.append(self._learning_team) self._learning_team = self._queue.popleft() - logger.debug( - "Learning team {} swapped on step {}".format(self._learning_team, step) - ) + logger.debug(f"Learning team {self._learning_team} swapped on step {step}") self._changed_training_team = True # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 5f24ac390b..7fae0d9a6b 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -82,7 +82,7 @@ def create_schedule( parameter, global_step, max_step, min_value, power=1.0 ) else: - raise UnityTrainerException("The schedule {} is invalid.".format(schedule)) + raise UnityTrainerException(f"The schedule {schedule} is invalid.") return parameter_rate @staticmethod @@ -290,7 +290,7 @@ def create_vector_observation_encoder( h_size, activation=activation, reuse=reuse, - name="hidden_{}".format(i), + name=f"hidden_{i}", kernel_initializer=tf.initializers.variance_scaling(1.0), ) return hidden @@ -656,7 +656,7 @@ def create_value_heads( """ value_heads = {} for name in stream_names: - value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name)) + value = tf.layers.dense(hidden_input, 1, name=f"{name}_value") value_heads[name] = value value = tf.reduce_mean(list(value_heads.values()), 0) return value_heads, value diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index cd77d073aa..b23a819b04 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -182,9 +182,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None ) ) else: - logger.info( - "Resuming training from step {}.".format(self.get_current_step()) - ) + logger.info(f"Resuming training from step {self.get_current_step()}.") def initialize_or_load(self): # If there is an initialize path, load from that. Else, load from the set model path. diff --git a/ml-agents/mlagents/trainers/ppo/optimizer.py b/ml-agents/mlagents/trainers/ppo/optimizer.py index 52cb7c0584..0df43eeb1f 100644 --- a/ml-agents/mlagents/trainers/ppo/optimizer.py +++ b/ml-agents/mlagents/trainers/ppo/optimizer.py @@ -229,10 +229,10 @@ def _create_losses( self.old_values = {} for name in value_heads.keys(): returns_holder = tf.placeholder( - shape=[None], dtype=tf.float32, name="{}_returns".format(name) + shape=[None], dtype=tf.float32, name=f"{name}_returns" ) old_value = tf.placeholder( - shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name) + shape=[None], dtype=tf.float32, name=f"{name}_value_estimate" ) self.returns_holders[name] = returns_holder self.old_values[name] = old_value @@ -334,12 +334,8 @@ def _construct_feed_dict( self.all_old_log_probs: mini_batch["action_probs"], } for name in self.reward_signals: - feed_dict[self.returns_holders[name]] = mini_batch[ - "{}_returns".format(name) - ] - feed_dict[self.old_values[name]] = mini_batch[ - "{}_value_estimates".format(name) - ] + feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"] + feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"] if self.policy.output_pre is not None and "actions_pre" in mini_batch: feed_dict[self.policy.output_pre] = mini_batch["actions_pre"] diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 6ac85103ac..f6b7af3a27 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -75,7 +75,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: trajectory.done_reached and not trajectory.interrupted, ) for name, v in value_estimates.items(): - agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v) + agent_buffer_trajectory[f"{name}_value_estimates"].extend(v) self._stats_reporter.add_stat( self.optimizer.reward_signals[name].value_name, np.mean(v) ) @@ -88,7 +88,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: evaluate_result = reward_signal.evaluate_batch( agent_buffer_trajectory ).scaled_reward - agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result) + agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result) # Report the reward signals self.collected_rewards[name][agent_id] += np.sum(evaluate_result) @@ -98,11 +98,9 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: for name in self.optimizer.reward_signals: bootstrap_value = value_next[name] - local_rewards = agent_buffer_trajectory[ - "{}_rewards".format(name) - ].get_batch() + local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch() local_value_estimates = agent_buffer_trajectory[ - "{}_value_estimates".format(name) + f"{name}_value_estimates" ].get_batch() local_advantage = get_gae( rewards=local_rewards, @@ -113,8 +111,8 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: ) local_return = local_advantage + local_value_estimates # This is later use as target for the different value estimates - agent_buffer_trajectory["{}_returns".format(name)].set(local_return) - agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage) + agent_buffer_trajectory[f"{name}_returns"].set(local_return) + agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage) tmp_advantages.append(local_advantage) tmp_returns.append(local_return) diff --git a/ml-agents/mlagents/trainers/sac/network.py b/ml-agents/mlagents/trainers/sac/network.py index 947963e06d..c411e4d555 100644 --- a/ml-agents/mlagents/trainers/sac/network.py +++ b/ml-agents/mlagents/trainers/sac/network.py @@ -99,7 +99,7 @@ def create_value_heads(self, stream_names, hidden_input): """ self.value_heads = {} for name in stream_names: - value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name)) + value = tf.layers.dense(hidden_input, 1, name=f"{name}_value") self.value_heads[name] = value self.value = tf.reduce_mean(list(self.value_heads.values()), 0) @@ -244,7 +244,7 @@ def create_q_heads( q1_heads = {} for name in stream_names: - _q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name)) + _q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1") q1_heads[name] = _q1 q1 = tf.reduce_mean(list(q1_heads.values()), axis=0) @@ -263,7 +263,7 @@ def create_q_heads( q2_heads = {} for name in stream_names: - _q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name)) + _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2") q2_heads[name] = _q2 q2 = tf.reduce_mean(list(q2_heads.values()), axis=0) diff --git a/ml-agents/mlagents/trainers/sac/optimizer.py b/ml-agents/mlagents/trainers/sac/optimizer.py index 6213acedd2..2d8b94067a 100644 --- a/ml-agents/mlagents/trainers/sac/optimizer.py +++ b/ml-agents/mlagents/trainers/sac/optimizer.py @@ -271,7 +271,7 @@ def _create_losses( ) rewards_holder = tf.placeholder( - shape=[None], dtype=tf.float32, name="{}_rewards".format(name) + shape=[None], dtype=tf.float32, name=f"{name}_rewards" ) self.rewards_holders[name] = rewards_holder @@ -607,7 +607,7 @@ def _construct_feed_dict( self.policy.mask_input: batch["masks"] * burn_in_mask, } for name in self.reward_signals: - feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)] + feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"] if self.policy.use_continuous_act: feed_dict[self.policy_network.external_action_in] = batch["actions"] diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 850c40f783..e8cc361850 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -100,7 +100,7 @@ def save_replay_buffer(self) -> None: Save the training buffer's update buffer to a pickle file. """ filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5") - logger.info("Saving Experience Replay Buffer to {}".format(filename)) + logger.info(f"Saving Experience Replay Buffer to {filename}") with open(filename, "wb") as file_object: self.update_buffer.save_to_file(file_object) @@ -109,7 +109,7 @@ def load_replay_buffer(self) -> None: Loads the last saved replay buffer from a file. """ filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5") - logger.info("Loading Experience Replay Buffer from {}".format(filename)) + logger.info(f"Loading Experience Replay Buffer from {filename}") with open(filename, "rb+") as file_object: self.update_buffer.load_from_file(file_object) logger.info( @@ -239,7 +239,7 @@ def _update_sac_policy(self) -> bool: while ( self.step - self.hyperparameters.buffer_init_steps ) / self.update_steps > self.steps_per_update: - logger.debug("Updating SAC policy at step {}".format(self.step)) + logger.debug(f"Updating SAC policy at step {self.step}") buffer = self.update_buffer if self.update_buffer.num_experiences >= self.hyperparameters.batch_size: sampled_minibatch = buffer.sample_mini_batch( @@ -248,9 +248,9 @@ def _update_sac_policy(self) -> bool: ) # Get rewards for each reward for name, signal in self.optimizer.reward_signals.items(): - sampled_minibatch[ - "{}_rewards".format(name) - ] = signal.evaluate_batch(sampled_minibatch).scaled_reward + sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch( + sampled_minibatch + ).scaled_reward update_stats = self.optimizer.update(sampled_minibatch, n_sequences) for stat_name, value in update_stats.items(): @@ -296,7 +296,7 @@ def _update_reward_signals(self) -> None: # Get minibatches for reward signal update if needed reward_signal_minibatches = {} for name, signal in self.optimizer.reward_signals.items(): - logger.debug("Updating {} at step {}".format(name, self.step)) + logger.debug(f"Updating {name} at step {self.step}") # Some signals don't need a minibatch to be sampled - so we don't! if signal.update_dict: reward_signal_minibatches[name] = buffer.sample_mini_batch( diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index bd568c4f6c..0b0c272cc5 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -114,7 +114,7 @@ def write_stats( ) if self.self_play and "Self-play/ELO" in values: elo_stats = values["Self-play/ELO"] - logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean)) + logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ") else: logger.info( "{}: Step: {}. No episode was completed since last summary. {}".format( @@ -177,7 +177,7 @@ def write_stats( self._maybe_create_summary_writer(category) for key, value in values.items(): summary = tf.Summary() - summary.value.add(tag="{}".format(key), simple_value=value.mean) + summary.value.add(tag=f"{key}", simple_value=value.mean) self.summary_writers[category].add_summary(summary, step) self.summary_writers[category].flush() @@ -195,7 +195,7 @@ def _delete_all_events_files(self, directory_name: str) -> None: for file_name in os.listdir(directory_name): if file_name.startswith("events.out"): logger.warning( - "{} was left over from a previous run. Deleting.".format(file_name) + f"{file_name} was left over from a previous run. Deleting." ) full_fname = os.path.join(directory_name, file_name) try: diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 82b6a11c7c..bf75a3b9b4 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -80,7 +80,7 @@ def default_reward_processor(rewards, last_n_rewards=5): rewards_to_use = rewards[-last_n_rewards:] # For debugging tests - print("Last {} rewards:".format(last_n_rewards), rewards_to_use) + print(f"Last {last_n_rewards} rewards:", rewards_to_use) return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean() diff --git a/ml-agents/setup.py b/ml-agents/setup.py index 8bc6a8a1bb..248bdefa89 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -58,7 +58,7 @@ def run(self): # Test-only dependencies should go in test_requirements.txt, not here. "grpcio>=1.11.0", "h5py>=2.9.0", - "mlagents_envs=={}".format(VERSION), + f"mlagents_envs=={VERSION}", "numpy>=1.13.3,<2.0", "Pillow>=4.2.1", "protobuf>=3.6", diff --git a/ml-agents/tests/yamato/check_coverage_percent.py b/ml-agents/tests/yamato/check_coverage_percent.py index 025db1375a..50809bc832 100644 --- a/ml-agents/tests/yamato/check_coverage_percent.py +++ b/ml-agents/tests/yamato/check_coverage_percent.py @@ -17,7 +17,7 @@ def check_coverage(root_dir, min_percentage): summary_xml = os.path.join(dirpath, SUMMARY_XML_FILENAME) break if not summary_xml: - print("Couldn't find {} in root directory".format(SUMMARY_XML_FILENAME)) + print(f"Couldn't find {SUMMARY_XML_FILENAME} in root directory") sys.exit(1) with open(summary_xml) as f: diff --git a/ml-agents/tests/yamato/scripts/run_gym.py b/ml-agents/tests/yamato/scripts/run_gym.py index c13c981f00..fabf9031d4 100644 --- a/ml-agents/tests/yamato/scripts/run_gym.py +++ b/ml-agents/tests/yamato/scripts/run_gym.py @@ -21,7 +21,7 @@ def test_run_environment(env_name): if len(env.observation_space.shape) == 1: # Examine the initial vector observation - print("Agent observations look like: \n{}".format(initial_observations)) + print(f"Agent observations look like: \n{initial_observations}") for _episode in range(10): env.reset() @@ -31,7 +31,7 @@ def test_run_environment(env_name): actions = env.action_space.sample() obs, reward, done, _ = env.step(actions) episode_rewards += reward - print("Total reward this episode: {}".format(episode_rewards)) + print(f"Total reward this episode: {episode_rewards}") finally: env.close() diff --git a/ml-agents/tests/yamato/scripts/run_llapi.py b/ml-agents/tests/yamato/scripts/run_llapi.py index 289fa8d8bc..b0dedff84f 100644 --- a/ml-agents/tests/yamato/scripts/run_llapi.py +++ b/ml-agents/tests/yamato/scripts/run_llapi.py @@ -82,7 +82,7 @@ def test_run_environment(env_name): if tracked_agent in terminal_steps: episode_rewards += terminal_steps[tracked_agent].reward done = True - print("Total reward this episode: {}".format(episode_rewards)) + print(f"Total reward this episode: {episode_rewards}") finally: env.close()