diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index ded18ba3d6..eeb4877b14 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -67,6 +67,15 @@ def reset(self, config: Dict = None) -> int: self.first_step_infos = self._reset_env(config) return len(self.first_step_infos) + @abstractmethod + def set_env_parameters(self, config: Dict = None) -> None: + """ + Sends environment parameter settings to C# via the + EnvironmentParametersSidehannel. + :param config: Dict of environment parameter keys and values + """ + pass + @property @abstractmethod def external_brains(self) -> Dict[BehaviorName, BrainParameters]: diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index ead4b92cd8..0ef42e37d0 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -205,7 +205,7 @@ def maybe_add_samplers( for offset, v in enumerate(sampler_config.values()): if v.seed == -1: v.seed = run_seed + offset - env.reset(config=sampler_config) + env.set_env_parameters(config=sampler_config) def try_create_meta_curriculum( diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py index 98cdfbbe99..4f7ed16b15 100644 --- a/ml-agents/mlagents/trainers/simple_env_manager.py +++ b/ml-agents/mlagents/trainers/simple_env_manager.py @@ -43,16 +43,24 @@ def _step(self) -> List[EnvironmentStep]: def _reset_env( self, config: Dict[BehaviorName, float] = None ) -> List[EnvironmentStep]: # type: ignore + self.set_env_parameters(config) + self.env.reset() + all_step_result = self._generate_all_results() + self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) + return [self.previous_step] + + def set_env_parameters(self, config: Dict = None) -> None: + """ + Sends environment parameter settings to C# via the + EnvironmentParametersSidehannel. + :param config: Dict of environment parameter keys and values + """ if config is not None: for k, v in config.items(): if isinstance(v, float): self.env_params.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): v.apply(k, self.env_params) - self.env.reset() - all_step_result = self._generate_all_results() - self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) - return [self.previous_step] @property def external_brains(self) -> Dict[BehaviorName, BrainParameters]: diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 8bf2e4e771..f50fc52747 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -46,7 +46,7 @@ class EnvironmentCommand(enum.Enum): STEP = 1 EXTERNAL_BRAINS = 2 - GET_PROPERTIES = 3 + ENVIRONMENT_PARAMETERS = 3 RESET = 4 CLOSE = 5 ENV_EXITED = 6 @@ -174,12 +174,13 @@ def external_brains(): reset_timers() elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS: _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) - elif req.cmd == EnvironmentCommand.RESET: + elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) + elif req.cmd == EnvironmentCommand.RESET: env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) @@ -291,6 +292,8 @@ def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]: if not self.step_queue.empty(): step = self.step_queue.get_nowait() self.env_workers[step.worker_id].waiting = False + # Send config to environment + self.set_env_parameters(config) # First enqueue reset commands for all workers so that they reset in parallel for ew in self.env_workers: ew.send(EnvironmentCommand.RESET, config) @@ -299,6 +302,15 @@ def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]: ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {}) return list(map(lambda ew: ew.previous_step, self.env_workers)) + def set_env_parameters(self, config: Dict = None) -> None: + """ + Sends environment parameter settings to C# via the + EnvironmentParametersSidehannel for each worker. + :param config: Dict of environment parameter keys and values + """ + for ew in self.env_workers: + ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config) + @property def external_brains(self) -> Dict[BehaviorName, BrainParameters]: self.env_workers[0].send(EnvironmentCommand.EXTERNAL_BRAINS)