diff --git a/.github/workflows/regressions.yml b/.github/workflows/regressions.yml index 46414dadc9..8bb0addcf4 100644 --- a/.github/workflows/regressions.yml +++ b/.github/workflows/regressions.yml @@ -327,7 +327,7 @@ jobs: && github.event.action == 'labeled' && github.event.label.name == 'regression-tests' ) - uses: peter-evans/find-comment@v1 + uses: peter-evans/find-comment@v2 id: comment_finder with: issue-number: ${{ github.event.pull_request.number }} diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index b4bb87fafd..5d24ae0627 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -9,7 +9,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v4 + - uses: actions/stale@v5 with: days-before-stale: 60 days-before-close: 7 diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 3902aded6e..8726ab8c6d 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -244,7 +244,7 @@ def __init__( if isinstance(disable_evaluator_output, Iterable): disable_evaluator_output = list(disable_evaluator_output) # Incase iterator - allowed = set(["model", "cv_model", "y_optimization", "y_test", "y_valid"]) + allowed = set(["model", "cv_model", "y_optimization", "y_test"]) unknown = allowed - set(disable_evaluator_output) if any(unknown): raise ValueError( @@ -656,273 +656,282 @@ def fit( # By default try to use the TCP logging port or get a new port self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT - self._logger = self._get_logger(dataset_name) - # The first thing we have to do is create the logger to update the backend - self._backend.setup_logger(self._logger_port) + # Once we start the logging server, it starts in a new process + # If an error occurs then we want to make sure that we exit cleanly + # and shut it down, else it might hang + # https://github.com/automl/auto-sklearn/issues/1480 + try: + self._logger = self._get_logger(dataset_name) - if not only_return_configuration_space: - # If only querying the configuration space, we do not save the start time - # The start time internally checks for the fit() method to execute only once - # But this does not apply when only querying the configuration space - self._backend.save_start_time(self._seed) + # The first thing we have to do is create the logger to update the backend + self._backend.setup_logger(self._logger_port) - self._stopwatch = StopWatch() + if not only_return_configuration_space: + # If only querying the configuration space, we do not save the start + # time The start time internally checks for the fit() method to execute + # only once but this does not apply when only querying the configuration + # space + self._backend.save_start_time(self._seed) - # Make sure that input is valid - # Performs Ordinal one hot encoding to the target - # both for train and test data - self.InputValidator = InputValidator( - is_classification=is_classification, - feat_type=feat_type, - logger_port=self._logger_port, - allow_string_features=self.allow_string_features, - ) - self.InputValidator.fit(X_train=X, y_train=y, X_test=X_test, y_test=y_test) - X, y = self.InputValidator.transform(X, y) + self._stopwatch = StopWatch() - if X_test is not None and y_test is not None: - X_test, y_test = self.InputValidator.transform(X_test, y_test) + # Make sure that input is valid + # Performs Ordinal one hot encoding to the target + # both for train and test data + self.InputValidator = InputValidator( + is_classification=is_classification, + feat_type=feat_type, + logger_port=self._logger_port, + allow_string_features=self.allow_string_features, + ) + self.InputValidator.fit(X_train=X, y_train=y, X_test=X_test, y_test=y_test) + X, y = self.InputValidator.transform(X, y) - # We don't support size reduction on pandas type object yet - if ( - self._dataset_compression is not None - and not isinstance(X, pd.DataFrame) - and not (isinstance(y, pd.Series) or isinstance(y, pd.DataFrame)) - ): - methods = self._dataset_compression["methods"] - memory_allocation = self._dataset_compression["memory_allocation"] - - # Remove precision reduction if we can't perform it - if "precision" in methods and X.dtype not in supported_precision_reductions: - methods = [method for method in methods if method != "precision"] - - with warnings_to(self._logger): - X, y = reduce_dataset_size_if_too_large( - X=X, - y=y, - memory_limit=self._memory_limit, - is_classification=is_classification, - random_state=self._seed, - operations=methods, - memory_allocation=memory_allocation, - ) + if X_test is not None and y_test is not None: + X_test, y_test = self.InputValidator.transform(X_test, y_test) - # Check the re-sampling strategy - try: + # We don't support size reduction on pandas type object yet + if ( + self._dataset_compression is not None + and not isinstance(X, pd.DataFrame) + and not (isinstance(y, pd.Series) or isinstance(y, pd.DataFrame)) + ): + methods = self._dataset_compression["methods"] + memory_allocation = self._dataset_compression["memory_allocation"] + + # Remove precision reduction if we can't perform it + if ( + "precision" in methods + and X.dtype not in supported_precision_reductions + ): + methods = [method for method in methods if method != "precision"] + + with warnings_to(self._logger): + X, y = reduce_dataset_size_if_too_large( + X=X, + y=y, + memory_limit=self._memory_limit, + is_classification=is_classification, + random_state=self._seed, + operations=methods, + memory_allocation=memory_allocation, + ) + + # Check the re-sampling strategy self._check_resampling_strategy( X=X, y=y, task=self._task, ) - except Exception as e: - self._fit_cleanup() - raise e - - # Reset learnt stuff - self.models_ = None - self.cv_models_ = None - self.ensemble_ = None - # The metric must exist as of this point - # It can be provided in the constructor, or automatically - # defined in the estimator fit call - if isinstance(self._metrics, Sequence): - for entry in self._metrics: - if not isinstance(entry, Scorer): - raise ValueError( - "Metric {entry} must be instance of autosklearn.metrics.Scorer." - ) - else: - raise ValueError( - "Metric must be a sequence of instances of " - "autosklearn.metrics.Scorer." - ) + # Reset learnt stuff + self.models_ = None + self.cv_models_ = None + self.ensemble_ = None - # If no dask client was provided, we create one, so that we can - # start a ensemble process in parallel to smbo optimize - if self._dask_client is None and ( - self._ensemble_class is not None - or self._n_jobs is not None - and self._n_jobs > 1 - ): - self._create_dask_client() - else: - self._is_dask_client_internally_created = False + # The metric must exist as of this point + # It can be provided in the constructor, or automatically + # defined in the estimator fit call + if isinstance(self._metrics, Sequence): + for entry in self._metrics: + if not isinstance(entry, Scorer): + raise ValueError( + f"Metric {entry} must be instance of" + " autosklearn.metrics.Scorer." + ) + else: + raise ValueError( + "Metric must be a sequence of instances of " + "autosklearn.metrics.Scorer." + ) - self._dataset_name = dataset_name - self._stopwatch.start(self._dataset_name) + # If no dask client was provided, we create one, so that we can + # start a ensemble process in parallel to smbo optimize + if self._dask_client is None and ( + self._ensemble_class is not None + or self._n_jobs is not None + and self._n_jobs > 1 + ): + self._create_dask_client() + else: + self._is_dask_client_internally_created = False - # Take the feature types from the validator - self._feat_type = self.InputValidator.feature_validator.feat_type + self._dataset_name = dataset_name + self._stopwatch.start(self._dataset_name) - self._log_fit_setup() + # Take the feature types from the validator + self._feat_type = self.InputValidator.feature_validator.feat_type - # == Pickle the data manager to speed up loading - with self._stopwatch.time("Save Datamanager"): - datamanager = XYDataManager( - X, - y, - X_test=X_test, - y_test=y_test, - task=self._task, - feat_type=self._feat_type, - dataset_name=dataset_name, - ) + self._log_fit_setup() - self._backend._make_internals_directory() - self._label_num = datamanager.info["label_num"] - - self._backend.save_datamanager(datamanager) - - # = Create a searchspace - # Do this before One Hot Encoding to make sure that it creates a - # search space for a dense classifier even if one hot encoding would - # make it sparse (tradeoff; if one hot encoding would make it sparse, - # densifier and truncatedSVD would probably lead to a MemoryError, - # like this we can't use some of the preprocessing methods in case - # the data became sparse) - with self._stopwatch.time("Create Search space"): - self.configuration_space, configspace_path = self._create_search_space( - self._backend.temporary_directory, - self._backend, - datamanager, - include=self._include, - exclude=self._exclude, - ) - - if only_return_configuration_space: - self._fit_cleanup() - return self.configuration_space - - # == Perform dummy predictions - with self._stopwatch.time("Dummy predictions"): - self.num_run += 1 - self._do_dummy_prediction() - - # == RUN ensemble builder - # Do this before calculating the meta-features to make sure that the - # dummy predictions are actually included in the ensemble even if - # calculating the meta-features takes very long - with self._stopwatch.time("Run Ensemble Builder"): - - elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") - - time_left_for_ensembles = max(0, self._time_for_task - elapsed_time) - proc_ensemble = None - if time_left_for_ensembles <= 0: - # Fit only raises error when an ensemble class is given but - # time_left_for_ensembles is zero. - if self._ensemble_class is not None: - raise ValueError( - "Not starting ensemble builder because there " - "is no time left. Try increasing the value " - "of time_left_for_this_task." - ) - elif self._ensemble_class is None: - self._logger.info( - "Not starting ensemble builder because no ensemble class is given." - ) - else: - self._logger.info( - "Start Ensemble with %5.2fsec time left" % time_left_for_ensembles - ) - - proc_ensemble = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=time_left_for_ensembles, - backend=copy.deepcopy(self._backend), - dataset_name=dataset_name, + # == Pickle the data manager to speed up loading + with self._stopwatch.time("Save Datamanager"): + datamanager = XYDataManager( + X, + y, + X_test=X_test, + y_test=y_test, task=self._task, - metrics=self._metrics, - ensemble_class=self._ensemble_class, - ensemble_kwargs=self._ensemble_kwargs, - ensemble_nbest=self._ensemble_nbest, - max_models_on_disc=self._max_models_on_disc, - seed=self._seed, - precision=self.precision, - max_iterations=self._max_ensemble_build_iterations, - read_at_most=self._read_at_most, - memory_limit=self._memory_limit, - random_state=self._seed, - logger_port=self._logger_port, - pynisher_context=self._multiprocessing_context, + feat_type=self._feat_type, + dataset_name=dataset_name, ) - # kill the datamanager as it will be re-loaded anyways from sub processes - try: - del self._datamanager - except Exception: - pass + self._backend._make_internals_directory() + self._label_num = datamanager.info["label_num"] + + self._backend.save_datamanager(datamanager) + + # = Create a searchspace + # Do this before One Hot Encoding to make sure that it creates a + # search space for a dense classifier even if one hot encoding would + # make it sparse (tradeoff; if one hot encoding would make it sparse, + # densifier and truncatedSVD would probably lead to a MemoryError, + # like this we can't use some of the preprocessing methods in case + # the data became sparse) + with self._stopwatch.time("Create Search space"): + self.configuration_space, configspace_path = self._create_search_space( + self._backend.temporary_directory, + self._backend, + datamanager, + include=self._include, + exclude=self._exclude, + ) - # => RUN SMAC - with self._stopwatch.time("Run SMAC"): - elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") - time_left = self._time_for_task - elapsed_time - - if self._logger: - self._logger.info("Start SMAC with %5.2fsec time left" % time_left) - if time_left <= 0: - self._logger.warning("Not starting SMAC because there is no time left.") - _proc_smac = None - self._budget_type = None - else: - if ( - self._per_run_time_limit is None - or self._per_run_time_limit > time_left - ): - self._logger.warning( - "Time limit for a single run is higher than total time " - "limit. Capping the limit for a single run to the total " - "time given to SMAC (%f)" % time_left + if only_return_configuration_space: + return self.configuration_space + + # == Perform dummy predictions + with self._stopwatch.time("Dummy predictions"): + self.num_run += 1 + self._do_dummy_prediction() + + # == RUN ensemble builder + # Do this before calculating the meta-features to make sure that the + # dummy predictions are actually included in the ensemble even if + # calculating the meta-features takes very long + with self._stopwatch.time("Run Ensemble Builder"): + + elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") + + time_left_for_ensembles = max(0, self._time_for_task - elapsed_time) + proc_ensemble = None + if time_left_for_ensembles <= 0: + # Fit only raises error when an ensemble class is given but + # time_left_for_ensembles is zero. + if self._ensemble_class is not None: + raise ValueError( + "Not starting ensemble builder because there " + "is no time left. Try increasing the value " + "of time_left_for_this_task." + ) + elif self._ensemble_class is None: + self._logger.info( + "No ensemble buildin because no ensemble class was given." ) - per_run_time_limit = time_left else: - per_run_time_limit = self._per_run_time_limit + self._logger.info( + "Start Ensemble with %5.2fsec time left" + % time_left_for_ensembles + ) + + proc_ensemble = EnsembleBuilderManager( + start_time=time.time(), + time_left_for_ensembles=time_left_for_ensembles, + backend=copy.deepcopy(self._backend), + dataset_name=dataset_name, + task=self._task, + metrics=self._metrics, + ensemble_class=self._ensemble_class, + ensemble_kwargs=self._ensemble_kwargs, + ensemble_nbest=self._ensemble_nbest, + max_models_on_disc=self._max_models_on_disc, + seed=self._seed, + precision=self.precision, + max_iterations=self._max_ensemble_build_iterations, + read_at_most=self._read_at_most, + memory_limit=self._memory_limit, + random_state=self._seed, + logger_port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ) - # Make sure that at least 2 models are created for the ensemble process - num_models = time_left // per_run_time_limit - if num_models < 2: - per_run_time_limit = time_left // 2 + # kill the datamanager as it will be re-loaded anyways from sub processes + try: + del self._datamanager + except Exception: + pass + + # => RUN SMAC + with self._stopwatch.time("Run SMAC"): + elapsed_time = self._stopwatch.time_since(self._dataset_name, "start") + time_left = self._time_for_task - elapsed_time + + if self._logger: + self._logger.info("Start SMAC with %5.2fsec time left" % time_left) + if time_left <= 0: self._logger.warning( - "Capping the per_run_time_limit to {} to have " - "time for a least 2 models in each process.".format( - per_run_time_limit - ) + "Not starting SMAC because there is no time left." ) + _proc_smac = None + self._budget_type = None + else: + if ( + self._per_run_time_limit is None + or self._per_run_time_limit > time_left + ): + self._logger.warning( + "Time limit for a single run is higher than total time " + "limit. Capping the limit for a single run to the total " + "time given to SMAC (%f)" % time_left + ) + per_run_time_limit = time_left + else: + per_run_time_limit = self._per_run_time_limit + + # At least 2 models are created for the ensemble process + num_models = time_left // per_run_time_limit + if num_models < 2: + per_run_time_limit = time_left // 2 + self._logger.warning( + "Capping the per_run_time_limit to {} to have " + "time for a least 2 models in each process.".format( + per_run_time_limit + ) + ) - _proc_smac = AutoMLSMBO( - config_space=self.configuration_space, - dataset_name=self._dataset_name, - backend=self._backend, - total_walltime_limit=time_left, - func_eval_time_limit=per_run_time_limit, - memory_limit=self._memory_limit, - data_memory_limit=self._data_memory_limit, - stopwatch=self._stopwatch, - n_jobs=self._n_jobs, - dask_client=self._dask_client, - start_num_run=self.num_run, - num_metalearning_cfgs=self._initial_configurations_via_metalearning, - config_file=configspace_path, - seed=self._seed, - metadata_directory=self._metadata_directory, - metrics=self._metrics, - resampling_strategy=self._resampling_strategy, - resampling_strategy_args=self._resampling_strategy_arguments, - include=self._include, - exclude=self._exclude, - disable_file_output=self._disable_evaluator_output, - get_smac_object_callback=self._get_smac_object_callback, - smac_scenario_args=self._smac_scenario_args, - scoring_functions=self._scoring_functions, - port=self._logger_port, - pynisher_context=self._multiprocessing_context, - ensemble_callback=proc_ensemble, - trials_callback=self._get_trials_callback, - ) + n_meta_configs = self._initial_configurations_via_metalearning + _proc_smac = AutoMLSMBO( + config_space=self.configuration_space, + dataset_name=self._dataset_name, + backend=self._backend, + total_walltime_limit=time_left, + func_eval_time_limit=per_run_time_limit, + memory_limit=self._memory_limit, + data_memory_limit=self._data_memory_limit, + stopwatch=self._stopwatch, + n_jobs=self._n_jobs, + dask_client=self._dask_client, + start_num_run=self.num_run, + num_metalearning_cfgs=n_meta_configs, + config_file=configspace_path, + seed=self._seed, + metadata_directory=self._metadata_directory, + metrics=self._metrics, + resampling_strategy=self._resampling_strategy, + resampling_strategy_args=self._resampling_strategy_arguments, + include=self._include, + exclude=self._exclude, + disable_file_output=self._disable_evaluator_output, + get_smac_object_callback=self._get_smac_object_callback, + smac_scenario_args=self._smac_scenario_args, + scoring_functions=self._scoring_functions, + port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ensemble_callback=proc_ensemble, + trials_callback=self._get_trials_callback, + ) - try: ( self.runhistory_, self.trajectory_, @@ -938,42 +947,49 @@ def fit( ] with open(trajectory_filename, "w") as fh: json.dump(saveable_trajectory, fh) - except Exception as e: - self._logger.exception(e) - raise - - self._logger.info("Starting shutdown...") - # Wait until the ensemble process is finished to avoid shutting down - # while the ensemble builder tries to access the data - if proc_ensemble is not None: - self.ensemble_performance_history = list(proc_ensemble.history) - - if len(proc_ensemble.futures) > 0: - # Now we need to wait for the future to return as it cannot be cancelled - # while it is running: https://stackoverflow.com/a/49203129 - self._logger.info( - "Ensemble script still running, waiting for it to finish." - ) - result = proc_ensemble.futures.pop().result() - if result: - ensemble_history, _ = result - self.ensemble_performance_history.extend(ensemble_history) - self._logger.info("Ensemble script finished, continue shutdown.") - - # save the ensemble performance history file - if len(self.ensemble_performance_history) > 0: - pd.DataFrame(self.ensemble_performance_history).to_json( - os.path.join( - self._backend.internals_directory, "ensemble_history.json" + + self._logger.info("Starting shutdown...") + # Wait until the ensemble process is finished to avoid shutting down + # while the ensemble builder tries to access the data + if proc_ensemble is not None: + self.ensemble_performance_history = list(proc_ensemble.history) + + if len(proc_ensemble.futures) > 0: + # Now we wait for the future to return as it cannot be cancelled + # while it is running: https://stackoverflow.com/a/49203129 + self._logger.info( + "Ensemble script still running, waiting for it to finish." + ) + result = proc_ensemble.futures.pop().result() + if result: + ensemble_history, _ = result + self.ensemble_performance_history.extend(ensemble_history) + self._logger.info("Ensemble script finished, continue shutdown.") + + # save the ensemble performance history file + if len(self.ensemble_performance_history) > 0: + pd.DataFrame(self.ensemble_performance_history).to_json( + os.path.join( + self._backend.internals_directory, "ensemble_history.json" + ) ) - ) - if load_models: - self._logger.info("Loading models...") - self._load_models() - self._logger.info("Finished loading models...") + if load_models: + self._logger.info("Loading models...") + self._load_models() + self._logger.info("Finished loading models...") + + # The whole logic above from where we begin the logging server is capture + # in a try: finally: so that if something goes wrong, we at least close + # down the logging server, preventing it from hanging and not closing + # until ctrl+c is pressed + except Exception as e: + # This will be called before the _fit_cleanup + self._logger.exception(e) + raise e + finally: + self._fit_cleanup() - self._fit_cleanup() self.fitted = True return self diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 96e7f07d26..5afd8c597c 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -262,8 +262,8 @@ def __init__( list are: * ``'y_optimization'`` : do not save the predictions for the - optimization/validation set, which would later on be used to build - an ensemble. + optimization set, which would later on be used to build an ensemble. + * ``model`` : do not save any model files smac_scenario_args : dict, optional (None) diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py index 9563f4ef8b..ba17513ae0 100644 --- a/autosklearn/evaluation/__init__.py +++ b/autosklearn/evaluation/__init__.py @@ -230,14 +230,7 @@ def __init__( self.memory_limit = memory_limit dm = self.backend.load_datamanager() - if "X_valid" in dm.data and "Y_valid" in dm.data: - self._get_validation_loss = True - else: - self._get_validation_loss = False - if "X_test" in dm.data and "Y_test" in dm.data: - self._get_test_loss = True - else: - self._get_test_loss = False + self._get_test_loss = "X_test" in dm.data and "Y_test" in dm.data self.port = port self.pynisher_context = pynisher_context @@ -533,21 +526,6 @@ def run( additional_run_info["train_learning_curve"] = train_learning_curve additional_run_info["learning_curve_runtime"] = learning_curve_runtime - if self._get_validation_loss: - validation_learning_curve = ( - autosklearn.evaluation.util.extract_learning_curve( - info, - "validation_loss", - ) - ) - if len(validation_learning_curve) > 1: - additional_run_info[ - "validation_learning_curve" - ] = validation_learning_curve - additional_run_info[ - "learning_curve_runtime" - ] = learning_curve_runtime - if self._get_test_loss: test_learning_curve = ( autosklearn.evaluation.util.extract_learning_curve( diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 6a189a86a0..b97f588a45 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -220,8 +220,6 @@ def __init__( self.include = include self.exclude = exclude - self.X_valid = self.datamanager.data.get("X_valid") - self.y_valid = self.datamanager.data.get("Y_valid") self.X_test = self.datamanager.data.get("X_test") self.y_test = self.datamanager.data.get("Y_test") @@ -359,7 +357,6 @@ def finish_up( loss: Union[Dict[str, float], float], train_loss: Optional[Dict[str, float]], opt_pred: np.ndarray, - valid_pred: np.ndarray, test_pred: np.ndarray, additional_run_info: Optional[TYPE_ADDITIONAL_INFO], file_output: bool, @@ -382,19 +379,12 @@ def finish_up( self.duration = time.time() - self.starttime if file_output: - file_out_loss, additional_run_info_ = self.file_output( - opt_pred, - valid_pred, - test_pred, - ) + file_out_loss, additional_run_info_ = self.file_output(opt_pred, test_pred) else: file_out_loss = None additional_run_info_ = {} - validation_loss, test_loss = self.calculate_auxiliary_losses( - valid_pred, - test_pred, - ) + test_loss = self.calculate_auxiliary_losses(test_pred) if file_out_loss is not None: return self.duration, file_out_loss, self.seed, additional_run_info_ @@ -424,8 +414,6 @@ def finish_up( additional_run_info["train_loss"] = [ train_loss[metric.name] for metric in self.metrics ] - if validation_loss is not None: - additional_run_info["validation_loss"] = validation_loss if test_loss is not None: additional_run_info["test_loss"] = test_loss @@ -442,41 +430,22 @@ def finish_up( def calculate_auxiliary_losses( self, - Y_valid_pred: np.ndarray, - Y_test_pred: np.ndarray, - ) -> Tuple[Optional[float | Sequence[float]], Optional[float | Sequence[float]]]: - if Y_valid_pred is not None: - if self.y_valid is not None: - validation_loss: Optional[Union[float, Dict[str, float]]] = self._loss( - self.y_valid, Y_valid_pred - ) - if len(self.metrics) == 1: - validation_loss = validation_loss[self.metrics[0].name] - else: - validation_loss = None - else: - validation_loss = None + Y_test_pred: np.ndarray | None, + ) -> float | dict[str, float] | None: + if Y_test_pred is None or self.y_test is None: + return None - if Y_test_pred is not None: - if self.y_test is not None: - test_loss: Optional[Union[float, Dict[str, float]]] = self._loss( - self.y_test, Y_test_pred - ) - if len(self.metrics) == 1: - test_loss = test_loss[self.metrics[0].name] - else: - test_loss = None - else: - test_loss = None + test_loss = self._loss(self.y_test, Y_test_pred) + if len(self.metrics) == 1: + test_loss = test_loss[self.metrics[0].name] - return validation_loss, test_loss + return test_loss def file_output( self, Y_optimization_pred: np.ndarray, - Y_valid_pred: np.ndarray, Y_test_pred: np.ndarray, - ) -> Tuple[Optional[float], Dict[str, Union[str, int, float, List, Dict, Tuple]]]: + ) -> tuple[float | None, dict[str, Any]]: # Abort if self.Y_optimization is None # self.Y_optimization can be None if we use partial-cv, then, # obviously no output should be saved. @@ -496,12 +465,7 @@ def file_output( ) # Abort if predictions contain NaNs - for y, s in [ - # Y_train_pred deleted here. Fix unittest accordingly. - [Y_optimization_pred, "optimization"], - [Y_valid_pred, "validation"], - [Y_test_pred, "test"], - ]: + for y, s in [(Y_optimization_pred, "optimization"), (Y_test_pred, "test")]: if y is not None and not np.all(np.isfinite(y)): return ( 1.0, @@ -553,14 +517,13 @@ def file_output( budget=self.budget, model=self.model if "model" not in self.disable_file_output else None, cv_model=models if "cv_model" not in self.disable_file_output else None, + # TODO: below line needs to be deleted once backend is updated + valid_predictions=None, ensemble_predictions=( Y_optimization_pred if "y_optimization" not in self.disable_file_output else None ), - valid_predictions=( - Y_valid_pred if "y_valid" not in self.disable_file_output else None - ), test_predictions=( Y_test_pred if "y_test" not in self.disable_file_output else None ), diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py index e76186aa06..d624c1a44d 100644 --- a/autosklearn/evaluation/test_evaluator.py +++ b/autosklearn/evaluation/test_evaluator.py @@ -67,7 +67,6 @@ def fit_predict_and_loss(self) -> None: loss=loss, train_loss=None, opt_pred=Y_pred, - valid_pred=None, test_pred=None, file_output=False, final_call=True, @@ -78,7 +77,6 @@ def fit_predict_and_loss(self) -> None: def predict_and_loss( self, train: bool = False ) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]: - if train: Y_pred = self.predict_function( self.X_train, self.model, self.task_type, self.Y_train diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index a8433c2136..f19db473bf 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -316,7 +316,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred = [None] * self.num_cv_folds Y_optimization_pred = [None] * self.num_cv_folds - Y_valid_pred = [None] * self.num_cv_folds Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds @@ -417,7 +416,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: **fit_params_array[i], ) - (train_pred, opt_pred, valid_pred, test_pred) = self._predict( + (train_pred, opt_pred, test_pred) = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -425,7 +424,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred[i] = train_pred Y_optimization_pred[i] = opt_pred - Y_valid_pred[i] = valid_pred Y_test_pred[i] = test_pred train_splits[i] = train_indices @@ -499,20 +497,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: X_targets = concat_data(X_targets, num_cv_folds=self.num_cv_folds) Y_targets = concat_data(Y_targets, num_cv_folds=self.num_cv_folds) - if self.X_valid is not None: - Y_valid_preds = np.array( - [ - Y_valid_pred[i] - for i in range(self.num_cv_folds) - if Y_valid_pred[i] is not None - ] - ) - # Average the predictions of several models - if len(Y_valid_preds.shape) == 3: - Y_valid_preds = np.nanmean(Y_valid_preds, axis=0) - else: - Y_valid_preds = None - if self.X_test is not None: Y_test_preds = np.array( [ @@ -544,7 +528,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: loss=opt_loss, train_loss=train_loss, opt_pred=Y_optimization_pred_concat, - valid_pred=Y_valid_preds, test_pred=Y_test_preds, additional_run_info=additional_run_info, file_output=True, @@ -558,7 +541,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred = [None] * self.num_cv_folds Y_optimization_pred = [None] * self.num_cv_folds - Y_valid_pred = [None] * self.num_cv_folds Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds @@ -586,7 +568,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -599,7 +580,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_budget( @@ -622,7 +602,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_train_pred[i] = train_pred Y_optimization_pred[i] = opt_pred - Y_valid_pred[i] = valid_pred Y_test_pred[i] = test_pred train_splits[i] = train_split @@ -683,18 +662,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: X_targets = concat_data(X_targets, num_cv_folds=self.num_cv_folds) Y_targets = concat_data(Y_targets, num_cv_folds=self.num_cv_folds) - if self.X_valid is not None: - Y_valid_pred = np.array( - [ - Y_valid_pred[i] - for i in range(self.num_cv_folds) - if Y_valid_pred[i] is not None - ] - ) - # Average the predictions of several models - if len(np.shape(Y_valid_pred)) == 3: - Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) - if self.X_test is not None: Y_test_pred = np.array( [ @@ -746,7 +713,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: loss=opt_loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred if self.X_valid is not None else None, test_pred=Y_test_pred if self.X_test is not None else None, additional_run_info=additional_run_info, file_output=True, @@ -793,7 +759,6 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -819,7 +784,6 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No loss=loss, train_loss=train_loss, opt_pred=opt_pred, - valid_pred=valid_pred, test_pred=test_pred, file_output=False, final_call=True, @@ -883,12 +847,7 @@ def _partial_fit_and_predict_iterative( n_iter=n_iter, **fit_params, ) - ( - Y_train_pred, - Y_optimization_pred, - Y_valid_pred, - Y_test_pred, - ) = self._predict( + (Y_train_pred, Y_optimization_pred, Y_test_pred,) = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -921,7 +880,6 @@ def _partial_fit_and_predict_iterative( loss=loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred, test_pred=Y_test_pred, additional_run_info=additional_run_info, file_output=file_output, @@ -936,7 +894,6 @@ def _partial_fit_and_predict_iterative( ( Y_train_pred, Y_optimization_pred, - Y_valid_pred, Y_test_pred, additional_run_info, ) = self._partial_fit_and_predict_standard( @@ -962,7 +919,6 @@ def _partial_fit_and_predict_iterative( loss=loss, train_loss=train_loss, opt_pred=Y_optimization_pred, - valid_pred=Y_valid_pred, test_pred=Y_test_pred, additional_run_info=additional_run_info, file_output=file_output, @@ -980,7 +936,6 @@ def _partial_fit_and_predict_standard( ) -> Tuple[ PIPELINE_DATA_DTYPE, # train_pred PIPELINE_DATA_DTYPE, # opt_pred - PIPELINE_DATA_DTYPE, # valid_pred PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: @@ -1020,7 +975,7 @@ def _partial_fit_and_predict_standard( else self.Y_train[train_indices] ) - train_pred, opt_pred, valid_pred, test_pred = self._predict( + train_pred, opt_pred, test_pred = self._predict( model=model, train_indices=train_indices, test_indices=test_indices, @@ -1029,7 +984,6 @@ def _partial_fit_and_predict_standard( return ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) @@ -1043,7 +997,6 @@ def _partial_fit_and_predict_budget( ) -> Tuple[ PIPELINE_DATA_DTYPE, # train_pred PIPELINE_DATA_DTYPE, # opt_pred - PIPELINE_DATA_DTYPE, # valid_pred PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: @@ -1073,7 +1026,7 @@ def _partial_fit_and_predict_budget( task_type=self.task_type, ) - train_pred, opt_pred, valid_pred, test_pred = self._predict( + train_pred, opt_pred, test_pred = self._predict( model, train_indices=train_indices, test_indices=test_indices, @@ -1088,19 +1041,13 @@ def _partial_fit_and_predict_budget( return ( train_pred, opt_pred, - valid_pred, test_pred, additional_run_info, ) def _predict( self, model: BaseEstimator, test_indices: List[int], train_indices: List[int] - ) -> Tuple[ - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - PIPELINE_DATA_DTYPE, - ]: + ) -> Tuple[PIPELINE_DATA_DTYPE, PIPELINE_DATA_DTYPE, PIPELINE_DATA_DTYPE]: train_pred = self.predict_function( self.X_train.iloc[train_indices] if hasattr(self.X_train, "iloc") @@ -1123,14 +1070,6 @@ def _predict( else self.Y_train[train_indices], ) - if self.X_valid is not None: - X_valid = self.X_valid.copy() - valid_pred = self.predict_function( - X_valid, model, self.task_type, self.Y_train[train_indices] - ) - else: - valid_pred = None - if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( @@ -1144,7 +1083,7 @@ def _predict( else: test_pred = None - return train_pred, opt_pred, valid_pred, test_pred + return train_pred, opt_pred, test_pred def get_splitter( self, D: AbstractDataManager diff --git a/test/test_evaluation/evaluation_util.py b/test/test_evaluation/evaluation_util.py index 62623a50ba..38040f2e4e 100644 --- a/test/test_evaluation/evaluation_util.py +++ b/test/test_evaluation/evaluation_util.py @@ -133,27 +133,14 @@ def get_multiclass_classification_datamanager(): np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] - - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = { "X_train": X_train, "Y_train": Y_train, - "X_valid": X_valid, - "Y_valid": Y_valid, "X_test": X_test, "Y_test": Y_test, } @@ -196,34 +183,16 @@ def get_multilabel_classification_datamanager(): Y_train = Y_train[indices] Y_train = np.array(convert_to_bin(Y_train, 3)) - # for i in range(Y_train_.shape[0]): - # Y_train_[:, Y_train[i]] = 1 - # Y_train = Y_train_ Y_test = np.array(convert_to_bin(Y_test, 3)) - # for i in range(Y_test_.shape[0]): - # Y_test_[:, Y_test[i]] = 1 - # Y_test = Y_test_ - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = { "X_train": X_train, "Y_train": Y_train, - "X_valid": X_valid, - "Y_valid": Y_valid, "X_test": X_test, "Y_test": Y_test, } @@ -247,26 +216,14 @@ def get_binary_classification_datamanager(): X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] - X_valid = X_test[ - :25, - ] - Y_valid = Y_test[ - :25, - ] - X_test = X_test[ - 25:, - ] - Y_test = Y_test[ - 25:, - ] + X_test = X_test[25:] + Y_test = Y_test[25:] D = Dummy() D.info = {"task": BINARY_CLASSIFICATION, "is_sparse": False, "label_num": 2} D.data = { "X_train": X_train, "Y_train": Y_train.reshape((-1, 1)), - "X_valid": X_valid, - "Y_valid": Y_valid.reshape((-1, 1)), "X_test": X_test, "Y_test": Y_test.reshape((-1, 1)), } @@ -282,26 +239,14 @@ def get_regression_datamanager(): X_train = X_train[indices] Y_train = Y_train[indices] - X_valid = X_test[ - :200, - ] - Y_valid = Y_test[ - :200, - ] - X_test = X_test[ - 200:, - ] - Y_test = Y_test[ - 200:, - ] + X_test = X_test[200:] + Y_test = Y_test[200:] D = Dummy() D.info = {"task": REGRESSION, "is_sparse": False, "label_num": 1} D.data = { "X_train": X_train, "Y_train": Y_train.reshape((-1, 1)), - "X_valid": X_valid, - "Y_valid": Y_valid.reshape((-1, 1)), "X_test": X_test, "Y_test": Y_test.reshape((-1, 1)), } @@ -334,8 +279,6 @@ def get_500_classes_datamanager(): D.data = { "X_train": X[:700], "Y_train": Y[:700], - "X_valid": X[700:710], - "Y_valid": Y[700:710], "X_test": X[710:], "Y_test": Y[710:], } diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py index 7bd52c0f76..e2473d738b 100644 --- a/test/test_evaluation/test_abstract_evaluator.py +++ b/test/test_evaluation/test_abstract_evaluator.py @@ -71,7 +71,6 @@ def test_finish_up_model_predicts_NaN(self): ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) # NaNs in prediction ensemble predictions_ensemble[5, 2] = np.NaN @@ -79,7 +78,6 @@ def test_finish_up_model_predicts_NaN(self): loss=0.1, train_loss=0.1, opt_pred=predictions_ensemble, - valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, @@ -89,37 +87,15 @@ def test_finish_up_model_predicts_NaN(self): self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, - {"error": "Model predictions for optimization set " "contains NaNs."}, + {"error": "Model predictions for optimization set contains NaNs."}, ) - # NaNs in prediction validation - predictions_ensemble[5, 2] = 0.5 - predictions_valid[5, 2] = np.NaN - _, loss, _, additional_run_info = ae.finish_up( - loss=0.1, - train_loss=0.1, - opt_pred=predictions_ensemble, - valid_pred=predictions_valid, - test_pred=predictions_test, - additional_run_info=None, - final_call=True, - file_output=True, - status=StatusType.SUCCESS, - ) - self.assertEqual(loss, 1.0) - self.assertEqual( - additional_run_info, - {"error": "Model predictions for validation set " "contains NaNs."}, - ) - - # NaNs in prediction test - predictions_valid[5, 2] = 0.5 + predictions_ensemble = rs.rand(33, 3) predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, train_loss=0.1, opt_pred=predictions_ensemble, - valid_pred=predictions_valid, test_pred=predictions_test, additional_run_info=None, final_call=True, @@ -129,9 +105,8 @@ def test_finish_up_model_predicts_NaN(self): self.assertEqual(loss, 1.0) self.assertEqual( additional_run_info, - {"error": "Model predictions for test set contains " "NaNs."}, + {"error": "Model predictions for test set contains NaNs."}, ) - self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count, 0) def test_disable_file_output(self): @@ -150,11 +125,9 @@ def test_disable_file_output(self): predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -179,7 +152,6 @@ def test_disable_file_output(self): loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -211,11 +183,6 @@ def test_disable_file_output(self): "ensemble_predictions" ] ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - "valid_predictions" - ] - ) self.assertIsNotNone( self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ "test_predictions" @@ -237,7 +204,6 @@ def test_disable_file_output(self): loss_, additional_run_info_ = ae.file_output( predictions_ensemble, - predictions_valid, predictions_test, ) @@ -249,11 +215,6 @@ def test_disable_file_output(self): "ensemble_predictions" ] ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - "valid_predictions" - ] - ) self.assertIsNotNone( self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ "test_predictions" @@ -296,11 +257,9 @@ def test_file_output(self): ae.Y_optimization = rs.rand(33, 3) predictions_ensemble = rs.rand(33, 3) predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) ae.file_output( Y_optimization_pred=predictions_ensemble, - Y_valid_pred=predictions_valid, Y_test_pred=predictions_test, ) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 14c36f2afc..c8fe1c5f87 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -7,6 +7,7 @@ import shutil import sys import tempfile +from itertools import chain import numpy as np import sklearn.model_selection @@ -68,6 +69,24 @@ ) +class LossSideEffect(object): + """Some kind of re-used fixture for losses calculated""" + + def __init__(self): + # The 3 below is related to train, test, opt sets + self.losses = [ + {"accuracy": value} + for value in chain.from_iterable( + [i] * 3 for i in [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2] + ) + ] + self.iteration = 0 + + def side_effect(self, *args, **kwargs): + self.iteration += 1 + return self.losses[self.iteration - 1] + + class Dummy(object): def __init__(self): self.name = "dummy" @@ -159,15 +178,14 @@ def test_holdout(self, pipeline_mock): self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.45833333333333337) self.assertEqual(pipeline_mock.fit.call_count, 1) - # four calls because of train, holdout, validation and test set - self.assertEqual(pipeline_mock.predict_proba.call_count, 4) + # four calls because of train, holdout and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) self.assertEqual(evaluator.file_output.call_count, 1) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24) + self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -240,46 +258,12 @@ def configuration_fully_fitted(self): class LossSideEffect(object): def __init__(self): + # The 3 below is related to train, test, opt sets self.losses = [ {"accuracy": value} - for value in [ - 1.0, - 1.0, - 1.0, - 1.0, - 0.9, - 0.9, - 0.9, - 0.9, - 0.8, - 0.8, - 0.8, - 0.8, - 0.7, - 0.7, - 0.7, - 0.7, - 0.6, - 0.6, - 0.6, - 0.6, - 0.5, - 0.5, - 0.5, - 0.5, - 0.4, - 0.4, - 0.4, - 0.4, - 0.3, - 0.3, - 0.3, - 0.3, - 0.2, - 0.2, - 0.2, - 0.2, - ] + for value in chain.from_iterable( + [i] * 3 for i in [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2] + ) ] self.iteration = 0 @@ -310,16 +294,12 @@ def side_effect(self, *args, **kwargs): [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list], [2, 2, 4, 8, 16, 32, 64, 128, 256], ) - # 20 calls because of train, holdout, validation and test set - # and a total of five calls because of five iterations of fitting - self.assertEqual(evaluator.model.predict_proba.call_count, 36) - # 1/3 of 69 + + # 9 per split type + self.assertEqual(evaluator.model.predict_proba.call_count, 27) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 9) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -443,15 +423,14 @@ def side_effect(self, *args, **kwargs): self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 2) - # eight calls because of train, holdout, the validation and the test set + + # 6 calls because of train, holdout and test set # and a total of two calls each because of two iterations of fitting - self.assertEqual(evaluator.model.predict_proba.call_count, 8) + self.assertEqual(evaluator.model.predict_proba.call_count, 6) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 2) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -504,14 +483,13 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock): self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(pipeline_mock.iterative_fit.call_count, 0) - # four calls for train, opt, valid and test - self.assertEqual(evaluator.model.predict_proba.call_count, 4) + + # 3 calls for train, opt and test + self.assertEqual(evaluator.model.predict_proba.call_count, 3) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -563,17 +541,14 @@ def test_cv(self, pipeline_mock): self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) - # Fifteen calls because of the training, holdout, validation and - # test set (4 sets x 5 folds = 20) - self.assertEqual(pipeline_mock.predict_proba.call_count, 20) + + # 15 calls because of the training (5), holdout (5) and test set (5) + self.assertEqual(pipeline_mock.predict_proba.call_count, 15) self.assertEqual( evaluator.file_output.call_args[0][0].shape[0], D.data["Y_train"].shape[0] ) self.assertEqual( - evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0] - ) - self.assertEqual( - evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0] + evaluator.file_output.call_args[0][1].shape[0], D.data["Y_test"].shape[0] ) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether @@ -629,7 +604,7 @@ def test_partial_cv(self, pipeline_mock): self.assertEqual(evaluator.file_output.call_count, 0) self.assertEqual(return_value["loss"], 0.5) self.assertEqual(pipeline_mock.fit.call_count, 1) - self.assertEqual(pipeline_mock.predict_proba.call_count, 4) + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether # the if block in which model assignment is done is accessed @@ -703,55 +678,6 @@ def configuration_fully_fitted(self): evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) evaluator.file_output.return_value = (None, {}) - class LossSideEffect(object): - def __init__(self): - self.losses = [ - {"accuracy": value} - for value in [ - 1.0, - 1.0, - 1.0, - 1.0, - 0.9, - 0.9, - 0.9, - 0.9, - 0.8, - 0.8, - 0.8, - 0.8, - 0.7, - 0.7, - 0.7, - 0.7, - 0.6, - 0.6, - 0.6, - 0.6, - 0.5, - 0.5, - 0.5, - 0.5, - 0.4, - 0.4, - 0.4, - 0.4, - 0.3, - 0.3, - 0.3, - 0.3, - 0.2, - 0.2, - 0.2, - 0.2, - ] - ] - self.iteration = 0 - - def side_effect(self, *args, **kwargs): - self.iteration += 1 - return self.losses[self.iteration - 1] - evaluator._loss = unittest.mock.Mock() evaluator._loss.side_effect = LossSideEffect().side_effect @@ -773,13 +699,9 @@ def side_effect(self, *args, **kwargs): [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list], [2, 2, 4, 8, 16, 32, 64, 128, 256], ) - # fifteen calls because of the holdout, the validation and the test set - # and a total of five calls because of five iterations of fitting self.assertTrue(hasattr(evaluator, "model")) self.assertEqual(pipeline_mock.iterative_fit.call_count, 9) - # 20 calls because of train, holdout, the validation and the test set - # and a total of five calls because of five iterations of fitting - self.assertEqual(pipeline_mock.predict_proba.call_count, 36) + self.assertEqual(pipeline_mock.predict_proba.call_count, 27) @unittest.mock.patch.object(TrainEvaluator, "_loss") @unittest.mock.patch.object(TrainEvaluator, "_get_model") @@ -809,11 +731,7 @@ def test_file_output(self, loss_mock, model_mock): self.backend_mock.get_model_dir.return_value = True evaluator.model = "model" evaluator.Y_optimization = D.data["Y_train"] - return_value = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) self.assertEqual(return_value, (None, {})) self.assertEqual(self.backend_mock.save_additional_data.call_count, 2) @@ -826,8 +744,8 @@ def test_file_output(self, loss_mock, model_mock): "budget", "model", "cv_model", + "valid_predictions", # TODO remove once backend updated "ensemble_predictions", - "valid_predictions", "test_predictions", }, ) @@ -839,11 +757,7 @@ def test_file_output(self, loss_mock, model_mock): ) evaluator.models = ["model2", "model2"] - return_value = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) self.assertEqual(return_value, (None, {})) self.assertEqual(self.backend_mock.save_additional_data.call_count, 4) self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2) @@ -855,8 +769,8 @@ def test_file_output(self, loss_mock, model_mock): "budget", "model", "cv_model", + "valid_predictions", # TODO remove once backend updated "ensemble_predictions", - "valid_predictions", "test_predictions", }, ) @@ -867,27 +781,8 @@ def test_file_output(self, loss_mock, model_mock): self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["cv_model"] ) - # Check for not containing NaNs - that the models don't predict nonsense - # for unseen data - D.data["Y_valid"][0] = np.NaN - return_value = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) - self.assertEqual( - return_value, - ( - 1.0, - {"error": "Model predictions for validation set contains NaNs."}, - ), - ) D.data["Y_train"][0] = np.NaN - return_value = evaluator.file_output( - D.data["Y_train"], - D.data["Y_valid"], - D.data["Y_test"], - ) + return_value = evaluator.file_output(D.data["Y_train"], D.data["Y_test"]) self.assertEqual( return_value, ( @@ -1086,7 +981,6 @@ def test_fit_predict_and_loss_standard_additional_run_info( _partial_fit_and_predict_mock.return_value = ( np.array([[0.1, 0.9]] * 46), np.array([[0.1, 0.9]] * 23), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -1129,7 +1023,6 @@ def __call__(self, *args, **kwargs): return ( np.array([[0.1, 0.9]] * 34), np.array([[0.1, 0.9]] * 35), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -1137,7 +1030,6 @@ def __call__(self, *args, **kwargs): return ( np.array([[0.1, 0.9]] * 34), np.array([[0.1, 0.9]] * 34), - np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), {"a": 5}, ) @@ -3070,7 +2962,6 @@ def test_eval_holdout_all_loss_functions(self): "recall_micro": 0.030303030303030276, "recall_weighted": 0.030303030303030276, "num_run": 1, - "validation_loss": 0.0, "test_loss": 0.04, "train_loss": 0.0, } @@ -3447,7 +3338,6 @@ def test_eval_cv_all_loss_functions(self): "recall_micro": 0.04999999999999997, "recall_weighted": 0.04999999999999997, "num_run": 1, - "validation_loss": 0.04, "test_loss": 0.04, "train_loss": 0.0, }