From 0a098212f88877a8ac8d4230137989ac32b5539c Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 1 Jun 2022 11:22:59 +0200 Subject: [PATCH 01/63] fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. --- autosklearn/automl.py | 4 + .../metalearning/input/aslib_simple.py | 7 +- .../metalearning/metalearning/meta_base.py | 2 +- .../data_preprocessing/feature_type.py | 92 +++++++++++-------- 4 files changed, 65 insertions(+), 40 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 12e80b8e4e..f0dfed33c8 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -748,6 +748,10 @@ def fit( self._log_fit_setup() + # save feat_type to file + with open(f'{os.path.dirname(os.path.realpath(__file__))}/feat_type.json', 'w') as f: + json.dump(self._feat_type, f, indent=4) + # == Pickle the data manager to speed up loading with self._stopwatch.time("Save Datamanager"): datamanager = XYDataManager( diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index 833242729d..c495c5cd69 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -8,10 +8,11 @@ class AlgorithmSelectionProblem(object): - def __init__(self, directory): + def __init__(self, directory, cs): self.logger = logging.getLogger(__name__) # Create data structures + self.cs = cs self.dir_ = directory self.algorithm_runs = None self.configurations = None @@ -147,9 +148,11 @@ def _read_configurations(self, filename): configuration = dict() algorithm_id = line["idx"] for hp_name, value in line.items(): + # Todo adapt to search space if not value or hp_name == "idx": continue - + if hp_name not in self.cs.get_hyperparameter_names(): + continue try: value = int(value) except Exception: diff --git a/autosklearn/metalearning/metalearning/meta_base.py b/autosklearn/metalearning/metalearning/meta_base.py index f193a61fef..61f16297fe 100644 --- a/autosklearn/metalearning/metalearning/meta_base.py +++ b/autosklearn/metalearning/metalearning/meta_base.py @@ -42,7 +42,7 @@ def __init__(self, configuration_space, aslib_directory, logger): self.configuration_space = configuration_space self.aslib_directory = aslib_directory - aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory) + aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory, self.configuration_space) self.metafeatures = aslib_reader.metafeatures self.algorithm_runs: OrderedDict[ str, pd.DataFrame diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index bd42d8a67a..f325e0c80c 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -1,6 +1,8 @@ from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np +import json +import os import sklearn.compose from ConfigSpace import Configuration from ConfigSpace.configuration_space import ConfigurationSpace @@ -64,6 +66,12 @@ def __init__( self.feat_type = feat_type self.force_sparse_output = force_sparse_output + # load global feat_type + f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json') + self.feat_type = json.load(f) + + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] + # The pipeline that will be applied to the categorical features (i.e. columns) # of the dataset # Configuration of the data-preprocessor is different from the configuration of @@ -71,15 +79,18 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.categ_ppl = CategoricalPreprocessingPipeline( - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) + self.categ_ppl = None + if "categorical" in self.feat_type.values(): + self.categ_ppl = CategoricalPreprocessingPipeline( + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) + self._transformers.append(("categorical_transformer", self.categ_ppl)) # The pipeline that will be applied to the numerical features (i.e. columns) # of the dataset # Configuration of the data-preprocessor is different from the configuration of @@ -87,15 +98,18 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.numer_ppl = NumericalPreprocessingPipeline( - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) + self.numer_ppl = None + if "numerical" in self.feat_type.values(): + self.numer_ppl = NumericalPreprocessingPipeline( + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) + self._transformers.append(("numerical_transformer", self.numer_ppl)) # The pipeline that will be applied to the text features (i.e. columns) # of the dataset @@ -104,21 +118,19 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.txt_ppl = TextPreprocessingPipeline( - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) + self.txt_ppl = None + if "string" in self.feat_type.values(): + self.txt_ppl = TextPreprocessingPipeline( + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) + self._transformers.append(("text_transformer", self.txt_ppl)) - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ - ("categorical_transformer", self.categ_ppl), - ("numerical_transformer", self.numer_ppl), - ("text_transformer", self.txt_ppl), - ] if self.config: self.set_hyperparameters(self.config, init_params=init_params) self.column_transformer = column_transformer @@ -143,29 +155,35 @@ def fit( f"Train data has columns={expected} yet the" f" feat_types are feat={columns}" ) + transformer_lst = [] + categorical_features = [ key for key, value in self.feat_type.items() if value.lower() == "categorical" ] + if len(categorical_features) > 0: + transformer_lst.append(("categorical_transformer", self.categ_ppl, categorical_features)) + numerical_features = [ key for key, value in self.feat_type.items() if value.lower() == "numerical" ] + if len(numerical_features) > 0: + transformer_lst.append(("numerical_transformer", self.numer_ppl, numerical_features)) + text_features = [ key for key, value in self.feat_type.items() if value.lower() == "string" ] + if len(transformer_lst) > 0: + transformer_lst.append(("text_transformer", self.txt_ppl, text_features)) sklearn_transf_spec = [ (name, transformer, feature_columns) - for name, transformer, feature_columns in [ - ("categorical_transformer", self.categ_ppl, categorical_features), - ("numerical_transformer", self.numer_ppl, numerical_features), - ("text_transformer", self.txt_ppl, text_features), - ] + for name, transformer, feature_columns in transformer_lst if len(feature_columns) > 0 ] else: From 619ccb605f55c030a6e4bcacac6be4206dd31728 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 1 Jun 2022 13:51:11 +0200 Subject: [PATCH 02/63] fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. --- .../data_preprocessing/feature_type.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index f325e0c80c..fc8a92b458 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -69,6 +69,11 @@ def __init__( # load global feat_type f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json') self.feat_type = json.load(f) + is_number = True + for key in self.feat_type.keys(): + is_number *= key.isnumeric() + if is_number: + self.feat_type = {int(key): value for key, value in self.feat_type.items()} self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] @@ -151,10 +156,14 @@ def fit( else: columns = set(range(n_feats)) if expected != columns: - raise ValueError( - f"Train data has columns={expected} yet the" - f" feat_types are feat={columns}" - ) + try: + # columns = [str(col) for col in columns] + pass + except: + raise ValueError( + f"Train data has columns={expected} yet the" + f" feat_types are feat={columns}" + ) transformer_lst = [] categorical_features = [ From 71715347af092515df6f8efd345e02144262d105 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 1 Jun 2022 14:32:06 +0200 Subject: [PATCH 03/63] fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. --- .../pipeline/components/data_preprocessing/feature_type.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index fc8a92b458..d42739c180 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -145,9 +145,6 @@ def fit( ) -> "FeatTypeSplit": n_feats = X.shape[1] - categorical_features = [] - numerical_features = [] - text_features = [] if self.feat_type is not None: # Make sure that we are not missing any column! expected = set(self.feat_type.keys()) @@ -187,7 +184,7 @@ def fit( for key, value in self.feat_type.items() if value.lower() == "string" ] - if len(transformer_lst) > 0: + if len(text_features) > 0: transformer_lst.append(("text_transformer", self.txt_ppl, text_features)) sklearn_transf_spec = [ From 4631a91bbd56b1862633e17c9a60ddced2a08ac0 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 4 Jun 2022 14:10:31 +0200 Subject: [PATCH 04/63] fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. --- autosklearn/automl.py | 2 +- autosklearn/pipeline/base.py | 34 ++++++++++++------ autosklearn/pipeline/classification.py | 10 ++++-- autosklearn/pipeline/components/base.py | 8 ++--- .../components/classification/__init__.py | 2 +- .../components/data_preprocessing/__init__.py | 6 ++-- .../data_preprocessing/balancing/balancing.py | 1 + .../categorical_encoding/__init__.py | 3 +- .../categorical_encoding/encoding.py | 1 + .../categorical_encoding/no_encoding.py | 1 + .../categorical_encoding/one_hot_encoding.py | 1 + .../category_shift/category_shift.py | 1 + .../data_preprocessing/feature_type.py | 36 ++++++++++++------- .../feature_type_categorical.py | 23 ++++++++---- .../feature_type_numerical.py | 20 +++++++---- .../data_preprocessing/feature_type_text.py | 26 +++++++++----- .../imputation/categorical_imputation.py | 1 + .../imputation/numerical_imputation.py | 1 + .../minority_coalescense/__init__.py | 8 ++++- .../minority_coalescer.py | 2 ++ .../minority_coalescense/no_coalescense.py | 1 + .../data_preprocessing/rescaling/__init__.py | 3 +- .../rescaling/abstract_rescaling.py | 1 + .../rescaling/quantile_transformer.py | 1 + .../rescaling/robust_scaler.py | 1 + .../text_encoding/__init__.py | 5 +-- .../text_encoding/bag_of_word_encoding.py | 1 + .../bag_of_word_encoding_distinct.py | 1 + .../text_encoding/tfidf_encoding.py | 1 + .../text_feature_reduction/truncated_svd.py | 1 + .../variance_threshold/variance_threshold.py | 1 + .../feature_preprocessing/__init__.py | 2 +- .../extra_trees_preproc_for_classification.py | 2 +- .../extra_trees_preproc_for_regression.py | 2 +- .../feature_agglomeration.py | 2 +- .../feature_preprocessing/kernel_pca.py | 2 +- .../feature_preprocessing/kitchen_sinks.py | 2 +- .../liblinear_svc_preprocessor.py | 2 +- .../feature_preprocessing/no_preprocessing.py | 2 +- .../feature_preprocessing/nystroem_sampler.py | 2 +- .../feature_preprocessing/polynomial.py | 2 +- .../random_trees_embedding.py | 2 +- .../select_percentile_classification.py | 2 +- .../select_percentile_regression.py | 2 +- .../select_rates_classification.py | 2 +- .../select_rates_regression.py | 2 +- .../feature_preprocessing/truncatedSVD.py | 2 +- .../components/regression/__init__.py | 2 +- .../pipeline/create_searchspace_util.py | 2 ++ autosklearn/pipeline/regression.py | 13 +++++-- autosklearn/util/pipeline.py | 26 +++++++------- .../40_advanced/example_text_preprocessing.py | 1 - 52 files changed, 186 insertions(+), 94 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index f0dfed33c8..6affb2d6b8 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -2234,7 +2234,7 @@ def _create_search_space( ) -> Tuple[ConfigurationSpace, str]: configspace_path = os.path.join(tmp_dir, "space.json") configuration_space = pipeline.get_configuration_space( - datamanager.info, + datamanager, include=include, exclude=exclude, ) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 93c73b4716..1352fb44d9 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -34,6 +34,7 @@ class BasePipeline(Pipeline): def __init__( self, + feat_type, config=None, steps=None, dataset_properties=None, @@ -50,15 +51,16 @@ def __init__( dataset_properties if dataset_properties is not None else {} ) self.random_state = random_state + self.feat_type = feat_type if steps is None: - self.steps = self._get_pipeline_steps(dataset_properties=dataset_properties) + self.steps = self._get_pipeline_steps(feat_type=feat_type, dataset_properties=dataset_properties) else: self.steps = steps self._validate_include_exclude_params() - self.config_space = self.get_hyperparameter_search_space() + self.config_space = self.get_hyperparameter_search_space(feat_type=feat_type) if config is None: self.config = self.config_space.get_default_configuration() @@ -82,8 +84,10 @@ def __init__( ) self.config = config - self.set_hyperparameters(self.config, init_params=init_params) + self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params) + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"base pip. self.steps: {self.steps}\n\n") super().__init__(steps=self.steps) self._additional_run_info = {} @@ -202,13 +206,16 @@ def predict(self, X, batch_size=None): return y - def set_hyperparameters(self, configuration, init_params=None): + def set_hyperparameters(self, configuration, feat_type, init_params=None): self.config = configuration for node_idx, n_ in enumerate(self.steps): node_name, node = n_ + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"node base: {type(node)}\n\n") sub_configuration_space = node.get_hyperparameter_search_space( + feat_type=feat_type, dataset_properties=self.dataset_properties ) sub_config_dict = {} @@ -235,8 +242,10 @@ def set_hyperparameters(self, configuration, init_params=None): if isinstance( node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"node: {type(node)}\n\n") node.set_hyperparameters( - configuration=sub_configuration, init_params=sub_init_params_dict + feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict ) else: raise NotImplementedError("Not supported yet!") @@ -247,7 +256,7 @@ def set_hyperparameters(self, configuration, init_params=None): return self - def get_hyperparameter_search_space(self, dataset_properties=None): + def get_hyperparameter_search_space(self, feat_type, dataset_properties=None): """Return the configuration space for the CASH problem. Returns @@ -258,6 +267,7 @@ def get_hyperparameter_search_space(self, dataset_properties=None): """ if not hasattr(self, "config_space") or self.config_space is None: self.config_space = self._get_hyperparameter_search_space( + feat_type=feat_type, include=self.include, exclude=self.exclude, dataset_properties=self.dataset_properties, @@ -265,7 +275,7 @@ def get_hyperparameter_search_space(self, dataset_properties=None): return self.config_space def _get_hyperparameter_search_space( - self, include=None, exclude=None, dataset_properties=None + self, feat_type, include=None, exclude=None, dataset_properties=None ): """Return the configuration space for the CASH problem. @@ -307,7 +317,7 @@ def _get_hyperparameter_search_space( raise NotImplementedError() def _get_base_search_space( - self, cs, dataset_properties, exclude, include, pipeline + self, feat_type, cs, dataset_properties, exclude, include, pipeline ): if include is None: if self.include is None: @@ -343,7 +353,7 @@ def _get_base_search_space( dataset_properties["signed"] = False matches = autosklearn.pipeline.create_searchspace_util.get_match_array( - pipeline, dataset_properties, include=include, exclude=exclude + pipeline=pipeline, dataset_properties=dataset_properties, include=include, exclude=exclude ) # Now we have only legal combinations at this step of the pipeline @@ -385,8 +395,10 @@ def _get_base_search_space( exclude.get(node_name), ) ) + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"node: {type(node)}\n\n") sub_config_space = node.get_hyperparameter_search_space( - dataset_properties, include=choices_list + feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list ) cs.add_configuration_space(node_name, sub_config_space) @@ -505,7 +517,7 @@ def __repr__(self): return rval - def _get_pipeline_steps(self, dataset_properties): + def _get_pipeline_steps(self, dataset_properties, feat_type): raise NotImplementedError() def _get_estimator_hyperparameter_name(self): diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 1686e02809..c74b336b4c 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -70,6 +70,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): def __init__( self, + feat_type, config: Optional[Configuration] = None, steps=None, dataset_properties=None, @@ -84,6 +85,7 @@ def __init__( if "target_type" not in dataset_properties: dataset_properties["target_type"] = "classification" super().__init__( + feat_type=feat_type, config=config, steps=steps, dataset_properties=dataset_properties, @@ -166,7 +168,7 @@ def predict_proba(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, include=None, exclude=None, dataset_properties=None + self, feat_type, include=None, exclude=None, dataset_properties=None ): """Create the hyperparameter configuration space. @@ -194,6 +196,7 @@ def _get_hyperparameter_search_space( cs = self._get_base_search_space( cs=cs, + feat_type=feat_type, dataset_properties=dataset_properties, exclude=exclude, include=include, @@ -344,7 +347,7 @@ def _get_hyperparameter_search_space( self.dataset_properties = dataset_properties return cs - def _get_pipeline_steps(self, dataset_properties): + def _get_pipeline_steps(self, dataset_properties, feat_type): steps = [] default_dataset_properties = {"target_type": "classification"} @@ -356,6 +359,7 @@ def _get_pipeline_steps(self, dataset_properties): [ "data_preprocessor", DataPreprocessorChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), @@ -364,6 +368,7 @@ def _get_pipeline_steps(self, dataset_properties): [ "feature_preprocessor", FeaturePreprocessorChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), @@ -371,6 +376,7 @@ def _get_pipeline_steps(self, dataset_properties): [ "classifier", ClassifierChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index c4a95df08c..98a2c22a80 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -136,7 +136,7 @@ def fit(self, X, y): for further information.""" raise NotImplementedError() - def set_hyperparameters(self, configuration, init_params=None): + def set_hyperparameters(self, configuration, feat_type, init_params=None): params = configuration.get_dictionary() for param, value in params.items(): @@ -339,7 +339,7 @@ def get_estimator(self): class AutoSklearnChoice(object): - def __init__(self, dataset_properties, random_state=None): + def __init__(self, dataset_properties, feat_type, random_state=None): """ Parameters ---------- @@ -414,7 +414,7 @@ def get_available_components( return components_dict - def set_hyperparameters(self, configuration, init_params=None): + def set_hyperparameters(self, configuration, feat_type, init_params=None): new_params = {} params = configuration.get_dictionary() @@ -438,7 +438,7 @@ def set_hyperparameters(self, configuration, init_params=None): return self def get_hyperparameter_search_space( - self, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type, dataset_properties=None, default=None, include=None, exclude=None ): raise NotImplementedError() diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index c95334273a..073e7325e0 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -86,7 +86,7 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type, dataset_properties=None, default=None, include=None, exclude=None ): if dataset_properties is None: dataset_properties = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 5693efd441..bbe805e519 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -105,6 +105,7 @@ def get_available_components( def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[Dict] = None, default: str = None, include: Optional[Dict] = None, @@ -136,6 +137,7 @@ def get_hyperparameter_search_space( cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]( + feat_type=feat_type, dataset_properties=dataset_properties ).get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} @@ -150,7 +152,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: return self.choice.transform(X) def set_hyperparameters( - self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None + self, feat_type, configuration: ConfigurationSpace, init_params: Optional[Dict] = None ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() @@ -162,7 +164,7 @@ def set_hyperparameters( config[param] = value new_params = {} - feat_type = None + # feat_type = None if init_params is not None: for param, value in init_params.items(): param = param.replace(choice, "").split(":", 1)[-1] diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py index 721fe63fc5..7e04082112 100644 --- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py +++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py @@ -139,6 +139,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index 5d1647b24a..ae52062c2b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -38,6 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -86,7 +87,7 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None ) -> "OHEChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py index 43d578219f..56d9ca16fa 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py @@ -69,6 +69,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py index 028a4fb9c1..0a7eaaf802 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py @@ -44,6 +44,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py index 9b9ee87c81..c223f165d4 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py @@ -55,6 +55,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py index f2dc2bf304..5008f406d8 100644 --- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py +++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py @@ -63,6 +63,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index d42739c180..0fcf3d98e6 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -67,16 +67,19 @@ def __init__( self.force_sparse_output = force_sparse_output # load global feat_type - f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json') - self.feat_type = json.load(f) - is_number = True - for key in self.feat_type.keys(): - is_number *= key.isnumeric() - if is_number: - self.feat_type = {int(key): value for key, value in self.feat_type.items()} + # f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json') + # self.feat_type = json.load(f) + # is_number = True + # for key in self.feat_type.keys(): + # is_number *= key.isnumeric() + # if is_number: + # self.feat_type = {int(key): value for key, value in self.feat_type.items()} self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] + if self.feat_type is None: + raise ValueError("feat_type init requires feat_type") + # The pipeline that will be applied to the categorical features (i.e. columns) # of the dataset # Configuration of the data-preprocessor is different from the configuration of @@ -87,6 +90,7 @@ def __init__( self.categ_ppl = None if "categorical" in self.feat_type.values(): self.categ_ppl = CategoricalPreprocessingPipeline( + feat_type=self.feat_type, config=None, steps=pipeline, dataset_properties=dataset_properties, @@ -106,6 +110,7 @@ def __init__( self.numer_ppl = None if "numerical" in self.feat_type.values(): self.numer_ppl = NumericalPreprocessingPipeline( + feat_type=self.feat_type, config=None, steps=pipeline, dataset_properties=dataset_properties, @@ -126,6 +131,7 @@ def __init__( self.txt_ppl = None if "string" in self.feat_type.values(): self.txt_ppl = TextPreprocessingPipeline( + feat_type=self.feat_type, config=None, steps=pipeline, dataset_properties=dataset_properties, @@ -137,7 +143,7 @@ def __init__( self._transformers.append(("text_transformer", self.txt_ppl)) if self.config: - self.set_hyperparameters(self.config, init_params=init_params) + self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params) self.column_transformer = column_transformer def fit( @@ -247,7 +253,7 @@ def get_properties( } def set_hyperparameters( - self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None ) -> "FeatTypeSplit": if init_params is not None and "feat_type" in init_params.keys(): self.feat_type = init_params["feat_type"] @@ -256,7 +262,8 @@ def set_hyperparameters( for transf_name, transf_op in self._transformers: sub_configuration_space = transf_op.get_hyperparameter_search_space( - dataset_properties=self.dataset_properties + dataset_properties=self.dataset_properties, + feat_type=feat_type ) sub_config_dict = {} for param in configuration: @@ -282,7 +289,7 @@ def set_hyperparameters( transf_op, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): transf_op.set_hyperparameters( - configuration=sub_configuration, init_params=sub_init_params_dict + feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict ) else: raise NotImplementedError("Not supported yet!") @@ -291,17 +298,22 @@ def set_hyperparameters( def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: self.dataset_properties = dataset_properties cs = ConfigurationSpace() cs = FeatTypeSplit._get_hyperparameter_search_space_recursevely( - dataset_properties, cs, self._transformers + feat_type=feat_type, + dataset_properties=dataset_properties, + cs=cs, + transformer=self._transformers ) return cs @staticmethod def _get_hyperparameter_search_space_recursevely( + feat_type, dataset_properties: DATASET_PROPERTIES_TYPE, cs: ConfigurationSpace, transformer: BaseEstimator, diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index dfdaf7af62..160384fac0 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -46,6 +46,7 @@ class CategoricalPreprocessingPipeline(BasePipeline): def __init__( self, + feat_type, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -56,13 +57,14 @@ def __init__( ) -> None: self._output_dtype = np.int32 super().__init__( - config, - steps, - dataset_properties, - include, - exclude, - random_state, - init_params, + config=config, + steps=steps, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + feat_type=feat_type ) @staticmethod @@ -92,6 +94,7 @@ def get_properties( def _get_hyperparameter_search_space( self, + feat_type, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -108,8 +111,11 @@ def _get_hyperparameter_search_space( if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"pipeline (self.steps): {self.steps}\n\n") cs = self._get_base_search_space( cs=cs, + feat_type=feat_type, dataset_properties=dataset_properties, exclude=exclude, include=include, @@ -120,6 +126,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, + feat_type, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] @@ -135,6 +142,7 @@ def _get_pipeline_steps( ( "category_coalescence", CoalescenseChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), @@ -142,6 +150,7 @@ def _get_pipeline_steps( ( "categorical_encoding", OHEChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py index b50bf0d357..a50ede3985 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py @@ -39,6 +39,7 @@ class NumericalPreprocessingPipeline(BasePipeline): def __init__( self, + feat_type, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -49,13 +50,14 @@ def __init__( ) -> None: self._output_dtype = np.int32 super().__init__( - config, - steps, - dataset_properties, - include, - exclude, - random_state, - init_params, + config=config, + steps=steps, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + feat_type=feat_type, ) @staticmethod @@ -85,6 +87,7 @@ def get_properties( def _get_hyperparameter_search_space( self, + feat_type, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -110,12 +113,14 @@ def _get_hyperparameter_search_space( exclude=exclude, include=include, pipeline=self.steps, + feat_type=feat_type, ) return cs def _get_pipeline_steps( self, + feat_type, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] @@ -134,6 +139,7 @@ def _get_pipeline_steps( ( "rescaling", rescaling_components.RescalingChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py index 8924d568a6..e3d7078de2 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py @@ -34,6 +34,7 @@ class TextPreprocessingPipeline(BasePipeline): def __init__( self, + feat_type, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -44,13 +45,14 @@ def __init__( ) -> None: self._output_dtype = np.int32 super().__init__( - config, - steps, - dataset_properties, - include, - exclude, - random_state, - init_params, + config=config, + steps=steps, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + feat_type=feat_type ) @staticmethod @@ -79,6 +81,7 @@ def get_properties( def _get_hyperparameter_search_space( self, + feat_type, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -104,12 +107,14 @@ def _get_hyperparameter_search_space( exclude=exclude, include=include, pipeline=self.steps, + feat_type=feat_type, ) return cs def _get_pipeline_steps( self, + feat_type, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] @@ -123,12 +128,15 @@ def _get_pipeline_steps( ( "text_encoding", BagOfWordChoice( - default_dataset_properties, random_state=self.random_state + feat_type=feat_type, + dataset_properties=default_dataset_properties, + random_state=self.random_state ), ), ( "text_feature_reduction", - TextFeatureReduction(random_state=self.random_state), + TextFeatureReduction( + random_state=self.random_state), ), ] ) diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py index 00b627daed..40f3d1e93a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py @@ -91,6 +91,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py index d7d6a645ab..99ab3a33c0 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py @@ -62,6 +62,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index fbf999761c..b1148d8d94 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -38,6 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -86,7 +87,7 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None ) -> "CoalescenseChoice": new_params = {} @@ -111,6 +112,11 @@ def set_hyperparameters( new_params["random_state"] = self.random_state self.new_params = new_params + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"minority_init self.get...:\n" + f"new_params: {new_params}\n" + f"choice: {self.get_components()[choice]}\n\n") + new_params["feat_type"] = feat_type self.choice = self.get_components()[choice](**new_params) return self diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py index 278cf0bfb9..9052188190 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py @@ -15,6 +15,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm): def __init__( self, + feat_type, minimum_fraction: float = 0.01, random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: @@ -59,6 +60,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index d05c146d98..8c6314988e 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -43,6 +43,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py index 2a9fbdb842..8f3caedd83 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py @@ -42,6 +42,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -74,7 +75,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py index 05e1a4e898..7955d90b28 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py @@ -38,6 +38,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py index 2611c0650d..0c840fb44f 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py @@ -62,6 +62,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py index af3b4c0558..9b4e01843d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py @@ -59,6 +59,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 990ad579ca..3cc71bc314 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -39,6 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -75,7 +76,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, @@ -88,7 +89,7 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None ) -> "BagOfWordChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py index b8a62ccd89..92692b1dda 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py @@ -95,6 +95,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py index 90a43b0f48..b2e6c0598d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py @@ -101,6 +101,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py index f20d24f769..577cddf7d2 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py @@ -100,6 +100,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py index beecefb028..1562b57249 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py @@ -74,6 +74,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py index 365ae405a0..33f7a1a996 100644 --- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py +++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py @@ -49,6 +49,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( + feat_type, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index cd52d6ad34..bff511fe6e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -101,7 +101,7 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type, dataset_properties=None, default=None, include=None, exclude=None ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index dad45795b8..084fe8e40d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -123,7 +123,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 3287b837c5..23914b713f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -125,7 +125,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index d51242de21..95ae7cee49 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -63,7 +63,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25) affinity = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index 4e96bfb1c2..0fad8bc6b3 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -82,7 +82,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): n_components = UniformIntegerHyperparameter( "n_components", 10, 2000, default_value=100 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index a81e9ddd78..22d1cfd248 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -69,7 +69,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index 546c8742ad..f7ee87d80f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -91,7 +91,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): cs = ConfigurationSpace() penalty = Constant("penalty", "l1") diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index 550872d551..016a44dd7b 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -34,6 +34,6 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 097f59e0f1..24ca8977a0 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -94,7 +94,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): if dataset_properties is not None and ( dataset_properties.get("sparse") is True or dataset_properties.get("signed") is False diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index bd5312bba0..d17e6d43a4 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -54,7 +54,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): # More than degree 3 is too expensive! degree = UniformIntegerHyperparameter("degree", 2, 3, 2) interaction_only = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index 9daed1ae97..b6966b27ae 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -94,7 +94,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=10, upper=100, default_value=10 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index 3caa50b46d..fdb15c50bd 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -110,7 +110,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index e9343fead4..cdec8fe152 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -53,7 +53,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): percentile = UniformFloatHyperparameter( "percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index 0c4768d000..cb002c9b6d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -116,7 +116,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index ffec19e6ec..e9a5f7b943 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -84,7 +84,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index 4d6f6b7ca9..214b0346d2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -48,7 +48,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type, dataset_properties=None): target_dim = UniformIntegerHyperparameter( "target_dim", 10, 256, default_value=128 ) diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 73033467a7..457d9813d3 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -79,7 +79,7 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type, dataset_properties=None, default=None, include=None, exclude=None ): if include is not None and exclude is not None: raise ValueError( diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py index dff69acc6e..d60c1ecaff 100644 --- a/autosklearn/pipeline/create_searchspace_util.py +++ b/autosklearn/pipeline/create_searchspace_util.py @@ -22,6 +22,8 @@ def get_match_array(pipeline, dataset_properties, include=None, exclude=None): node_i_choices = [] node_i_choices_names = [] all_nodes = [] + with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: + f.write(f"pipeline: {pipeline}\n\n") for node_name, node in pipeline: all_nodes.append(node) is_choice = hasattr(node, "get_available_components") diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 638f8ae3cb..090083b5ba 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -67,6 +67,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline): def __init__( self, + feat_type, config: Optional[Configuration] = None, steps=None, dataset_properties=None, @@ -81,6 +82,7 @@ def __init__( if "target_type" not in dataset_properties: dataset_properties["target_type"] = "regression" super().__init__( + feat_type=feat_type, config=config, steps=steps, dataset_properties=dataset_properties, @@ -112,7 +114,7 @@ def predict(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, include=None, exclude=None, dataset_properties=None + self, feat_type, include=None, exclude=None, dataset_properties=None ): """Return the configuration space for the CASH problem. @@ -149,6 +151,7 @@ def _get_hyperparameter_search_space( cs = self._get_base_search_space( cs=cs, + feat_type=feat_type, dataset_properties=dataset_properties, exclude=exclude, include=include, @@ -259,7 +262,7 @@ def _get_hyperparameter_search_space( def _get_estimator_components(self): return regression_components._regressors - def _get_pipeline_steps(self, dataset_properties, init_params=None): + def _get_pipeline_steps(self, feat_type, dataset_properties, init_params=None): steps = [] default_dataset_properties = {"target_type": "regression"} @@ -271,6 +274,7 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None): [ "data_preprocessor", DataPreprocessorChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), @@ -278,6 +282,7 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None): [ "feature_preprocessor", feature_preprocessing_components.FeaturePreprocessorChoice( + feat_type=feat_type, dataset_properties=default_dataset_properties, random_state=self.random_state, ), @@ -285,7 +290,9 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None): [ "regressor", regression_components.RegressorChoice( - default_dataset_properties, random_state=self.random_state + feat_type=feat_type, + dataset_properties=default_dataset_properties, + random_state=self.random_state ), ], ] diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index d3291069f5..bc6a27a71c 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -18,7 +18,7 @@ def get_configuration_space( - info: Dict[str, Any], + datamanager: Dict[str, Any], include: Optional[Dict[str, List[str]]] = None, exclude: Optional[Dict[str, List[str]]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -44,16 +44,16 @@ def get_configuration_space( ConfigurationSpace The configuration space for the pipeline """ - if info["task"] in REGRESSION_TASKS: - return _get_regression_configuration_space(info, include, exclude, random_state) + if datamanager.info["task"] in REGRESSION_TASKS: + return _get_regression_configuration_space(datamanager, include, exclude, random_state) else: return _get_classification_configuration_space( - info, include, exclude, random_state + datamanager, include, exclude, random_state ) def _get_regression_configuration_space( - info: Dict[str, Any], + datamanager: Dict[str, Any], include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -79,28 +79,29 @@ def _get_regression_configuration_space( ConfigurationSpace The configuration space for the regression pipeline """ - task_type = info["task"] + task_type = datamanager.info["task"] sparse = False multioutput = False if task_type == MULTIOUTPUT_REGRESSION: multioutput = True - if info["is_sparse"] == 1: + if datamanager.info["is_sparse"] == 1: sparse = True dataset_properties = {"multioutput": multioutput, "sparse": sparse} configuration_space = SimpleRegressionPipeline( + feat_type=datamanager.feat_type, dataset_properties=dataset_properties, include=include, exclude=exclude, random_state=random_state, - ).get_hyperparameter_search_space() + ).get_hyperparameter_search_space(feat_type=datamanager.feat_type) return configuration_space def _get_classification_configuration_space( - info: Dict[str, Any], + datamanager: Dict[str, Any], include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -126,7 +127,7 @@ def _get_classification_configuration_space( ConfigurationSpace The configuration space for the classification pipeline """ - task_type = info["task"] + task_type = datamanager.info["task"] multilabel = False multiclass = False @@ -139,7 +140,7 @@ def _get_classification_configuration_space( if task_type == BINARY_CLASSIFICATION: pass - if info["is_sparse"] == 1: + if datamanager.info["is_sparse"] == 1: sparse = True dataset_properties = { @@ -149,8 +150,9 @@ def _get_classification_configuration_space( } return SimpleClassificationPipeline( + feat_type=datamanager.feat_type, dataset_properties=dataset_properties, include=include, exclude=exclude, random_state=random_state, - ).get_hyperparameter_search_space() + ).get_hyperparameter_search_space(feat_type=datamanager.feat_type) diff --git a/examples/40_advanced/example_text_preprocessing.py b/examples/40_advanced/example_text_preprocessing.py index 7c65825b7b..ba7deffe03 100644 --- a/examples/40_advanced/example_text_preprocessing.py +++ b/examples/40_advanced/example_text_preprocessing.py @@ -59,7 +59,6 @@ automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=60, per_run_time_limit=30, - tmp_folder="/tmp/autosklearn_text_example_tmp", ) automl.fit(X_train, y_train, dataset_name="20_Newsgroups") # fit the automl model From 0bad1d402aead52664bc853b0fc968442476ec80 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 4 Jun 2022 14:16:21 +0200 Subject: [PATCH 05/63] fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. --- autosklearn/automl.py | 4 ---- autosklearn/pipeline/base.py | 8 -------- .../data_preprocessing/feature_type_categorical.py | 2 -- .../data_preprocessing/minority_coalescense/__init__.py | 4 ---- autosklearn/pipeline/create_searchspace_util.py | 2 -- 5 files changed, 20 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 6affb2d6b8..df46ee4c4c 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -748,10 +748,6 @@ def fit( self._log_fit_setup() - # save feat_type to file - with open(f'{os.path.dirname(os.path.realpath(__file__))}/feat_type.json', 'w') as f: - json.dump(self._feat_type, f, indent=4) - # == Pickle the data manager to speed up loading with self._stopwatch.time("Save Datamanager"): datamanager = XYDataManager( diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 1352fb44d9..25247a0d76 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -86,8 +86,6 @@ def __init__( self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params) - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"base pip. self.steps: {self.steps}\n\n") super().__init__(steps=self.steps) self._additional_run_info = {} @@ -212,8 +210,6 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None): for node_idx, n_ in enumerate(self.steps): node_name, node = n_ - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"node base: {type(node)}\n\n") sub_configuration_space = node.get_hyperparameter_search_space( feat_type=feat_type, dataset_properties=self.dataset_properties @@ -242,8 +238,6 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None): if isinstance( node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"node: {type(node)}\n\n") node.set_hyperparameters( feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict ) @@ -395,8 +389,6 @@ def _get_base_search_space( exclude.get(node_name), ) ) - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"node: {type(node)}\n\n") sub_config_space = node.get_hyperparameter_search_space( feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list ) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index 160384fac0..599221187b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -111,8 +111,6 @@ def _get_hyperparameter_search_space( if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"pipeline (self.steps): {self.steps}\n\n") cs = self._get_base_search_space( cs=cs, feat_type=feat_type, diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index b1148d8d94..6eff766085 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -112,10 +112,6 @@ def set_hyperparameters( new_params["random_state"] = self.random_state self.new_params = new_params - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"minority_init self.get...:\n" - f"new_params: {new_params}\n" - f"choice: {self.get_components()[choice]}\n\n") new_params["feat_type"] = feat_type self.choice = self.get_components()[choice](**new_params) diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py index d60c1ecaff..dff69acc6e 100644 --- a/autosklearn/pipeline/create_searchspace_util.py +++ b/autosklearn/pipeline/create_searchspace_util.py @@ -22,8 +22,6 @@ def get_match_array(pipeline, dataset_properties, include=None, exclude=None): node_i_choices = [] node_i_choices_names = [] all_nodes = [] - with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f: - f.write(f"pipeline: {pipeline}\n\n") for node_name, node in pipeline: all_nodes.append(node) is_choice = hasattr(node, "get_available_components") From 5d36fa5ed4bc19544d78a57c1a2327e98488aed7 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 7 Jun 2022 12:00:48 +0200 Subject: [PATCH 06/63] fixing ensemble builder --- autosklearn/evaluation/abstract_evaluator.py | 11 +++++++++-- autosklearn/evaluation/train_evaluator.py | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index efd87c6cc3..bc5fc6df2c 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -45,6 +45,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], + feat_type, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, @@ -61,6 +62,7 @@ def __init__( self.dataset_properties = dataset_properties self.include = include self.exclude = exclude + self.feat_type = feat_type def pre_transform( self, @@ -108,6 +110,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], + feat_type, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, @@ -123,6 +126,7 @@ def __init__( self.dataset_properties = dataset_properties self.include = include self.exclude = exclude + self.feat_type = feat_type def pre_transform( self, @@ -217,6 +221,7 @@ def __init__( self.queue = queue self.datamanager = self.backend.load_datamanager() + self.feat_type = self.datamanager.feat_type self.include = include self.exclude = exclude @@ -296,11 +301,12 @@ def __init__( _addons[key].add_component(component) # Please mypy to prevent not defined attr - self.model = self._get_model() + self.model = self._get_model(feat_type=self.feat_type) - def _get_model(self) -> BaseEstimator: + def _get_model(self, feat_type) -> BaseEstimator: if not isinstance(self.configuration, Configuration): model = self.model_class( + feat_type=feat_type, config=self.configuration, random_state=self.seed, init_params=self._init_params, @@ -320,6 +326,7 @@ def _get_model(self) -> BaseEstimator: "multiclass": self.task_type == MULTICLASS_CLASSIFICATION, } model = self.model_class( + feat_type=feat_type, config=self.configuration, dataset_properties=dataset_properties, random_state=self.seed, diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index a8433c2136..cda7806b6c 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -247,6 +247,7 @@ def __init__( budget_type=budget_type, ) + self.feat_type = self.backend.load_datamanager().feat_type self.resampling_strategy = resampling_strategy if resampling_strategy_args is None: self.resampling_strategy_args = {} @@ -984,7 +985,7 @@ def _partial_fit_and_predict_standard( PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: - model = self._get_model() + model = self._get_model(feat_type=self.feat_type) self.indices[fold] = (train_indices, test_indices) From 8afbd975f9f02eae9d2f2a41e2d8f21422b84fcd Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 13:41:15 +0200 Subject: [PATCH 07/63] fixing ensemble builder --- autosklearn/evaluation/test_evaluator.py | 2 +- autosklearn/evaluation/train_evaluator.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py index e76186aa06..c658fb0cea 100644 --- a/autosklearn/evaluation/test_evaluator.py +++ b/autosklearn/evaluation/test_evaluator.py @@ -58,7 +58,7 @@ def __init__( self.X_test = self.datamanager.data.get("X_test") self.Y_test = self.datamanager.data.get("Y_test") - self.model = self._get_model() + self.model = self._get_model(self.feat_type) def fit_predict_and_loss(self) -> None: _fit_and_suppress_warnings(self.logger, self.model, self.X_train, self.Y_train) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index cda7806b6c..228a685c37 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -306,7 +306,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: # Test if the model allows for an iterative fit, if not, # call this method again without the iterative argument - model = self._get_model() + model = self._get_model(self.feat_type) if not model.estimator_supports_iterative_fit(): self.fit_predict_and_loss(iterative=False) return @@ -321,7 +321,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds - self.models = [self._get_model() for i in range(self.num_cv_folds)] + self.models = [self._get_model(self.feat_type) for i in range(self.num_cv_folds)] iterations = [1] * self.num_cv_folds total_n_iterations = [0] * self.num_cv_folds # model.estimator_supports_iterative_fit -> true @@ -532,7 +532,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: self.Y_optimization = Y_targets self.Y_actual_train = Y_train_targets - self.model = self._get_model() + self.model = self._get_model(self.feat_type) status = StatusType.DONOTADVANCE if any( [ @@ -713,7 +713,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: self.Y_actual_train = Y_train_targets if self.num_cv_folds > 1: - self.model = self._get_model() + self.model = self._get_model(self.feat_type) # Bad style, but necessary for unit testing that self.model is # actually a new model self._added_empty_model = True @@ -835,7 +835,7 @@ def _partial_fit_and_predict_iterative( test_indices: List[int], add_model_to_self: bool, ) -> None: - model = self._get_model() + model = self._get_model(self.feat_type) self.indices[fold] = (train_indices, test_indices) @@ -1053,7 +1053,7 @@ def _partial_fit_and_predict_budget( # Add this statement for mypy assert self.budget is not None - model = self._get_model() + model = self._get_model(self.feat_type) self.indices[fold] = (train_indices, test_indices) self.X_targets[fold] = self.X_train[test_indices] self.Y_targets[fold] = self.Y_train[test_indices] From de09993285e785a16bcb1cc5f103936922aed037 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 14:29:49 +0200 Subject: [PATCH 08/63] fixing ensemble builder --- autosklearn/pipeline/base.py | 10 +++++----- autosklearn/pipeline/classification.py | 8 +++++--- autosklearn/pipeline/components/base.py | 6 +++--- .../components/classification/__init__.py | 2 +- .../components/data_preprocessing/__init__.py | 4 ++-- .../data_preprocessing/balancing/balancing.py | 2 +- .../categorical_encoding/__init__.py | 2 +- .../categorical_encoding/encoding.py | 2 +- .../categorical_encoding/no_encoding.py | 2 +- .../categorical_encoding/one_hot_encoding.py | 2 +- .../category_shift/category_shift.py | 2 +- .../data_preprocessing/feature_type.py | 16 +++++++--------- .../feature_type_categorical.py | 6 +++--- .../feature_type_numerical.py | 6 +++--- .../data_preprocessing/feature_type_text.py | 6 +++--- .../imputation/categorical_imputation.py | 2 +- .../imputation/numerical_imputation.py | 2 +- .../minority_coalescense/__init__.py | 18 ++++++++++-------- .../minority_coalescer.py | 2 +- .../minority_coalescense/no_coalescense.py | 2 +- .../data_preprocessing/rescaling/__init__.py | 4 ++-- .../rescaling/abstract_rescaling.py | 4 ++-- .../rescaling/quantile_transformer.py | 2 +- .../rescaling/robust_scaler.py | 2 +- .../text_encoding/__init__.py | 18 ++++++++++-------- .../text_encoding/bag_of_word_encoding.py | 2 +- .../bag_of_word_encoding_distinct.py | 2 +- .../text_encoding/tfidf_encoding.py | 2 +- .../text_feature_reduction/truncated_svd.py | 2 +- .../variance_threshold/variance_threshold.py | 2 +- .../feature_preprocessing/__init__.py | 2 +- .../extra_trees_preproc_for_classification.py | 2 +- .../extra_trees_preproc_for_regression.py | 2 +- .../feature_agglomeration.py | 2 +- .../feature_preprocessing/kernel_pca.py | 2 +- .../feature_preprocessing/kitchen_sinks.py | 2 +- .../liblinear_svc_preprocessor.py | 2 +- .../feature_preprocessing/no_preprocessing.py | 2 +- .../feature_preprocessing/nystroem_sampler.py | 2 +- .../components/feature_preprocessing/pca.py | 2 +- .../feature_preprocessing/polynomial.py | 2 +- .../random_trees_embedding.py | 2 +- .../select_percentile_classification.py | 2 +- .../select_percentile_regression.py | 2 +- .../select_rates_classification.py | 2 +- .../select_rates_regression.py | 2 +- .../feature_preprocessing/truncatedSVD.py | 2 +- autosklearn/pipeline/regression.py | 8 ++++---- autosklearn/util/pipeline.py | 19 ++++++++++--------- 49 files changed, 104 insertions(+), 99 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 25247a0d76..9e25af6802 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -34,7 +34,7 @@ class BasePipeline(Pipeline): def __init__( self, - feat_type, + feat_type=None, config=None, steps=None, dataset_properties=None, @@ -204,7 +204,7 @@ def predict(self, X, batch_size=None): return y - def set_hyperparameters(self, configuration, feat_type, init_params=None): + def set_hyperparameters(self, configuration, feat_type=None, init_params=None): self.config = configuration for node_idx, n_ in enumerate(self.steps): @@ -250,7 +250,7 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None): return self - def get_hyperparameter_search_space(self, feat_type, dataset_properties=None): + def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=None): """Return the configuration space for the CASH problem. Returns @@ -269,7 +269,7 @@ def get_hyperparameter_search_space(self, feat_type, dataset_properties=None): return self.config_space def _get_hyperparameter_search_space( - self, feat_type, include=None, exclude=None, dataset_properties=None + self, feat_type=None, include=None, exclude=None, dataset_properties=None ): """Return the configuration space for the CASH problem. @@ -509,7 +509,7 @@ def __repr__(self): return rval - def _get_pipeline_steps(self, dataset_properties, feat_type): + def _get_pipeline_steps(self, dataset_properties, feat_type=None): raise NotImplementedError() def _get_estimator_hyperparameter_name(self): diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index c74b336b4c..7819cd9e8f 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -70,7 +70,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps=None, dataset_properties=None, @@ -168,12 +168,14 @@ def predict_proba(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, feat_type, include=None, exclude=None, dataset_properties=None + self, feat_type=None, include=None, exclude=None, dataset_properties=None ): """Create the hyperparameter configuration space. Parameters ---------- + feat_type : dict, maps columns to there datatypes + include : dict (optional, default=None) Returns @@ -347,7 +349,7 @@ def _get_hyperparameter_search_space( self.dataset_properties = dataset_properties return cs - def _get_pipeline_steps(self, dataset_properties, feat_type): + def _get_pipeline_steps(self, dataset_properties, feat_type=None): steps = [] default_dataset_properties = {"target_type": "classification"} diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index 98a2c22a80..59aa210248 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -136,7 +136,7 @@ def fit(self, X, y): for further information.""" raise NotImplementedError() - def set_hyperparameters(self, configuration, feat_type, init_params=None): + def set_hyperparameters(self, configuration, feat_type=None, init_params=None): params = configuration.get_dictionary() for param, value in params.items(): @@ -339,7 +339,7 @@ def get_estimator(self): class AutoSklearnChoice(object): - def __init__(self, dataset_properties, feat_type, random_state=None): + def __init__(self, dataset_properties, feat_type=None, random_state=None): """ Parameters ---------- @@ -414,7 +414,7 @@ def get_available_components( return components_dict - def set_hyperparameters(self, configuration, feat_type, init_params=None): + def set_hyperparameters(self, configuration, feat_type=None, init_params=None): new_params = {} params = configuration.get_dictionary() diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 073e7325e0..e7d30e9e0b 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -86,7 +86,7 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, feat_type, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None ): if dataset_properties is None: dataset_properties = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index bbe805e519..24b6788d63 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -105,7 +105,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type, + feat_type=None, dataset_properties: Optional[Dict] = None, default: str = None, include: Optional[Dict] = None, @@ -152,7 +152,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: return self.choice.transform(X) def set_hyperparameters( - self, feat_type, configuration: ConfigurationSpace, init_params: Optional[Dict] = None + self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, feat_type=None ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py index 7e04082112..d9a4958bde 100644 --- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py +++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py @@ -139,7 +139,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type=None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index ae52062c2b..bb640b484b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -38,7 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type, + feat_type=None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py index 56d9ca16fa..570ac0f730 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py @@ -69,7 +69,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type=None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py index 0a7eaaf802..9e356d9f41 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py @@ -44,7 +44,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py index c223f165d4..f6afe06c8e 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py @@ -55,7 +55,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py index 5008f406d8..3af659331e 100644 --- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py +++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py @@ -63,7 +63,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type=None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 0fcf3d98e6..ceaec426ef 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -1,8 +1,6 @@ from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np -import json -import os import sklearn.compose from ConfigSpace import Configuration from ConfigSpace.configuration_space import ConfigurationSpace @@ -77,8 +75,8 @@ def __init__( self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] - if self.feat_type is None: - raise ValueError("feat_type init requires feat_type") + # if self.feat_type is None: + # raise ValueError("feat_type init requires feat_type") # The pipeline that will be applied to the categorical features (i.e. columns) # of the dataset @@ -88,7 +86,7 @@ def __init__( # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed self.categ_ppl = None - if "categorical" in self.feat_type.values(): + if "categorical" in self.feat_type.values() or self.feat_type is None: self.categ_ppl = CategoricalPreprocessingPipeline( feat_type=self.feat_type, config=None, @@ -108,7 +106,7 @@ def __init__( # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed self.numer_ppl = None - if "numerical" in self.feat_type.values(): + if "numerical" in self.feat_type.values() or self.feat_type is None: self.numer_ppl = NumericalPreprocessingPipeline( feat_type=self.feat_type, config=None, @@ -129,7 +127,7 @@ def __init__( # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed self.txt_ppl = None - if "string" in self.feat_type.values(): + if "string" in self.feat_type.values() or self.feat_type is None: self.txt_ppl = TextPreprocessingPipeline( feat_type=self.feat_type, config=None, @@ -298,7 +296,7 @@ def set_hyperparameters( def get_hyperparameter_search_space( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: self.dataset_properties = dataset_properties @@ -313,10 +311,10 @@ def get_hyperparameter_search_space( @staticmethod def _get_hyperparameter_search_space_recursevely( - feat_type, dataset_properties: DATASET_PROPERTIES_TYPE, cs: ConfigurationSpace, transformer: BaseEstimator, + feat_type: Optional[Dict[Union[str, int], str]] = None ) -> ConfigurationSpace: for st_name, st_operation in transformer: if hasattr(st_operation, "get_hyperparameter_search_space"): diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index 599221187b..5020a81b5b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -46,7 +46,7 @@ class CategoricalPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -94,7 +94,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -124,7 +124,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py index a50ede3985..fbba3b9172 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py @@ -39,7 +39,7 @@ class NumericalPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -87,7 +87,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -120,7 +120,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py index e3d7078de2..beb4c099e0 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py @@ -34,7 +34,7 @@ class TextPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -81,7 +81,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -114,7 +114,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py index 40f3d1e93a..65a1542018 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py @@ -91,7 +91,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py index 99ab3a33c0..b5945ca6a1 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py @@ -62,7 +62,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index 6eff766085..61f8ebb83b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import os from collections import OrderedDict @@ -37,12 +37,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: return components def get_hyperparameter_search_space( - self, - feat_type, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - default: Optional[str] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + default: Optional[str] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() @@ -87,7 +87,9 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None ) -> "CoalescenseChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py index 9052188190..a64bdbcc9d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py @@ -15,7 +15,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, minimum_fraction: float = 0.01, random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index 8c6314988e..40bbe5beaa 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -43,7 +43,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py index 8f3caedd83..4eaddd0999 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, Optional, Union import os from collections import OrderedDict @@ -42,7 +42,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py index 7955d90b28..b0cc348673 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, Dict import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace @@ -38,7 +38,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py index 0c840fb44f..a797a5769a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py @@ -62,7 +62,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py index 9b4e01843d..b9d25235d3 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py @@ -59,7 +59,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 3cc71bc314..9c018894dc 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import os from collections import OrderedDict @@ -38,12 +38,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: return components def get_hyperparameter_search_space( - self, - feat_type, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - default: Optional[str] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + default: Optional[str] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() @@ -89,7 +89,9 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None ) -> "BagOfWordChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py index 92692b1dda..9810006ffa 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py @@ -95,7 +95,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py index b2e6c0598d..2f23276824 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py @@ -101,7 +101,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py index 577cddf7d2..36238b4fa8 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py @@ -100,7 +100,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py index 1562b57249..be3ab9b00a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py @@ -74,7 +74,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py index 33f7a1a996..9849e348ed 100644 --- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py +++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py @@ -49,7 +49,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + ffeat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index bff511fe6e..0dbb4128d1 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -101,7 +101,7 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, feat_type, dataset_properties=None, default=None, include=None, exclude=None + self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index 084fe8e40d..c45f9b1cf2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -123,7 +123,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 23914b713f..98a5a5700c 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -125,7 +125,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index 95ae7cee49..0076f14121 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -63,7 +63,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25) affinity = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index 0fad8bc6b3..27b3446d57 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -82,7 +82,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): n_components = UniformIntegerHyperparameter( "n_components", 10, 2000, default_value=100 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index 22d1cfd248..93673e75e9 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -69,7 +69,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index f7ee87d80f..43135da483 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -91,7 +91,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() penalty = Constant("penalty", "l1") diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index 016a44dd7b..0597cfcabe 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -34,6 +34,6 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 24ca8977a0..f93d8bbff3 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -94,7 +94,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): if dataset_properties is not None and ( dataset_properties.get("sparse") is True or dataset_properties.get("signed") is False diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py index a1ad9f3981..d86b38cf22 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py @@ -55,7 +55,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): keep_variance = UniformFloatHyperparameter( "keep_variance", 0.5, 0.9999, default_value=0.9999 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index d17e6d43a4..0d4b166f35 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -54,7 +54,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): # More than degree 3 is too expensive! degree = UniformIntegerHyperparameter("degree", 2, 3, 2) interaction_only = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index b6966b27ae..60b7df0c3a 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -94,7 +94,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=10, upper=100, default_value=10 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index fdb15c50bd..3fa80f0ca1 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -110,7 +110,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index cdec8fe152..0f489f933f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -53,7 +53,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): percentile = UniformFloatHyperparameter( "percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index cb002c9b6d..c21ff3d7cb 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -116,7 +116,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index e9a5f7b943..a708b18e9f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -84,7 +84,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index 214b0346d2..d515c9552a 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -48,7 +48,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type, dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): target_dim = UniformIntegerHyperparameter( "target_dim", 10, 256, default_value=128 ) diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 090083b5ba..cdeb73af45 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, Dict import copy from itertools import product @@ -67,7 +67,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline): def __init__( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps=None, dataset_properties=None, @@ -114,7 +114,7 @@ def predict(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, feat_type, include=None, exclude=None, dataset_properties=None + self, feat_type=None, include=None, exclude=None, dataset_properties=None ): """Return the configuration space for the CASH problem. @@ -262,7 +262,7 @@ def _get_hyperparameter_search_space( def _get_estimator_components(self): return regression_components._regressors - def _get_pipeline_steps(self, feat_type, dataset_properties, init_params=None): + def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=None): steps = [] default_dataset_properties = {"target_type": "regression"} diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index bc6a27a71c..61e52f27aa 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -13,12 +13,13 @@ ) from autosklearn.pipeline.classification import SimpleClassificationPipeline from autosklearn.pipeline.regression import SimpleRegressionPipeline +from autosklearn.data.xy_data_manager import XYDataManager __all__ = ["get_configuration_space"] def get_configuration_space( - datamanager: Dict[str, Any], + datamanager: XYDataManager, include: Optional[Dict[str, List[str]]] = None, exclude: Optional[Dict[str, List[str]]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -27,8 +28,8 @@ def get_configuration_space( Parameters ---------- - info: Dict[str, Any] - Information about the dataset + datamanager: XYDataManager + XYDataManger object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step @@ -53,7 +54,7 @@ def get_configuration_space( def _get_regression_configuration_space( - datamanager: Dict[str, Any], + datamanager: XYDataManager, include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -62,8 +63,8 @@ def _get_regression_configuration_space( Parameters ---------- - info: Dict[str, Any] - Information about the dataset + datamanager: XYDataManager + XYDataManger object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step @@ -101,7 +102,7 @@ def _get_regression_configuration_space( def _get_classification_configuration_space( - datamanager: Dict[str, Any], + datamanager: XYDataManager, include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -110,8 +111,8 @@ def _get_classification_configuration_space( Parameters ---------- - info: Dict[str, Any] - Information about the dataset + datamanager: XYDataManager + XYDataManger object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step From a5c9bad21022b9f9b4189bc00d1c470b7ec9ee0c Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 14:30:10 +0200 Subject: [PATCH 09/63] fixing ensemble builder --- autosklearn/pipeline/base.py | 5 ++++- .../minority_coalescense/minority_coalescer.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 9e25af6802..29f8702f26 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -282,6 +282,9 @@ def _get_hyperparameter_search_space( Parameters ---------- + feat_type: dict + python dictionary which maps the columns of the dataset to the data types + estimator_name : str Name of the estimator hyperparameter which will be used in the configuration space. For a classification task, this would be @@ -311,7 +314,7 @@ def _get_hyperparameter_search_space( raise NotImplementedError() def _get_base_search_space( - self, feat_type, cs, dataset_properties, exclude, include, pipeline + self, cs, dataset_properties, exclude, include, pipeline, feat_type=None ): if include is None: if self.include is None: diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py index a64bdbcc9d..737e8c85f1 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py @@ -60,7 +60,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() From 7f2f14b680bd62cb4f10d722960c765a5566c921 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 14:39:18 +0200 Subject: [PATCH 10/63] fixing ensemble builder --- autosklearn/pipeline/classification.py | 2 +- .../variance_threshold/variance_threshold.py | 2 +- .../test_data_preprocessing_numerical.py | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 7819cd9e8f..4a6b267d1b 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, Dict import copy from itertools import product diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py index 9849e348ed..f11c07a2d2 100644 --- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py +++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py @@ -49,7 +49,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - ffeat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py index d25cef2a2b..638a7e958d 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py @@ -11,13 +11,13 @@ class NumericalPreprocessingPipelineTest(unittest.TestCase): def test_data_type_consistency(self): X = np.random.rand(3, 4) - Y = NumericalPreprocessingPipeline().fit_transform(X) + Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) self.assertFalse(sparse.issparse(Y)) X = sparse.csc_matrix( ([3.0, 6.0, 4.0, 5.0], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4) ) - Y = NumericalPreprocessingPipeline().fit_transform(X) + Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) self.assertTrue(sparse.issparse(Y)) def test_fit_transform(self): @@ -37,12 +37,13 @@ def test_fit_transform(self): ] ) # noqa : matrix legibility # dense input - Yt = NumericalPreprocessingPipeline().fit_transform(X) + Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) np.testing.assert_array_almost_equal(Yt, Y1) # sparse input (uses with_mean=False) Y2 = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) / sdev X_sparse = sparse.csc_matrix(X) - Yt = NumericalPreprocessingPipeline().fit_transform(X_sparse) + Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform( + X_sparse) np.testing.assert_array_almost_equal(Yt.todense(), Y2) def test_transform(self): @@ -51,7 +52,7 @@ def test_transform(self): ) # noqa : matrix legibility sdev = np.sqrt(2 / 3) # fit - NPP = NumericalPreprocessingPipeline() + NPP = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}) NPP.fit_transform(X1) # transform X2 = np.array([[1.0, 5.0, 8.0], [2.0, 6.0, 9.0], [3.0, 7.0, np.nan]]) From 68b051ec5f3882e8bd063d1bcb38d798be904053 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 19:22:31 +0200 Subject: [PATCH 11/63] fixing ensemble builder --- .../data_preprocessing/feature_type.py | 99 +++++++++---------- test/fixtures/ensembles.py | 23 ++--- .../data_preprocessing/test_balancing.py | 2 +- 3 files changed, 57 insertions(+), 67 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index ceaec426ef..10738aaed0 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -64,20 +64,6 @@ def __init__( self.feat_type = feat_type self.force_sparse_output = force_sparse_output - # load global feat_type - # f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json') - # self.feat_type = json.load(f) - # is_number = True - # for key in self.feat_type.keys(): - # is_number *= key.isnumeric() - # if is_number: - # self.feat_type = {int(key): value for key, value in self.feat_type.items()} - - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] - - # if self.feat_type is None: - # raise ValueError("feat_type init requires feat_type") - # The pipeline that will be applied to the categorical features (i.e. columns) # of the dataset # Configuration of the data-preprocessor is different from the configuration of @@ -85,19 +71,16 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.categ_ppl = None - if "categorical" in self.feat_type.values() or self.feat_type is None: - self.categ_ppl = CategoricalPreprocessingPipeline( - feat_type=self.feat_type, - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) - self._transformers.append(("categorical_transformer", self.categ_ppl)) + self.categ_ppl = CategoricalPreprocessingPipeline( + feat_type=self.feat_type, + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) # The pipeline that will be applied to the numerical features (i.e. columns) # of the dataset # Configuration of the data-preprocessor is different from the configuration of @@ -105,19 +88,16 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.numer_ppl = None - if "numerical" in self.feat_type.values() or self.feat_type is None: - self.numer_ppl = NumericalPreprocessingPipeline( - feat_type=self.feat_type, - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) - self._transformers.append(("numerical_transformer", self.numer_ppl)) + self.numer_ppl = NumericalPreprocessingPipeline( + feat_type=self.feat_type, + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) # The pipeline that will be applied to the text features (i.e. columns) # of the dataset @@ -126,22 +106,31 @@ def __init__( # It is actually the call to set_hyperparameter who properly sets this argument # TODO: Extract the child configuration space from the FeatTypeSplit to the # pipeline if needed - self.txt_ppl = None - if "string" in self.feat_type.values() or self.feat_type is None: - self.txt_ppl = TextPreprocessingPipeline( - feat_type=self.feat_type, - config=None, - steps=pipeline, - dataset_properties=dataset_properties, - include=include, - exclude=exclude, - random_state=random_state, - init_params=init_params, - ) - self._transformers.append(("text_transformer", self.txt_ppl)) + self.txt_ppl = TextPreprocessingPipeline( + feat_type=self.feat_type, + config=None, + steps=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, + random_state=random_state, + init_params=init_params, + ) - if self.config: - self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params) + if self.feat_type is None: + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [("categorical_transformer", self.categ_ppl), + ("numerical_transformer", self.numer_ppl), + ("text_transformer", self.txt_ppl)] + else: + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] + if "categorical" in self.feat_type.values(): + self._transformers.append(("categorical_transformer", self.categ_ppl)) + if "numerical" in self.feat_type.values(): + self._transformers.append(("numerical_transformer", self.numer_ppl)) + if "string" in self.feat_type.values(): + self._transformers.append(("text_transformer", self.txt_ppl)) + if self.config: + self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params) self.column_transformer = column_transformer def fit( diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py index 467c53822f..82673d2c2d 100644 --- a/test/fixtures/ensembles.py +++ b/test/fixtures/ensembles.py @@ -36,15 +36,15 @@ def make_voting_classifier() -> Callable[..., VotingClassifier]: """ def _make( - X: Optional[SUPPORTED_FEAT_TYPES] = None, - y: Optional[SUPPORTED_TARGET_TYPES] = None, - models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None, - seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, + X: Optional[SUPPORTED_FEAT_TYPES] = None, + y: Optional[SUPPORTED_TARGET_TYPES] = None, + models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None, + seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, ) -> VotingClassifier: assert not (X is None) ^ (y is None) - if not models: - models = [MyDummyClassifier(config=1, random_state=seed) for _ in range(5)] + models = [MyDummyClassifier(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _ + in range(5)] if X is not None: for model in models: @@ -73,15 +73,16 @@ def make_voting_regressor() -> Callable[..., VotingRegressor]: """ def _make( - X: Optional[SUPPORTED_FEAT_TYPES] = None, - y: Optional[SUPPORTED_TARGET_TYPES] = None, - models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None, - seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, + X: Optional[SUPPORTED_FEAT_TYPES] = None, + y: Optional[SUPPORTED_TARGET_TYPES] = None, + models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None, + seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, ) -> VotingRegressor: assert not (X is None) ^ (y is None) if not models: - models = [MyDummyRegressor(config=1, random_state=seed) for _ in range(5)] + models = [MyDummyRegressor(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _ + in range(5)] if X is not None: for model in models: diff --git a/test/test_pipeline/components/data_preprocessing/test_balancing.py b/test/test_pipeline/components/data_preprocessing/test_balancing.py index 6a76ce419c..a128559833 100644 --- a/test/test_pipeline/components/data_preprocessing/test_balancing.py +++ b/test/test_pipeline/components/data_preprocessing/test_balancing.py @@ -215,7 +215,7 @@ def test_weighting_effect(self): default._values["balancing:strategy"] = strategy classifier = SimpleClassificationPipeline( - default, random_state=1, include=include + config=default, random_state=1, include=include ) Xt, fit_params = classifier.fit_transformer(X_train, Y_train) classifier.fit_estimator(Xt, Y_train, **fit_params) From b940a974a068847fea21b64a8fc1ea1f0da58e5b Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 19:46:01 +0200 Subject: [PATCH 12/63] fixing ensemble builder --- test/test_evaluation/test_dummy_pipelines.py | 2 +- test/test_evaluation/test_test_evaluator.py | 10 +++++++--- test/test_evaluation/test_train_evaluator.py | 15 +++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py index 8d1005e178..1d7b1b5f83 100644 --- a/test/test_evaluation/test_dummy_pipelines.py +++ b/test/test_evaluation/test_dummy_pipelines.py @@ -23,8 +23,8 @@ def test_dummy_pipeline(task_type: str) -> None: pytest.fail(task_type) return - estimator = estimator_class(config=1, random_state=0) X, y = data_maker(random_state=0) + estimator = estimator_class(feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0) estimator.fit(X, y) check_is_fitted(estimator) diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py index 457661df03..79af5a112d 100644 --- a/test/test_evaluation/test_test_evaluator.py +++ b/test/test_evaluation/test_test_evaluator.py @@ -86,12 +86,16 @@ def test_datasets(self): self.assertTrue(np.isfinite(rval[0]["loss"])) +class DummyDatamanager(): + def __init__(self): + self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} + self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'} + + class FunctionsTest(unittest.TestCase): def setUp(self): self.queue = multiprocessing.Queue() - self.configuration = get_configuration_space( - {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} - ).get_default_configuration() + self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), ".test_cv_functions") self.backend = unittest.mock.Mock(spec=Backend) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 9413af5509..034453fa9d 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -2940,13 +2940,16 @@ def test_holdout_split_size(self, te_mock): self.assertEqual(len(train_samples), 6) self.assertEqual(len(test_samples), 3) +class DummyDatamanager(): + def __init__(self): + self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} + self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'} + class FunctionsTest(unittest.TestCase): def setUp(self): self.queue = multiprocessing.Queue() - self.configuration = get_configuration_space( - {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} - ).get_default_configuration() + self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join( os.path.dirname(__file__), ".test_holdout_functions" @@ -3205,7 +3208,7 @@ def test_eval_holdout_budget_iterations_multi_objective(self): def test_eval_holdout_budget_iterations_converged_multi_objective(self): configuration = get_configuration_space( exclude={"classifier": ["random_forest", "liblinear_svc"]}, - info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}, + datamanager=DummyDatamanager(), ).get_default_configuration() eval_holdout( queue=self.queue, @@ -3240,7 +3243,7 @@ def test_eval_holdout_budget_iterations_converged(self): } configuration = get_configuration_space( exclude={"classifier": ["random_forest", "liblinear_svc"]}, - info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}, + datamanager=DummyDatamanager(), ).get_default_configuration() eval_holdout( queue=self.queue, @@ -3357,7 +3360,7 @@ def test_eval_holdout_budget_mixed_iterations(self): def test_eval_holdout_budget_mixed_subsample(self): configuration = get_configuration_space( exclude={"classifier": ["random_forest"]}, - info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}, + datamanager=DummyDatamanager(), ).get_default_configuration() self.assertEqual(configuration["classifier:__choice__"], "liblinear_svc") eval_holdout( From f12419016bc05b426e36cf2c18ac7834f0245d20 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 20:06:59 +0200 Subject: [PATCH 13/63] fixing ensemble builder --- .../minority_coalescense/no_coalescense.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index 40bbe5beaa..84025f3f17 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -10,12 +10,13 @@ class NoCoalescence(AutoSklearnPreprocessingAlgorithm): def __init__( - self, random_state: Optional[Union[int, np.random.RandomState]] = None + self, random_state: Optional[Union[int, np.random.RandomState]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None ) -> None: pass def fit( - self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None + self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None ) -> PIPELINE_DATA_DTYPE: self.preprocessor = "passthrough" return self @@ -25,7 +26,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: @staticmethod def get_properties( - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: return { "shortname": "no coalescence", @@ -43,8 +44,8 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() return cs From 91007492d0bf63db0a14a295f09c04393cd0fd39 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 20:20:02 +0200 Subject: [PATCH 14/63] fixing ensemble builder --- autosklearn/evaluation/train_evaluator.py | 4 +- .../metalearning/metalearning/meta_base.py | 4 +- autosklearn/pipeline/base.py | 24 +++++++---- autosklearn/pipeline/classification.py | 2 +- autosklearn/pipeline/components/base.py | 7 +++- .../components/classification/__init__.py | 7 +++- .../components/data_preprocessing/__init__.py | 8 ++-- .../categorical_encoding/__init__.py | 5 ++- .../data_preprocessing/feature_type.py | 42 +++++++++++++------ .../feature_type_categorical.py | 2 +- .../data_preprocessing/feature_type_text.py | 7 ++-- .../minority_coalescense/__init__.py | 19 +++++---- .../minority_coalescense/no_coalescense.py | 13 +++--- .../data_preprocessing/rescaling/__init__.py | 4 +- .../rescaling/abstract_rescaling.py | 2 +- .../text_encoding/__init__.py | 23 +++++----- .../feature_preprocessing/__init__.py | 7 +++- .../components/regression/__init__.py | 7 +++- autosklearn/pipeline/regression.py | 4 +- autosklearn/util/pipeline.py | 34 ++++++++------- test/fixtures/ensembles.py | 36 ++++++++++------ test/test_evaluation/test_dummy_pipelines.py | 4 +- test/test_evaluation/test_test_evaluator.py | 13 ++++-- test/test_evaluation/test_train_evaluator.py | 14 +++++-- .../test_data_preprocessing_numerical.py | 21 +++++++--- 25 files changed, 207 insertions(+), 106 deletions(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index 228a685c37..c27b3c36f3 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -321,7 +321,9 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None: Y_test_pred = [None] * self.num_cv_folds train_splits = [None] * self.num_cv_folds - self.models = [self._get_model(self.feat_type) for i in range(self.num_cv_folds)] + self.models = [ + self._get_model(self.feat_type) for i in range(self.num_cv_folds) + ] iterations = [1] * self.num_cv_folds total_n_iterations = [0] * self.num_cv_folds # model.estimator_supports_iterative_fit -> true diff --git a/autosklearn/metalearning/metalearning/meta_base.py b/autosklearn/metalearning/metalearning/meta_base.py index 61f16297fe..a85ec6a279 100644 --- a/autosklearn/metalearning/metalearning/meta_base.py +++ b/autosklearn/metalearning/metalearning/meta_base.py @@ -42,7 +42,9 @@ def __init__(self, configuration_space, aslib_directory, logger): self.configuration_space = configuration_space self.aslib_directory = aslib_directory - aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory, self.configuration_space) + aslib_reader = aslib_simple.AlgorithmSelectionProblem( + self.aslib_directory, self.configuration_space + ) self.metafeatures = aslib_reader.metafeatures self.algorithm_runs: OrderedDict[ str, pd.DataFrame diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 29f8702f26..eacc4ffacf 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -54,7 +54,9 @@ def __init__( self.feat_type = feat_type if steps is None: - self.steps = self._get_pipeline_steps(feat_type=feat_type, dataset_properties=dataset_properties) + self.steps = self._get_pipeline_steps( + feat_type=feat_type, dataset_properties=dataset_properties + ) else: self.steps = steps @@ -84,7 +86,9 @@ def __init__( ) self.config = config - self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params) + self.set_hyperparameters( + self.config, feat_type=feat_type, init_params=init_params + ) super().__init__(steps=self.steps) @@ -211,8 +215,7 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): node_name, node = n_ sub_configuration_space = node.get_hyperparameter_search_space( - feat_type=feat_type, - dataset_properties=self.dataset_properties + feat_type=feat_type, dataset_properties=self.dataset_properties ) sub_config_dict = {} for param in configuration: @@ -239,7 +242,9 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): node.set_hyperparameters( - feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict + feat_type=feat_type, + configuration=sub_configuration, + init_params=sub_init_params_dict, ) else: raise NotImplementedError("Not supported yet!") @@ -350,7 +355,10 @@ def _get_base_search_space( dataset_properties["signed"] = False matches = autosklearn.pipeline.create_searchspace_util.get_match_array( - pipeline=pipeline, dataset_properties=dataset_properties, include=include, exclude=exclude + pipeline=pipeline, + dataset_properties=dataset_properties, + include=include, + exclude=exclude, ) # Now we have only legal combinations at this step of the pipeline @@ -393,7 +401,9 @@ def _get_base_search_space( ) ) sub_config_space = node.get_hyperparameter_search_space( - feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list + feat_type=feat_type, + dataset_properties=dataset_properties, + include=choices_list, ) cs.add_configuration_space(node_name, sub_config_space) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 4a6b267d1b..203d26877c 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, Dict +from typing import Dict, Optional, Union import copy from itertools import product diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index 59aa210248..c4d4485a7d 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -438,7 +438,12 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): return self def get_hyperparameter_search_space( - self, feat_type, dataset_properties=None, default=None, include=None, exclude=None + self, + feat_type, + dataset_properties=None, + default=None, + include=None, + exclude=None, ): raise NotImplementedError() diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index e7d30e9e0b..ae9a09ff66 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -86,7 +86,12 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None + self, + feat_type=None, + dataset_properties=None, + default=None, + include=None, + exclude=None, ): if dataset_properties is None: dataset_properties = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 24b6788d63..80d0d2fc9e 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -137,8 +137,7 @@ def get_hyperparameter_search_space( cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]( - feat_type=feat_type, - dataset_properties=dataset_properties + feat_type=feat_type, dataset_properties=dataset_properties ).get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( @@ -152,7 +151,10 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: return self.choice.transform(X) def set_hyperparameters( - self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, feat_type=None + self, + configuration: ConfigurationSpace, + init_params: Optional[Dict] = None, + feat_type=None, ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index bb640b484b..40ddb6e9c6 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -87,7 +87,10 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, + feat_type, + configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, ) -> "OHEChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 10738aaed0..8e7d2be98d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -118,9 +118,11 @@ def __init__( ) if self.feat_type is None: - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [("categorical_transformer", self.categ_ppl), - ("numerical_transformer", self.numer_ppl), - ("text_transformer", self.txt_ppl)] + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ + ("categorical_transformer", self.categ_ppl), + ("numerical_transformer", self.numer_ppl), + ("text_transformer", self.txt_ppl), + ] else: self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] if "categorical" in self.feat_type.values(): @@ -130,7 +132,11 @@ def __init__( if "string" in self.feat_type.values(): self._transformers.append(("text_transformer", self.txt_ppl)) if self.config: - self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params) + self.set_hyperparameters( + feat_type=self.feat_type, + configuration=self.config, + init_params=init_params, + ) self.column_transformer = column_transformer def fit( @@ -162,7 +168,9 @@ def fit( if value.lower() == "categorical" ] if len(categorical_features) > 0: - transformer_lst.append(("categorical_transformer", self.categ_ppl, categorical_features)) + transformer_lst.append( + ("categorical_transformer", self.categ_ppl, categorical_features) + ) numerical_features = [ key @@ -170,7 +178,9 @@ def fit( if value.lower() == "numerical" ] if len(numerical_features) > 0: - transformer_lst.append(("numerical_transformer", self.numer_ppl, numerical_features)) + transformer_lst.append( + ("numerical_transformer", self.numer_ppl, numerical_features) + ) text_features = [ key @@ -178,7 +188,9 @@ def fit( if value.lower() == "string" ] if len(text_features) > 0: - transformer_lst.append(("text_transformer", self.txt_ppl, text_features)) + transformer_lst.append( + ("text_transformer", self.txt_ppl, text_features) + ) sklearn_transf_spec = [ (name, transformer, feature_columns) @@ -240,7 +252,10 @@ def get_properties( } def set_hyperparameters( - self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None + self, + feat_type, + configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, ) -> "FeatTypeSplit": if init_params is not None and "feat_type" in init_params.keys(): self.feat_type = init_params["feat_type"] @@ -249,8 +264,7 @@ def set_hyperparameters( for transf_name, transf_op in self._transformers: sub_configuration_space = transf_op.get_hyperparameter_search_space( - dataset_properties=self.dataset_properties, - feat_type=feat_type + dataset_properties=self.dataset_properties, feat_type=feat_type ) sub_config_dict = {} for param in configuration: @@ -276,7 +290,9 @@ def set_hyperparameters( transf_op, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): transf_op.set_hyperparameters( - feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict + feat_type=feat_type, + configuration=sub_configuration, + init_params=sub_init_params_dict, ) else: raise NotImplementedError("Not supported yet!") @@ -294,7 +310,7 @@ def get_hyperparameter_search_space( feat_type=feat_type, dataset_properties=dataset_properties, cs=cs, - transformer=self._transformers + transformer=self._transformers, ) return cs @@ -303,7 +319,7 @@ def _get_hyperparameter_search_space_recursevely( dataset_properties: DATASET_PROPERTIES_TYPE, cs: ConfigurationSpace, transformer: BaseEstimator, - feat_type: Optional[Dict[Union[str, int], str]] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, ) -> ConfigurationSpace: for st_name, st_operation in transformer: if hasattr(st_operation, "get_hyperparameter_search_space"): diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index 5020a81b5b..5ad0aabe70 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -64,7 +64,7 @@ def __init__( exclude=exclude, random_state=random_state, init_params=init_params, - feat_type=feat_type + feat_type=feat_type, ) @staticmethod diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py index beb4c099e0..5cbb962ae5 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py @@ -52,7 +52,7 @@ def __init__( exclude=exclude, random_state=random_state, init_params=init_params, - feat_type=feat_type + feat_type=feat_type, ) @staticmethod @@ -130,13 +130,12 @@ def _get_pipeline_steps( BagOfWordChoice( feat_type=feat_type, dataset_properties=default_dataset_properties, - random_state=self.random_state + random_state=self.random_state, ), ), ( "text_feature_reduction", - TextFeatureReduction( - random_state=self.random_state), + TextFeatureReduction(random_state=self.random_state), ), ] ) diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index 61f8ebb83b..438a4ce681 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -37,12 +37,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: return components def get_hyperparameter_search_space( - self, - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - default: Optional[str] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + default: Optional[str] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() @@ -87,9 +87,10 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, configuration: Configuration, - init_params: Optional[Dict[str, Any]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None + self, + configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, ) -> "CoalescenseChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index 84025f3f17..433d9a8247 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -10,13 +10,14 @@ class NoCoalescence(AutoSklearnPreprocessingAlgorithm): def __init__( - self, random_state: Optional[Union[int, np.random.RandomState]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None + self, + random_state: Optional[Union[int, np.random.RandomState]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, ) -> None: pass def fit( - self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None + self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None ) -> PIPELINE_DATA_DTYPE: self.preprocessor = "passthrough" return self @@ -26,7 +27,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: @staticmethod def get_properties( - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: return { "shortname": "no coalescence", @@ -44,8 +45,8 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py index 4eaddd0999..d7b01c7a93 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py @@ -75,7 +75,9 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties) + ].get_hyperparameter_search_space( + feat_type=feat_type, dataset_properties=dataset_properties + ) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py index b0cc348673..ba97eee886 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, Dict +from typing import Dict, Optional, Union import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 9c018894dc..325beec9fa 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -38,12 +38,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: return components def get_hyperparameter_search_space( - self, - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - default: Optional[str] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + default: Optional[str] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() @@ -76,7 +76,9 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties) + ].get_hyperparameter_search_space( + feat_type=feat_type, dataset_properties=dataset_properties + ) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, @@ -89,9 +91,10 @@ def get_hyperparameter_search_space( return cs def set_hyperparameters( - self, configuration: Configuration, - init_params: Optional[Dict[str, Any]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None + self, + configuration: Configuration, + init_params: Optional[Dict[str, Any]] = None, + feat_type: Optional[Dict[Union[str, int], str]] = None, ) -> "BagOfWordChoice": new_params = {} diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index 0dbb4128d1..d4f9bc6662 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -101,7 +101,12 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None + self, + feat_type=None, + dataset_properties=None, + default=None, + include=None, + exclude=None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 457d9813d3..08452b4809 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -79,7 +79,12 @@ def get_available_components( return components_dict def get_hyperparameter_search_space( - self, feat_type, dataset_properties=None, default=None, include=None, exclude=None + self, + feat_type, + dataset_properties=None, + default=None, + include=None, + exclude=None, ): if include is not None and exclude is not None: raise ValueError( diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index cdeb73af45..529953cb18 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, Dict +from typing import Dict, Optional, Union import copy from itertools import product @@ -292,7 +292,7 @@ def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=No regression_components.RegressorChoice( feat_type=feat_type, dataset_properties=default_dataset_properties, - random_state=self.random_state + random_state=self.random_state, ), ], ] diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index 61e52f27aa..78f94c2309 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -11,9 +11,9 @@ MULTIOUTPUT_REGRESSION, REGRESSION_TASKS, ) +from autosklearn.data.xy_data_manager import XYDataManager from autosklearn.pipeline.classification import SimpleClassificationPipeline from autosklearn.pipeline.regression import SimpleRegressionPipeline -from autosklearn.data.xy_data_manager import XYDataManager __all__ = ["get_configuration_space"] @@ -46,7 +46,9 @@ def get_configuration_space( The configuration space for the pipeline """ if datamanager.info["task"] in REGRESSION_TASKS: - return _get_regression_configuration_space(datamanager, include, exclude, random_state) + return _get_regression_configuration_space( + datamanager, include, exclude, random_state + ) else: return _get_classification_configuration_space( datamanager, include, exclude, random_state @@ -109,24 +111,24 @@ def _get_classification_configuration_space( ) -> ConfigurationSpace: """Get the configuration of a classification pipeline given some dataset info - Parameters - ---------- - datamanager: XYDataManager - XYDataManger object storing all important information about the dataset + Parameters + ---------- + datamanager: XYDataManager + XYDataManger object storing all important information about the dataset - include: Optional[Dict[str, List[str]]] = None - A dictionary of what components to include for each pipeline step + include: Optional[Dict[str, List[str]]] = None + A dictionary of what components to include for each pipeline step - exclude: Optional[Dict[str, List[str]]] = None - A dictionary of what components to exclude for each pipeline step + exclude: Optional[Dict[str, List[str]]] = None + A dictionary of what components to exclude for each pipeline step - random_state: Optional[Union[int, np.random.Randomstate]] = None - The random state to use for seeding the ConfigSpace + random_state: Optional[Union[int, np.random.Randomstate]] = None + The random state to use for seeding the ConfigSpace - Returns - ------- - ConfigurationSpace - The configuration space for the classification pipeline + Returns + ------- + ConfigurationSpace + The configuration space for the classification pipeline """ task_type = datamanager.info["task"] diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py index 82673d2c2d..32bb706eee 100644 --- a/test/fixtures/ensembles.py +++ b/test/fixtures/ensembles.py @@ -36,15 +36,21 @@ def make_voting_classifier() -> Callable[..., VotingClassifier]: """ def _make( - X: Optional[SUPPORTED_FEAT_TYPES] = None, - y: Optional[SUPPORTED_TARGET_TYPES] = None, - models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None, - seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, + X: Optional[SUPPORTED_FEAT_TYPES] = None, + y: Optional[SUPPORTED_TARGET_TYPES] = None, + models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None, + seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, ) -> VotingClassifier: assert not (X is None) ^ (y is None) if not models: - models = [MyDummyClassifier(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _ - in range(5)] + models = [ + MyDummyClassifier( + feat_type={i: "numerical" for i in range(4)}, + config=1, + random_state=seed, + ) + for _ in range(5) + ] if X is not None: for model in models: @@ -73,16 +79,22 @@ def make_voting_regressor() -> Callable[..., VotingRegressor]: """ def _make( - X: Optional[SUPPORTED_FEAT_TYPES] = None, - y: Optional[SUPPORTED_TARGET_TYPES] = None, - models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None, - seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, + X: Optional[SUPPORTED_FEAT_TYPES] = None, + y: Optional[SUPPORTED_TARGET_TYPES] = None, + models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None, + seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED, ) -> VotingRegressor: assert not (X is None) ^ (y is None) if not models: - models = [MyDummyRegressor(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _ - in range(5)] + models = [ + MyDummyRegressor( + feat_type={i: "numerical" for i in range(4)}, + config=1, + random_state=seed, + ) + for _ in range(5) + ] if X is not None: for model in models: diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py index 1d7b1b5f83..cc7ea3c284 100644 --- a/test/test_evaluation/test_dummy_pipelines.py +++ b/test/test_evaluation/test_dummy_pipelines.py @@ -24,7 +24,9 @@ def test_dummy_pipeline(task_type: str) -> None: return X, y = data_maker(random_state=0) - estimator = estimator_class(feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0) + estimator = estimator_class( + feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0 + ) estimator.fit(X, y) check_is_fitted(estimator) diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py index 79af5a112d..3decc30753 100644 --- a/test/test_evaluation/test_test_evaluator.py +++ b/test/test_evaluation/test_test_evaluator.py @@ -86,16 +86,23 @@ def test_datasets(self): self.assertTrue(np.isfinite(rval[0]["loss"])) -class DummyDatamanager(): +class DummyDatamanager: def __init__(self): self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} - self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'} + self.feat_type = { + 0: "numerical", + 1: "Numerical", + 2: "numerical", + 3: "numerical", + } class FunctionsTest(unittest.TestCase): def setUp(self): self.queue = multiprocessing.Queue() - self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration() + self.configuration = get_configuration_space( + DummyDatamanager() + ).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join(os.path.dirname(__file__), ".test_cv_functions") self.backend = unittest.mock.Mock(spec=Backend) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 034453fa9d..a8db34d832 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -2940,16 +2940,24 @@ def test_holdout_split_size(self, te_mock): self.assertEqual(len(train_samples), 6) self.assertEqual(len(test_samples), 3) -class DummyDatamanager(): + +class DummyDatamanager: def __init__(self): self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False} - self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'} + self.feat_type = { + 0: "numerical", + 1: "Numerical", + 2: "numerical", + 3: "numerical", + } class FunctionsTest(unittest.TestCase): def setUp(self): self.queue = multiprocessing.Queue() - self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration() + self.configuration = get_configuration_space( + DummyDatamanager() + ).get_default_configuration() self.data = get_multiclass_classification_datamanager() self.tmp_dir = os.path.join( os.path.dirname(__file__), ".test_holdout_functions" diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py index 638a7e958d..6110793c8c 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py @@ -11,13 +11,17 @@ class NumericalPreprocessingPipelineTest(unittest.TestCase): def test_data_type_consistency(self): X = np.random.rand(3, 4) - Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) + Y = NumericalPreprocessingPipeline( + feat_type={0: "numerical", 1: "numerical", 2: "numerical"} + ).fit_transform(X) self.assertFalse(sparse.issparse(Y)) X = sparse.csc_matrix( ([3.0, 6.0, 4.0, 5.0], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4) ) - Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) + Y = NumericalPreprocessingPipeline( + feat_type={0: "numerical", 1: "numerical", 2: "numerical"} + ).fit_transform(X) self.assertTrue(sparse.issparse(Y)) def test_fit_transform(self): @@ -37,13 +41,16 @@ def test_fit_transform(self): ] ) # noqa : matrix legibility # dense input - Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X) + Yt = NumericalPreprocessingPipeline( + feat_type={0: "numerical", 1: "numerical", 2: "numerical"} + ).fit_transform(X) np.testing.assert_array_almost_equal(Yt, Y1) # sparse input (uses with_mean=False) Y2 = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) / sdev X_sparse = sparse.csc_matrix(X) - Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform( - X_sparse) + Yt = NumericalPreprocessingPipeline( + feat_type={0: "numerical", 1: "numerical", 2: "numerical"} + ).fit_transform(X_sparse) np.testing.assert_array_almost_equal(Yt.todense(), Y2) def test_transform(self): @@ -52,7 +59,9 @@ def test_transform(self): ) # noqa : matrix legibility sdev = np.sqrt(2 / 3) # fit - NPP = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}) + NPP = NumericalPreprocessingPipeline( + feat_type={0: "numerical", 1: "numerical", 2: "numerical"} + ) NPP.fit_transform(X1) # transform X2 = np.array([[1.0, 5.0, 8.0], [2.0, 6.0, 9.0], [3.0, 7.0, np.nan]]) From 3149f8e0d3f6c1266b763dbd084ea1288a5fe6d2 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 20:27:55 +0200 Subject: [PATCH 15/63] fixing ensemble builder --- .../pipeline/components/data_preprocessing/feature_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 8e7d2be98d..95633afbcc 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -155,7 +155,7 @@ def fit( try: # columns = [str(col) for col in columns] pass - except: + except Exception as e: raise ValueError( f"Train data has columns={expected} yet the" f" feat_types are feat={columns}" From e9428077a7442e3e3a7322c8ebe48dce6f0f6ee1 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 20:28:46 +0200 Subject: [PATCH 16/63] fixing ensemble builder --- autosklearn/util/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index 78f94c2309..623958e792 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace From a27ca67b6fb4572f911753b25159db8da3248193 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 21:17:19 +0200 Subject: [PATCH 17/63] fixing ensemble builder --- .../pipeline/components/data_preprocessing/feature_type.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 95633afbcc..ad6b6cfe85 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -158,7 +158,8 @@ def fit( except Exception as e: raise ValueError( f"Train data has columns={expected} yet the" - f" feat_types are feat={columns}" + f" feat_types are feat={columns}\n" + f"Exception: {e}" ) transformer_lst = [] From ecb3801e80f6ec08688c585c364c6add489c86a9 Mon Sep 17 00:00:00 2001 From: lukas Date: Sat, 11 Jun 2022 22:07:17 +0200 Subject: [PATCH 18/63] fixing ensemble builder --- scripts/02_retrieve_metadata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index f87f65ecc4..931ad499f5 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -178,6 +178,11 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri fh.write("%s: %s\n" % (key, description[key])) +class DummyDatamanager(): + def __init__(self, info): + self.info = info + self.feat_type = {"A1": "numerical"} + def main(): parser = ArgumentParser() @@ -220,7 +225,7 @@ def main(): ) configuration_space = pipeline.get_configuration_space( - {"is_sparse": sparse, "task": task} + DummyDatamanager({"is_sparse": sparse, "task": task}) ) outputs, configurations = retrieve_matadata( From 0f39c36f025341c9f4e185c166c47357d6b24476 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 14 Jun 2022 13:36:00 +0200 Subject: [PATCH 19/63] fixing ensemble builder --- autosklearn/experimental/askl2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index bc6f78764b..4d68a3bf6b 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -110,8 +110,9 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: + _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()} initial_configurations.append( - Configuration(configuration_space=scenario.cs, values=member) + Configuration(configuration_space=scenario.cs, values=_member) ) except ValueError: pass @@ -162,8 +163,9 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: + _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()} initial_configurations.append( - Configuration(configuration_space=scenario.cs, values=member) + Configuration(configuration_space=scenario.cs, values=_member) ) except ValueError: pass From cc0ffd2ecaa929796ae966b60a1aa1fa30b7d066 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 11:16:52 +0200 Subject: [PATCH 20/63] fixing ensemble builder --- .../components/data_preprocessing/feature_type.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index ad6b6cfe85..919f0416ab 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -131,12 +131,14 @@ def __init__( self._transformers.append(("numerical_transformer", self.numer_ppl)) if "string" in self.feat_type.values(): self._transformers.append(("text_transformer", self.txt_ppl)) - if self.config: - self.set_hyperparameters( - feat_type=self.feat_type, - configuration=self.config, - init_params=init_params, - ) + + if self.config: + self.set_hyperparameters( + feat_type=self.feat_type, + configuration=self.config, + init_params=init_params, + ) + self.column_transformer = column_transformer def fit( From 561d40e02d6b505666d957c65a0c24332e5670c1 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 11:23:56 +0200 Subject: [PATCH 21/63] fixing ensemble builder --- autosklearn/experimental/askl2.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index 4d68a3bf6b..beb68d43e0 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -110,7 +110,11 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: - _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()} + _member = { + key: member[key] + for key in member + if key in scenario.cs.get_hyperparameter_names() + } initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) ) @@ -163,7 +167,11 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: - _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()} + _member = { + key: member[key] + for key in member + if key in scenario.cs.get_hyperparameter_names() + } initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) ) From f6cc8a5aacb797037feb4a5c106b03dda52773ee Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 12:13:08 +0200 Subject: [PATCH 22/63] fixing ensemble builder --- .../components/feature_preprocessing/nystroem_sampler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index f93d8bbff3..a236999b9f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -51,10 +51,11 @@ def fit(self, X, Y=None): if self.kernel == "chi2": if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 + X = X.todense() else: X[X < 0] = 0.0 - self.preprocessor.fit(X.astype(np.float64)) + self.preprocessor.fit(X) return self def transform(self, X): @@ -65,6 +66,7 @@ def transform(self, X): if self.kernel == "chi2": if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 + X = X.todense() else: X[X < 0] = 0.0 From 37b08b8aadbf706d09536b32a6f2ee234dcee013 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 12:50:04 +0200 Subject: [PATCH 23/63] fixing ensemble builder --- .../components/feature_preprocessing/nystroem_sampler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index a236999b9f..456e326e83 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -1,4 +1,3 @@ -import numpy as np from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( From ed3da30b50a3337ab8b4178331772ae966ea7e98 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 16:32:51 +0200 Subject: [PATCH 24/63] fix search space bug --- autosklearn/evaluation/abstract_evaluator.py | 4 ++-- .../pipeline/components/data_preprocessing/__init__.py | 9 ++++----- .../components/data_preprocessing/balancing/balancing.py | 2 +- .../data_preprocessing/categorical_encoding/__init__.py | 6 +++--- .../data_preprocessing/categorical_encoding/encoding.py | 2 +- .../data_preprocessing/category_shift/category_shift.py | 2 +- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 4735db13e9..ab29c4519f 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -45,7 +45,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, @@ -110,7 +110,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], - feat_type, + feat_type: Optional[Dict[Union[str, int], str]] = None, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 80d0d2fc9e..25a5342846 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Type +from typing import Dict, Optional, Type, Union import os from collections import OrderedDict @@ -105,7 +105,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[Dict] = None, default: str = None, include: Optional[Dict] = None, @@ -154,7 +154,7 @@ def set_hyperparameters( self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() @@ -166,12 +166,11 @@ def set_hyperparameters( config[param] = value new_params = {} - # feat_type = None if init_params is not None: for param, value in init_params.items(): param = param.replace(choice, "").split(":", 1)[-1] if "feat_type" in param: - feat_type = value + continue else: new_params[param] = value self.choice = self.get_components()[choice]( diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py index d9a4958bde..2cdd112ba6 100644 --- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py +++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py @@ -139,7 +139,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index 40ddb6e9c6..887bff593e 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import os from collections import OrderedDict @@ -38,7 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -88,7 +88,7 @@ def get_hyperparameter_search_space( def set_hyperparameters( self, - feat_type, + feat_type: Optional[Dict[Union[str, int], str]], configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, ) -> "OHEChoice": diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py index 570ac0f730..a8a2d0a89d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py @@ -69,7 +69,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py index 3af659331e..2d5e5607bd 100644 --- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py +++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py @@ -63,7 +63,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() From 4690854fd571241cbe4637d267249cfd1619e63b Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 15 Jun 2022 17:30:32 +0200 Subject: [PATCH 25/63] fix search space bug --- autosklearn/pipeline/components/data_preprocessing/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 25a5342846..9ebe1d0679 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -170,7 +170,7 @@ def set_hyperparameters( for param, value in init_params.items(): param = param.replace(choice, "").split(":", 1)[-1] if "feat_type" in param: - continue + feat_type = value else: new_params[param] = value self.choice = self.get_components()[choice]( From aff8c04eec8ac3ff91414368f53590a72a2aac65 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 13:10:10 +0200 Subject: [PATCH 26/63] fix search space bug --- autosklearn/evaluation/abstract_evaluator.py | 2 +- autosklearn/evaluation/train_evaluator.py | 2 +- .../metalearning/input/aslib_simple.py | 5 ++- autosklearn/pipeline/base.py | 44 +++++++++++++------ autosklearn/pipeline/classification.py | 12 ++++- autosklearn/pipeline/components/base.py | 23 +++++++--- .../components/classification/__init__.py | 14 +++--- .../components/data_preprocessing/__init__.py | 2 +- .../categorical_encoding/__init__.py | 2 +- .../data_preprocessing/feature_type.py | 4 +- .../minority_coalescense/__init__.py | 2 +- .../feature_preprocessing/__init__.py | 13 +++--- .../extra_trees_preproc_for_classification.py | 8 +++- .../extra_trees_preproc_for_regression.py | 8 +++- .../feature_agglomeration.py | 8 +++- .../feature_preprocessing/kernel_pca.py | 8 +++- .../feature_preprocessing/kitchen_sinks.py | 8 +++- .../liblinear_svc_preprocessor.py | 8 +++- .../feature_preprocessing/no_preprocessing.py | 8 +++- .../components/feature_preprocessing/pca.py | 8 +++- .../feature_preprocessing/polynomial.py | 8 +++- .../random_trees_embedding.py | 8 +++- .../select_percentile_classification.py | 8 +++- .../select_percentile_regression.py | 8 +++- .../select_rates_classification.py | 8 +++- .../select_rates_regression.py | 3 ++ .../feature_preprocessing/truncatedSVD.py | 8 +++- .../components/regression/__init__.py | 14 +++--- autosklearn/pipeline/regression.py | 23 +++++++--- autosklearn/util/pipeline.py | 24 +++++----- .../data_preprocessing/test_scaling.py | 2 +- 31 files changed, 220 insertions(+), 83 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index ab29c4519f..0638b0a57d 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -303,7 +303,7 @@ def __init__( # Please mypy to prevent not defined attr self.model = self._get_model(feat_type=self.feat_type) - def _get_model(self, feat_type) -> BaseEstimator: + def _get_model(self, feat_type: Optional[Dict[Union[str, int], str]]) -> BaseEstimator: if not isinstance(self.configuration, Configuration): model = self.model_class( feat_type=feat_type, diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index c27b3c36f3..eb5b735742 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -987,7 +987,7 @@ def _partial_fit_and_predict_standard( PIPELINE_DATA_DTYPE, # test_pred TYPE_ADDITIONAL_INFO, ]: - model = self._get_model(feat_type=self.feat_type) + model = self._get_model(self.feat_type) self.indices[fold] = (train_indices, test_indices) diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index c495c5cd69..4d84e6fe97 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -1,6 +1,7 @@ import csv import logging import os +from ConfigSpace.configuration_space import ConfigurationSpace from collections import OrderedDict, defaultdict import arff @@ -8,7 +9,9 @@ class AlgorithmSelectionProblem(object): - def __init__(self, directory, cs): + def __init__(self, + directory: str, + cs: ConfigurationSpace): self.logger = logging.getLogger(__name__) # Create data structures diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 29305f3b77..d1aba5e138 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -1,9 +1,9 @@ from abc import ABCMeta -from typing import Dict, Union +from typing import Dict, Union, Optional, Any import numpy as np import scipy.sparse -from ConfigSpace import Configuration +from ConfigSpace import Configuration, ConfigurationSpace from sklearn.pipeline import Pipeline import autosklearn.pipeline.create_searchspace_util @@ -34,14 +34,14 @@ class BasePipeline(Pipeline): def __init__( self, - feat_type=None, - config=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, + config: Optional[Configuration] = None, steps=None, - dataset_properties=None, - include=None, - exclude=None, - random_state=None, - init_params=None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict[str, Any]] = None, ): self.init_params = init_params if init_params is not None else {} @@ -208,7 +208,10 @@ def predict(self, X, batch_size=None): return y - def set_hyperparameters(self, configuration, feat_type=None, init_params=None): + def set_hyperparameters(self, + configuration: Configuration, + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None): self.config = configuration for node_idx, n_ in enumerate(self.steps): @@ -255,7 +258,9 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): return self - def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space(self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None): """Return the configuration space for the CASH problem. Returns @@ -274,7 +279,11 @@ def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=Non return self.config_space def _get_hyperparameter_search_space( - self, feat_type=None, include=None, exclude=None, dataset_properties=None + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None ): """Return the configuration space for the CASH problem. @@ -319,7 +328,13 @@ def _get_hyperparameter_search_space( raise NotImplementedError() def _get_base_search_space( - self, cs, dataset_properties, exclude, include, pipeline, feat_type=None + self, + cs: ConfigurationSpace, + dataset_properties: DATASET_PROPERTIES_TYPE, + include: Dict[str, str], + exclude: Dict[str, str], + pipeline, + feat_type: Optional[Dict[Union[str, int], str]] = None ): if include is None: if self.include is None: @@ -385,7 +400,8 @@ def _get_base_search_space( if not is_choice: cs.add_configuration_space( node_name, - node.get_hyperparameter_search_space(dataset_properties), + node.get_hyperparameter_search_space(dataset_properties=dataset_properties, + feat_type=feat_type), ) # If the node is a choice, we have to figure out which of its # choices are actually legal choices diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 203d26877c..fece85823e 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -19,6 +19,8 @@ ) from autosklearn.pipeline.constants import SPARSE +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): """This class implements the classification task. @@ -168,7 +170,11 @@ def predict_proba(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, feat_type=None, include=None, exclude=None, dataset_properties=None + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None ): """Create the hyperparameter configuration space. @@ -349,7 +355,9 @@ def _get_hyperparameter_search_space( self.dataset_properties = dataset_properties return cs - def _get_pipeline_steps(self, dataset_properties, feat_type=None): + def _get_pipeline_steps(self, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE], + feat_type: Optional[Dict[Union[str, int], str]] = None): steps = [] default_dataset_properties = {"target_type": "classification"} diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index c4d4485a7d..b2ff65db02 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Optional, Union, Any import importlib import inspect @@ -6,10 +6,13 @@ import sys from collections import OrderedDict +from ConfigSpace.configuration_space import Configuration from sklearn.base import BaseEstimator, TransformerMixin from autosklearn.pipeline.constants import SPARSE +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + _addons = dict() # type: Dict[str, 'ThirdPartyComponents'] @@ -98,7 +101,10 @@ def get_properties(dataset_properties=None): raise NotImplementedError() @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): """Return the configuration space of this classification algorithm. Parameters @@ -136,7 +142,10 @@ def fit(self, X, y): for further information.""" raise NotImplementedError() - def set_hyperparameters(self, configuration, feat_type=None, init_params=None): + def set_hyperparameters(self, + configuration: Configuration, + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None): params = configuration.get_dictionary() for param, value in params.items(): @@ -439,11 +448,11 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): def get_hyperparameter_search_space( self, - feat_type, - dataset_properties=None, + feat_type: Dict[Union[str, int], str], + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, - include=None, - exclude=None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ): raise NotImplementedError() diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index ae9a09ff66..605f44aa05 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -1,6 +1,6 @@ __author__ = "feurerm" -from typing import Type +from typing import Type, Dict, Union, Optional import os from collections import OrderedDict @@ -23,6 +23,8 @@ additional_components = ThirdPartyComponents(AutoSklearnClassificationAlgorithm) _addons["classification"] = additional_components +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None: additional_components.add_component(classifier) @@ -87,11 +89,11 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type=None, - dataset_properties=None, + feat_type: Dict[Union[str, int], str], + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, - include=None, - exclude=None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ): if dataset_properties is None: dataset_properties = {} @@ -131,7 +133,7 @@ def get_hyperparameter_search_space( for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[ estimator_name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": estimator, "value": estimator_name} cs.add_configuration_space( estimator_name, diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 9ebe1d0679..848bbb5913 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -138,7 +138,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]( feat_type=feat_type, dataset_properties=dataset_properties - ).get_hyperparameter_search_space(dataset_properties) + ).get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index 887bff593e..ba3a555027 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -74,7 +74,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 919f0416ab..ac1978e80a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -256,7 +256,7 @@ def get_properties( def set_hyperparameters( self, - feat_type, + feat_type: Dict[Union[str, int], str], configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, ) -> "FeatTypeSplit": @@ -328,7 +328,7 @@ def _get_hyperparameter_search_space_recursevely( if hasattr(st_operation, "get_hyperparameter_search_space"): cs.add_configuration_space( st_name, - st_operation.get_hyperparameter_search_space(dataset_properties), + st_operation.get_hyperparameter_search_space(dataset_properties=dataset_properties), ) else: return FeatTypeSplit._get_hyperparameter_search_space_recursevely( diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index 438a4ce681..bef957b7f4 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -74,7 +74,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index d4f9bc6662..db4b2983e0 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -1,4 +1,4 @@ -from typing import Type +from typing import Dict, Optional, Union, Type import os from collections import OrderedDict @@ -21,6 +21,7 @@ additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) _addons["feature_preprocessing"] = additional_components +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: additional_components.add_component(preprocessor) @@ -102,11 +103,11 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type=None, - dataset_properties=None, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, - include=None, - exclude=None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ): cs = ConfigurationSpace() @@ -135,7 +136,7 @@ def get_hyperparameter_search_space( for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[ name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": preprocessor, "value": name} cs.add_configuration_space( name, diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index c45f9b1cf2..d2c178adb2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -11,6 +12,8 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class ExtraTreesPreprocessorClassification(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -123,7 +126,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 98a5a5700c..73fb9f3e11 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -12,6 +13,8 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -125,7 +128,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index 0076f14121..8af0e130e8 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ( @@ -13,6 +14,8 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class FeatureAgglomeration(AutoSklearnPreprocessingAlgorithm): def __init__(self, n_clusters, affinity, linkage, pooling_func, random_state=None): @@ -63,7 +66,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25) affinity = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index 27b3446d57..bec513aec6 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional import warnings import numpy as np @@ -12,6 +13,8 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class KernelPCA(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -82,7 +85,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): n_components = UniformIntegerHyperparameter( "n_components", 10, 2000, default_value=100 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index 93673e75e9..6f9d6cd9f3 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from typing import Optional, Union from ConfigSpace.configuration_space import ConfigurationSpace @@ -10,6 +11,8 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class RandomKitchenSinks(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -69,7 +72,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index 43135da483..c7b770e7d2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from ConfigSpace.hyperparameters import ( @@ -10,6 +11,8 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm): # Liblinear is not deterministic as it uses a RNG inside @@ -91,7 +94,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): cs = ConfigurationSpace() penalty = Constant("penalty", "l1") diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index 0597cfcabe..dc76a4a2a3 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -1,8 +1,11 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class NoPreprocessing(AutoSklearnPreprocessingAlgorithm): def __init__(self, random_state): @@ -34,6 +37,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py index d86b38cf22..761525c8da 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -9,6 +10,8 @@ from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class PCA(AutoSklearnPreprocessingAlgorithm): def __init__(self, keep_variance, whiten, random_state=None): @@ -55,7 +58,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): keep_variance = UniformFloatHyperparameter( "keep_variance", 0.5, 0.9999, default_value=0.9999 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index 0d4b166f35..c563b1afbb 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -8,6 +9,8 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class PolynomialFeatures(AutoSklearnPreprocessingAlgorithm): def __init__(self, degree, interaction_only, include_bias, random_state=None): @@ -54,7 +57,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): # More than degree 3 is too expensive! degree = UniformIntegerHyperparameter("degree", 2, 3, 2) interaction_only = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index 60b7df0c3a..c41b051e9b 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -10,6 +11,8 @@ from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class RandomTreesEmbedding(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -94,7 +97,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=10, upper=100, default_value=10 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index 3fa80f0ca1..b5265ba122 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from functools import partial from ConfigSpace.configuration_space import ConfigurationSpace @@ -19,6 +20,8 @@ UNSIGNED_DATA, ) +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SelectPercentileClassification( SelectPercentileBase, AutoSklearnPreprocessingAlgorithm @@ -110,7 +113,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index 0f489f933f..021305777f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from functools import partial from ConfigSpace.configuration_space import ConfigurationSpace @@ -12,6 +13,8 @@ ) from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SelectPercentileRegression( SelectPercentileBase, AutoSklearnPreprocessingAlgorithm @@ -53,7 +56,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): percentile = UniformFloatHyperparameter( "percentile", lower=1, upper=99, default_value=50 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index c21ff3d7cb..d93f8e9528 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from functools import partial from ConfigSpace import NotEqualsCondition @@ -16,6 +17,8 @@ UNSIGNED_DATA, ) +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm): def __init__(self, alpha, mode="fpr", score_func="chi2", random_state=None): @@ -116,7 +119,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index a708b18e9f..fcc59ab2c7 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from functools import partial from ConfigSpace import NotEqualsCondition @@ -10,6 +11,8 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm): def __init__( diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index d515c9552a..4c9c84bf4d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -1,9 +1,12 @@ +from typing import Dict, Union, Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class TruncatedSVD(AutoSklearnPreprocessingAlgorithm): def __init__(self, target_dim, random_state=None): @@ -48,7 +51,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): target_dim = UniformIntegerHyperparameter( "target_dim", 10, 256, default_value=128 ) diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 08452b4809..5773aec45b 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -1,4 +1,4 @@ -from typing import Type +from typing import Type, Dict, Union, Optional import os from collections import OrderedDict @@ -21,6 +21,8 @@ additional_components = ThirdPartyComponents(AutoSklearnRegressionAlgorithm) _addons["regression"] = additional_components +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None: additional_components.add_component(regressor) @@ -80,11 +82,11 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type, - dataset_properties=None, + feat_type: Dict[Union[str, int], str], + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, - include=None, - exclude=None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, ): if include is not None and exclude is not None: raise ValueError( @@ -121,7 +123,7 @@ def get_hyperparameter_search_space( for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[ estimator_name - ].get_hyperparameter_search_space(dataset_properties) + ].get_hyperparameter_search_space(dataset_properties=dataset_properties) parent_hyperparameter = {"parent": estimator, "value": estimator_name} cs.add_configuration_space( estimator_name, diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 529953cb18..d8358c5628 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Dict, Optional, Union, Any import copy from itertools import product @@ -16,6 +16,8 @@ from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice from autosklearn.pipeline.constants import SPARSE +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class SimpleRegressionPipeline(RegressorMixin, BasePipeline): """This class implements the regression task. @@ -70,11 +72,11 @@ def __init__( feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps=None, - dataset_properties=None, - include=None, - exclude=None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params=None, + init_params: Optional[Dict[str, Any]] = None, ): self._output_dtype = np.float32 if dataset_properties is None: @@ -114,7 +116,11 @@ def predict(self, X, batch_size=None): return y def _get_hyperparameter_search_space( - self, feat_type=None, include=None, exclude=None, dataset_properties=None + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + include: Optional[Dict[str, str]] = None, + exclude: Optional[Dict[str, str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None ): """Return the configuration space for the CASH problem. @@ -262,7 +268,10 @@ def _get_hyperparameter_search_space( def _get_estimator_components(self): return regression_components._regressors - def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=None): + def _get_pipeline_steps(self, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE], + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None): steps = [] default_dataset_properties = {"target_type": "regression"} diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index 623958e792..6ff74b0fb7 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -111,24 +111,24 @@ def _get_classification_configuration_space( ) -> ConfigurationSpace: """Get the configuration of a classification pipeline given some dataset info - Parameters - ---------- + Parameters + ---------- datamanager: XYDataManager XYDataManger object storing all important information about the dataset - include: Optional[Dict[str, List[str]]] = None - A dictionary of what components to include for each pipeline step + include: Optional[Dict[str, List[str]]] = None + A dictionary of what components to include for each pipeline step - exclude: Optional[Dict[str, List[str]]] = None - A dictionary of what components to exclude for each pipeline step + exclude: Optional[Dict[str, List[str]]] = None + A dictionary of what components to exclude for each pipeline step - random_state: Optional[Union[int, np.random.Randomstate]] = None - The random state to use for seeding the ConfigSpace + random_state: Optional[Union[int, np.random.Randomstate]] = None + The random state to use for seeding the ConfigSpace - Returns - ------- - ConfigurationSpace - The configuration space for the classification pipeline + Returns + ------- + ConfigurationSpace + The configuration space for the classification pipeline """ task_type = datamanager.info["task"] diff --git a/test/test_pipeline/components/data_preprocessing/test_scaling.py b/test/test_pipeline/components/data_preprocessing/test_scaling.py index b87223d14d..faa5b3f1e1 100644 --- a/test/test_pipeline/components/data_preprocessing/test_scaling.py +++ b/test/test_pipeline/components/data_preprocessing/test_scaling.py @@ -19,7 +19,7 @@ def _test_helper(self, Preprocessor, dataset=None, make_sparse=False): original_X_train = X_train.copy() configuration_space = Preprocessor( dataset_properties - ).get_hyperparameter_search_space(dataset_properties) + ).get_hyperparameter_search_space(dataset_properties=dataset_properties) default = configuration_space.get_default_configuration() preprocessor = Preprocessor(dataset_properties, random_state=1) From e4e9fe33d6dc9410b9e5e5a5b28d2d9ca8d06f10 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 13:10:26 +0200 Subject: [PATCH 27/63] fix search space bug --- .../components/feature_preprocessing/nystroem_sampler.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 456e326e83..29cc83603a 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -1,3 +1,4 @@ +from typing import Dict, Union, Optional from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -15,6 +16,8 @@ UNSIGNED_DATA, ) +DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + class Nystroem(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -65,7 +68,6 @@ def transform(self, X): if self.kernel == "chi2": if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 - X = X.todense() else: X[X < 0] = 0.0 @@ -95,7 +97,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): if dataset_properties is not None and ( dataset_properties.get("sparse") is True or dataset_properties.get("signed") is False From be72171f84d84fba65f0c637542bd62b29e89a6d Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 13:27:16 +0200 Subject: [PATCH 28/63] fix search space bug --- autosklearn/evaluation/abstract_evaluator.py | 4 ++- .../metalearning/input/aslib_simple.py | 6 ++-- autosklearn/pipeline/base.py | 29 ++++++++++------- autosklearn/pipeline/classification.py | 20 ++++++------ autosklearn/pipeline/components/base.py | 16 ++++++---- .../components/classification/__init__.py | 2 +- .../data_preprocessing/feature_type.py | 32 ++++++++++--------- .../feature_preprocessing/__init__.py | 3 +- .../extra_trees_preproc_for_classification.py | 7 ++-- .../extra_trees_preproc_for_regression.py | 7 ++-- .../feature_agglomeration.py | 7 ++-- .../feature_preprocessing/kernel_pca.py | 7 ++-- .../feature_preprocessing/kitchen_sinks.py | 7 ++-- .../liblinear_svc_preprocessor.py | 7 ++-- .../feature_preprocessing/no_preprocessing.py | 7 ++-- .../feature_preprocessing/nystroem_sampler.py | 7 ++-- .../components/feature_preprocessing/pca.py | 7 ++-- .../feature_preprocessing/polynomial.py | 7 ++-- .../random_trees_embedding.py | 7 ++-- .../select_percentile_classification.py | 7 ++-- .../select_percentile_regression.py | 7 ++-- .../select_rates_classification.py | 7 ++-- .../select_rates_regression.py | 3 +- .../feature_preprocessing/truncatedSVD.py | 7 ++-- .../components/regression/__init__.py | 2 +- autosklearn/pipeline/regression.py | 20 ++++++------ 26 files changed, 135 insertions(+), 107 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 0638b0a57d..233165e2a9 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -303,7 +303,9 @@ def __init__( # Please mypy to prevent not defined attr self.model = self._get_model(feat_type=self.feat_type) - def _get_model(self, feat_type: Optional[Dict[Union[str, int], str]]) -> BaseEstimator: + def _get_model( + self, feat_type: Optional[Dict[Union[str, int], str]] + ) -> BaseEstimator: if not isinstance(self.configuration, Configuration): model = self.model_class( feat_type=feat_type, diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index 4d84e6fe97..2a8d1e04d2 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -1,17 +1,15 @@ import csv import logging import os -from ConfigSpace.configuration_space import ConfigurationSpace from collections import OrderedDict, defaultdict import arff import pandas as pd +from ConfigSpace.configuration_space import ConfigurationSpace class AlgorithmSelectionProblem(object): - def __init__(self, - directory: str, - cs: ConfigurationSpace): + def __init__(self, directory: str, cs: ConfigurationSpace): self.logger = logging.getLogger(__name__) # Create data structures diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index d1aba5e138..9eba727c4b 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -1,5 +1,5 @@ from abc import ABCMeta -from typing import Dict, Union, Optional, Any +from typing import Any, Dict, Optional, Union import numpy as np import scipy.sparse @@ -208,10 +208,12 @@ def predict(self, X, batch_size=None): return y - def set_hyperparameters(self, - configuration: Configuration, - feat_type: Optional[Dict[Union[str, int], str]] = None, - init_params: Optional[Dict[str, Any]] = None): + def set_hyperparameters( + self, + configuration: Configuration, + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None, + ): self.config = configuration for node_idx, n_ in enumerate(self.steps): @@ -258,9 +260,11 @@ def set_hyperparameters(self, return self - def get_hyperparameter_search_space(self, - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None): + def get_hyperparameter_search_space( + self, + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + ): """Return the configuration space for the CASH problem. Returns @@ -283,7 +287,7 @@ def _get_hyperparameter_search_space( feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Return the configuration space for the CASH problem. @@ -334,7 +338,7 @@ def _get_base_search_space( include: Dict[str, str], exclude: Dict[str, str], pipeline, - feat_type: Optional[Dict[Union[str, int], str]] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, ): if include is None: if self.include is None: @@ -400,8 +404,9 @@ def _get_base_search_space( if not is_choice: cs.add_configuration_space( node_name, - node.get_hyperparameter_search_space(dataset_properties=dataset_properties, - feat_type=feat_type), + node.get_hyperparameter_search_space( + dataset_properties=dataset_properties, feat_type=feat_type + ), ) # If the node is a choice, we have to figure out which of its # choices are actually legal choices diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index fece85823e..9ad54c3e41 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Any, Dict, List, Optional, Union import copy from itertools import product @@ -75,11 +75,11 @@ def __init__( feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps=None, - dataset_properties=None, - include=None, - exclude=None, + dataset_properties: Dict[str, bool] = None, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params=None, + init_params: Optional[Dict[str, Any]] = None, ): self._output_dtype = np.int32 if dataset_properties is None: @@ -174,7 +174,7 @@ def _get_hyperparameter_search_space( feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Create the hyperparameter configuration space. @@ -355,9 +355,11 @@ def _get_hyperparameter_search_space( self.dataset_properties = dataset_properties return cs - def _get_pipeline_steps(self, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[Dict[Union[str, int], str]] = None): + def _get_pipeline_steps( + self, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE], + feat_type: Optional[Dict[Union[str, int], str]] = None, + ): steps = [] default_dataset_properties = {"target_type": "classification"} diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index b2ff65db02..7185926931 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union, Any +from typing import Any, Dict, Optional, Union import importlib import inspect @@ -102,8 +102,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Return the configuration space of this classification algorithm. @@ -142,10 +142,12 @@ def fit(self, X, y): for further information.""" raise NotImplementedError() - def set_hyperparameters(self, - configuration: Configuration, - feat_type: Optional[Dict[Union[str, int], str]] = None, - init_params: Optional[Dict[str, Any]] = None): + def set_hyperparameters( + self, + configuration: Configuration, + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None, + ): params = configuration.get_dictionary() for param, value in params.items(): diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 605f44aa05..d1a7ea83cb 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -1,6 +1,6 @@ __author__ = "feurerm" -from typing import Type, Dict, Union, Optional +from typing import Dict, Optional, Type, Union import os from collections import OrderedDict diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index ac1978e80a..bd3164ced4 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -117,20 +117,20 @@ def __init__( init_params=init_params, ) - if self.feat_type is None: - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ - ("categorical_transformer", self.categ_ppl), - ("numerical_transformer", self.numer_ppl), - ("text_transformer", self.txt_ppl), - ] - else: - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] - if "categorical" in self.feat_type.values(): - self._transformers.append(("categorical_transformer", self.categ_ppl)) - if "numerical" in self.feat_type.values(): - self._transformers.append(("numerical_transformer", self.numer_ppl)) - if "string" in self.feat_type.values(): - self._transformers.append(("text_transformer", self.txt_ppl)) + # if self.feat_type is None: + # self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ + # ("categorical_transformer", self.categ_ppl), + # ("numerical_transformer", self.numer_ppl), + # ("text_transformer", self.txt_ppl), + # ] + # else: + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] + if "categorical" in self.feat_type.values(): + self._transformers.append(("categorical_transformer", self.categ_ppl)) + if "numerical" in self.feat_type.values(): + self._transformers.append(("numerical_transformer", self.numer_ppl)) + if "string" in self.feat_type.values(): + self._transformers.append(("text_transformer", self.txt_ppl)) if self.config: self.set_hyperparameters( @@ -328,7 +328,9 @@ def _get_hyperparameter_search_space_recursevely( if hasattr(st_operation, "get_hyperparameter_search_space"): cs.add_configuration_space( st_name, - st_operation.get_hyperparameter_search_space(dataset_properties=dataset_properties), + st_operation.get_hyperparameter_search_space( + dataset_properties=dataset_properties + ), ) else: return FeatTypeSplit._get_hyperparameter_search_space_recursevely( diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index db4b2983e0..87b42ffe73 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union, Type +from typing import Dict, Optional, Type, Union import os from collections import OrderedDict @@ -23,6 +23,7 @@ DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] + def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: additional_components.add_component(preprocessor) diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index d2c178adb2..8ef60a4629 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -127,8 +128,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 73fb9f3e11..215817e577 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -129,8 +130,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index 8af0e130e8..5c6c8949d2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ( @@ -67,8 +68,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25) diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index bec513aec6..edbe399e10 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + import warnings import numpy as np @@ -86,8 +87,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): n_components = UniformIntegerHyperparameter( "n_components", 10, 2000, default_value=100 diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index 6f9d6cd9f3..59b7eb2418 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -1,5 +1,4 @@ -from typing import Dict, Union, Optional -from typing import Optional, Union +from typing import Dict, Optional, Union from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -73,8 +72,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index c7b770e7d2..aa7658c732 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from ConfigSpace.hyperparameters import ( @@ -95,8 +96,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index dc76a4a2a3..149fb96709 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm @@ -38,8 +39,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 29cc83603a..23f39c7e4d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -98,8 +99,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): if dataset_properties is not None and ( dataset_properties.get("sparse") is True diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py index 761525c8da..dd4871a840 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -59,8 +60,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): keep_variance = UniformFloatHyperparameter( "keep_variance", 0.5, 0.9999, default_value=0.9999 diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index c563b1afbb..1fab87a700 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -58,8 +59,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): # More than degree 3 is too expensive! degree = UniformIntegerHyperparameter("degree", 2, 3, 2) diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index c41b051e9b..c6f8d61647 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -98,8 +99,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=10, upper=100, default_value=10 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index b5265ba122..08c6c929be 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from functools import partial from ConfigSpace.configuration_space import ConfigurationSpace @@ -114,8 +115,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default_value=50 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index 021305777f..fe5fd8ad03 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from functools import partial from ConfigSpace.configuration_space import ConfigurationSpace @@ -57,8 +58,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): percentile = UniformFloatHyperparameter( "percentile", lower=1, upper=99, default_value=50 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index d93f8e9528..9546c8e8c6 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from functools import partial from ConfigSpace import NotEqualsCondition @@ -120,8 +121,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index fcc59ab2c7..398ea6f23b 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Union + from functools import partial from ConfigSpace import NotEqualsCondition diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index 4c9c84bf4d..78d52309ec 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -1,4 +1,5 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter @@ -52,8 +53,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[Dict[Union[str, int], str]] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): target_dim = UniformIntegerHyperparameter( "target_dim", 10, 256, default_value=128 diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 5773aec45b..b750d7fb1c 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -1,4 +1,4 @@ -from typing import Type, Dict, Union, Optional +from typing import Dict, Optional, Type, Union import os from collections import OrderedDict diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index d8358c5628..9ffaf00c93 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union, Any +from typing import Any, Dict, List, Optional, Union import copy from itertools import product @@ -72,9 +72,9 @@ def __init__( feat_type: Optional[Dict[Union[str, int], str]] = None, config: Optional[Configuration] = None, steps=None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + dataset_properties: Dict[str, bool] = None, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, init_params: Optional[Dict[str, Any]] = None, ): @@ -120,7 +120,7 @@ def _get_hyperparameter_search_space( feat_type: Optional[Dict[Union[str, int], str]] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Return the configuration space for the CASH problem. @@ -268,10 +268,12 @@ def _get_hyperparameter_search_space( def _get_estimator_components(self): return regression_components._regressors - def _get_pipeline_steps(self, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[Dict[Union[str, int], str]] = None, - init_params: Optional[Dict[str, Any]] = None): + def _get_pipeline_steps( + self, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE], + feat_type: Optional[Dict[Union[str, int], str]] = None, + init_params: Optional[Dict[str, Any]] = None, + ): steps = [] default_dataset_properties = {"target_type": "regression"} From 34bb58fbcb5ca0f5d405b979274c779126045741 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 14:45:43 +0200 Subject: [PATCH 29/63] fix search space bug --- .../data_preprocessing/feature_type.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index bd3164ced4..407b3ee626 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -117,20 +117,20 @@ def __init__( init_params=init_params, ) - # if self.feat_type is None: - # self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ - # ("categorical_transformer", self.categ_ppl), - # ("numerical_transformer", self.numer_ppl), - # ("text_transformer", self.txt_ppl), - # ] - # else: - self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] - if "categorical" in self.feat_type.values(): - self._transformers.append(("categorical_transformer", self.categ_ppl)) - if "numerical" in self.feat_type.values(): - self._transformers.append(("numerical_transformer", self.numer_ppl)) - if "string" in self.feat_type.values(): - self._transformers.append(("text_transformer", self.txt_ppl)) + if self.feat_type is None: + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [ + ("categorical_transformer", self.categ_ppl), + ("numerical_transformer", self.numer_ppl), + ("text_transformer", self.txt_ppl), + ] + else: + self._transformers: List[Tuple[str, AutoSklearnComponent]] = [] + if "categorical" in self.feat_type.values(): + self._transformers.append(("categorical_transformer", self.categ_ppl)) + if "numerical" in self.feat_type.values(): + self._transformers.append(("numerical_transformer", self.numer_ppl)) + if "string" in self.feat_type.values(): + self._transformers.append(("text_transformer", self.txt_ppl)) if self.config: self.set_hyperparameters( From 6b0fdb49a1ef12b53b411102ffe11ace3fabffd1 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 14:53:50 +0200 Subject: [PATCH 30/63] fix search space bug --- autosklearn/experimental/askl2.py | 6 ++++-- autosklearn/metalearning/input/aslib_simple.py | 3 ++- .../pipeline/components/data_preprocessing/feature_type.py | 3 +-- test/fixtures/ensembles.py | 4 ++-- test/test_evaluation/test_dummy_pipelines.py | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index 24d02e544a..7984f2b33e 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -51,10 +51,11 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: + hp_names = scenario.cs.get_hyperparameter_names() _member = { key: member[key] for key in member - if key in scenario.cs.get_hyperparameter_names() + if key in hp_names } initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) @@ -108,10 +109,11 @@ def __call__( initial_configurations = [] for member in self.portfolio.values(): try: + hp_names = scenario.cs.get_hyperparameter_names() _member = { key: member[key] for key in member - if key in scenario.cs.get_hyperparameter_names() + if key in hp_names } initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index 2a8d1e04d2..a1724a3846 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -145,6 +145,7 @@ def _read_configurations(self, filename): csv_reader = csv.DictReader(fh) configurations = dict() + hp_names = self.cs.get_hyperparameter_names() for line in csv_reader: configuration = dict() algorithm_id = line["idx"] @@ -152,7 +153,7 @@ def _read_configurations(self, filename): # Todo adapt to search space if not value or hp_name == "idx": continue - if hp_name not in self.cs.get_hyperparameter_names(): + if hp_name not in hp_names: continue try: value = int(value) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 407b3ee626..9f2d33597d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -155,8 +155,7 @@ def fit( columns = set(range(n_feats)) if expected != columns: try: - # columns = [str(col) for col in columns] - pass + columns = [str(col) for col in columns] except Exception as e: raise ValueError( f"Train data has columns={expected} yet the" diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py index 32bb706eee..113974b256 100644 --- a/test/fixtures/ensembles.py +++ b/test/fixtures/ensembles.py @@ -45,7 +45,7 @@ def _make( if not models: models = [ MyDummyClassifier( - feat_type={i: "numerical" for i in range(4)}, + feat_type={i: "numerical" for i in range(X.shape[1])}, config=1, random_state=seed, ) @@ -89,7 +89,7 @@ def _make( if not models: models = [ MyDummyRegressor( - feat_type={i: "numerical" for i in range(4)}, + feat_type={i: "numerical" for i in range(X.shape[1])}, config=1, random_state=seed, ) diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py index cc7ea3c284..c69578420f 100644 --- a/test/test_evaluation/test_dummy_pipelines.py +++ b/test/test_evaluation/test_dummy_pipelines.py @@ -25,7 +25,7 @@ def test_dummy_pipeline(task_type: str) -> None: X, y = data_maker(random_state=0) estimator = estimator_class( - feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0 + feat_type={i: "numerical" for i in range(X.shape[1])}, config=1, random_state=0 ) estimator.fit(X, y) check_is_fitted(estimator) From 9096ea30898a47978c99b372fdf960a0e4e3ba16 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 15:00:52 +0200 Subject: [PATCH 31/63] fix search space bug --- test/test_pipeline/test_base.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py index f8cfe26912..33d57cd230 100644 --- a/test/test_pipeline/test_base.py +++ b/test/test_pipeline/test_base.py @@ -37,7 +37,11 @@ def test_get_hyperparameter_configuration_space_3choices(self): base = BasePipelineMock() cs = base._get_base_search_space( - cs, dataset_properties, exclude, include, pipeline + cs=cs, + dataset_properties=dataset_properties, + exclude=exclude, + include=include, + pipeline=pipeline ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) @@ -51,7 +55,11 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties = {"target_type": "classification", "signed": True} include = {"c": ["multinomial_nb"]} cs = base._get_base_search_space( - cs, dataset_properties, exclude, include, pipeline + cs=cs, + dataset_properties=dataset_properties, + exclude=exclude, + include=include, + pipeline=pipeline ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 10) @@ -66,7 +74,11 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties = {"target_type": "classification", "signed": True} include = {} cs = base._get_base_search_space( - cs, dataset_properties, exclude, include, pipeline + cs=cs, + dataset_properties=dataset_properties, + exclude=exclude, + include=include, + pipeline=pipeline ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15) @@ -78,7 +90,11 @@ def test_get_hyperparameter_configuration_space_3choices(self): cs = ConfigSpace.configuration_space.ConfigurationSpace() dataset_properties = {"target_type": "classification", "sparse": True} cs = base._get_base_search_space( - cs, dataset_properties, exclude, include, pipeline + cs=cs, + dataset_properties=dataset_properties, + exclude=exclude, + include=include, + pipeline=pipeline ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15) @@ -93,7 +109,11 @@ def test_get_hyperparameter_configuration_space_3choices(self): "signed": True, } cs = base._get_base_search_space( - cs, dataset_properties, exclude, include, pipeline + cs=cs, + dataset_properties=dataset_properties, + exclude=exclude, + include=include, + pipeline=pipeline ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12) From 372d9791a0c9065a6c084fc8174bcc65b90ac724 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 15:01:10 +0200 Subject: [PATCH 32/63] fix search space bug --- autosklearn/experimental/askl2.py | 12 ++---------- test/test_pipeline/test_base.py | 10 +++++----- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index 7984f2b33e..078355dfbb 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -52,11 +52,7 @@ def __call__( for member in self.portfolio.values(): try: hp_names = scenario.cs.get_hyperparameter_names() - _member = { - key: member[key] - for key in member - if key in hp_names - } + _member = {key: member[key] for key in member if key in hp_names} initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) ) @@ -110,11 +106,7 @@ def __call__( for member in self.portfolio.values(): try: hp_names = scenario.cs.get_hyperparameter_names() - _member = { - key: member[key] - for key in member - if key in hp_names - } + _member = {key: member[key] for key in member if key in hp_names} initial_configurations.append( Configuration(configuration_space=scenario.cs, values=_member) ) diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py index 33d57cd230..af5123b4f7 100644 --- a/test/test_pipeline/test_base.py +++ b/test/test_pipeline/test_base.py @@ -41,7 +41,7 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties=dataset_properties, exclude=exclude, include=include, - pipeline=pipeline + pipeline=pipeline, ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) @@ -59,7 +59,7 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties=dataset_properties, exclude=exclude, include=include, - pipeline=pipeline + pipeline=pipeline, ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 10) @@ -78,7 +78,7 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties=dataset_properties, exclude=exclude, include=include, - pipeline=pipeline + pipeline=pipeline, ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15) @@ -94,7 +94,7 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties=dataset_properties, exclude=exclude, include=include, - pipeline=pipeline + pipeline=pipeline, ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12) self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15) @@ -113,7 +113,7 @@ def test_get_hyperparameter_configuration_space_3choices(self): dataset_properties=dataset_properties, exclude=exclude, include=include, - pipeline=pipeline + pipeline=pipeline, ) self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12) From 3b1105e4bf7c14bc24c98828e7f95a5127ad47bd Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 16:13:30 +0200 Subject: [PATCH 33/63] fix search space bug --- autosklearn/pipeline/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 9ad54c3e41..6dd0d38d20 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -113,7 +113,7 @@ def fit_transformer(self, X, y, fit_params=None): ) _init_params.update(self.init_params) self.set_hyperparameters( - configuration=self.config, init_params=_init_params + feat_type=self.feat_type, configuration=self.config, init_params=_init_params ) if _fit_params is not None: From b090ecfb588d55cc0dd6d0c91e2d675246c32c2d Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 16 Jun 2022 16:14:14 +0200 Subject: [PATCH 34/63] fix search space bug --- autosklearn/pipeline/classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 6dd0d38d20..61cdef9f4f 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -113,7 +113,9 @@ def fit_transformer(self, X, y, fit_params=None): ) _init_params.update(self.init_params) self.set_hyperparameters( - feat_type=self.feat_type, configuration=self.config, init_params=_init_params + feat_type=self.feat_type, + configuration=self.config, + init_params=_init_params, ) if _fit_params is not None: From 280d3d09f50c64baaf21c2c89edbeb6d3e106aab Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 00:22:52 +0200 Subject: [PATCH 35/63] fix typing --- autosklearn/askl_typing.py | 3 +++ autosklearn/evaluation/abstract_evaluator.py | 7 ++++--- autosklearn/pipeline/base.py | 11 ++++++----- autosklearn/pipeline/classification.py | 7 ++++--- autosklearn/pipeline/components/base.py | 7 ++++--- .../pipeline/components/classification/__init__.py | 3 ++- .../components/data_preprocessing/__init__.py | 5 +++-- .../data_preprocessing/balancing/balancing.py | 3 ++- .../categorical_encoding/__init__.py | 5 +++-- .../categorical_encoding/encoding.py | 3 ++- .../categorical_encoding/no_encoding.py | 3 ++- .../categorical_encoding/one_hot_encoding.py | 3 ++- .../category_shift/category_shift.py | 3 ++- .../components/data_preprocessing/feature_type.py | 9 +++++---- .../data_preprocessing/feature_type_categorical.py | 7 ++++--- .../data_preprocessing/feature_type_numerical.py | 7 ++++--- .../data_preprocessing/feature_type_text.py | 7 ++++--- .../imputation/categorical_imputation.py | 3 ++- .../imputation/numerical_imputation.py | 3 ++- .../minority_coalescense/__init__.py | 5 +++-- .../minority_coalescense/minority_coalescer.py | 6 ++++-- .../minority_coalescense/no_coalescense.py | 5 +++-- .../data_preprocessing/rescaling/__init__.py | 3 ++- .../rescaling/abstract_rescaling.py | 3 ++- .../rescaling/quantile_transformer.py | 3 ++- .../data_preprocessing/rescaling/robust_scaler.py | 3 ++- .../data_preprocessing/text_encoding/__init__.py | 5 +++-- .../text_encoding/bag_of_word_encoding.py | 3 ++- .../text_encoding/bag_of_word_encoding_distinct.py | 3 ++- .../text_encoding/tfidf_encoding.py | 3 ++- .../text_feature_reduction/truncated_svd.py | 3 ++- .../variance_threshold/variance_threshold.py | 3 ++- .../components/feature_preprocessing/__init__.py | 3 ++- .../extra_trees_preproc_for_classification.py | 3 ++- .../extra_trees_preproc_for_regression.py | 3 ++- .../feature_preprocessing/feature_agglomeration.py | 3 ++- .../components/feature_preprocessing/kernel_pca.py | 3 ++- .../components/feature_preprocessing/kitchen_sinks.py | 3 ++- .../liblinear_svc_preprocessor.py | 3 ++- .../feature_preprocessing/no_preprocessing.py | 3 ++- .../feature_preprocessing/nystroem_sampler.py | 3 ++- .../pipeline/components/feature_preprocessing/pca.py | 3 ++- .../components/feature_preprocessing/polynomial.py | 3 ++- .../feature_preprocessing/random_trees_embedding.py | 3 ++- .../select_percentile_classification.py | 3 ++- .../select_percentile_regression.py | 3 ++- .../select_rates_classification.py | 3 ++- .../feature_preprocessing/select_rates_regression.py | 8 ++++++-- .../components/feature_preprocessing/truncatedSVD.py | 3 ++- .../pipeline/components/regression/__init__.py | 3 ++- autosklearn/pipeline/regression.py | 7 ++++--- 51 files changed, 135 insertions(+), 78 deletions(-) create mode 100644 autosklearn/askl_typing.py diff --git a/autosklearn/askl_typing.py b/autosklearn/askl_typing.py new file mode 100644 index 0000000000..61d01bef30 --- /dev/null +++ b/autosklearn/askl_typing.py @@ -0,0 +1,3 @@ +from typing import Dict, Union + +FEAT_TYPE_TYPE = Dict[Union[str, int], str] diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 233165e2a9..99453807eb 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -17,6 +17,7 @@ import autosklearn.pipeline.classification import autosklearn.pipeline.regression +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.automl_common.common.utils.backend import Backend from autosklearn.constants import ( CLASSIFICATION_TASKS, @@ -45,7 +46,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, @@ -110,7 +111,7 @@ def __init__( self, config: Configuration, random_state: Optional[Union[int, np.random.RandomState]], - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, init_params: Optional[Dict[str, Any]] = None, dataset_properties: Dict[str, Any] = {}, include: Optional[List[str]] = None, @@ -304,7 +305,7 @@ def __init__( self.model = self._get_model(feat_type=self.feat_type) def _get_model( - self, feat_type: Optional[Dict[Union[str, int], str]] + self, feat_type: Optional[FEAT_TYPE_TYPE] ) -> BaseEstimator: if not isinstance(self.configuration, Configuration): model = self.model_class( diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 9eba727c4b..42a3fa029f 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -7,6 +7,7 @@ from sklearn.pipeline import Pipeline import autosklearn.pipeline.create_searchspace_util +from autosklearn.askl_typing import FEAT_TYPE_TYPE from .components.base import AutoSklearnChoice, AutoSklearnComponent @@ -34,7 +35,7 @@ class BasePipeline(Pipeline): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps=None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -211,7 +212,7 @@ def predict(self, X, batch_size=None): def set_hyperparameters( self, configuration: Configuration, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, init_params: Optional[Dict[str, Any]] = None, ): self.config = configuration @@ -262,7 +263,7 @@ def set_hyperparameters( def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Return the configuration space for the CASH problem. @@ -284,7 +285,7 @@ def get_hyperparameter_search_space( def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -338,7 +339,7 @@ def _get_base_search_space( include: Dict[str, str], exclude: Dict[str, str], pipeline, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ): if include is None: if self.include is None: diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index 61cdef9f4f..bbf56fa429 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -8,6 +8,7 @@ from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from sklearn.base import ClassifierMixin +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import BasePipeline from autosklearn.pipeline.components.classification import ClassifierChoice from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice @@ -72,7 +73,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps=None, dataset_properties: Dict[str, bool] = None, @@ -173,7 +174,7 @@ def predict_proba(self, X, batch_size=None): def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -360,7 +361,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ): steps = [] diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index 7185926931..4737dfb790 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -9,6 +9,7 @@ from ConfigSpace.configuration_space import Configuration from sklearn.base import BaseEstimator, TransformerMixin +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.constants import SPARSE DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] @@ -102,7 +103,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): """Return the configuration space of this classification algorithm. @@ -145,7 +146,7 @@ def fit(self, X, y): def set_hyperparameters( self, configuration: Configuration, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, init_params: Optional[Dict[str, Any]] = None, ): params = configuration.get_dictionary() @@ -450,7 +451,7 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): def get_hyperparameter_search_space( self, - feat_type: Dict[Union[str, int], str], + feat_type: FEAT_TYPE_TYPE, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index d1a7ea83cb..1967eec874 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -8,6 +8,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from ..base import ( AutoSklearnChoice, AutoSklearnClassificationAlgorithm, @@ -89,7 +90,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type: Dict[Union[str, int], str], + feat_type: FEAT_TYPE_TYPE, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 848bbb5913..72eeb51e07 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -6,6 +6,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import PIPELINE_DATA_DTYPE from ..base import ( @@ -105,7 +106,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[Dict] = None, default: str = None, include: Optional[Dict] = None, @@ -154,7 +155,7 @@ def set_hyperparameters( self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py index 2cdd112ba6..106eb377f7 100644 --- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py +++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py @@ -5,6 +5,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import ( @@ -139,7 +140,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index ba3a555027..188fc3fad2 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -8,6 +8,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from ...base import ( @@ -38,7 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -88,7 +89,7 @@ def get_hyperparameter_search_space( def set_hyperparameters( self, - feat_type: Optional[Dict[Union[str, int], str]], + feat_type: FEAT_TYPE_TYPE, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, ) -> "OHEChoice": diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py index a8a2d0a89d..7c904635f8 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py @@ -5,6 +5,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from sklearn.preprocessing import OrdinalEncoder +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -69,7 +70,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py index 9e356d9f41..cead9331d4 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py @@ -3,6 +3,7 @@ import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -44,7 +45,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py index f6afe06c8e..989cf86680 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py @@ -5,6 +5,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from sklearn.preprocessing import OneHotEncoder as DenseOneHotEncoder +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -55,7 +56,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py index 2d5e5607bd..65ec36f7e7 100644 --- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py +++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace import autosklearn.pipeline.implementations.CategoryShift +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -63,7 +64,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 9f2d33597d..479af234ab 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -7,6 +7,7 @@ from scipy import sparse from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES from autosklearn.pipeline.base import ( DATASET_PROPERTIES_TYPE, @@ -46,7 +47,7 @@ def __init__( exclude: Optional[Dict[str, str]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, init_params: Optional[Dict[str, Any]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, force_sparse_output: bool = False, column_transformer: Optional[sklearn.compose.ColumnTransformer] = None, ): @@ -255,7 +256,7 @@ def get_properties( def set_hyperparameters( self, - feat_type: Dict[Union[str, int], str], + feat_type: FEAT_TYPE_TYPE, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, ) -> "FeatTypeSplit": @@ -303,7 +304,7 @@ def set_hyperparameters( def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: self.dataset_properties = dataset_properties @@ -321,7 +322,7 @@ def _get_hyperparameter_search_space_recursevely( dataset_properties: DATASET_PROPERTIES_TYPE, cs: ConfigurationSpace, transformer: BaseEstimator, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ) -> ConfigurationSpace: for st_name, st_operation in transformer: if hasattr(st_operation, "get_hyperparameter_search_space"): diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py index 5ad0aabe70..07cfeb7fa5 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import Configuration, ConfigurationSpace from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import ( # noqa: E501 OHEChoice, @@ -46,7 +47,7 @@ class CategoricalPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -94,7 +95,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -124,7 +125,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py index fbba3b9172..5cc3f19561 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import Configuration, ConfigurationSpace from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline from autosklearn.pipeline.components.data_preprocessing import ( rescaling as rescaling_components, @@ -39,7 +40,7 @@ class NumericalPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -87,7 +88,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -120,7 +121,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py index 5cbb962ae5..e92ef09c03 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import Configuration, ConfigurationSpace from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline from autosklearn.pipeline.components.data_preprocessing.text_encoding import ( BagOfWordChoice, @@ -34,7 +35,7 @@ class TextPreprocessingPipeline(BasePipeline): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps: Optional[List[Tuple[str, BaseEstimator]]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -81,7 +82,7 @@ def get_properties( def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -114,7 +115,7 @@ def _get_hyperparameter_search_space( def _get_pipeline_steps( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[Dict[str, str]] = None, ) -> List[Tuple[str, BaseEstimator]]: steps = [] diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py index 65a1542018..31b762eb60 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from scipy.sparse import spmatrix +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -91,7 +92,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: return ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py index b5945ca6a1..0d09b7bf11 100644 --- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py +++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py @@ -4,6 +4,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -62,7 +63,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: # TODO add replace by zero! diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index bef957b7f4..8912c781d2 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -8,6 +8,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from ...base import ( @@ -38,7 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -90,7 +91,7 @@ def set_hyperparameters( self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ) -> "CoalescenseChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py index 737e8c85f1..9c5dc6da56 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py @@ -5,6 +5,8 @@ from ConfigSpace.hyperparameters import UniformFloatHyperparameter import autosklearn.pipeline.implementations.MinorityCoalescer + +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -15,7 +17,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, minimum_fraction: float = 0.01, random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: @@ -60,7 +62,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index 433d9a8247..f563371fe6 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -3,6 +3,7 @@ import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -12,7 +13,7 @@ class NoCoalescence(AutoSklearnPreprocessingAlgorithm): def __init__( self, random_state: Optional[Union[int, np.random.RandomState]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ) -> None: pass @@ -45,7 +46,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py index d7b01c7a93..957c985296 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py @@ -7,6 +7,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 Rescaling, @@ -42,7 +43,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py index ba97eee886..e567f5cd2b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py @@ -5,6 +5,7 @@ from sklearn.base import BaseEstimator from sklearn.exceptions import NotFittedError +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm @@ -38,7 +39,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py index a797a5769a..51beabcc7a 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py @@ -7,6 +7,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 @@ -62,7 +63,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py index b9d25235d3..8762c1be96 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py @@ -6,6 +6,7 @@ from scipy import sparse from sklearn.exceptions import NotFittedError +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import ( # noqa: E501 @@ -59,7 +60,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 325beec9fa..1182cce461 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -8,6 +8,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from sklearn.base import BaseEstimator +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from ...base import ( @@ -39,7 +40,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]: def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, @@ -94,7 +95,7 @@ def set_hyperparameters( self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ) -> "BagOfWordChoice": new_params = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py index 9810006ffa..a90b1c1fa4 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py @@ -9,6 +9,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from sklearn.feature_extraction.text import CountVectorizer +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -95,7 +96,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py index 2f23276824..de852b5d6b 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py @@ -8,6 +8,7 @@ from scipy.sparse import hstack from sklearn.feature_extraction.text import CountVectorizer +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -101,7 +102,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py index 36238b4fa8..3956ec9eff 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py @@ -9,6 +9,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from sklearn.feature_extraction.text import TfidfVectorizer +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -100,7 +101,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py index be3ab9b00a..d6380e03dd 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py @@ -5,6 +5,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from sklearn.decomposition import TruncatedSVD +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -74,7 +75,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py index f11c07a2d2..eb917d6915 100644 --- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py +++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py @@ -4,6 +4,7 @@ import sklearn.feature_selection from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -49,7 +50,7 @@ def get_properties( @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index 87b42ffe73..22996b00a8 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -6,6 +6,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from ..base import ( AutoSklearnChoice, AutoSklearnPreprocessingAlgorithm, @@ -104,7 +105,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index 8ef60a4629..f247fe1e90 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -9,6 +9,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -128,7 +129,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 215817e577..84e9d3afbc 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -10,6 +10,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -130,7 +131,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index 5c6c8949d2..734beb834d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -12,6 +12,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA @@ -68,7 +69,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index edbe399e10..5b7f04c2b4 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -11,6 +11,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA @@ -87,7 +88,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): n_components = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index 59b7eb2418..23382abcd8 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -7,6 +7,7 @@ ) from numpy.random import RandomState +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -72,7 +73,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): gamma = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index aa7658c732..59087ce249 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -8,6 +8,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -96,7 +97,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index 149fb96709..979021a04a 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -2,6 +2,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -39,7 +40,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 23f39c7e4d..9190c8f715 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -8,6 +8,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import ( DENSE, @@ -99,7 +100,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): if dataset_properties is not None and ( diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py index dd4871a840..ea1f5a29b0 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py @@ -7,6 +7,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool @@ -60,7 +61,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): keep_variance = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index 1fab87a700..b44c0c3240 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -6,6 +6,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool @@ -59,7 +60,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): # More than degree 3 is too expensive! diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index c6f8d61647..785458aa8e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -8,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -99,7 +100,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): n_estimators = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index 08c6c929be..0f6dcab0f6 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -9,6 +9,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.components.feature_preprocessing.select_percentile import ( SelectPercentileBase, @@ -115,7 +116,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): percentile = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index fe5fd8ad03..61a0c8598d 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -8,6 +8,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.components.feature_preprocessing.select_percentile import ( SelectPercentileBase, @@ -58,7 +59,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): percentile = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index 9546c8e8c6..1d52628766 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -9,6 +9,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import ( DENSE, @@ -121,7 +122,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): alpha = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index 398ea6f23b..9e6be1ddc8 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Union +from typing import Dict, Union, Optional from functools import partial @@ -9,6 +9,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -88,7 +89,10 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 ) diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index 78d52309ec..77e49028eb 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -3,6 +3,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -53,7 +54,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): target_dim = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index b750d7fb1c..f2c041d73b 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -6,6 +6,7 @@ from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from ..base import ( AutoSklearnChoice, AutoSklearnRegressionAlgorithm, @@ -82,7 +83,7 @@ def get_available_components( def get_hyperparameter_search_space( self, - feat_type: Dict[Union[str, int], str], + feat_type: FEAT_TYPE_TYPE, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default=None, include: Optional[Dict[str, str]] = None, diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 9ffaf00c93..43d0d8c1fa 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -8,6 +8,7 @@ from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from sklearn.base import RegressorMixin +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import BasePipeline from autosklearn.pipeline.components import ( feature_preprocessing as feature_preprocessing_components, @@ -69,7 +70,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline): def __init__( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps=None, dataset_properties: Dict[str, bool] = None, @@ -117,7 +118,7 @@ def predict(self, X, batch_size=None): def _get_hyperparameter_search_space( self, - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, @@ -271,7 +272,7 @@ def _get_estimator_components(self): def _get_pipeline_steps( self, dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[Dict[Union[str, int], str]] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, init_params: Optional[Dict[str, Any]] = None, ): steps = [] From f8df417a665db081dcb78e6ae642a35148781a83 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 01:13:27 +0200 Subject: [PATCH 36/63] fixing pre-commit --- autosklearn/pipeline/components/data_preprocessing/__init__.py | 2 +- .../data_preprocessing/categorical_encoding/__init__.py | 2 +- .../data_preprocessing/minority_coalescense/__init__.py | 2 +- .../components/data_preprocessing/rescaling/__init__.py | 2 +- .../data_preprocessing/rescaling/abstract_rescaling.py | 2 +- .../components/data_preprocessing/text_encoding/__init__.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 72eeb51e07..5f89548377 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Type, Union +from typing import Dict, Optional, Type import os from collections import OrderedDict diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py index 188fc3fad2..5b1cf075b3 100644 --- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional import os from collections import OrderedDict diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py index 8912c781d2..85002ec349 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional import os from collections import OrderedDict diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py index 957c985296..9f83881472 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Dict, Optional import os from collections import OrderedDict diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py index e567f5cd2b..0b3244cc62 100644 --- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py +++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional, Union import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index 1182cce461..bbfbf9196f 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional import os from collections import OrderedDict From 5b8f0d55edd611ef87836225f9258f81d241b8ae Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 01:29:38 +0200 Subject: [PATCH 37/63] fixing pre-commit --- autosklearn/evaluation/abstract_evaluator.py | 4 +--- autosklearn/pipeline/components/classification/__init__.py | 1 + .../minority_coalescense/minority_coalescer.py | 1 - .../pipeline/components/feature_preprocessing/__init__.py | 1 + .../feature_preprocessing/select_rates_regression.py | 6 +++--- autosklearn/pipeline/components/regression/__init__.py | 1 + 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 99453807eb..fbdfd4463e 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -304,9 +304,7 @@ def __init__( # Please mypy to prevent not defined attr self.model = self._get_model(feat_type=self.feat_type) - def _get_model( - self, feat_type: Optional[FEAT_TYPE_TYPE] - ) -> BaseEstimator: + def _get_model(self, feat_type: Optional[FEAT_TYPE_TYPE]) -> BaseEstimator: if not isinstance(self.configuration, Configuration): model = self.model_class( feat_type=feat_type, diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 1967eec874..0233aee2b2 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -9,6 +9,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from autosklearn.askl_typing import FEAT_TYPE_TYPE + from ..base import ( AutoSklearnChoice, AutoSklearnClassificationAlgorithm, diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py index 9c5dc6da56..2533e92e8d 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py @@ -5,7 +5,6 @@ from ConfigSpace.hyperparameters import UniformFloatHyperparameter import autosklearn.pipeline.implementations.MinorityCoalescer - from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index 22996b00a8..46cd476337 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -7,6 +7,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from autosklearn.askl_typing import FEAT_TYPE_TYPE + from ..base import ( AutoSklearnChoice, AutoSklearnPreprocessingAlgorithm, diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index 9e6be1ddc8..802a4c3267 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union from functools import partial @@ -90,8 +90,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None + feat_type: Optional[FEAT_TYPE_TYPE] = None, + dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index f2c041d73b..806cef98f3 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -7,6 +7,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter from autosklearn.askl_typing import FEAT_TYPE_TYPE + from ..base import ( AutoSklearnChoice, AutoSklearnRegressionAlgorithm, From 4739c7fac1703206f1ca36bf907becffb77d1e84 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 15:19:04 +0200 Subject: [PATCH 38/63] fixing pre-commit --- autosklearn/pipeline/classification.py | 20 +++++------ autosklearn/pipeline/components/base.py | 34 +++++++++++-------- .../components/classification/__init__.py | 10 +++--- .../components/data_preprocessing/__init__.py | 2 +- .../minority_coalescense/no_coalescense.py | 2 +- .../text_encoding/__init__.py | 2 +- .../feature_preprocessing/__init__.py | 10 +++--- .../extra_trees_preproc_for_classification.py | 7 ++-- .../extra_trees_preproc_for_regression.py | 7 ++-- .../feature_agglomeration.py | 7 ++-- .../feature_preprocessing/kernel_pca.py | 7 ++-- .../feature_preprocessing/kitchen_sinks.py | 7 ++-- .../liblinear_svc_preprocessor.py | 7 ++-- .../feature_preprocessing/no_preprocessing.py | 7 ++-- .../feature_preprocessing/nystroem_sampler.py | 8 ++--- .../components/feature_preprocessing/pca.py | 7 ++-- .../feature_preprocessing/polynomial.py | 7 ++-- .../random_trees_embedding.py | 7 ++-- .../select_percentile_classification.py | 7 ++-- .../select_percentile_regression.py | 7 ++-- .../select_rates_classification.py | 7 ++-- .../select_rates_regression.py | 7 ++-- .../feature_preprocessing/truncatedSVD.py | 7 ++-- .../components/regression/__init__.py | 10 +++--- autosklearn/pipeline/regression.py | 23 +++++-------- 25 files changed, 85 insertions(+), 141 deletions(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index bbf56fa429..e99dcc2cff 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Dict, Optional, Union import copy from itertools import product @@ -76,11 +76,11 @@ def __init__( feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps=None, - dataset_properties: Dict[str, bool] = None, - include: Optional[Dict[str, List[str]]] = None, - exclude: Optional[Dict[str, List[str]]] = None, + dataset_properties=None, + include=None, + exclude=None, random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict[str, Any]] = None, + init_params=None, ): self._output_dtype = np.int32 if dataset_properties is None: @@ -175,9 +175,9 @@ def predict_proba(self, X, batch_size=None): def _get_hyperparameter_search_space( self, feat_type: Optional[FEAT_TYPE_TYPE] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + include=None, + exclude=None, + dataset_properties=None, ): """Create the hyperparameter configuration space. @@ -359,9 +359,7 @@ def _get_hyperparameter_search_space( return cs def _get_pipeline_steps( - self, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[FEAT_TYPE_TYPE] = None, + self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None ): steps = [] diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py index 4737dfb790..7b496842b2 100644 --- a/autosklearn/pipeline/components/base.py +++ b/autosklearn/pipeline/components/base.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Union +from typing import Dict, Optional import importlib import inspect @@ -6,14 +6,11 @@ import sys from collections import OrderedDict -from ConfigSpace.configuration_space import Configuration from sklearn.base import BaseEstimator, TransformerMixin from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.constants import SPARSE -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - _addons = dict() # type: Dict[str, 'ThirdPartyComponents'] @@ -103,14 +100,13 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): """Return the configuration space of this classification algorithm. Parameters ---------- - + feat_type : FEAT_TYPE_TYPE (default=None) dataset_properties : dict, optional (default=None) Returns @@ -145,9 +141,9 @@ def fit(self, X, y): def set_hyperparameters( self, - configuration: Configuration, + configuration, feat_type: Optional[FEAT_TYPE_TYPE] = None, - init_params: Optional[Dict[str, Any]] = None, + init_params=None, ): params = configuration.get_dictionary() @@ -351,7 +347,12 @@ def get_estimator(self): class AutoSklearnChoice(object): - def __init__(self, dataset_properties, feat_type=None, random_state=None): + def __init__( + self, + dataset_properties, + feat_type: Optional[FEAT_TYPE_TYPE] = None, + random_state=None, + ): """ Parameters ---------- @@ -426,7 +427,12 @@ def get_available_components( return components_dict - def set_hyperparameters(self, configuration, feat_type=None, init_params=None): + def set_hyperparameters( + self, + configuration, + feat_type: Optional[FEAT_TYPE_TYPE] = None, + init_params=None, + ): new_params = {} params = configuration.get_dictionary() @@ -452,10 +458,10 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None): def get_hyperparameter_search_space( self, feat_type: FEAT_TYPE_TYPE, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties=None, default=None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + include=None, + exclude=None, ): raise NotImplementedError() diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 0233aee2b2..6475fa0156 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -1,6 +1,6 @@ __author__ = "feurerm" -from typing import Dict, Optional, Type, Union +from typing import Type import os from collections import OrderedDict @@ -25,8 +25,6 @@ additional_components = ThirdPartyComponents(AutoSklearnClassificationAlgorithm) _addons["classification"] = additional_components -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None: additional_components.add_component(classifier) @@ -92,10 +90,10 @@ def get_available_components( def get_hyperparameter_search_space( self, feat_type: FEAT_TYPE_TYPE, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties=None, default=None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + include=None, + exclude=None, ): if dataset_properties is None: dataset_properties = {} diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index 5f89548377..3cc968f7d1 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -154,8 +154,8 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE: def set_hyperparameters( self, configuration: ConfigurationSpace, - init_params: Optional[Dict] = None, feat_type: Optional[FEAT_TYPE_TYPE] = None, + init_params: Optional[Dict] = None, ) -> "DataPreprocessorChoice": config = {} params = configuration.get_dictionary() diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py index f563371fe6..2732795649 100644 --- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py +++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py @@ -12,8 +12,8 @@ class NoCoalescence(AutoSklearnPreprocessingAlgorithm): def __init__( self, - random_state: Optional[Union[int, np.random.RandomState]] = None, feat_type: Optional[FEAT_TYPE_TYPE] = None, + random_state: Optional[Union[int, np.random.RandomState]] = None, ) -> None: pass diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py index bbfbf9196f..75c173e181 100644 --- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py @@ -94,8 +94,8 @@ def get_hyperparameter_search_space( def set_hyperparameters( self, configuration: Configuration, - init_params: Optional[Dict[str, Any]] = None, feat_type: Optional[FEAT_TYPE_TYPE] = None, + init_params: Optional[Dict[str, Any]] = None, ) -> "BagOfWordChoice": new_params = {} diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index 46cd476337..9a0bf69a30 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Type, Union +from typing import Optional, Type import os from collections import OrderedDict @@ -23,8 +23,6 @@ additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) _addons["feature_preprocessing"] = additional_components -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: additional_components.add_component(preprocessor) @@ -107,10 +105,10 @@ def get_available_components( def get_hyperparameter_search_space( self, feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties=None, default=None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + include=None, + exclude=None, ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py index f247fe1e90..904004b201 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -14,8 +14,6 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class ExtraTreesPreprocessorClassification(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -129,8 +127,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py index 84e9d3afbc..10e741a44e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace @@ -15,8 +15,6 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -131,8 +129,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py index 734beb834d..2a8db4eaad 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py +++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace @@ -16,8 +16,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class FeatureAgglomeration(AutoSklearnPreprocessingAlgorithm): def __init__(self, n_clusters, affinity, linkage, pooling_func, random_state=None): @@ -69,8 +67,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25) diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py index 5b7f04c2b4..08c72efb6f 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional import warnings @@ -15,8 +15,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class KernelPCA(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -88,8 +86,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): n_components = UniformIntegerHyperparameter( "n_components", 10, 2000, default_value=100 diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py index 23382abcd8..4e6a348f17 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py +++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional, Union from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -11,8 +11,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class RandomKitchenSinks(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -73,8 +71,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py index 59087ce249..7031089e91 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py +++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause @@ -13,8 +13,6 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm): # Liblinear is not deterministic as it uses a RNG inside @@ -97,8 +95,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py index 979021a04a..38c11bdb58 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py +++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace @@ -6,8 +6,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class NoPreprocessing(AutoSklearnPreprocessingAlgorithm): def __init__(self, random_state): @@ -40,8 +38,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py index 9190c8f715..a7dc227056 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py +++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace @@ -18,8 +18,6 @@ UNSIGNED_DATA, ) -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class Nystroem(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -55,7 +53,6 @@ def fit(self, X, Y=None): if self.kernel == "chi2": if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 - X = X.todense() else: X[X < 0] = 0.0 @@ -100,8 +97,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): if dataset_properties is not None and ( dataset_properties.get("sparse") is True diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py index ea1f5a29b0..7c69f8eb80 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/pca.py +++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace @@ -12,8 +12,6 @@ from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class PCA(AutoSklearnPreprocessingAlgorithm): def __init__(self, keep_variance, whiten, random_state=None): @@ -61,8 +59,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): keep_variance = UniformFloatHyperparameter( "keep_variance", 0.5, 0.9999, default_value=0.9999 diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py index b44c0c3240..78e3ff2676 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py +++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -11,8 +11,6 @@ from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class PolynomialFeatures(AutoSklearnPreprocessingAlgorithm): def __init__(self, degree, interaction_only, include_bias, random_state=None): @@ -60,8 +58,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): # More than degree 3 is too expensive! degree = UniformIntegerHyperparameter("degree", 2, 3, 2) diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py index 785458aa8e..2b5aa340a9 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py +++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -13,8 +13,6 @@ from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class RandomTreesEmbedding(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -100,8 +98,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=10, upper=100, default_value=10 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py index 0f6dcab0f6..98495eaedb 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from functools import partial @@ -22,8 +22,6 @@ UNSIGNED_DATA, ) -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SelectPercentileClassification( SelectPercentileBase, AutoSklearnPreprocessingAlgorithm @@ -116,8 +114,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default_value=50 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py index 61a0c8598d..a653dc4a7e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from functools import partial @@ -15,8 +15,6 @@ ) from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SelectPercentileRegression( SelectPercentileBase, AutoSklearnPreprocessingAlgorithm @@ -59,8 +57,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): percentile = UniformFloatHyperparameter( "percentile", lower=1, upper=99, default_value=50 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py index 1d52628766..3a728d753e 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from functools import partial @@ -19,8 +19,6 @@ UNSIGNED_DATA, ) -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm): def __init__(self, alpha, mode="fpr", score_func="chi2", random_state=None): @@ -122,8 +120,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py index 802a4c3267..89c84905b2 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py +++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from functools import partial @@ -13,8 +13,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm): def __init__( @@ -90,8 +88,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default_value=0.1 diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py index 77e49028eb..0c61e72c1c 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py +++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter @@ -7,8 +7,6 @@ from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class TruncatedSVD(AutoSklearnPreprocessingAlgorithm): def __init__(self, target_dim, random_state=None): @@ -54,8 +52,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space( - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): target_dim = UniformIntegerHyperparameter( "target_dim", 10, 256, default_value=128 diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 806cef98f3..0f693e2d08 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Type, Union +from typing import Type import os from collections import OrderedDict @@ -23,8 +23,6 @@ additional_components = ThirdPartyComponents(AutoSklearnRegressionAlgorithm) _addons["regression"] = additional_components -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None: additional_components.add_component(regressor) @@ -85,10 +83,10 @@ def get_available_components( def get_hyperparameter_search_space( self, feat_type: FEAT_TYPE_TYPE, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + dataset_properties=None, default=None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, + include=None, + exclude=None, ): if include is not None and exclude is not None: raise ValueError( diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 43d0d8c1fa..0c42fa8e4d 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Optional, Union import copy from itertools import product @@ -17,8 +17,6 @@ from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice from autosklearn.pipeline.constants import SPARSE -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SimpleRegressionPipeline(RegressorMixin, BasePipeline): """This class implements the regression task. @@ -73,11 +71,11 @@ def __init__( feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, steps=None, - dataset_properties: Dict[str, bool] = None, - include: Optional[Dict[str, List[str]]] = None, - exclude: Optional[Dict[str, List[str]]] = None, + dataset_properties=None, + include=None, + exclude=None, random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict[str, Any]] = None, + init_params=None, ): self._output_dtype = np.float32 if dataset_properties is None: @@ -119,9 +117,9 @@ def predict(self, X, batch_size=None): def _get_hyperparameter_search_space( self, feat_type: Optional[FEAT_TYPE_TYPE] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + include=None, + exclude=None, + dataset_properties=None, ): """Return the configuration space for the CASH problem. @@ -270,10 +268,7 @@ def _get_estimator_components(self): return regression_components._regressors def _get_pipeline_steps( - self, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE], - feat_type: Optional[FEAT_TYPE_TYPE] = None, - init_params: Optional[Dict[str, Any]] = None, + self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None ): steps = [] From a260e395ee9a161fcecf2f5b67dbf0da6c840ce7 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 15:22:08 +0200 Subject: [PATCH 39/63] fixing pre-commit --- autosklearn/pipeline/base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 42a3fa029f..889c0c82ff 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -35,14 +35,14 @@ class BasePipeline(Pipeline): def __init__( self, + config=None, feat_type: Optional[FEAT_TYPE_TYPE] = None, - config: Optional[Configuration] = None, steps=None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict[str, Any]] = None, + dataset_properties=None, + include=None, + exclude=None, + random_state=None, + init_params=None, ): self.init_params = init_params if init_params is not None else {} From 8341e822723279ef40e12a778b30b245c69174a3 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 15:27:24 +0200 Subject: [PATCH 40/63] fixing pre-commit --- autosklearn/pipeline/base.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index 889c0c82ff..b4647215c6 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -1,9 +1,9 @@ from abc import ABCMeta -from typing import Any, Dict, Optional, Union +from typing import Dict, Optional, Union import numpy as np import scipy.sparse -from ConfigSpace import Configuration, ConfigurationSpace +from ConfigSpace import Configuration from sklearn.pipeline import Pipeline import autosklearn.pipeline.create_searchspace_util @@ -211,9 +211,9 @@ def predict(self, X, batch_size=None): def set_hyperparameters( self, - configuration: Configuration, + configuration, feat_type: Optional[FEAT_TYPE_TYPE] = None, - init_params: Optional[Dict[str, Any]] = None, + init_params=None, ): self.config = configuration @@ -262,9 +262,7 @@ def set_hyperparameters( return self def get_hyperparameter_search_space( - self, - feat_type: Optional[FEAT_TYPE_TYPE] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + self, feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): """Return the configuration space for the CASH problem. @@ -286,9 +284,9 @@ def get_hyperparameter_search_space( def _get_hyperparameter_search_space( self, feat_type: Optional[FEAT_TYPE_TYPE] = None, - include: Optional[Dict[str, str]] = None, - exclude: Optional[Dict[str, str]] = None, - dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, + include=None, + exclude=None, + dataset_properties=None, ): """Return the configuration space for the CASH problem. @@ -334,10 +332,10 @@ def _get_hyperparameter_search_space( def _get_base_search_space( self, - cs: ConfigurationSpace, - dataset_properties: DATASET_PROPERTIES_TYPE, - include: Dict[str, str], - exclude: Dict[str, str], + cs, + dataset_properties, + include, + exclude, pipeline, feat_type: Optional[FEAT_TYPE_TYPE] = None, ): @@ -544,7 +542,9 @@ def __repr__(self): return return_value - def _get_pipeline_steps(self, dataset_properties, feat_type=None): + def _get_pipeline_steps( + self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None + ): raise NotImplementedError() def _get_estimator_hyperparameter_name(self): From 6cf49dd577a59245c68cd02188868327ff7c532c Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 15:29:42 +0200 Subject: [PATCH 41/63] fixing pre-commit --- autosklearn/pipeline/classification.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index e99dcc2cff..ba5a662a3f 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional, Union import copy from itertools import product @@ -20,8 +20,6 @@ ) from autosklearn.pipeline.constants import SPARSE -DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]] - class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): """This class implements the classification task. From d0f9d963df0caf38917cacd12171110a4609b6f0 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 15:32:14 +0200 Subject: [PATCH 42/63] fixing pre-commit --- autosklearn/metalearning/input/aslib_simple.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index a1724a3846..bbf4f44fb9 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -150,10 +150,11 @@ def _read_configurations(self, filename): configuration = dict() algorithm_id = line["idx"] for hp_name, value in line.items(): - # Todo adapt to search space if not value or hp_name == "idx": continue if hp_name not in hp_names: + # skip meta learning configuration + # if it is not existing in the current search space continue try: value = int(value) From adc011ef6f35bc8c750bad008d77ecebc5fd95f0 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 17:10:35 +0200 Subject: [PATCH 43/63] fixing pre-commit --- .../pipeline/components/classification/bernoulli_nb.py | 7 ++++++- .../pipeline/components/classification/decision_tree.py | 7 ++++++- .../pipeline/components/classification/extra_trees.py | 7 ++++++- .../pipeline/components/classification/gaussian_nb.py | 7 ++++++- .../components/classification/gradient_boosting.py | 7 ++++++- .../components/classification/k_nearest_neighbors.py | 7 ++++++- autosklearn/pipeline/components/classification/lda.py | 7 ++++++- .../pipeline/components/classification/liblinear_svc.py | 7 ++++++- .../pipeline/components/classification/libsvm_svc.py | 7 ++++++- autosklearn/pipeline/components/classification/mlp.py | 7 ++++++- .../pipeline/components/classification/multinomial_nb.py | 7 ++++++- .../components/classification/passive_aggressive.py | 7 ++++++- autosklearn/pipeline/components/classification/qda.py | 7 ++++++- .../pipeline/components/classification/random_forest.py | 7 ++++++- autosklearn/pipeline/components/classification/sgd.py | 7 ++++++- .../pipeline/components/feature_preprocessing/densifier.py | 7 ++++++- .../pipeline/components/feature_preprocessing/fast_ica.py | 7 ++++++- autosklearn/pipeline/components/regression/adaboost.py | 7 ++++++- .../pipeline/components/regression/ard_regression.py | 7 ++++++- .../pipeline/components/regression/decision_tree.py | 7 ++++++- autosklearn/pipeline/components/regression/extra_trees.py | 7 ++++++- .../pipeline/components/regression/gaussian_process.py | 7 ++++++- .../pipeline/components/regression/gradient_boosting.py | 7 ++++++- .../pipeline/components/regression/k_nearest_neighbors.py | 7 ++++++- .../pipeline/components/regression/liblinear_svr.py | 7 ++++++- autosklearn/pipeline/components/regression/libsvm_svr.py | 7 ++++++- autosklearn/pipeline/components/regression/mlp.py | 7 ++++++- .../pipeline/components/regression/random_forest.py | 7 ++++++- autosklearn/pipeline/components/regression/sgd.py | 7 ++++++- examples/80_extending/example_extending_classification.py | 7 ++++++- .../80_extending/example_extending_data_preprocessor.py | 7 ++++++- examples/80_extending/example_extending_preprocessor.py | 7 ++++++- examples/80_extending/example_extending_regression.py | 7 ++++++- .../example_restrict_number_of_hyperparameters.py | 6 +++++- test/test_pipeline/components/regression/test_mlp.py | 2 +- 35 files changed, 204 insertions(+), 35 deletions(-) diff --git a/autosklearn/pipeline/components/classification/bernoulli_nb.py b/autosklearn/pipeline/components/classification/bernoulli_nb.py index 8271c5f602..de52bc939c 100644 --- a/autosklearn/pipeline/components/classification/bernoulli_nb.py +++ b/autosklearn/pipeline/components/classification/bernoulli_nb.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -5,6 +7,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool @@ -64,7 +67,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() # the smoothing parameter is a non-negative float diff --git a/autosklearn/pipeline/components/classification/decision_tree.py b/autosklearn/pipeline/components/classification/decision_tree.py index fbfc6b7c6a..1369ecf906 100644 --- a/autosklearn/pipeline/components/classification/decision_tree.py +++ b/autosklearn/pipeline/components/classification/decision_tree.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -8,6 +10,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.pipeline.implementations.util import ( @@ -106,7 +109,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/classification/extra_trees.py b/autosklearn/pipeline/components/classification/extra_trees.py index 5c7ce1879a..36edd82584 100644 --- a/autosklearn/pipeline/components/classification/extra_trees.py +++ b/autosklearn/pipeline/components/classification/extra_trees.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponentWithSampleWeight, @@ -156,7 +159,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/classification/gaussian_nb.py b/autosklearn/pipeline/components/classification/gaussian_nb.py index 8e978e9631..bf43f4e4a5 100644 --- a/autosklearn/pipeline/components/classification/gaussian_nb.py +++ b/autosklearn/pipeline/components/classification/gaussian_nb.py @@ -1,6 +1,9 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA @@ -55,6 +58,8 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/classification/gradient_boosting.py b/autosklearn/pipeline/components/classification/gradient_boosting.py index 50b0b284bd..618028dff7 100644 --- a/autosklearn/pipeline/components/classification/gradient_boosting.py +++ b/autosklearn/pipeline/components/classification/gradient_boosting.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace @@ -9,6 +11,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponentWithSampleWeight, @@ -182,7 +185,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() loss = Constant("loss", "auto") learning_rate = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py index fe55e0783d..d524bd42d9 100644 --- a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py +++ b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py @@ -1,9 +1,12 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA @@ -57,7 +60,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() n_neighbors = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/classification/lda.py b/autosklearn/pipeline/components/classification/lda.py index 29a08f80b5..e7ebec290b 100644 --- a/autosklearn/pipeline/components/classification/lda.py +++ b/autosklearn/pipeline/components/classification/lda.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.conditions import EqualsCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -5,6 +7,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA from autosklearn.pipeline.implementations.util import softmax @@ -76,7 +79,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() shrinkage = CategoricalHyperparameter( "shrinkage", ["None", "auto", "manual"], default_value="None" diff --git a/autosklearn/pipeline/components/classification/liblinear_svc.py b/autosklearn/pipeline/components/classification/liblinear_svc.py index 3f57ef8f94..d1beb08837 100644 --- a/autosklearn/pipeline/components/classification/liblinear_svc.py +++ b/autosklearn/pipeline/components/classification/liblinear_svc.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from ConfigSpace.hyperparameters import ( @@ -6,6 +8,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.pipeline.implementations.util import softmax @@ -104,7 +107,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") diff --git a/autosklearn/pipeline/components/classification/libsvm_svc.py b/autosklearn/pipeline/components/classification/libsvm_svc.py index ba423161c1..43bd017c5c 100644 --- a/autosklearn/pipeline/components/classification/libsvm_svc.py +++ b/autosklearn/pipeline/components/classification/libsvm_svc.py @@ -1,3 +1,5 @@ +from typing import Optional + import resource import sys @@ -10,6 +12,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.pipeline.implementations.util import softmax @@ -138,7 +141,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) # No linear kernel here, because we have liblinear kernel = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/classification/mlp.py b/autosklearn/pipeline/components/classification/mlp.py index f7001d7bc1..d8c95fa9d0 100644 --- a/autosklearn/pipeline/components/classification/mlp.py +++ b/autosklearn/pipeline/components/classification/mlp.py @@ -1,3 +1,5 @@ +from typing import Optional + import copy import numpy as np @@ -11,6 +13,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponent, @@ -203,7 +206,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() hidden_layer_depth = UniformIntegerHyperparameter( name="hidden_layer_depth", lower=1, upper=3, default_value=1 diff --git a/autosklearn/pipeline/components/classification/multinomial_nb.py b/autosklearn/pipeline/components/classification/multinomial_nb.py index 7b65be8a5c..dee1507f01 100644 --- a/autosklearn/pipeline/components/classification/multinomial_nb.py +++ b/autosklearn/pipeline/components/classification/multinomial_nb.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -5,6 +7,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SIGNED_DATA, SPARSE from autosklearn.util.common import check_for_bool @@ -76,7 +79,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() # the smoothing parameter is a non-negative float diff --git a/autosklearn/pipeline/components/classification/passive_aggressive.py b/autosklearn/pipeline/components/classification/passive_aggressive.py index 494ea7db06..97a11a0283 100644 --- a/autosklearn/pipeline/components/classification/passive_aggressive.py +++ b/autosklearn/pipeline/components/classification/passive_aggressive.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponentWithSampleWeight, @@ -152,7 +155,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") loss = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/classification/qda.py b/autosklearn/pipeline/components/classification/qda.py index 7b25858392..0b6f6f7653 100644 --- a/autosklearn/pipeline/components/classification/qda.py +++ b/autosklearn/pipeline/components/classification/qda.py @@ -1,7 +1,10 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA from autosklearn.pipeline.implementations.util import softmax @@ -72,7 +75,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): reg_param = UniformFloatHyperparameter("reg_param", 0.0, 1.0, default_value=0.0) cs = ConfigurationSpace() cs.add_hyperparameter(reg_param) diff --git a/autosklearn/pipeline/components/classification/random_forest.py b/autosklearn/pipeline/components/classification/random_forest.py index 6ccd720b3a..892d8611d5 100644 --- a/autosklearn/pipeline/components/classification/random_forest.py +++ b/autosklearn/pipeline/components/classification/random_forest.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponentWithSampleWeight, @@ -149,7 +152,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini" diff --git a/autosklearn/pipeline/components/classification/sgd.py b/autosklearn/pipeline/components/classification/sgd.py index 469c2605dd..5073f8ec20 100644 --- a/autosklearn/pipeline/components/classification/sgd.py +++ b/autosklearn/pipeline/components/classification/sgd.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnClassificationAlgorithm, IterativeComponentWithSampleWeight, @@ -169,7 +172,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() loss = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/feature_preprocessing/densifier.py b/autosklearn/pipeline/components/feature_preprocessing/densifier.py index f5c88ecadf..f571d6abee 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/densifier.py +++ b/autosklearn/pipeline/components/feature_preprocessing/densifier.py @@ -1,5 +1,8 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA @@ -36,6 +39,8 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() return cs diff --git a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py index 695ff3c2cc..fe23177fc9 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py +++ b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py @@ -1,3 +1,5 @@ +from typing import Optional + import warnings from ConfigSpace.conditions import EqualsCondition @@ -7,6 +9,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -74,7 +77,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() n_components = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/regression/adaboost.py b/autosklearn/pipeline/components/regression/adaboost.py index e78a57e6a2..8faae821c3 100644 --- a/autosklearn/pipeline/components/regression/adaboost.py +++ b/autosklearn/pipeline/components/regression/adaboost.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -5,6 +7,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA @@ -62,7 +65,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() # base_estimator = Constant(name="base_estimator", value="None") diff --git a/autosklearn/pipeline/components/regression/ard_regression.py b/autosklearn/pipeline/components/regression/ard_regression.py index 219cb775af..758c4b04d7 100644 --- a/autosklearn/pipeline/components/regression/ard_regression.py +++ b/autosklearn/pipeline/components/regression/ard_regression.py @@ -1,9 +1,12 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( UniformFloatHyperparameter, UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA from autosklearn.util.common import check_for_bool @@ -89,7 +92,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() n_iter = UnParametrizedHyperparameter("n_iter", value=300) tol = UniformFloatHyperparameter( diff --git a/autosklearn/pipeline/components/regression/decision_tree.py b/autosklearn/pipeline/components/regression/decision_tree.py index db59767587..80890889f9 100644 --- a/autosklearn/pipeline/components/regression/decision_tree.py +++ b/autosklearn/pipeline/components/regression/decision_tree.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -8,6 +10,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_none @@ -96,7 +99,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/regression/extra_trees.py b/autosklearn/pipeline/components/regression/extra_trees.py index c4646a2709..b1d8eeb00a 100644 --- a/autosklearn/pipeline/components/regression/extra_trees.py +++ b/autosklearn/pipeline/components/regression/extra_trees.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnRegressionAlgorithm, IterativeComponent, @@ -148,7 +151,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/regression/gaussian_process.py b/autosklearn/pipeline/components/regression/gaussian_process.py index 1acf238cd1..d08a3b0239 100644 --- a/autosklearn/pipeline/components/regression/gaussian_process.py +++ b/autosklearn/pipeline/components/regression/gaussian_process.py @@ -1,6 +1,9 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA @@ -65,7 +68,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): alpha = UniformFloatHyperparameter( name="alpha", lower=1e-14, upper=1.0, default_value=1e-8, log=True ) diff --git a/autosklearn/pipeline/components/regression/gradient_boosting.py b/autosklearn/pipeline/components/regression/gradient_boosting.py index b7503f5fd0..16b7df965d 100644 --- a/autosklearn/pipeline/components/regression/gradient_boosting.py +++ b/autosklearn/pipeline/components/regression/gradient_boosting.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace @@ -9,6 +11,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnRegressionAlgorithm, IterativeComponent, @@ -166,7 +169,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["least_squares"], default_value="least_squares" diff --git a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py index 83c13cd191..c16e8a6404 100644 --- a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py +++ b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py @@ -1,9 +1,12 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA @@ -52,7 +55,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() n_neighbors = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/regression/liblinear_svr.py b/autosklearn/pipeline/components/regression/liblinear_svr.py index e129331298..62e38c1551 100644 --- a/autosklearn/pipeline/components/regression/liblinear_svr.py +++ b/autosklearn/pipeline/components/regression/liblinear_svr.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause from ConfigSpace.hyperparameters import ( @@ -6,6 +8,7 @@ UniformFloatHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool @@ -83,7 +86,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) loss = CategoricalHyperparameter( diff --git a/autosklearn/pipeline/components/regression/libsvm_svr.py b/autosklearn/pipeline/components/regression/libsvm_svr.py index d4173d7f01..c3ac98b1f9 100644 --- a/autosklearn/pipeline/components/regression/libsvm_svr.py +++ b/autosklearn/pipeline/components/regression/libsvm_svr.py @@ -1,3 +1,5 @@ +from typing import Optional + import resource import sys @@ -10,6 +12,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA from autosklearn.util.common import check_for_bool, check_none @@ -149,7 +152,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): C = UniformFloatHyperparameter( name="C", lower=0.03125, upper=32768, log=True, default_value=1.0 ) diff --git a/autosklearn/pipeline/components/regression/mlp.py b/autosklearn/pipeline/components/regression/mlp.py index 645c29403a..42ceff4556 100644 --- a/autosklearn/pipeline/components/regression/mlp.py +++ b/autosklearn/pipeline/components/regression/mlp.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from ConfigSpace.conditions import InCondition from ConfigSpace.configuration_space import ConfigurationSpace @@ -9,6 +11,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnRegressionAlgorithm, IterativeComponent, @@ -225,7 +228,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() hidden_layer_depth = UniformIntegerHyperparameter( name="hidden_layer_depth", lower=1, upper=3, default_value=1 diff --git a/autosklearn/pipeline/components/regression/random_forest.py b/autosklearn/pipeline/components/regression/random_forest.py index 128113fc43..043d62e16b 100644 --- a/autosklearn/pipeline/components/regression/random_forest.py +++ b/autosklearn/pipeline/components/regression/random_forest.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnRegressionAlgorithm, IterativeComponent, @@ -135,7 +138,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["mse", "friedman_mse", "mae"] diff --git a/autosklearn/pipeline/components/regression/sgd.py b/autosklearn/pipeline/components/regression/sgd.py index 3b3f939fa8..915e45169f 100644 --- a/autosklearn/pipeline/components/regression/sgd.py +++ b/autosklearn/pipeline/components/regression/sgd.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.conditions import EqualsCondition, InCondition from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -6,6 +8,7 @@ UnParametrizedHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import ( AutoSklearnRegressionAlgorithm, IterativeComponent, @@ -185,7 +188,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() loss = CategoricalHyperparameter( diff --git a/examples/80_extending/example_extending_classification.py b/examples/80_extending/example_extending_classification.py index b5112c022b..9e46b9e8cd 100644 --- a/examples/80_extending/example_extending_classification.py +++ b/examples/80_extending/example_extending_classification.py @@ -6,6 +6,7 @@ The following example demonstrates how to create a new classification component for using in auto-sklearn. """ +from typing import Optional from pprint import pprint from ConfigSpace.configuration_space import ConfigurationSpace @@ -16,6 +17,8 @@ ) import sklearn.metrics + +from autosklearn.askl_typing import FEAT_TYPE_TYPE import autosklearn.classification import autosklearn.pipeline.components.classification from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm @@ -100,7 +103,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() hidden_layer_depth = UniformIntegerHyperparameter( name="hidden_layer_depth", lower=1, upper=3, default_value=1 diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py index aa5c443255..eb0325d9df 100644 --- a/examples/80_extending/example_extending_data_preprocessor.py +++ b/examples/80_extending/example_extending_data_preprocessor.py @@ -5,12 +5,15 @@ The following example demonstrates how to turn off data preprocessing step in auto-skearn. """ +from typing import Optional from pprint import pprint import autosklearn.classification import autosklearn.pipeline.components.data_preprocessing import sklearn.metrics from ConfigSpace.configuration_space import ConfigurationSpace + +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT from sklearn.datasets import load_breast_cancer @@ -49,7 +52,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): return ConfigurationSpace() # Return an empty configuration as there is None diff --git a/examples/80_extending/example_extending_preprocessor.py b/examples/80_extending/example_extending_preprocessor.py index 1eb3fc1daf..8516931780 100644 --- a/examples/80_extending/example_extending_preprocessor.py +++ b/examples/80_extending/example_extending_preprocessor.py @@ -7,6 +7,7 @@ discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor in auto-sklearn. """ +from typing import Optional from pprint import pprint from ConfigSpace.configuration_space import ConfigurationSpace @@ -17,6 +18,8 @@ from ConfigSpace.conditions import InCondition import sklearn.metrics + +from autosklearn.askl_typing import FEAT_TYPE_TYPE import autosklearn.classification import autosklearn.pipeline.components.feature_preprocessing from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm @@ -76,7 +79,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() solver = CategoricalHyperparameter( name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd" diff --git a/examples/80_extending/example_extending_regression.py b/examples/80_extending/example_extending_regression.py index 4d6987a9db..ad2fb8850f 100644 --- a/examples/80_extending/example_extending_regression.py +++ b/examples/80_extending/example_extending_regression.py @@ -6,6 +6,7 @@ The following example demonstrates how to create a new regression component for using in auto-sklearn. """ +from typing import Optional from pprint import pprint from ConfigSpace.configuration_space import ConfigurationSpace @@ -17,6 +18,8 @@ from ConfigSpace.conditions import EqualsCondition import sklearn.metrics + +from autosklearn.askl_typing import FEAT_TYPE_TYPE import autosklearn.regression import autosklearn.pipeline.components.regression from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm @@ -86,7 +89,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() alpha = UniformFloatHyperparameter( name="alpha", lower=10**-5, upper=1, log=True, default_value=1.0 diff --git a/examples/80_extending/example_restrict_number_of_hyperparameters.py b/examples/80_extending/example_restrict_number_of_hyperparameters.py index d8bd2f4a98..a17aa128aa 100644 --- a/examples/80_extending/example_restrict_number_of_hyperparameters.py +++ b/examples/80_extending/example_restrict_number_of_hyperparameters.py @@ -7,6 +7,7 @@ component with a new component, implementing the same classifier, but with different hyperparameters . """ +from typing import Optional from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( @@ -17,6 +18,7 @@ from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split +from autosklearn.askl_typing import FEAT_TYPE_TYPE import autosklearn.classification import autosklearn.pipeline.components.classification from autosklearn.pipeline.components.classification import ( @@ -84,7 +86,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() # The maximum number of features used in the forest is calculated as m^max_features, where diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py index 9e2a92acac..7052952302 100644 --- a/test/test_pipeline/components/regression/test_mlp.py +++ b/test/test_pipeline/components/regression/test_mlp.py @@ -47,7 +47,7 @@ class MLPComponentTest(BaseRegressionComponentTest): res["default_boston"] = 0.2750079862455884 res["default_boston_places"] = 1 res["boston_n_calls"] = [8, 9] - res["boston_iterative_n_iter"] = [236, 331] + res["boston_iterative_n_iter"] = [236, 327, 331] res["default_boston_iterative"] = res["default_boston"] res["default_boston_iterative_places"] = 1 res["default_boston_sparse"] = -0.10972947168054104 From 3402ed976b0a90f92fead60cebf6728c54330764 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 4 Jul 2022 17:12:38 +0200 Subject: [PATCH 44/63] fixing pre-commit --- test/test_pipeline/components/regression/test_mlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py index 7052952302..5e89e7a452 100644 --- a/test/test_pipeline/components/regression/test_mlp.py +++ b/test/test_pipeline/components/regression/test_mlp.py @@ -47,7 +47,7 @@ class MLPComponentTest(BaseRegressionComponentTest): res["default_boston"] = 0.2750079862455884 res["default_boston_places"] = 1 res["boston_n_calls"] = [8, 9] - res["boston_iterative_n_iter"] = [236, 327, 331] + res["boston_iterative_n_iter"] = [236, 331, 327] res["default_boston_iterative"] = res["default_boston"] res["default_boston_iterative_places"] = 1 res["default_boston_sparse"] = -0.10972947168054104 From 3bf835e3a7b1d78424b2d0ebbb1c2e8bd4a154bd Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 6 Jul 2022 13:02:49 +0200 Subject: [PATCH 45/63] adding new test. Test if new configuration space is correct. --- .../test_data_preprocessing_feat_type.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py new file mode 100644 index 0000000000..3318f3e08b --- /dev/null +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py @@ -0,0 +1,128 @@ +from autosklearn.pipeline.components.data_preprocessing.feature_type import ( + FeatTypeSplit, +) + +import unittest + + +class PreprocessingPipelineFeatTypeTest(unittest.TestCase): + def test_single_type(self): + DPP = FeatTypeSplit(feat_type={"A": "numerical"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "numerical"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("text", key.split(":")[0]) + self.assertNotIn("categorical", key.split(":")[0]) + + DPP = FeatTypeSplit(feat_type={"A": "categorical"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "categorical"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("text", key.split(":")[0]) + self.assertNotIn("numerical", key.split(":")[0]) + + DPP = FeatTypeSplit(feat_type={"A": "string"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "string"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("numerical", key.split(":")[0]) + self.assertNotIn("categorical", key.split(":")[0]) + + def test_dual_type(self): + DPP = FeatTypeSplit(feat_type={"A": "numerical", "B": "categorical"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "numerical", "B": "categorical"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("text", key.split(":")[0]) + + DPP = FeatTypeSplit(feat_type={"A": "categorical", "B": "string"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "categorical", "B": "string"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("numerical", key.split(":")[0]) + + DPP = FeatTypeSplit(feat_type={"A": "string", "B": "categorical"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "string", "B": "categorical"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("numerical", key.split(":")[0]) + + def test_triple_type(self): + DPP = FeatTypeSplit( + feat_type={"A": "numerical", "B": "categorical", "C": "string"} + ) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "numerical", "B": "categorical", "C": "string"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + truth_table = [False] * 3 + for key in cs.get_hyperparameters_dict().keys(): + if "text" in key.split(":")[0]: + truth_table[0] = True + elif "categorical" in key.split(":")[0]: + truth_table[1] = True + elif "numerical" in key.split(":")[0]: + truth_table[2] = True + + self.assertEqual(sum(truth_table), 3) From 869db4942495706982c4eb4f8ba881425764dcba Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 13:38:48 +0200 Subject: [PATCH 46/63] add new tests and fixing some issues from PR --- autosklearn/pipeline/base.py | 4 +- .../data_preprocessing/feature_type.py | 26 ++---- .../pyMetaLearn/test_metalearner.py | 28 ++++-- test/test_pipeline/test_base.py | 7 +- test/test_pipeline/test_classification.py | 87 +++++++++++++++---- 5 files changed, 106 insertions(+), 46 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index b4647215c6..d138a32a84 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -221,7 +221,7 @@ def set_hyperparameters( node_name, node = n_ sub_configuration_space = node.get_hyperparameter_search_space( - feat_type=feat_type, dataset_properties=self.dataset_properties + feat_type=self.feat_type, dataset_properties=self.dataset_properties ) sub_config_dict = {} for param in configuration: @@ -248,7 +248,7 @@ def set_hyperparameters( node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): node.set_hyperparameters( - feat_type=feat_type, + feat_type=self.feat_type, configuration=sub_configuration, init_params=sub_init_params_dict, ) diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py index 479af234ab..11085a1f74 100644 --- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py +++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py @@ -155,15 +155,11 @@ def fit( else: columns = set(range(n_feats)) if expected != columns: - try: - columns = [str(col) for col in columns] - except Exception as e: - raise ValueError( - f"Train data has columns={expected} yet the" - f" feat_types are feat={columns}\n" - f"Exception: {e}" - ) - transformer_lst = [] + raise ValueError( + f"Train data has columns={expected} yet the" + f" feat_types are feat={columns}" + ) + sklearn_transf_spec = [] categorical_features = [ key @@ -171,7 +167,7 @@ def fit( if value.lower() == "categorical" ] if len(categorical_features) > 0: - transformer_lst.append( + sklearn_transf_spec.append( ("categorical_transformer", self.categ_ppl, categorical_features) ) @@ -181,7 +177,7 @@ def fit( if value.lower() == "numerical" ] if len(numerical_features) > 0: - transformer_lst.append( + sklearn_transf_spec.append( ("numerical_transformer", self.numer_ppl, numerical_features) ) @@ -191,15 +187,9 @@ def fit( if value.lower() == "string" ] if len(text_features) > 0: - transformer_lst.append( + sklearn_transf_spec.append( ("text_transformer", self.txt_ppl, text_features) ) - - sklearn_transf_spec = [ - (name, transformer, feature_columns) - for name, transformer, feature_columns in transformer_lst - if len(feature_columns) > 0 - ] else: # self.feature_type == None assumes numerical case sklearn_transf_spec = [ diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py index 42d27d49da..9e7a54a77f 100644 --- a/test/test_metalearning/pyMetaLearn/test_metalearner.py +++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py @@ -23,14 +23,26 @@ def setUp(self): data_dir = os.path.join(data_dir, "test_meta_base_data") os.chdir(data_dir) - pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline() - self.cs = pipeline.get_hyperparameter_search_space() - - self.logger = logging.getLogger() - meta_base = MetaBase(self.cs, data_dir, logger=self.logger) - self.meta_optimizer = metalearner.MetaLearningOptimizer( - "233", self.cs, meta_base, logger=self.logger - ) + for feat_type in [ + None, + {"A": "numerical"}, + {"A": "categorical"}, + {"A": "string"}, + {"A": "numerical", "B": "categorical"}, + {"A": "numerical", "B": "string"}, + {"A": "categorical", "B": "string"}, + {"A": "categorical", "B": "string", "C": "numerical"}, + ]: + pipeline = ( + autosklearn.pipeline.classification.SimpleClassificationPipeline() + ) + self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) + + self.logger = logging.getLogger() + meta_base = MetaBase(self.cs, data_dir, logger=self.logger) + self.meta_optimizer = metalearner.MetaLearningOptimizer( + "233", self.cs, meta_base, logger=self.logger + ) def tearDown(self): os.chdir(self.cwd) diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py index af5123b4f7..1b604caf58 100644 --- a/test/test_pipeline/test_base.py +++ b/test/test_pipeline/test_base.py @@ -1,17 +1,20 @@ +from typing import Optional + import ConfigSpace.configuration_space import autosklearn.pipeline.base import autosklearn.pipeline.components.base import autosklearn.pipeline.components.classification as classification import autosklearn.pipeline.components.feature_preprocessing as feature_preprocessing +from autosklearn.askl_typing import FEAT_TYPE_TYPE import unittest import unittest.mock class BasePipelineMock(autosklearn.pipeline.base.BasePipeline): - def __init__(self): - pass + def __init__(self, feat_type: Optional[FEAT_TYPE_TYPE] = None): + self.feat_type = feat_type class BaseTest(unittest.TestCase): diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py index 7be8038119..eb127ad02d 100644 --- a/test/test_pipeline/test_classification.py +++ b/test/test_pipeline/test_classification.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, Union import copy import itertools @@ -20,6 +20,7 @@ import autosklearn.pipeline.components.classification as classification_components import autosklearn.pipeline.components.feature_preprocessing as preprocessing_components +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.classification import SimpleClassificationPipeline from autosklearn.pipeline.components.base import ( AutoSklearnChoice, @@ -352,16 +353,6 @@ def test_configurations_categorical_data(self): ------- * All configurations should fit, predict and predict_proba successfully """ - pipeline = SimpleClassificationPipeline( - dataset_properties={"sparse": False}, - include={ - "feature_preprocessor": ["no_preprocessing"], - "classifier": ["sgd", "adaboost"], - }, - ) - - cs = pipeline.get_hyperparameter_search_space() - categorical_columns = [ True, True, @@ -407,6 +398,17 @@ def test_configurations_categorical_data(self): for i, is_categorical in enumerate(categorical_columns) } + pipeline = SimpleClassificationPipeline( + feat_type=categorical, + dataset_properties={"sparse": False}, + include={ + "feature_preprocessor": ["no_preprocessing"], + "classifier": ["sgd", "adaboost"], + }, + ) + + cs = pipeline.get_hyperparameter_search_space() + here = os.path.dirname(__file__) dataset_path = os.path.join( here, "components", "data_preprocessing", "dataset.pkl" @@ -429,7 +431,10 @@ def test_configurations_categorical_data(self): init_params = {"data_preprocessor:feat_type": categorical} self._test_configurations( - configurations_space=cs, dataset=data, init_params=init_params + configurations_space=cs, + dataset=data, + init_params=init_params, + feat_type=categorical, ) @unittest.mock.patch( @@ -459,7 +464,8 @@ def test_categorical_passed_to_one_hot_encoder(self, ohe_mock): feat_types = {0: "categorical", 1: "numerical"} cls = SimpleClassificationPipeline( - init_params={"data_preprocessor:feat_type": feat_types} + feat_type=feat_types, + init_params={"data_preprocessor:feat_type": feat_types}, ) init_args = ohe_mock.call_args[1]["init_params"] @@ -485,6 +491,7 @@ def _test_configurations( init_params: Dict[str, Any] = None, dataset_properties: Dict[str, Any] = None, n_samples: int = 10, + feat_type: Optional[FEAT_TYPE_TYPE] = None, ): """Tests a configuration space by taking multiple samples and fiting each before calling predict and predict_proba. @@ -560,7 +567,9 @@ def _test_configurations( init_params_ = copy.deepcopy(init_params) cls = SimpleClassificationPipeline( - dataset_properties=dataset_properties, init_params=init_params_ + feat_type=feat_type, + dataset_properties=dataset_properties, + init_params=init_params_, ) cls.set_hyperparameters(config, init_params=init_params_) @@ -659,7 +668,9 @@ def test_get_hyperparameter_search_space(self): * (n_hyperparameters - 4) different conditionals for the pipeline * 53 forbidden combinations """ - pipeline = SimpleClassificationPipeline() + pipeline = SimpleClassificationPipeline( + feat_type={"A": "numerical", "B": "categorical", "C": "string"} + ) cs = pipeline.get_hyperparameter_search_space() self.assertIsInstance(cs, ConfigurationSpace) @@ -897,7 +908,10 @@ def test_predict_proba_batched(self): perform near identically """ # Multiclass - cls = SimpleClassificationPipeline(include={"classifier": ["sgd"]}) + cls = SimpleClassificationPipeline( + feat_type={i: "numerical" for i in range(0, 64)}, + include={"classifier": ["sgd"]}, + ) X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits") with ignore_warnings(classifier_warnings): @@ -951,6 +965,7 @@ def test_predict_proba_batched_sparse(self): perform near identically """ cls = SimpleClassificationPipeline( + feat_type={i: "numerical" for i in range(0, 64)}, dataset_properties={"sparse": True, "multiclass": True}, include={"classifier": ["sgd"]}, ) @@ -1313,3 +1328,43 @@ def test_fit_instantiates_component(self): del preprocessing_components.additional_components.components[ "CrashPreprocessor" ] + + def test_get_hyperparameter_search_space_feat_type(self): + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "numerical"} + ).get_hyperparameter_search_space(dataset_properties={"multiclass": True}) + self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc)) + self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc)) + + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "categorical"} + ).get_hyperparameter_search_space(dataset_properties={"multilabel": True}) + self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc)) + self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc)) + + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "string"} + ).get_hyperparameter_search_space(dataset_properties={"sparse": True}) + self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc)) + self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc)) + + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "numerical", "2": "categorical"} + ).get_hyperparameter_search_space( + dataset_properties={"multilabel": True, "multiclass": True} + ) + self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc)) + + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "numerical", "2": "string"} + ).get_hyperparameter_search_space( + dataset_properties={"multilabel": True, "multiclass": True} + ) + self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc)) + + cs_mc = SimpleClassificationPipeline( + feat_type={"1": "categorical", "2": "string"} + ).get_hyperparameter_search_space( + dataset_properties={"multilabel": True, "multiclass": True} + ) + self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc)) From e546632de030a7f26b1bca666dfc35854c5773b0 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 13:57:49 +0200 Subject: [PATCH 47/63] add new tests and fixing some issues from PR --- autosklearn/pipeline/classification.py | 2 +- autosklearn/pipeline/regression.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py index ba5a662a3f..332c076b9b 100644 --- a/autosklearn/pipeline/classification.py +++ b/autosklearn/pipeline/classification.py @@ -71,8 +71,8 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin): def __init__( self, - feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, steps=None, dataset_properties=None, include=None, diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py index 0c42fa8e4d..dcc2fa3fcf 100644 --- a/autosklearn/pipeline/regression.py +++ b/autosklearn/pipeline/regression.py @@ -68,8 +68,8 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline): def __init__( self, - feat_type: Optional[FEAT_TYPE_TYPE] = None, config: Optional[Configuration] = None, + feat_type: Optional[FEAT_TYPE_TYPE] = None, steps=None, dataset_properties=None, include=None, From 63b94847a18ec94828d5d6e8126ff9edbe6808cc Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 14:34:37 +0200 Subject: [PATCH 48/63] add new tests and fixing some issues from PR --- .../test_data_preprocessing_feat_type.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py index 3318f3e08b..c5c5369ded 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py @@ -101,6 +101,21 @@ def test_dual_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) + DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"}) + cs = DPP.get_hyperparameter_search_space( + feat_type={"A": "string", "B": "numerical"}, + dataset_properties={ + "task": 1, + "sparse": False, + "multilabel": False, + "multiclass": False, + "target_type": "classification", + "signed": False, + }, + ) + for key in cs.get_hyperparameters_dict().keys(): + self.assertNotIn("categorical", key.split(":")[0]) + def test_triple_type(self): DPP = FeatTypeSplit( feat_type={"A": "numerical", "B": "categorical", "C": "string"} From 4f47450d2b54fe3672fc38c304226a1eb7fa6a9c Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 14:38:48 +0200 Subject: [PATCH 49/63] add new tests and fixing some issues from PR --- autosklearn/metalearning/input/aslib_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py index bbf4f44fb9..871cccd2c1 100644 --- a/autosklearn/metalearning/input/aslib_simple.py +++ b/autosklearn/metalearning/input/aslib_simple.py @@ -153,7 +153,7 @@ def _read_configurations(self, filename): if not value or hp_name == "idx": continue if hp_name not in hp_names: - # skip meta learning configuration + # skip hyperparameter # if it is not existing in the current search space continue try: From 2764037e2e4e0cdb209f42b5f5d464153b3e5e24 Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Fri, 8 Jul 2022 14:46:25 +0200 Subject: [PATCH 50/63] fix-1535-Exception-in-the-fit()-call-of-AutoSklearn (#1539) * Create PR * Fix test fixture --- test/fixtures/ensemble_building.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/fixtures/ensemble_building.py b/test/fixtures/ensemble_building.py index 548d1c5d72..20e771fd1a 100644 --- a/test/fixtures/ensemble_building.py +++ b/test/fixtures/ensemble_building.py @@ -231,7 +231,10 @@ def _make( # Hence, we take the y_train of the datamanager and use that as the # the targets if "Y_train" in datamanager.data: - backend.save_targets_ensemble(datamanager.data["Y_train"]) + backend.save_additional_data( + datamanager.data["Y_train"], + what="targets_ensemble", + ) return EnsembleBuilderManager( backend=backend, From 519ce4dcc79e7eb78b666d7f41a291f8950c17cd Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 15:36:32 +0200 Subject: [PATCH 51/63] add new tests and fixing some issues from PR --- autosklearn/util/pipeline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index 6ff74b0fb7..5d3b132343 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -11,7 +11,7 @@ MULTIOUTPUT_REGRESSION, REGRESSION_TASKS, ) -from autosklearn.data.xy_data_manager import XYDataManager +from autosklearn.data.abstract_data_manager import AbstractDataManager from autosklearn.pipeline.classification import SimpleClassificationPipeline from autosklearn.pipeline.regression import SimpleRegressionPipeline @@ -19,7 +19,7 @@ def get_configuration_space( - datamanager: XYDataManager, + datamanager: AbstractDataManager, include: Optional[Dict[str, List[str]]] = None, exclude: Optional[Dict[str, List[str]]] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -56,7 +56,7 @@ def get_configuration_space( def _get_regression_configuration_space( - datamanager: XYDataManager, + datamanager: AbstractDataManager, include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, @@ -104,7 +104,7 @@ def _get_regression_configuration_space( def _get_classification_configuration_space( - datamanager: XYDataManager, + datamanager: AbstractDataManager, include: Optional[Dict[str, List[str]]], exclude: Optional[Dict[str, List[str]]], random_state: Optional[Union[int, np.random.RandomState]] = None, From 05d4ca85849fae89a532de99c777ea7517c77686 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 16:06:26 +0200 Subject: [PATCH 52/63] add new tests and fixing some issues from PR --- autosklearn/pipeline/base.py | 4 +-- .../pyMetaLearn/test_metalearner.py | 28 ++++++------------- .../test_data_preprocessing_feat_type.py | 15 ---------- 3 files changed, 10 insertions(+), 37 deletions(-) diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py index d138a32a84..b4647215c6 100644 --- a/autosklearn/pipeline/base.py +++ b/autosklearn/pipeline/base.py @@ -221,7 +221,7 @@ def set_hyperparameters( node_name, node = n_ sub_configuration_space = node.get_hyperparameter_search_space( - feat_type=self.feat_type, dataset_properties=self.dataset_properties + feat_type=feat_type, dataset_properties=self.dataset_properties ) sub_config_dict = {} for param in configuration: @@ -248,7 +248,7 @@ def set_hyperparameters( node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline) ): node.set_hyperparameters( - feat_type=self.feat_type, + feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict, ) diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py index 9e7a54a77f..42d27d49da 100644 --- a/test/test_metalearning/pyMetaLearn/test_metalearner.py +++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py @@ -23,26 +23,14 @@ def setUp(self): data_dir = os.path.join(data_dir, "test_meta_base_data") os.chdir(data_dir) - for feat_type in [ - None, - {"A": "numerical"}, - {"A": "categorical"}, - {"A": "string"}, - {"A": "numerical", "B": "categorical"}, - {"A": "numerical", "B": "string"}, - {"A": "categorical", "B": "string"}, - {"A": "categorical", "B": "string", "C": "numerical"}, - ]: - pipeline = ( - autosklearn.pipeline.classification.SimpleClassificationPipeline() - ) - self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) - - self.logger = logging.getLogger() - meta_base = MetaBase(self.cs, data_dir, logger=self.logger) - self.meta_optimizer = metalearner.MetaLearningOptimizer( - "233", self.cs, meta_base, logger=self.logger - ) + pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline() + self.cs = pipeline.get_hyperparameter_search_space() + + self.logger = logging.getLogger() + meta_base = MetaBase(self.cs, data_dir, logger=self.logger) + self.meta_optimizer = metalearner.MetaLearningOptimizer( + "233", self.cs, meta_base, logger=self.logger + ) def tearDown(self): os.chdir(self.cwd) diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py index c5c5369ded..6135883705 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py @@ -86,21 +86,6 @@ def test_dual_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) - DPP = FeatTypeSplit(feat_type={"A": "string", "B": "categorical"}) - cs = DPP.get_hyperparameter_search_space( - feat_type={"A": "string", "B": "categorical"}, - dataset_properties={ - "task": 1, - "sparse": False, - "multilabel": False, - "multiclass": False, - "target_type": "classification", - "signed": False, - }, - ) - for key in cs.get_hyperparameters_dict().keys(): - self.assertNotIn("numerical", key.split(":")[0]) - DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"}) cs = DPP.get_hyperparameter_search_space( feat_type={"A": "string", "B": "numerical"}, From c6dbab3ea23c88a76d3c8375ba1af2f3cfb6c853 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 16:20:47 +0200 Subject: [PATCH 53/63] chancing metalearning test to check whether the configspace adapts to feat_type or not --- .../pyMetaLearn/test_metalearner.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py index 42d27d49da..2b7b5c51c6 100644 --- a/test/test_metalearning/pyMetaLearn/test_metalearner.py +++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py @@ -23,14 +23,27 @@ def setUp(self): data_dir = os.path.join(data_dir, "test_meta_base_data") os.chdir(data_dir) - pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline() - self.cs = pipeline.get_hyperparameter_search_space() - - self.logger = logging.getLogger() - meta_base = MetaBase(self.cs, data_dir, logger=self.logger) - self.meta_optimizer = metalearner.MetaLearningOptimizer( - "233", self.cs, meta_base, logger=self.logger - ) + for feat_type, cs_size in [ + ({"A": "numerical"}, 165), + ({"A": "categorical"}, 162), + ({"A": "string"}, 174), + ({"A": "numerical", "B": "categorical"}, 168), + ({"A": "numerical", "B": "string"}, 180), + ({"A": "categorical", "B": "string"}, 177), + ({"A": "categorical", "B": "string", "C": "numerical"}, 183), + ]: + pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline( + feat_type=feat_type + ) + self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) + # print(self.cs.get_default_configuration()) + + self.logger = logging.getLogger() + meta_base = MetaBase(self.cs, data_dir, logger=self.logger) + self.meta_optimizer = metalearner.MetaLearningOptimizer( + "233", self.cs, meta_base, logger=self.logger + ) + self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size) def tearDown(self): os.chdir(self.cwd) From 65b037829c5f643b7e59131883aeeb21e4e25873 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 16:29:46 +0200 Subject: [PATCH 54/63] in `test_data_preprocessing_feat_type.py` check for configuration space size --- .../test_data_preprocessing_feat_type.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py index 6135883705..0cf5ee6bd2 100644 --- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py +++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py @@ -22,6 +22,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) self.assertNotIn("categorical", key.split(":")[0]) + self.assertEqual(len(cs), 6) DPP = FeatTypeSplit(feat_type={"A": "categorical"}) cs = DPP.get_hyperparameter_search_space( @@ -38,6 +39,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) self.assertNotIn("numerical", key.split(":")[0]) + self.assertEqual(len(cs), 3) DPP = FeatTypeSplit(feat_type={"A": "string"}) cs = DPP.get_hyperparameter_search_space( @@ -54,6 +56,7 @@ def test_single_type(self): for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) self.assertNotIn("categorical", key.split(":")[0]) + self.assertEqual(len(cs), 15) def test_dual_type(self): DPP = FeatTypeSplit(feat_type={"A": "numerical", "B": "categorical"}) @@ -70,6 +73,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("text", key.split(":")[0]) + self.assertEqual(len(cs), 9) DPP = FeatTypeSplit(feat_type={"A": "categorical", "B": "string"}) cs = DPP.get_hyperparameter_search_space( @@ -85,6 +89,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("numerical", key.split(":")[0]) + self.assertEqual(len(cs), 18) DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"}) cs = DPP.get_hyperparameter_search_space( @@ -100,6 +105,7 @@ def test_dual_type(self): ) for key in cs.get_hyperparameters_dict().keys(): self.assertNotIn("categorical", key.split(":")[0]) + self.assertEqual(len(cs), 21) def test_triple_type(self): DPP = FeatTypeSplit( @@ -126,3 +132,4 @@ def test_triple_type(self): truth_table[2] = True self.assertEqual(sum(truth_table), 3) + self.assertEqual(len(cs), 24) From f00aa76711ceb41aeeed957338ebee677923e63c Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 17:30:43 +0200 Subject: [PATCH 55/63] in `test_data_preprocessing_feat_type.py` check for configuration space size --- .../components/classification/__init__.py | 4 +- .../components/classification/adaboost.py | 7 +++- .../components/regression/__init__.py | 4 +- autosklearn/util/pipeline.py | 12 +++--- scripts/02_retrieve_metadata.py | 2 +- test/fixtures/ensembles.py | 4 +- .../pyMetaLearn/test_metalearner.py | 30 +++++--------- .../test_metalearning_configuration.py | 40 +++++++++++++++++++ 8 files changed, 70 insertions(+), 33 deletions(-) create mode 100644 test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 6475fa0156..31fa2ea9ca 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -133,7 +133,9 @@ def get_hyperparameter_search_space( for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[ estimator_name - ].get_hyperparameter_search_space(dataset_properties=dataset_properties) + ].get_hyperparameter_search_space( + feat_type=feat_type, dataset_properties=dataset_properties + ) parent_hyperparameter = {"parent": estimator, "value": estimator_name} cs.add_configuration_space( estimator_name, diff --git a/autosklearn/pipeline/components/classification/adaboost.py b/autosklearn/pipeline/components/classification/adaboost.py index 3634f53956..08a9bc06bd 100644 --- a/autosklearn/pipeline/components/classification/adaboost.py +++ b/autosklearn/pipeline/components/classification/adaboost.py @@ -1,3 +1,5 @@ +from typing import Optional + from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( CategoricalHyperparameter, @@ -5,6 +7,7 @@ UniformIntegerHyperparameter, ) +from autosklearn.askl_typing import FEAT_TYPE_TYPE from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA @@ -68,7 +71,9 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space( + feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None + ): cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter( diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 0f693e2d08..9d1ef58650 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -123,7 +123,9 @@ def get_hyperparameter_search_space( for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[ estimator_name - ].get_hyperparameter_search_space(dataset_properties=dataset_properties) + ].get_hyperparameter_search_space( + feat_type=feat_type, dataset_properties=dataset_properties + ) parent_hyperparameter = {"parent": estimator, "value": estimator_name} cs.add_configuration_space( estimator_name, diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py index 5d3b132343..f0a66a2a86 100755 --- a/autosklearn/util/pipeline.py +++ b/autosklearn/util/pipeline.py @@ -28,8 +28,8 @@ def get_configuration_space( Parameters ---------- - datamanager: XYDataManager - XYDataManger object storing all important information about the dataset + datamanager: AbstractDataManager + AbstractDataManager object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step @@ -65,8 +65,8 @@ def _get_regression_configuration_space( Parameters ---------- - datamanager: XYDataManager - XYDataManger object storing all important information about the dataset + datamanager: AbstractDataManager + AbstractDataManager object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step @@ -113,8 +113,8 @@ def _get_classification_configuration_space( Parameters ---------- - datamanager: XYDataManager - XYDataManger object storing all important information about the dataset + datamanager: AbstractDataManager + AbstractDataManager object storing all important information about the dataset include: Optional[Dict[str, List[str]]] = None A dictionary of what components to include for each pipeline step diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index 931ad499f5..8c3a2e5468 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -181,7 +181,7 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri class DummyDatamanager(): def __init__(self, info): self.info = info - self.feat_type = {"A1": "numerical"} + self.feat_type = None def main(): parser = ArgumentParser() diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py index 113974b256..80a3425e2d 100644 --- a/test/fixtures/ensembles.py +++ b/test/fixtures/ensembles.py @@ -45,7 +45,7 @@ def _make( if not models: models = [ MyDummyClassifier( - feat_type={i: "numerical" for i in range(X.shape[1])}, + feat_type=None, config=1, random_state=seed, ) @@ -89,7 +89,7 @@ def _make( if not models: models = [ MyDummyRegressor( - feat_type={i: "numerical" for i in range(X.shape[1])}, + feat_type=None, config=1, random_state=seed, ) diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py index 2b7b5c51c6..33ce3f9f88 100644 --- a/test/test_metalearning/pyMetaLearn/test_metalearner.py +++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py @@ -23,27 +23,15 @@ def setUp(self): data_dir = os.path.join(data_dir, "test_meta_base_data") os.chdir(data_dir) - for feat_type, cs_size in [ - ({"A": "numerical"}, 165), - ({"A": "categorical"}, 162), - ({"A": "string"}, 174), - ({"A": "numerical", "B": "categorical"}, 168), - ({"A": "numerical", "B": "string"}, 180), - ({"A": "categorical", "B": "string"}, 177), - ({"A": "categorical", "B": "string", "C": "numerical"}, 183), - ]: - pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline( - feat_type=feat_type - ) - self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) - # print(self.cs.get_default_configuration()) - - self.logger = logging.getLogger() - meta_base = MetaBase(self.cs, data_dir, logger=self.logger) - self.meta_optimizer = metalearner.MetaLearningOptimizer( - "233", self.cs, meta_base, logger=self.logger - ) - self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size) + pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline() + self.cs = pipeline.get_hyperparameter_search_space() + # print(self.cs.get_default_configuration()) + + self.logger = logging.getLogger() + meta_base = MetaBase(self.cs, data_dir, logger=self.logger) + self.meta_optimizer = metalearner.MetaLearningOptimizer( + "233", self.cs, meta_base, logger=self.logger + ) def tearDown(self): os.chdir(self.cwd) diff --git a/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py new file mode 100644 index 0000000000..1e08805d87 --- /dev/null +++ b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py @@ -0,0 +1,40 @@ +import logging +import os + +import autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner as metalearner # noqa: E501 +import autosklearn.pipeline.classification +from autosklearn.metalearning.metalearning.meta_base import MetaBase + +import unittest + +logging.basicConfig() + + +class MetalearningConfiguration(unittest.TestCase): + def test_metalearning_cs_size(self): + self.cwd = os.getcwd() + data_dir = os.path.dirname(__file__) + data_dir = os.path.join(data_dir, "test_meta_base_data") + os.chdir(data_dir) + + for feat_type, cs_size in [ + ({"A": "numerical"}, 165), + ({"A": "categorical"}, 162), + ({"A": "string"}, 174), + ({"A": "numerical", "B": "categorical"}, 168), + ({"A": "numerical", "B": "string"}, 180), + ({"A": "categorical", "B": "string"}, 177), + ({"A": "categorical", "B": "string", "C": "numerical"}, 183), + ]: + pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline( + feat_type=feat_type + ) + self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type) + # print(self.cs.get_default_configuration()) + + self.logger = logging.getLogger() + meta_base = MetaBase(self.cs, data_dir, logger=self.logger) + self.meta_optimizer = metalearner.MetaLearningOptimizer( + "233", self.cs, meta_base, logger=self.logger + ) + self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size) From c43627eecdd4a8fd8d9ced9438161f75142cea1d Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 17:34:53 +0200 Subject: [PATCH 56/63] in `test_data_preprocessing_feat_type.py` check for configuration space size --- scripts/02_retrieve_metadata.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index 8c3a2e5468..fa104464f2 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -225,7 +225,10 @@ def main(): ) configuration_space = pipeline.get_configuration_space( - DummyDatamanager({"is_sparse": sparse, "task": task}) + DummyDatamanager( + info={"is_sparse": sparse, "task": task}, + feat_type={"A": "numerical", "B": "categorical"} + ) ) outputs, configurations = retrieve_matadata( From 1c26599118d81fccdad3830bd72b2ddf9dca42bc Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 17:41:07 +0200 Subject: [PATCH 57/63] include feedback from review --- test/fixtures/ensembles.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py index 80a3425e2d..250841511a 100644 --- a/test/fixtures/ensembles.py +++ b/test/fixtures/ensembles.py @@ -5,7 +5,11 @@ import numpy as np from sklearn.ensemble import VotingClassifier, VotingRegressor -from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES +from autosklearn.data.validation import ( + SUPPORTED_FEAT_TYPES, + SUPPORTED_TARGET_TYPES, + InputValidator, +) from autosklearn.evaluation.abstract_evaluator import ( MyDummyClassifier, MyDummyRegressor, @@ -43,9 +47,10 @@ def _make( ) -> VotingClassifier: assert not (X is None) ^ (y is None) if not models: + validator = InputValidator(is_classification=True).fit(X, y) models = [ MyDummyClassifier( - feat_type=None, + feat_type=validator.feature_validator.feat_type, config=1, random_state=seed, ) @@ -87,9 +92,10 @@ def _make( assert not (X is None) ^ (y is None) if not models: + validator = InputValidator(is_classification=False).fit(X, y) models = [ MyDummyRegressor( - feat_type=None, + feat_type=validator.feature_validator.feat_type, config=1, random_state=seed, ) From 0af7f9053c99540e8ee69f8dcf749a36968cbc16 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 18:24:08 +0200 Subject: [PATCH 58/63] include feedback from review --- test/test_pipeline/test_classification.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py index eb127ad02d..94fd8c7a65 100644 --- a/test/test_pipeline/test_classification.py +++ b/test/test_pipeline/test_classification.py @@ -62,7 +62,7 @@ def get_properties(dataset_properties=None): } @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): + def get_hyperparameter_search_space(feat_type=None, dataset_properties=None): cs = ConfigurationSpace() return cs @@ -407,7 +407,7 @@ def test_configurations_categorical_data(self): }, ) - cs = pipeline.get_hyperparameter_search_space() + cs = pipeline.get_hyperparameter_search_space(feat_type=categorical) here = os.path.dirname(__file__) dataset_path = os.path.join( @@ -474,8 +474,11 @@ def test_categorical_passed_to_one_hot_encoder(self, ohe_mock): # Check through `set_hyperparameters` feat_types = {0: "categorical", 1: "categorical", 2: "numerical"} - default = cls.get_hyperparameter_search_space().get_default_configuration() + default = cls.get_hyperparameter_search_space( + feat_type=feat_types + ).get_default_configuration() cls.set_hyperparameters( + feat_type=feat_types, configuration=default, init_params={"data_preprocessor:feat_type": feat_types}, ) @@ -571,7 +574,9 @@ def _test_configurations( dataset_properties=dataset_properties, init_params=init_params_, ) - cls.set_hyperparameters(config, init_params=init_params_) + cls.set_hyperparameters( + config, init_params=init_params_, feat_type=feat_type + ) # First make sure that for this configuration, setting the parameters # does not mistakenly set the estimator as fitted From 5057b260624609c108ce69c9c40217269a430759 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 8 Jul 2022 18:35:37 +0200 Subject: [PATCH 59/63] include feedback from review --- scripts/02_retrieve_metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index fa104464f2..e0b20d1e61 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -179,9 +179,9 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri class DummyDatamanager(): - def __init__(self, info): + def __init__(self, info, feat_type=None): self.info = info - self.feat_type = None + self.feat_type = feat_type def main(): parser = ArgumentParser() From 18ab29e3197686ebca73e49b282095c468bbd435 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 10 Jul 2022 16:29:38 +0200 Subject: [PATCH 60/63] adapted meta_data_generation search space --- scripts/02_retrieve_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index e0b20d1e61..574a24c87b 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -227,7 +227,7 @@ def main(): configuration_space = pipeline.get_configuration_space( DummyDatamanager( info={"is_sparse": sparse, "task": task}, - feat_type={"A": "numerical", "B": "categorical"} + feat_type={"A": "numerical"} ) ) From 12fe4495f905ce02424094f1006cdf8df809f351 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Jul 2022 11:46:23 +0200 Subject: [PATCH 61/63] Bump docker/build-push-action from 1 to 3 (#1515) * Bump docker/build-push-action from 1 to 3 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 1 to 3. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/v1...v3) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Update docker-publish.yml Replace password by token Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matthias Feurer --- .github/workflows/docker-publish.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 4c4773700e..525004269e 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,9 +26,8 @@ jobs: run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: extract_branch - # Updating this to @v2 requires a github auth token - name: Push to GitHub Packages - uses: docker/build-push-action@v1 + uses: docker/build-push-action@v3 with: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -37,12 +36,11 @@ jobs: tag_with_ref: true tags: ${{ steps.extract_branch.outputs.branch }} - # Updating this to @v2 requires a github auth token - name: Push to Docker Hub - uses: docker/build-push-action@v1 + uses: docker/build-push-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + password: ${{ secrets.DOCKER_TOKEN }} repository: mfeurer/auto-sklearn tags: ${{ steps.extract_branch.outputs.branch }} From af9d46983c4680b710c79c7714ed0047077d02dc Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Sat, 16 Jul 2022 20:35:59 +0200 Subject: [PATCH 62/63] fix-1532-_ERROR_-asyncio.exceptions.CancelledError (#1540) * Create PR * Abstract out dask client types * Fix _ issue * Extend scope of dask_client in automl.py * Add docstring to dask module * Indent result addition * Add basic tests for Dask wrappers --- autosklearn/automl.py | 282 +++++++++++------------- autosklearn/util/dask.py | 142 ++++++++++++ test/test_automl/test_construction.py | 6 +- test/test_estimators/test_estimators.py | 2 - test/test_util/test_dask.py | 75 +++++++ 5 files changed, 347 insertions(+), 160 deletions(-) create mode 100644 autosklearn/util/dask.py create mode 100644 test/test_util/test_dask.py diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 278cd5c146..f76a03adec 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -21,7 +21,6 @@ import os import platform import sys -import tempfile import time import types import uuid @@ -37,7 +36,7 @@ import sklearn.utils from ConfigSpace.configuration_space import Configuration, ConfigurationSpace from ConfigSpace.read_and_write import json as cs_json -from dask.distributed import Client, LocalCluster +from dask.distributed import Client from scipy.sparse import spmatrix from sklearn.base import BaseEstimator from sklearn.dummy import DummyClassifier, DummyRegressor @@ -105,6 +104,7 @@ from autosklearn.pipeline.components.regression import RegressorChoice from autosklearn.smbo import AutoMLSMBO from autosklearn.util import RE_PATTERN, pipeline +from autosklearn.util.dask import Dask, LocalDask, UserDask from autosklearn.util.data import ( DatasetCompressionSpec, default_dataset_compression_arg, @@ -120,7 +120,6 @@ warnings_to, ) from autosklearn.util.parallel import preload_modules -from autosklearn.util.single_thread_client import SingleThreadedClient from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback from autosklearn.util.stopwatch import StopWatch @@ -299,21 +298,22 @@ def __init__( self._initial_configurations_via_metalearning = ( initial_configurations_via_metalearning ) + self._n_jobs = n_jobs self._scoring_functions = scoring_functions or [] self._resampling_strategy_arguments = resampling_strategy_arguments or {} + self._multiprocessing_context = "forkserver" # Single core, local runs should use fork to prevent the __main__ requirements # in examples. Nevertheless, multi-process runs have spawn as requirement to # reduce the possibility of a deadlock - if n_jobs == 1 and dask_client is None: - self._multiprocessing_context = "fork" - self._dask_client = SingleThreadedClient() - self._n_jobs = 1 + self._dask: Dask + if dask_client is not None: + self._dask = UserDask(client=dask_client) else: - self._multiprocessing_context = "forkserver" - self._dask_client = dask_client - self._n_jobs = n_jobs + self._dask = LocalDask(n_jobs=n_jobs) + if n_jobs == 1: + self._multiprocessing_context = "fork" # Create the backend self._backend: Backend = create( @@ -350,38 +350,6 @@ def __init__( self.num_run = 0 self.fitted = False - def _create_dask_client(self) -> None: - self._is_dask_client_internally_created = True - self._dask_client = Client( - LocalCluster( - n_workers=self._n_jobs, - processes=False, - threads_per_worker=1, - # We use the temporal directory to save the - # dask workers, because deleting workers takes - # more time than deleting backend directories - # This prevent an error saying that the worker - # file was deleted, so the client could not close - # the worker properly - local_directory=tempfile.gettempdir(), - # Memory is handled by the pynisher, not by the dask worker/nanny - memory_limit=0, - ), - # Heartbeat every 10s - heartbeat_interval=10000, - ) - - def _close_dask_client(self, force: bool = False) -> None: - if getattr(self, "_dask_client", None) is not None and ( - force or getattr(self, "_is_dask_client_internally_created", False) - ): - self._dask_client.shutdown() - self._dask_client.close() - del self._dask_client - self._dask_client = None - self._is_dask_client_internally_created = False - del self._is_dask_client_internally_created - def _get_logger(self, name: str) -> PicklableClientLogger: logger_name = "AutoML(%d):%s" % (self._seed, name) @@ -747,17 +715,6 @@ def fit( "autosklearn.metrics.Scorer." ) - # If no dask client was provided, we create one, so that we can - # start a ensemble process in parallel to smbo optimize - if self._dask_client is None and ( - self._ensemble_class is not None - or self._n_jobs is not None - and self._n_jobs > 1 - ): - self._create_dask_client() - else: - self._is_dask_client_internally_created = False - self._dataset_name = dataset_name self._stopwatch.start(self._dataset_name) @@ -902,70 +859,85 @@ def fit( ) n_meta_configs = self._initial_configurations_via_metalearning - _proc_smac = AutoMLSMBO( - config_space=self.configuration_space, - dataset_name=self._dataset_name, - backend=self._backend, - total_walltime_limit=time_left, - func_eval_time_limit=per_run_time_limit, - memory_limit=self._memory_limit, - data_memory_limit=self._data_memory_limit, - stopwatch=self._stopwatch, - n_jobs=self._n_jobs, - dask_client=self._dask_client, - start_num_run=self.num_run, - num_metalearning_cfgs=n_meta_configs, - config_file=configspace_path, - seed=self._seed, - metadata_directory=self._metadata_directory, - metrics=self._metrics, - resampling_strategy=self._resampling_strategy, - resampling_strategy_args=self._resampling_strategy_arguments, - include=self._include, - exclude=self._exclude, - disable_file_output=self._disable_evaluator_output, - get_smac_object_callback=self._get_smac_object_callback, - smac_scenario_args=self._smac_scenario_args, - scoring_functions=self._scoring_functions, - port=self._logger_port, - pynisher_context=self._multiprocessing_context, - ensemble_callback=proc_ensemble, - trials_callback=self._get_trials_callback, - ) + with self._dask as dask_client: + resamp_args = self._resampling_strategy_arguments + _proc_smac = AutoMLSMBO( + config_space=self.configuration_space, + dataset_name=self._dataset_name, + backend=self._backend, + total_walltime_limit=time_left, + func_eval_time_limit=per_run_time_limit, + memory_limit=self._memory_limit, + data_memory_limit=self._data_memory_limit, + stopwatch=self._stopwatch, + n_jobs=self._n_jobs, + dask_client=dask_client, + start_num_run=self.num_run, + num_metalearning_cfgs=n_meta_configs, + config_file=configspace_path, + seed=self._seed, + metadata_directory=self._metadata_directory, + metrics=self._metrics, + resampling_strategy=self._resampling_strategy, + resampling_strategy_args=resamp_args, + include=self._include, + exclude=self._exclude, + disable_file_output=self._disable_evaluator_output, + get_smac_object_callback=self._get_smac_object_callback, + smac_scenario_args=self._smac_scenario_args, + scoring_functions=self._scoring_functions, + port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ensemble_callback=proc_ensemble, + trials_callback=self._get_trials_callback, + ) - ( - self.runhistory_, - self.trajectory_, - self._budget_type, - ) = _proc_smac.run_smbo() - trajectory_filename = os.path.join( - self._backend.get_smac_output_directory_for_run(self._seed), - "trajectory.json", - ) - saveable_trajectory = [ - list(entry[:2]) + [entry[2].get_dictionary()] + list(entry[3:]) - for entry in self.trajectory_ - ] - with open(trajectory_filename, "w") as fh: - json.dump(saveable_trajectory, fh) - - self._logger.info("Starting shutdown...") - # Wait until the ensemble process is finished to avoid shutting down - # while the ensemble builder tries to access the data - if proc_ensemble is not None: - self.ensemble_performance_history = list(proc_ensemble.history) - - if len(proc_ensemble.futures) > 0: - # Now we wait for the future to return as it cannot be cancelled - # while it is running: https://stackoverflow.com/a/49203129 - self._logger.info( - "Ensemble script still running, waiting for it to finish." - ) - result = proc_ensemble.futures.pop().result() - if result: - ensemble_history, _ = result - self.ensemble_performance_history.extend(ensemble_history) - self._logger.info("Ensemble script finished, continue shutdown.") + ( + self.runhistory_, + self.trajectory_, + self._budget_type, + ) = _proc_smac.run_smbo() + + trajectory_filename = os.path.join( + self._backend.get_smac_output_directory_for_run(self._seed), + "trajectory.json", + ) + saveable_trajectory = [ + list(entry[:2]) + + [entry[2].get_dictionary()] + + list(entry[3:]) + for entry in self.trajectory_ + ] + with open(trajectory_filename, "w") as fh: + json.dump(saveable_trajectory, fh) + + self._logger.info("Starting shutdown...") + # Wait until the ensemble process is finished to avoid shutting + # down while the ensemble builder tries to access the data + if proc_ensemble is not None: + self.ensemble_performance_history = list( + proc_ensemble.history + ) + + if len(proc_ensemble.futures) > 0: + # Now we wait for the future to return as it cannot be + # cancelled while it is running + # * https://stackoverflow.com/a/49203129 + self._logger.info( + "Ensemble script still running," + " waiting for it to finish." + ) + result = proc_ensemble.futures.pop().result() + + if result: + ensemble_history, _ = result + self.ensemble_performance_history.extend( + ensemble_history + ) + + self._logger.info( + "Ensemble script finished, continue shutdown." + ) # save the ensemble performance history file if len(self.ensemble_performance_history) > 0: @@ -1054,7 +1026,7 @@ def _log_fit_setup(self) -> None: self._logger.debug( " multiprocessing_context: %s", str(self._multiprocessing_context) ) - self._logger.debug(" dask_client: %s", str(self._dask_client)) + self._logger.debug(" dask_client: %s", str(self._dask)) self._logger.debug(" precision: %s", str(self.precision)) self._logger.debug( " disable_evaluator_output: %s", str(self._disable_evaluator_output) @@ -1090,7 +1062,6 @@ def __sklearn_is_fitted__(self) -> bool: def _fit_cleanup(self) -> None: self._logger.info("Closing the dask infrastructure") - self._close_dask_client() self._logger.info("Finished closing the dask infrastructure") # Clean up the logger @@ -1555,12 +1526,6 @@ def fit_ensemble( # Make sure that input is valid y = self.InputValidator.target_validator.transform(y) - # Create a client if needed - if self._dask_client is None: - self._create_dask_client() - else: - self._is_dask_client_internally_created = False - metrics = metrics if metrics is not None else self._metrics if not isinstance(metrics, Sequence): metrics = [metrics] @@ -1568,35 +1533,41 @@ def fit_ensemble( # Use the current thread to start the ensemble builder process # The function ensemble_builder_process will internally create a ensemble # builder in the provide dask client - manager = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=self._time_for_task, - backend=copy.deepcopy(self._backend), - dataset_name=dataset_name if dataset_name else self._dataset_name, - task=task if task else self._task, - metrics=metrics if metrics is not None else self._metrics, - ensemble_class=( - ensemble_class if ensemble_class is not None else self._ensemble_class - ), - ensemble_kwargs=( - ensemble_kwargs - if ensemble_kwargs is not None - else self._ensemble_kwargs - ), - ensemble_nbest=ensemble_nbest if ensemble_nbest else self._ensemble_nbest, - max_models_on_disc=self._max_models_on_disc, - seed=self._seed, - precision=precision if precision else self.precision, - max_iterations=1, - read_at_most=None, - memory_limit=self._memory_limit, - random_state=self._seed, - logger_port=self._logger_port, - pynisher_context=self._multiprocessing_context, - ) - manager.build_ensemble(self._dask_client) - future = manager.futures.pop() - result = future.result() + with self._dask as dask_client: + manager = EnsembleBuilderManager( + start_time=time.time(), + time_left_for_ensembles=self._time_for_task, + backend=copy.deepcopy(self._backend), + dataset_name=dataset_name if dataset_name else self._dataset_name, + task=task if task else self._task, + metrics=metrics if metrics is not None else self._metrics, + ensemble_class=( + ensemble_class + if ensemble_class is not None + else self._ensemble_class + ), + ensemble_kwargs=( + ensemble_kwargs + if ensemble_kwargs is not None + else self._ensemble_kwargs + ), + ensemble_nbest=ensemble_nbest + if ensemble_nbest + else self._ensemble_nbest, + max_models_on_disc=self._max_models_on_disc, + seed=self._seed, + precision=precision if precision else self.precision, + max_iterations=1, + read_at_most=None, + memory_limit=self._memory_limit, + random_state=self._seed, + logger_port=self._logger_port, + pynisher_context=self._multiprocessing_context, + ) + manager.build_ensemble(dask_client) + future = manager.futures.pop() + result = future.result() + if result is None: raise ValueError( "Error building the ensemble - please check the log file and command " @@ -1606,7 +1577,6 @@ def fit_ensemble( self._ensemble_class = ensemble_class self._load_models() - self._close_dask_client() return self def _load_models(self): @@ -2295,7 +2265,7 @@ def _create_search_space( def __getstate__(self) -> dict[str, Any]: # Cannot serialize a client! - self._dask_client = None + self._dask = None self.logging_server = None self.stop_logging_server = None return self.__dict__ @@ -2304,8 +2274,6 @@ def __del__(self) -> None: # Clean up the logger self._clean_logger() - self._close_dask_client() - class AutoMLClassifier(AutoML): diff --git a/autosklearn/util/dask.py b/autosklearn/util/dask.py new file mode 100644 index 0000000000..624fecfae9 --- /dev/null +++ b/autosklearn/util/dask.py @@ -0,0 +1,142 @@ +""" Provides simplified 2 use cases of dask that we consider + +1. A UserDask is when a user supplies a dask client, in which case +we don't close this down and leave it up to the user to control its lifetime. +2. A LocalDask is one we use when no user dask is supplied. In this case +we make sure to spin up and close down clients as needed. + +Both of these can be uniformly accessed as a context manager. + +.. code:: python + + # Locally controlled dask client + local_dask = LocalDask(n_jobs=2) + with local_dask as client: + # Do stuff with client + ... + + # `client` is shutdown properly + + # ---------------- + + # User controlled dask client + user_dask = UserDask(user_client) + + with user_dask as client: + # Do stuff with (client == user_client) + ... + + # `user_client` is still open and up to the user to close +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + +import tempfile + +from dask.distributed import Client, LocalCluster + +from autosklearn.util.single_thread_client import SingleThreadedClient + + +class Dask(ABC): + @abstractmethod + def client(self) -> Client: + """Should return a dask client""" + ... + + @abstractmethod + def close(self) -> None: + """Should close up any resources needed for the dask client""" + ... + + def __enter__(self) -> Client: + return self.client() + + def __exit__(self, *args: Any, **kwargs: Any) -> None: + self.close() + + @abstractmethod + def __repr__(self) -> str: + ... + + +class UserDask(Dask): + """A dask instance created by a user""" + + def __init__(self, client: Client): + """ + Parameters + ---------- + client : Client + The client they passed in + """ + self._client = client + + def client(self) -> Client: + """The dask client""" + return self._client + + def close(self) -> None: + """Close the dask client""" + # We do nothing, it's user provided + pass + + def __repr__(self) -> str: + return "UserDask(...)" + + +class LocalDask(Dask): + def __init__(self, n_jobs: int | None = None) -> None: + self.n_jobs = n_jobs + self._client: Client | None = None + self._cluster: LocalCluster | None = None + + def client(self) -> Client: + """Creates a usable dask client or returns an existing one + + If there is not current client, because it has been closed, create + a new one. + * If ``n_jobs == 1``, create a ``SingleThreadedClient`` + * Else create a ``Client`` with a ``LocalCluster`` + """ + if self._client is not None: + return self._client + + if self.n_jobs == 1: + cluster = None + client = SingleThreadedClient() + else: + cluster = LocalCluster( + n_workers=self.n_jobs, + processes=False, + threads_per_worker=1, + # We use tmpdir to save the workers as deleting workers takes + # more time than deleting backend directories. + # This prevent an error saying that the worker file was deleted, + # so the client could not close the worker properly + local_directory=tempfile.gettempdir(), + # Memory is handled by the pynisher, not by the dask worker/nanny + memory_limit=0, + ) + client = Client(cluster, heartbeat_interval=10000) # 10s + + self._client = client + self._cluster = cluster + return self._client + + def close(self) -> None: + """Closes any open dask client""" + if self._client is None: + return + + self._client.close() + if self._cluster is not None: + self._cluster.close() + + self._client = None + self._cluster = None + + def __repr__(self) -> str: + return f"LocalDask(n_jobs = {self.n_jobs})" diff --git a/test/test_automl/test_construction.py b/test/test_automl/test_construction.py index 5b68d35118..be6fe0e39b 100644 --- a/test/test_automl/test_construction.py +++ b/test/test_automl/test_construction.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional, Union from autosklearn.automl import AutoML +from autosklearn.util.dask import LocalDask from autosklearn.util.data import default_dataset_compression_arg from autosklearn.util.single_thread_client import SingleThreadedClient @@ -87,4 +88,7 @@ def test_single_job_and_no_dask_client_sets_correct_multiprocessing_context() -> assert automl._multiprocessing_context == "fork" assert automl._n_jobs == 1 - assert isinstance(automl._dask_client, SingleThreadedClient) + assert isinstance(automl._dask, LocalDask) + + with automl._dask as client: + assert isinstance(client, SingleThreadedClient) diff --git a/test/test_estimators/test_estimators.py b/test/test_estimators/test_estimators.py index d0d3f28bdb..e1e33d684a 100644 --- a/test/test_estimators/test_estimators.py +++ b/test/test_estimators/test_estimators.py @@ -140,8 +140,6 @@ def __call__(self, *args, **kwargs): assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible(automl.performance_over_time_) is True - # For travis-ci it is important that the client no longer exists - assert automl.automl_._dask_client is None def test_feat_type_wrong_arguments(): diff --git a/test/test_util/test_dask.py b/test/test_util/test_dask.py new file mode 100644 index 0000000000..1dbc290500 --- /dev/null +++ b/test/test_util/test_dask.py @@ -0,0 +1,75 @@ +from pathlib import Path + +from dask.distributed import Client, LocalCluster + +from autosklearn.util.dask import LocalDask, UserDask + +import pytest + + +@pytest.mark.parametrize("n_jobs", [1, 2]) +def test_user_dask(tmp_path: Path, n_jobs: int) -> None: + """ + Expects + ------- + * A UserDask should not close the client after exiting context + """ + cluster = LocalCluster( + n_workers=n_jobs, + processes=False, + threads_per_worker=1, + local_directory=tmp_path, + ) + client = Client(cluster, heartbeat_interval=10000) + + # Active at creation + dask = UserDask(client) + + client_1 = None + with dask as user_client: + client_1 = user_client + assert user_client.status == "running" + + client_2 = None + with dask as user_client: + assert user_client.status == "running" + client_2 = user_client + + # Make sure they are the same client + assert id(client_1) == id(client_2) + + # Remains running after context + assert client_1.status == "running" + + cluster.close() + client.close() + + assert client.status == "closed" + + +def test_local_dask_creates_new_clients(tmp_path: Path) -> None: + """ + Expects + ------- + * A LocalDask should create new dask clusters at each context usage + """ + # We need 2 to use an actual dask client and not a SingleThreadedClient + local_dask = LocalDask(n_jobs=2) + + client_1 = None + with local_dask as client: + client_1 = client + assert client_1.status == "running" + + assert client_1.status == "closed" + + client_2 = None + with local_dask as client: + client_2 = client + assert client_2.status == "running" + + # Make sure they were different clients + assert id(client_1) != id(client_2) + + assert client_2.status == "closed" + assert client_1.status == "closed" From 0d8168c37412b27e38b3b3691e08fb0b09d5d618 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 3 Aug 2022 18:34:41 +0200 Subject: [PATCH 63/63] Update config space in meta-data generation script --- scripts/02_retrieve_metadata.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py index 574a24c87b..56a0395b9e 100644 --- a/scripts/02_retrieve_metadata.py +++ b/scripts/02_retrieve_metadata.py @@ -1,5 +1,4 @@ from argparse import ArgumentParser -from collections import defaultdict import csv import glob import itertools @@ -10,6 +9,7 @@ import numpy as np from ConfigSpace.configuration_space import Configuration +from ConfigSpace.util import deactivate_inactive_hyperparameters from autosklearn.constants import * from autosklearn.metrics import CLASSIFICATION_METRICS, REGRESSION_METRICS @@ -66,8 +66,18 @@ def retrieve_matadata( n_better += 1 try: + for hp in configuration_space.get_hyperparameters(): + if hp.name not in config: + config[hp.name] = hp.default_value + best_configuration = Configuration( - configuration_space=configuration_space, values=config + configuration_space=configuration_space, + values=config, + allow_inactive_with_values=True, + ) + best_configuration = deactivate_inactive_hyperparameters( + configuration=best_configuration, + configuration_space=configuration_space, ) best_value = score best_configuration_dir = validation_trajectory_file @@ -227,7 +237,7 @@ def main(): configuration_space = pipeline.get_configuration_space( DummyDatamanager( info={"is_sparse": sparse, "task": task}, - feat_type={"A": "numerical"} + feat_type={"A": "numerical", "B": "categorical"} ) )