From 0a098212f88877a8ac8d4230137989ac32b5539c Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 1 Jun 2022 11:22:59 +0200
Subject: [PATCH 01/63] fixing the issue that metalearning tries to use every
 hp defined in the csv files.

Also fixing the hps remain active bug.
---
 autosklearn/automl.py                         |  4 +
 .../metalearning/input/aslib_simple.py        |  7 +-
 .../metalearning/metalearning/meta_base.py    |  2 +-
 .../data_preprocessing/feature_type.py        | 92 +++++++++++--------
 4 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 12e80b8e4e..f0dfed33c8 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -748,6 +748,10 @@ def fit(
 
         self._log_fit_setup()
 
+        # save feat_type to file
+        with open(f'{os.path.dirname(os.path.realpath(__file__))}/feat_type.json', 'w') as f:
+            json.dump(self._feat_type, f, indent=4)
+
         # == Pickle the data manager to speed up loading
         with self._stopwatch.time("Save Datamanager"):
             datamanager = XYDataManager(
diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index 833242729d..c495c5cd69 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -8,10 +8,11 @@
 
 
 class AlgorithmSelectionProblem(object):
-    def __init__(self, directory):
+    def __init__(self, directory, cs):
         self.logger = logging.getLogger(__name__)
 
         # Create data structures
+        self.cs = cs
         self.dir_ = directory
         self.algorithm_runs = None
         self.configurations = None
@@ -147,9 +148,11 @@ def _read_configurations(self, filename):
                 configuration = dict()
                 algorithm_id = line["idx"]
                 for hp_name, value in line.items():
+                    # Todo adapt to search space
                     if not value or hp_name == "idx":
                         continue
-
+                    if hp_name not in self.cs.get_hyperparameter_names():
+                        continue
                     try:
                         value = int(value)
                     except Exception:
diff --git a/autosklearn/metalearning/metalearning/meta_base.py b/autosklearn/metalearning/metalearning/meta_base.py
index f193a61fef..61f16297fe 100644
--- a/autosklearn/metalearning/metalearning/meta_base.py
+++ b/autosklearn/metalearning/metalearning/meta_base.py
@@ -42,7 +42,7 @@ def __init__(self, configuration_space, aslib_directory, logger):
         self.configuration_space = configuration_space
         self.aslib_directory = aslib_directory
 
-        aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory)
+        aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory, self.configuration_space)
         self.metafeatures = aslib_reader.metafeatures
         self.algorithm_runs: OrderedDict[
             str, pd.DataFrame
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index bd42d8a67a..f325e0c80c 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -1,6 +1,8 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
+import json
+import os
 import sklearn.compose
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -64,6 +66,12 @@ def __init__(
         self.feat_type = feat_type
         self.force_sparse_output = force_sparse_output
 
+        # load global feat_type
+        f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json')
+        self.feat_type = json.load(f)
+
+        self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
+
         # The pipeline that will be applied to the categorical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -71,15 +79,18 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.categ_ppl = CategoricalPreprocessingPipeline(
-            config=None,
-            steps=pipeline,
-            dataset_properties=dataset_properties,
-            include=include,
-            exclude=exclude,
-            random_state=random_state,
-            init_params=init_params,
-        )
+        self.categ_ppl = None
+        if "categorical" in self.feat_type.values():
+            self.categ_ppl = CategoricalPreprocessingPipeline(
+                config=None,
+                steps=pipeline,
+                dataset_properties=dataset_properties,
+                include=include,
+                exclude=exclude,
+                random_state=random_state,
+                init_params=init_params,
+            )
+            self._transformers.append(("categorical_transformer", self.categ_ppl))
         # The pipeline that will be applied to the numerical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -87,15 +98,18 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.numer_ppl = NumericalPreprocessingPipeline(
-            config=None,
-            steps=pipeline,
-            dataset_properties=dataset_properties,
-            include=include,
-            exclude=exclude,
-            random_state=random_state,
-            init_params=init_params,
-        )
+        self.numer_ppl = None
+        if "numerical" in self.feat_type.values():
+            self.numer_ppl = NumericalPreprocessingPipeline(
+                config=None,
+                steps=pipeline,
+                dataset_properties=dataset_properties,
+                include=include,
+                exclude=exclude,
+                random_state=random_state,
+                init_params=init_params,
+            )
+            self._transformers.append(("numerical_transformer", self.numer_ppl))
 
         # The pipeline that will be applied to the text features (i.e. columns)
         # of the dataset
@@ -104,21 +118,19 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.txt_ppl = TextPreprocessingPipeline(
-            config=None,
-            steps=pipeline,
-            dataset_properties=dataset_properties,
-            include=include,
-            exclude=exclude,
-            random_state=random_state,
-            init_params=init_params,
-        )
+        self.txt_ppl = None
+        if "string" in self.feat_type.values():
+            self.txt_ppl = TextPreprocessingPipeline(
+                config=None,
+                steps=pipeline,
+                dataset_properties=dataset_properties,
+                include=include,
+                exclude=exclude,
+                random_state=random_state,
+                init_params=init_params,
+            )
+            self._transformers.append(("text_transformer", self.txt_ppl))
 
-        self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
-            ("categorical_transformer", self.categ_ppl),
-            ("numerical_transformer", self.numer_ppl),
-            ("text_transformer", self.txt_ppl),
-        ]
         if self.config:
             self.set_hyperparameters(self.config, init_params=init_params)
         self.column_transformer = column_transformer
@@ -143,29 +155,35 @@ def fit(
                     f"Train data has columns={expected} yet the"
                     f" feat_types are feat={columns}"
                 )
+            transformer_lst = []
+
             categorical_features = [
                 key
                 for key, value in self.feat_type.items()
                 if value.lower() == "categorical"
             ]
+            if len(categorical_features) > 0:
+                transformer_lst.append(("categorical_transformer", self.categ_ppl, categorical_features))
+
             numerical_features = [
                 key
                 for key, value in self.feat_type.items()
                 if value.lower() == "numerical"
             ]
+            if len(numerical_features) > 0:
+                transformer_lst.append(("numerical_transformer", self.numer_ppl, numerical_features))
+
             text_features = [
                 key
                 for key, value in self.feat_type.items()
                 if value.lower() == "string"
             ]
+            if len(transformer_lst) > 0:
+                transformer_lst.append(("text_transformer", self.txt_ppl, text_features))
 
             sklearn_transf_spec = [
                 (name, transformer, feature_columns)
-                for name, transformer, feature_columns in [
-                    ("categorical_transformer", self.categ_ppl, categorical_features),
-                    ("numerical_transformer", self.numer_ppl, numerical_features),
-                    ("text_transformer", self.txt_ppl, text_features),
-                ]
+                for name, transformer, feature_columns in transformer_lst
                 if len(feature_columns) > 0
             ]
         else:

From 619ccb605f55c030a6e4bcacac6be4206dd31728 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 1 Jun 2022 13:51:11 +0200
Subject: [PATCH 02/63] fixing the issue that metalearning tries to use every
 hp defined in the csv files.

Also fixing the hps remain active bug.
---
 .../data_preprocessing/feature_type.py          | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index f325e0c80c..fc8a92b458 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -69,6 +69,11 @@ def __init__(
         # load global feat_type
         f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json')
         self.feat_type = json.load(f)
+        is_number = True
+        for key in self.feat_type.keys():
+            is_number *= key.isnumeric()
+        if is_number:
+            self.feat_type = {int(key): value for key, value in self.feat_type.items()}
 
         self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
 
@@ -151,10 +156,14 @@ def fit(
             else:
                 columns = set(range(n_feats))
             if expected != columns:
-                raise ValueError(
-                    f"Train data has columns={expected} yet the"
-                    f" feat_types are feat={columns}"
-                )
+                try:
+                    # columns = [str(col) for col in columns]
+                    pass
+                except:
+                    raise ValueError(
+                        f"Train data has columns={expected} yet the"
+                        f" feat_types are feat={columns}"
+                    )
             transformer_lst = []
 
             categorical_features = [

From 71715347af092515df6f8efd345e02144262d105 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 1 Jun 2022 14:32:06 +0200
Subject: [PATCH 03/63] fixing the issue that metalearning tries to use every
 hp defined in the csv files.

Also fixing the hps remain active bug.
---
 .../pipeline/components/data_preprocessing/feature_type.py   | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index fc8a92b458..d42739c180 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -145,9 +145,6 @@ def fit(
     ) -> "FeatTypeSplit":
 
         n_feats = X.shape[1]
-        categorical_features = []
-        numerical_features = []
-        text_features = []
         if self.feat_type is not None:
             # Make sure that we are not missing any column!
             expected = set(self.feat_type.keys())
@@ -187,7 +184,7 @@ def fit(
                 for key, value in self.feat_type.items()
                 if value.lower() == "string"
             ]
-            if len(transformer_lst) > 0:
+            if len(text_features) > 0:
                 transformer_lst.append(("text_transformer", self.txt_ppl, text_features))
 
             sklearn_transf_spec = [

From 4631a91bbd56b1862633e17c9a60ddced2a08ac0 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 4 Jun 2022 14:10:31 +0200
Subject: [PATCH 04/63] fixing the issue that metalearning tries to use every
 hp defined in the csv files.

Also fixing the hps remain active bug.
---
 autosklearn/automl.py                         |  2 +-
 autosklearn/pipeline/base.py                  | 34 ++++++++++++------
 autosklearn/pipeline/classification.py        | 10 ++++--
 autosklearn/pipeline/components/base.py       |  8 ++---
 .../components/classification/__init__.py     |  2 +-
 .../components/data_preprocessing/__init__.py |  6 ++--
 .../data_preprocessing/balancing/balancing.py |  1 +
 .../categorical_encoding/__init__.py          |  3 +-
 .../categorical_encoding/encoding.py          |  1 +
 .../categorical_encoding/no_encoding.py       |  1 +
 .../categorical_encoding/one_hot_encoding.py  |  1 +
 .../category_shift/category_shift.py          |  1 +
 .../data_preprocessing/feature_type.py        | 36 ++++++++++++-------
 .../feature_type_categorical.py               | 23 ++++++++----
 .../feature_type_numerical.py                 | 20 +++++++----
 .../data_preprocessing/feature_type_text.py   | 26 +++++++++-----
 .../imputation/categorical_imputation.py      |  1 +
 .../imputation/numerical_imputation.py        |  1 +
 .../minority_coalescense/__init__.py          |  8 ++++-
 .../minority_coalescer.py                     |  2 ++
 .../minority_coalescense/no_coalescense.py    |  1 +
 .../data_preprocessing/rescaling/__init__.py  |  3 +-
 .../rescaling/abstract_rescaling.py           |  1 +
 .../rescaling/quantile_transformer.py         |  1 +
 .../rescaling/robust_scaler.py                |  1 +
 .../text_encoding/__init__.py                 |  5 +--
 .../text_encoding/bag_of_word_encoding.py     |  1 +
 .../bag_of_word_encoding_distinct.py          |  1 +
 .../text_encoding/tfidf_encoding.py           |  1 +
 .../text_feature_reduction/truncated_svd.py   |  1 +
 .../variance_threshold/variance_threshold.py  |  1 +
 .../feature_preprocessing/__init__.py         |  2 +-
 .../extra_trees_preproc_for_classification.py |  2 +-
 .../extra_trees_preproc_for_regression.py     |  2 +-
 .../feature_agglomeration.py                  |  2 +-
 .../feature_preprocessing/kernel_pca.py       |  2 +-
 .../feature_preprocessing/kitchen_sinks.py    |  2 +-
 .../liblinear_svc_preprocessor.py             |  2 +-
 .../feature_preprocessing/no_preprocessing.py |  2 +-
 .../feature_preprocessing/nystroem_sampler.py |  2 +-
 .../feature_preprocessing/polynomial.py       |  2 +-
 .../random_trees_embedding.py                 |  2 +-
 .../select_percentile_classification.py       |  2 +-
 .../select_percentile_regression.py           |  2 +-
 .../select_rates_classification.py            |  2 +-
 .../select_rates_regression.py                |  2 +-
 .../feature_preprocessing/truncatedSVD.py     |  2 +-
 .../components/regression/__init__.py         |  2 +-
 .../pipeline/create_searchspace_util.py       |  2 ++
 autosklearn/pipeline/regression.py            | 13 +++++--
 autosklearn/util/pipeline.py                  | 26 +++++++-------
 .../40_advanced/example_text_preprocessing.py |  1 -
 52 files changed, 186 insertions(+), 94 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index f0dfed33c8..6affb2d6b8 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -2234,7 +2234,7 @@ def _create_search_space(
     ) -> Tuple[ConfigurationSpace, str]:
         configspace_path = os.path.join(tmp_dir, "space.json")
         configuration_space = pipeline.get_configuration_space(
-            datamanager.info,
+            datamanager,
             include=include,
             exclude=exclude,
         )
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 93c73b4716..1352fb44d9 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -34,6 +34,7 @@ class BasePipeline(Pipeline):
 
     def __init__(
         self,
+        feat_type,
         config=None,
         steps=None,
         dataset_properties=None,
@@ -50,15 +51,16 @@ def __init__(
             dataset_properties if dataset_properties is not None else {}
         )
         self.random_state = random_state
+        self.feat_type = feat_type
 
         if steps is None:
-            self.steps = self._get_pipeline_steps(dataset_properties=dataset_properties)
+            self.steps = self._get_pipeline_steps(feat_type=feat_type, dataset_properties=dataset_properties)
         else:
             self.steps = steps
 
         self._validate_include_exclude_params()
 
-        self.config_space = self.get_hyperparameter_search_space()
+        self.config_space = self.get_hyperparameter_search_space(feat_type=feat_type)
 
         if config is None:
             self.config = self.config_space.get_default_configuration()
@@ -82,8 +84,10 @@ def __init__(
                 )
             self.config = config
 
-        self.set_hyperparameters(self.config, init_params=init_params)
+        self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params)
 
+        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+            f.write(f"base pip. self.steps: {self.steps}\n\n")
         super().__init__(steps=self.steps)
 
         self._additional_run_info = {}
@@ -202,13 +206,16 @@ def predict(self, X, batch_size=None):
 
                 return y
 
-    def set_hyperparameters(self, configuration, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type, init_params=None):
         self.config = configuration
 
         for node_idx, n_ in enumerate(self.steps):
             node_name, node = n_
 
+            with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+                f.write(f"node base: {type(node)}\n\n")
             sub_configuration_space = node.get_hyperparameter_search_space(
+                feat_type=feat_type,
                 dataset_properties=self.dataset_properties
             )
             sub_config_dict = {}
@@ -235,8 +242,10 @@ def set_hyperparameters(self, configuration, init_params=None):
             if isinstance(
                 node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
+                with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+                    f.write(f"node: {type(node)}\n\n")
                 node.set_hyperparameters(
-                    configuration=sub_configuration, init_params=sub_init_params_dict
+                    feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict
                 )
             else:
                 raise NotImplementedError("Not supported yet!")
@@ -247,7 +256,7 @@ def set_hyperparameters(self, configuration, init_params=None):
 
         return self
 
-    def get_hyperparameter_search_space(self, dataset_properties=None):
+    def get_hyperparameter_search_space(self, feat_type, dataset_properties=None):
         """Return the configuration space for the CASH problem.
 
         Returns
@@ -258,6 +267,7 @@ def get_hyperparameter_search_space(self, dataset_properties=None):
         """
         if not hasattr(self, "config_space") or self.config_space is None:
             self.config_space = self._get_hyperparameter_search_space(
+                feat_type=feat_type,
                 include=self.include,
                 exclude=self.exclude,
                 dataset_properties=self.dataset_properties,
@@ -265,7 +275,7 @@ def get_hyperparameter_search_space(self, dataset_properties=None):
         return self.config_space
 
     def _get_hyperparameter_search_space(
-        self, include=None, exclude=None, dataset_properties=None
+        self, feat_type, include=None, exclude=None, dataset_properties=None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -307,7 +317,7 @@ def _get_hyperparameter_search_space(
         raise NotImplementedError()
 
     def _get_base_search_space(
-        self, cs, dataset_properties, exclude, include, pipeline
+        self, feat_type, cs, dataset_properties, exclude, include, pipeline
     ):
         if include is None:
             if self.include is None:
@@ -343,7 +353,7 @@ def _get_base_search_space(
             dataset_properties["signed"] = False
 
         matches = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline, dataset_properties, include=include, exclude=exclude
+            pipeline=pipeline, dataset_properties=dataset_properties, include=include, exclude=exclude
         )
 
         # Now we have only legal combinations at this step of the pipeline
@@ -385,8 +395,10 @@ def _get_base_search_space(
                         exclude.get(node_name),
                     )
                 )
+                with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+                    f.write(f"node: {type(node)}\n\n")
                 sub_config_space = node.get_hyperparameter_search_space(
-                    dataset_properties, include=choices_list
+                    feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list
                 )
                 cs.add_configuration_space(node_name, sub_config_space)
 
@@ -505,7 +517,7 @@ def __repr__(self):
 
         return rval
 
-    def _get_pipeline_steps(self, dataset_properties):
+    def _get_pipeline_steps(self, dataset_properties, feat_type):
         raise NotImplementedError()
 
     def _get_estimator_hyperparameter_name(self):
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 1686e02809..c74b336b4c 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -70,6 +70,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
 
     def __init__(
         self,
+        feat_type,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties=None,
@@ -84,6 +85,7 @@ def __init__(
         if "target_type" not in dataset_properties:
             dataset_properties["target_type"] = "classification"
         super().__init__(
+            feat_type=feat_type,
             config=config,
             steps=steps,
             dataset_properties=dataset_properties,
@@ -166,7 +168,7 @@ def predict_proba(self, X, batch_size=None):
                 return y
 
     def _get_hyperparameter_search_space(
-        self, include=None, exclude=None, dataset_properties=None
+        self, feat_type, include=None, exclude=None, dataset_properties=None
     ):
         """Create the hyperparameter configuration space.
 
@@ -194,6 +196,7 @@ def _get_hyperparameter_search_space(
 
         cs = self._get_base_search_space(
             cs=cs,
+            feat_type=feat_type,
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
@@ -344,7 +347,7 @@ def _get_hyperparameter_search_space(
         self.dataset_properties = dataset_properties
         return cs
 
-    def _get_pipeline_steps(self, dataset_properties):
+    def _get_pipeline_steps(self, dataset_properties, feat_type):
         steps = []
 
         default_dataset_properties = {"target_type": "classification"}
@@ -356,6 +359,7 @@ def _get_pipeline_steps(self, dataset_properties):
                 [
                     "data_preprocessor",
                     DataPreprocessorChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
@@ -364,6 +368,7 @@ def _get_pipeline_steps(self, dataset_properties):
                 [
                     "feature_preprocessor",
                     FeaturePreprocessorChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
@@ -371,6 +376,7 @@ def _get_pipeline_steps(self, dataset_properties):
                 [
                     "classifier",
                     ClassifierChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index c4a95df08c..98a2c22a80 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -136,7 +136,7 @@ def fit(self, X, y):
         for further information."""
         raise NotImplementedError()
 
-    def set_hyperparameters(self, configuration, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type, init_params=None):
         params = configuration.get_dictionary()
 
         for param, value in params.items():
@@ -339,7 +339,7 @@ def get_estimator(self):
 
 
 class AutoSklearnChoice(object):
-    def __init__(self, dataset_properties, random_state=None):
+    def __init__(self, dataset_properties, feat_type, random_state=None):
         """
         Parameters
         ----------
@@ -414,7 +414,7 @@ def get_available_components(
 
         return components_dict
 
-    def set_hyperparameters(self, configuration, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type, init_params=None):
         new_params = {}
 
         params = configuration.get_dictionary()
@@ -438,7 +438,7 @@ def set_hyperparameters(self, configuration, init_params=None):
         return self
 
     def get_hyperparameter_search_space(
-        self, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
     ):
         raise NotImplementedError()
 
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index c95334273a..073e7325e0 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -86,7 +86,7 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
     ):
         if dataset_properties is None:
             dataset_properties = {}
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 5693efd441..bbe805e519 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -105,6 +105,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[Dict] = None,
         default: str = None,
         include: Optional[Dict] = None,
@@ -136,6 +137,7 @@ def get_hyperparameter_search_space(
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[name](
+                feat_type=feat_type,
                 dataset_properties=dataset_properties
             ).get_hyperparameter_search_space(dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
@@ -150,7 +152,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.choice.transform(X)
 
     def set_hyperparameters(
-        self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None
+        self, feat_type, configuration: ConfigurationSpace, init_params: Optional[Dict] = None
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
@@ -162,7 +164,7 @@ def set_hyperparameters(
             config[param] = value
 
         new_params = {}
-        feat_type = None
+        # feat_type = None
         if init_params is not None:
             for param, value in init_params.items():
                 param = param.replace(choice, "").split(":", 1)[-1]
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
index 721fe63fc5..7e04082112 100644
--- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
+++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -139,6 +139,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index 5d1647b24a..ae52062c2b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -38,6 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -86,7 +87,7 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
     ) -> "OHEChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
index 43d578219f..56d9ca16fa 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
@@ -69,6 +69,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
index 028a4fb9c1..0a7eaaf802 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
@@ -44,6 +44,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
index 9b9ee87c81..c223f165d4 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
@@ -55,6 +55,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
index f2dc2bf304..5008f406d8 100644
--- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
+++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -63,6 +63,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index d42739c180..0fcf3d98e6 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -67,16 +67,19 @@ def __init__(
         self.force_sparse_output = force_sparse_output
 
         # load global feat_type
-        f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json')
-        self.feat_type = json.load(f)
-        is_number = True
-        for key in self.feat_type.keys():
-            is_number *= key.isnumeric()
-        if is_number:
-            self.feat_type = {int(key): value for key, value in self.feat_type.items()}
+        # f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json')
+        # self.feat_type = json.load(f)
+        # is_number = True
+        # for key in self.feat_type.keys():
+        #     is_number *= key.isnumeric()
+        # if is_number:
+        #     self.feat_type = {int(key): value for key, value in self.feat_type.items()}
 
         self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
 
+        if self.feat_type is None:
+            raise ValueError("feat_type init requires feat_type")
+
         # The pipeline that will be applied to the categorical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -87,6 +90,7 @@ def __init__(
         self.categ_ppl = None
         if "categorical" in self.feat_type.values():
             self.categ_ppl = CategoricalPreprocessingPipeline(
+                feat_type=self.feat_type,
                 config=None,
                 steps=pipeline,
                 dataset_properties=dataset_properties,
@@ -106,6 +110,7 @@ def __init__(
         self.numer_ppl = None
         if "numerical" in self.feat_type.values():
             self.numer_ppl = NumericalPreprocessingPipeline(
+                feat_type=self.feat_type,
                 config=None,
                 steps=pipeline,
                 dataset_properties=dataset_properties,
@@ -126,6 +131,7 @@ def __init__(
         self.txt_ppl = None
         if "string" in self.feat_type.values():
             self.txt_ppl = TextPreprocessingPipeline(
+                feat_type=self.feat_type,
                 config=None,
                 steps=pipeline,
                 dataset_properties=dataset_properties,
@@ -137,7 +143,7 @@ def __init__(
             self._transformers.append(("text_transformer", self.txt_ppl))
 
         if self.config:
-            self.set_hyperparameters(self.config, init_params=init_params)
+            self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params)
         self.column_transformer = column_transformer
 
     def fit(
@@ -247,7 +253,7 @@ def get_properties(
         }
 
     def set_hyperparameters(
-        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
     ) -> "FeatTypeSplit":
         if init_params is not None and "feat_type" in init_params.keys():
             self.feat_type = init_params["feat_type"]
@@ -256,7 +262,8 @@ def set_hyperparameters(
 
         for transf_name, transf_op in self._transformers:
             sub_configuration_space = transf_op.get_hyperparameter_search_space(
-                dataset_properties=self.dataset_properties
+                dataset_properties=self.dataset_properties,
+                feat_type=feat_type
             )
             sub_config_dict = {}
             for param in configuration:
@@ -282,7 +289,7 @@ def set_hyperparameters(
                 transf_op, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
                 transf_op.set_hyperparameters(
-                    configuration=sub_configuration, init_params=sub_init_params_dict
+                    feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict
                 )
             else:
                 raise NotImplementedError("Not supported yet!")
@@ -291,17 +298,22 @@ def set_hyperparameters(
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         self.dataset_properties = dataset_properties
         cs = ConfigurationSpace()
         cs = FeatTypeSplit._get_hyperparameter_search_space_recursevely(
-            dataset_properties, cs, self._transformers
+            feat_type=feat_type,
+            dataset_properties=dataset_properties,
+            cs=cs,
+            transformer=self._transformers
         )
         return cs
 
     @staticmethod
     def _get_hyperparameter_search_space_recursevely(
+        feat_type,
         dataset_properties: DATASET_PROPERTIES_TYPE,
         cs: ConfigurationSpace,
         transformer: BaseEstimator,
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index dfdaf7af62..160384fac0 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -46,6 +46,7 @@ class CategoricalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
+        feat_type,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -56,13 +57,14 @@ def __init__(
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config,
-            steps,
-            dataset_properties,
-            include,
-            exclude,
-            random_state,
-            init_params,
+            config=config,
+            steps=steps,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+            feat_type=feat_type
         )
 
     @staticmethod
@@ -92,6 +94,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
+        feat_type,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -108,8 +111,11 @@ def _get_hyperparameter_search_space(
         if dataset_properties is None or not isinstance(dataset_properties, dict):
             dataset_properties = dict()
 
+        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+            f.write(f"pipeline (self.steps): {self.steps}\n\n")
         cs = self._get_base_search_space(
             cs=cs,
+            feat_type=feat_type,
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
@@ -120,6 +126,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
+        feat_type,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
@@ -135,6 +142,7 @@ def _get_pipeline_steps(
             (
                 "category_coalescence",
                 CoalescenseChoice(
+                    feat_type=feat_type,
                     dataset_properties=default_dataset_properties,
                     random_state=self.random_state,
                 ),
@@ -142,6 +150,7 @@ def _get_pipeline_steps(
             (
                 "categorical_encoding",
                 OHEChoice(
+                    feat_type=feat_type,
                     dataset_properties=default_dataset_properties,
                     random_state=self.random_state,
                 ),
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
index b50bf0d357..a50ede3985 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
@@ -39,6 +39,7 @@ class NumericalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
+        feat_type,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -49,13 +50,14 @@ def __init__(
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config,
-            steps,
-            dataset_properties,
-            include,
-            exclude,
-            random_state,
-            init_params,
+            config=config,
+            steps=steps,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+            feat_type=feat_type,
         )
 
     @staticmethod
@@ -85,6 +87,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
+        feat_type,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -110,12 +113,14 @@ def _get_hyperparameter_search_space(
             exclude=exclude,
             include=include,
             pipeline=self.steps,
+            feat_type=feat_type,
         )
 
         return cs
 
     def _get_pipeline_steps(
         self,
+        feat_type,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
@@ -134,6 +139,7 @@ def _get_pipeline_steps(
                 (
                     "rescaling",
                     rescaling_components.RescalingChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
index 8924d568a6..e3d7078de2 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
@@ -34,6 +34,7 @@ class TextPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
+        feat_type,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -44,13 +45,14 @@ def __init__(
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config,
-            steps,
-            dataset_properties,
-            include,
-            exclude,
-            random_state,
-            init_params,
+            config=config,
+            steps=steps,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+            feat_type=feat_type
         )
 
     @staticmethod
@@ -79,6 +81,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
+        feat_type,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -104,12 +107,14 @@ def _get_hyperparameter_search_space(
             exclude=exclude,
             include=include,
             pipeline=self.steps,
+            feat_type=feat_type,
         )
 
         return cs
 
     def _get_pipeline_steps(
         self,
+        feat_type,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
@@ -123,12 +128,15 @@ def _get_pipeline_steps(
                 (
                     "text_encoding",
                     BagOfWordChoice(
-                        default_dataset_properties, random_state=self.random_state
+                        feat_type=feat_type,
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state
                     ),
                 ),
                 (
                     "text_feature_reduction",
-                    TextFeatureReduction(random_state=self.random_state),
+                    TextFeatureReduction(
+                        random_state=self.random_state),
                 ),
             ]
         )
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
index 00b627daed..40f3d1e93a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
@@ -91,6 +91,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
index d7d6a645ab..99ab3a33c0 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
@@ -62,6 +62,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index fbf999761c..b1148d8d94 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -38,6 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -86,7 +87,7 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
     ) -> "CoalescenseChoice":
         new_params = {}
 
@@ -111,6 +112,11 @@ def set_hyperparameters(
         new_params["random_state"] = self.random_state
 
         self.new_params = new_params
+        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+            f.write(f"minority_init self.get...:\n"
+                    f"new_params: {new_params}\n"
+                    f"choice: {self.get_components()[choice]}\n\n")
+        new_params["feat_type"] = feat_type
         self.choice = self.get_components()[choice](**new_params)
 
         return self
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index 278cf0bfb9..9052188190 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -15,6 +15,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm):
 
     def __init__(
         self,
+        feat_type,
         minimum_fraction: float = 0.01,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
@@ -59,6 +60,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index d05c146d98..8c6314988e 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -43,6 +43,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index 2a9fbdb842..8f3caedd83 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -42,6 +42,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -74,7 +75,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index 05e1a4e898..7955d90b28 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -38,6 +38,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
index 2611c0650d..0c840fb44f 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
@@ -62,6 +62,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
index af3b4c0558..9b4e01843d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
@@ -59,6 +59,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 990ad579ca..3cc71bc314 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -39,6 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -75,7 +76,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
@@ -88,7 +89,7 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
     ) -> "BagOfWordChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
index b8a62ccd89..92692b1dda 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
@@ -95,6 +95,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
index 90a43b0f48..b2e6c0598d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
@@ -101,6 +101,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
index f20d24f769..577cddf7d2 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
@@ -100,6 +100,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
index beecefb028..1562b57249 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
@@ -74,6 +74,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
index 365ae405a0..33f7a1a996 100644
--- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
+++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
@@ -49,6 +49,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
+        feat_type,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index cd52d6ad34..bff511fe6e 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -101,7 +101,7 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index dad45795b8..084fe8e40d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -123,7 +123,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 3287b837c5..23914b713f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -125,7 +125,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index d51242de21..95ae7cee49 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -63,7 +63,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
         affinity = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index 4e96bfb1c2..0fad8bc6b3 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -82,7 +82,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         n_components = UniformIntegerHyperparameter(
             "n_components", 10, 2000, default_value=100
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index a81e9ddd78..22d1cfd248 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -69,7 +69,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         gamma = UniformFloatHyperparameter(
             "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index 546c8742ad..f7ee87d80f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -91,7 +91,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         cs = ConfigurationSpace()
 
         penalty = Constant("penalty", "l1")
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 550872d551..016a44dd7b 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -34,6 +34,6 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 097f59e0f1..24ca8977a0 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -94,7 +94,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         if dataset_properties is not None and (
             dataset_properties.get("sparse") is True
             or dataset_properties.get("signed") is False
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index bd5312bba0..d17e6d43a4 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -54,7 +54,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
         interaction_only = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index 9daed1ae97..b6966b27ae 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -94,7 +94,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         n_estimators = UniformIntegerHyperparameter(
             name="n_estimators", lower=10, upper=100, default_value=10
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index 3caa50b46d..fdb15c50bd 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -110,7 +110,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         percentile = UniformFloatHyperparameter(
             name="percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index e9343fead4..cdec8fe152 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -53,7 +53,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         percentile = UniformFloatHyperparameter(
             "percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index 0c4768d000..cb002c9b6d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -116,7 +116,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index ffec19e6ec..e9a5f7b943 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -84,7 +84,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 4d6f6b7ca9..214b0346d2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -48,7 +48,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
         target_dim = UniformIntegerHyperparameter(
             "target_dim", 10, 256, default_value=128
         )
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 73033467a7..457d9813d3 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -79,7 +79,7 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
     ):
         if include is not None and exclude is not None:
             raise ValueError(
diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py
index dff69acc6e..d60c1ecaff 100644
--- a/autosklearn/pipeline/create_searchspace_util.py
+++ b/autosklearn/pipeline/create_searchspace_util.py
@@ -22,6 +22,8 @@ def get_match_array(pipeline, dataset_properties, include=None, exclude=None):
     node_i_choices = []
     node_i_choices_names = []
     all_nodes = []
+    with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
+        f.write(f"pipeline: {pipeline}\n\n")
     for node_name, node in pipeline:
         all_nodes.append(node)
         is_choice = hasattr(node, "get_available_components")
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 638f8ae3cb..090083b5ba 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -67,6 +67,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
 
     def __init__(
         self,
+        feat_type,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties=None,
@@ -81,6 +82,7 @@ def __init__(
         if "target_type" not in dataset_properties:
             dataset_properties["target_type"] = "regression"
         super().__init__(
+            feat_type=feat_type,
             config=config,
             steps=steps,
             dataset_properties=dataset_properties,
@@ -112,7 +114,7 @@ def predict(self, X, batch_size=None):
         return y
 
     def _get_hyperparameter_search_space(
-        self, include=None, exclude=None, dataset_properties=None
+        self, feat_type, include=None, exclude=None, dataset_properties=None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -149,6 +151,7 @@ def _get_hyperparameter_search_space(
 
         cs = self._get_base_search_space(
             cs=cs,
+            feat_type=feat_type,
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
@@ -259,7 +262,7 @@ def _get_hyperparameter_search_space(
     def _get_estimator_components(self):
         return regression_components._regressors
 
-    def _get_pipeline_steps(self, dataset_properties, init_params=None):
+    def _get_pipeline_steps(self, feat_type, dataset_properties, init_params=None):
         steps = []
 
         default_dataset_properties = {"target_type": "regression"}
@@ -271,6 +274,7 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None):
                 [
                     "data_preprocessor",
                     DataPreprocessorChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
@@ -278,6 +282,7 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None):
                 [
                     "feature_preprocessor",
                     feature_preprocessing_components.FeaturePreprocessorChoice(
+                        feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
                         random_state=self.random_state,
                     ),
@@ -285,7 +290,9 @@ def _get_pipeline_steps(self, dataset_properties, init_params=None):
                 [
                     "regressor",
                     regression_components.RegressorChoice(
-                        default_dataset_properties, random_state=self.random_state
+                        feat_type=feat_type,
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state
                     ),
                 ],
             ]
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index d3291069f5..bc6a27a71c 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -18,7 +18,7 @@
 
 
 def get_configuration_space(
-    info: Dict[str, Any],
+    datamanager: Dict[str, Any],
     include: Optional[Dict[str, List[str]]] = None,
     exclude: Optional[Dict[str, List[str]]] = None,
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -44,16 +44,16 @@ def get_configuration_space(
     ConfigurationSpace
         The configuration space for the pipeline
     """
-    if info["task"] in REGRESSION_TASKS:
-        return _get_regression_configuration_space(info, include, exclude, random_state)
+    if datamanager.info["task"] in REGRESSION_TASKS:
+        return _get_regression_configuration_space(datamanager, include, exclude, random_state)
     else:
         return _get_classification_configuration_space(
-            info, include, exclude, random_state
+            datamanager, include, exclude, random_state
         )
 
 
 def _get_regression_configuration_space(
-    info: Dict[str, Any],
+    datamanager: Dict[str, Any],
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -79,28 +79,29 @@ def _get_regression_configuration_space(
     ConfigurationSpace
         The configuration space for the regression pipeline
     """
-    task_type = info["task"]
+    task_type = datamanager.info["task"]
     sparse = False
     multioutput = False
     if task_type == MULTIOUTPUT_REGRESSION:
         multioutput = True
 
-    if info["is_sparse"] == 1:
+    if datamanager.info["is_sparse"] == 1:
         sparse = True
 
     dataset_properties = {"multioutput": multioutput, "sparse": sparse}
 
     configuration_space = SimpleRegressionPipeline(
+        feat_type=datamanager.feat_type,
         dataset_properties=dataset_properties,
         include=include,
         exclude=exclude,
         random_state=random_state,
-    ).get_hyperparameter_search_space()
+    ).get_hyperparameter_search_space(feat_type=datamanager.feat_type)
     return configuration_space
 
 
 def _get_classification_configuration_space(
-    info: Dict[str, Any],
+    datamanager: Dict[str, Any],
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -126,7 +127,7 @@ def _get_classification_configuration_space(
     ConfigurationSpace
         The configuration space for the classification pipeline
     """
-    task_type = info["task"]
+    task_type = datamanager.info["task"]
 
     multilabel = False
     multiclass = False
@@ -139,7 +140,7 @@ def _get_classification_configuration_space(
     if task_type == BINARY_CLASSIFICATION:
         pass
 
-    if info["is_sparse"] == 1:
+    if datamanager.info["is_sparse"] == 1:
         sparse = True
 
     dataset_properties = {
@@ -149,8 +150,9 @@ def _get_classification_configuration_space(
     }
 
     return SimpleClassificationPipeline(
+        feat_type=datamanager.feat_type,
         dataset_properties=dataset_properties,
         include=include,
         exclude=exclude,
         random_state=random_state,
-    ).get_hyperparameter_search_space()
+    ).get_hyperparameter_search_space(feat_type=datamanager.feat_type)
diff --git a/examples/40_advanced/example_text_preprocessing.py b/examples/40_advanced/example_text_preprocessing.py
index 7c65825b7b..ba7deffe03 100644
--- a/examples/40_advanced/example_text_preprocessing.py
+++ b/examples/40_advanced/example_text_preprocessing.py
@@ -59,7 +59,6 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=60,
     per_run_time_limit=30,
-    tmp_folder="/tmp/autosklearn_text_example_tmp",
 )
 
 automl.fit(X_train, y_train, dataset_name="20_Newsgroups")  # fit the automl model

From 0bad1d402aead52664bc853b0fc968442476ec80 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 4 Jun 2022 14:16:21 +0200
Subject: [PATCH 05/63] fixing the issue that metalearning tries to use every
 hp defined in the csv files.

Also fixing the hps remain active bug.
---
 autosklearn/automl.py                                     | 4 ----
 autosklearn/pipeline/base.py                              | 8 --------
 .../data_preprocessing/feature_type_categorical.py        | 2 --
 .../data_preprocessing/minority_coalescense/__init__.py   | 4 ----
 autosklearn/pipeline/create_searchspace_util.py           | 2 --
 5 files changed, 20 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 6affb2d6b8..df46ee4c4c 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -748,10 +748,6 @@ def fit(
 
         self._log_fit_setup()
 
-        # save feat_type to file
-        with open(f'{os.path.dirname(os.path.realpath(__file__))}/feat_type.json', 'w') as f:
-            json.dump(self._feat_type, f, indent=4)
-
         # == Pickle the data manager to speed up loading
         with self._stopwatch.time("Save Datamanager"):
             datamanager = XYDataManager(
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 1352fb44d9..25247a0d76 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -86,8 +86,6 @@ def __init__(
 
         self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params)
 
-        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-            f.write(f"base pip. self.steps: {self.steps}\n\n")
         super().__init__(steps=self.steps)
 
         self._additional_run_info = {}
@@ -212,8 +210,6 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None):
         for node_idx, n_ in enumerate(self.steps):
             node_name, node = n_
 
-            with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-                f.write(f"node base: {type(node)}\n\n")
             sub_configuration_space = node.get_hyperparameter_search_space(
                 feat_type=feat_type,
                 dataset_properties=self.dataset_properties
@@ -242,8 +238,6 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None):
             if isinstance(
                 node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
-                with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-                    f.write(f"node: {type(node)}\n\n")
                 node.set_hyperparameters(
                     feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict
                 )
@@ -395,8 +389,6 @@ def _get_base_search_space(
                         exclude.get(node_name),
                     )
                 )
-                with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-                    f.write(f"node: {type(node)}\n\n")
                 sub_config_space = node.get_hyperparameter_search_space(
                     feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list
                 )
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index 160384fac0..599221187b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -111,8 +111,6 @@ def _get_hyperparameter_search_space(
         if dataset_properties is None or not isinstance(dataset_properties, dict):
             dataset_properties = dict()
 
-        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-            f.write(f"pipeline (self.steps): {self.steps}\n\n")
         cs = self._get_base_search_space(
             cs=cs,
             feat_type=feat_type,
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index b1148d8d94..6eff766085 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -112,10 +112,6 @@ def set_hyperparameters(
         new_params["random_state"] = self.random_state
 
         self.new_params = new_params
-        with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-            f.write(f"minority_init self.get...:\n"
-                    f"new_params: {new_params}\n"
-                    f"choice: {self.get_components()[choice]}\n\n")
         new_params["feat_type"] = feat_type
         self.choice = self.get_components()[choice](**new_params)
 
diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py
index d60c1ecaff..dff69acc6e 100644
--- a/autosklearn/pipeline/create_searchspace_util.py
+++ b/autosklearn/pipeline/create_searchspace_util.py
@@ -22,8 +22,6 @@ def get_match_array(pipeline, dataset_properties, include=None, exclude=None):
     node_i_choices = []
     node_i_choices_names = []
     all_nodes = []
-    with open("/home/lukas/PycharmProjects/AutoMLFork/log.txt", "a") as f:
-        f.write(f"pipeline: {pipeline}\n\n")
     for node_name, node in pipeline:
         all_nodes.append(node)
         is_choice = hasattr(node, "get_available_components")

From 5d36fa5ed4bc19544d78a57c1a2327e98488aed7 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Tue, 7 Jun 2022 12:00:48 +0200
Subject: [PATCH 06/63] fixing ensemble builder

---
 autosklearn/evaluation/abstract_evaluator.py | 11 +++++++++--
 autosklearn/evaluation/train_evaluator.py    |  3 ++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index efd87c6cc3..bc5fc6df2c 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -45,6 +45,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
+        feat_type,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
@@ -61,6 +62,7 @@ def __init__(
         self.dataset_properties = dataset_properties
         self.include = include
         self.exclude = exclude
+        self.feat_type = feat_type
 
     def pre_transform(
         self,
@@ -108,6 +110,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
+        feat_type,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
@@ -123,6 +126,7 @@ def __init__(
         self.dataset_properties = dataset_properties
         self.include = include
         self.exclude = exclude
+        self.feat_type = feat_type
 
     def pre_transform(
         self,
@@ -217,6 +221,7 @@ def __init__(
         self.queue = queue
 
         self.datamanager = self.backend.load_datamanager()
+        self.feat_type = self.datamanager.feat_type
         self.include = include
         self.exclude = exclude
 
@@ -296,11 +301,12 @@ def __init__(
                     _addons[key].add_component(component)
 
         # Please mypy to prevent not defined attr
-        self.model = self._get_model()
+        self.model = self._get_model(feat_type=self.feat_type)
 
-    def _get_model(self) -> BaseEstimator:
+    def _get_model(self, feat_type) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
             model = self.model_class(
+                feat_type=feat_type,
                 config=self.configuration,
                 random_state=self.seed,
                 init_params=self._init_params,
@@ -320,6 +326,7 @@ def _get_model(self) -> BaseEstimator:
                     "multiclass": self.task_type == MULTICLASS_CLASSIFICATION,
                 }
             model = self.model_class(
+                feat_type=feat_type,
                 config=self.configuration,
                 dataset_properties=dataset_properties,
                 random_state=self.seed,
diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index a8433c2136..cda7806b6c 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -247,6 +247,7 @@ def __init__(
             budget_type=budget_type,
         )
 
+        self.feat_type = self.backend.load_datamanager().feat_type
         self.resampling_strategy = resampling_strategy
         if resampling_strategy_args is None:
             self.resampling_strategy_args = {}
@@ -984,7 +985,7 @@ def _partial_fit_and_predict_standard(
         PIPELINE_DATA_DTYPE,  # test_pred
         TYPE_ADDITIONAL_INFO,
     ]:
-        model = self._get_model()
+        model = self._get_model(feat_type=self.feat_type)
 
         self.indices[fold] = (train_indices, test_indices)
 

From 8afbd975f9f02eae9d2f2a41e2d8f21422b84fcd Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 13:41:15 +0200
Subject: [PATCH 07/63] fixing ensemble builder

---
 autosklearn/evaluation/test_evaluator.py  |  2 +-
 autosklearn/evaluation/train_evaluator.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py
index e76186aa06..c658fb0cea 100644
--- a/autosklearn/evaluation/test_evaluator.py
+++ b/autosklearn/evaluation/test_evaluator.py
@@ -58,7 +58,7 @@ def __init__(
         self.X_test = self.datamanager.data.get("X_test")
         self.Y_test = self.datamanager.data.get("Y_test")
 
-        self.model = self._get_model()
+        self.model = self._get_model(self.feat_type)
 
     def fit_predict_and_loss(self) -> None:
         _fit_and_suppress_warnings(self.logger, self.model, self.X_train, self.Y_train)
diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index cda7806b6c..228a685c37 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -306,7 +306,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                 # Test if the model allows for an iterative fit, if not,
                 # call this method again without the iterative argument
-                model = self._get_model()
+                model = self._get_model(self.feat_type)
                 if not model.estimator_supports_iterative_fit():
                     self.fit_predict_and_loss(iterative=False)
                     return
@@ -321,7 +321,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 Y_test_pred = [None] * self.num_cv_folds
                 train_splits = [None] * self.num_cv_folds
 
-                self.models = [self._get_model() for i in range(self.num_cv_folds)]
+                self.models = [self._get_model(self.feat_type) for i in range(self.num_cv_folds)]
                 iterations = [1] * self.num_cv_folds
                 total_n_iterations = [0] * self.num_cv_folds
                 # model.estimator_supports_iterative_fit -> true
@@ -532,7 +532,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     self.Y_optimization = Y_targets
                     self.Y_actual_train = Y_train_targets
 
-                    self.model = self._get_model()
+                    self.model = self._get_model(self.feat_type)
                     status = StatusType.DONOTADVANCE
                     if any(
                         [
@@ -713,7 +713,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             self.Y_actual_train = Y_train_targets
 
             if self.num_cv_folds > 1:
-                self.model = self._get_model()
+                self.model = self._get_model(self.feat_type)
                 # Bad style, but necessary for unit testing that self.model is
                 # actually a new model
                 self._added_empty_model = True
@@ -835,7 +835,7 @@ def _partial_fit_and_predict_iterative(
         test_indices: List[int],
         add_model_to_self: bool,
     ) -> None:
-        model = self._get_model()
+        model = self._get_model(self.feat_type)
 
         self.indices[fold] = (train_indices, test_indices)
 
@@ -1053,7 +1053,7 @@ def _partial_fit_and_predict_budget(
         # Add this statement for mypy
         assert self.budget is not None
 
-        model = self._get_model()
+        model = self._get_model(self.feat_type)
         self.indices[fold] = (train_indices, test_indices)
         self.X_targets[fold] = self.X_train[test_indices]
         self.Y_targets[fold] = self.Y_train[test_indices]

From de09993285e785a16bcb1cc5f103936922aed037 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 14:29:49 +0200
Subject: [PATCH 08/63] fixing ensemble builder

---
 autosklearn/pipeline/base.py                  | 10 +++++-----
 autosklearn/pipeline/classification.py        |  8 +++++---
 autosklearn/pipeline/components/base.py       |  6 +++---
 .../components/classification/__init__.py     |  2 +-
 .../components/data_preprocessing/__init__.py |  4 ++--
 .../data_preprocessing/balancing/balancing.py |  2 +-
 .../categorical_encoding/__init__.py          |  2 +-
 .../categorical_encoding/encoding.py          |  2 +-
 .../categorical_encoding/no_encoding.py       |  2 +-
 .../categorical_encoding/one_hot_encoding.py  |  2 +-
 .../category_shift/category_shift.py          |  2 +-
 .../data_preprocessing/feature_type.py        | 16 +++++++---------
 .../feature_type_categorical.py               |  6 +++---
 .../feature_type_numerical.py                 |  6 +++---
 .../data_preprocessing/feature_type_text.py   |  6 +++---
 .../imputation/categorical_imputation.py      |  2 +-
 .../imputation/numerical_imputation.py        |  2 +-
 .../minority_coalescense/__init__.py          | 18 ++++++++++--------
 .../minority_coalescer.py                     |  2 +-
 .../minority_coalescense/no_coalescense.py    |  2 +-
 .../data_preprocessing/rescaling/__init__.py  |  4 ++--
 .../rescaling/abstract_rescaling.py           |  4 ++--
 .../rescaling/quantile_transformer.py         |  2 +-
 .../rescaling/robust_scaler.py                |  2 +-
 .../text_encoding/__init__.py                 | 18 ++++++++++--------
 .../text_encoding/bag_of_word_encoding.py     |  2 +-
 .../bag_of_word_encoding_distinct.py          |  2 +-
 .../text_encoding/tfidf_encoding.py           |  2 +-
 .../text_feature_reduction/truncated_svd.py   |  2 +-
 .../variance_threshold/variance_threshold.py  |  2 +-
 .../feature_preprocessing/__init__.py         |  2 +-
 .../extra_trees_preproc_for_classification.py |  2 +-
 .../extra_trees_preproc_for_regression.py     |  2 +-
 .../feature_agglomeration.py                  |  2 +-
 .../feature_preprocessing/kernel_pca.py       |  2 +-
 .../feature_preprocessing/kitchen_sinks.py    |  2 +-
 .../liblinear_svc_preprocessor.py             |  2 +-
 .../feature_preprocessing/no_preprocessing.py |  2 +-
 .../feature_preprocessing/nystroem_sampler.py |  2 +-
 .../components/feature_preprocessing/pca.py   |  2 +-
 .../feature_preprocessing/polynomial.py       |  2 +-
 .../random_trees_embedding.py                 |  2 +-
 .../select_percentile_classification.py       |  2 +-
 .../select_percentile_regression.py           |  2 +-
 .../select_rates_classification.py            |  2 +-
 .../select_rates_regression.py                |  2 +-
 .../feature_preprocessing/truncatedSVD.py     |  2 +-
 autosklearn/pipeline/regression.py            |  8 ++++----
 autosklearn/util/pipeline.py                  | 19 ++++++++++---------
 49 files changed, 104 insertions(+), 99 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 25247a0d76..9e25af6802 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -34,7 +34,7 @@ class BasePipeline(Pipeline):
 
     def __init__(
         self,
-        feat_type,
+        feat_type=None,
         config=None,
         steps=None,
         dataset_properties=None,
@@ -204,7 +204,7 @@ def predict(self, X, batch_size=None):
 
                 return y
 
-    def set_hyperparameters(self, configuration, feat_type, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
         self.config = configuration
 
         for node_idx, n_ in enumerate(self.steps):
@@ -250,7 +250,7 @@ def set_hyperparameters(self, configuration, feat_type, init_params=None):
 
         return self
 
-    def get_hyperparameter_search_space(self, feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=None):
         """Return the configuration space for the CASH problem.
 
         Returns
@@ -269,7 +269,7 @@ def get_hyperparameter_search_space(self, feat_type, dataset_properties=None):
         return self.config_space
 
     def _get_hyperparameter_search_space(
-        self, feat_type, include=None, exclude=None, dataset_properties=None
+        self, feat_type=None, include=None, exclude=None, dataset_properties=None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -509,7 +509,7 @@ def __repr__(self):
 
         return rval
 
-    def _get_pipeline_steps(self, dataset_properties, feat_type):
+    def _get_pipeline_steps(self, dataset_properties, feat_type=None):
         raise NotImplementedError()
 
     def _get_estimator_hyperparameter_name(self):
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index c74b336b4c..7819cd9e8f 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -70,7 +70,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties=None,
@@ -168,12 +168,14 @@ def predict_proba(self, X, batch_size=None):
                 return y
 
     def _get_hyperparameter_search_space(
-        self, feat_type, include=None, exclude=None, dataset_properties=None
+        self, feat_type=None, include=None, exclude=None, dataset_properties=None
     ):
         """Create the hyperparameter configuration space.
 
         Parameters
         ----------
+        feat_type : dict, maps columns to there datatypes
+
         include : dict (optional, default=None)
 
         Returns
@@ -347,7 +349,7 @@ def _get_hyperparameter_search_space(
         self.dataset_properties = dataset_properties
         return cs
 
-    def _get_pipeline_steps(self, dataset_properties, feat_type):
+    def _get_pipeline_steps(self, dataset_properties, feat_type=None):
         steps = []
 
         default_dataset_properties = {"target_type": "classification"}
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index 98a2c22a80..59aa210248 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -136,7 +136,7 @@ def fit(self, X, y):
         for further information."""
         raise NotImplementedError()
 
-    def set_hyperparameters(self, configuration, feat_type, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
         params = configuration.get_dictionary()
 
         for param, value in params.items():
@@ -339,7 +339,7 @@ def get_estimator(self):
 
 
 class AutoSklearnChoice(object):
-    def __init__(self, dataset_properties, feat_type, random_state=None):
+    def __init__(self, dataset_properties, feat_type=None, random_state=None):
         """
         Parameters
         ----------
@@ -414,7 +414,7 @@ def get_available_components(
 
         return components_dict
 
-    def set_hyperparameters(self, configuration, feat_type, init_params=None):
+    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
         new_params = {}
 
         params = configuration.get_dictionary()
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 073e7325e0..e7d30e9e0b 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -86,7 +86,7 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None
     ):
         if dataset_properties is None:
             dataset_properties = {}
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index bbe805e519..24b6788d63 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -105,7 +105,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type=None,
         dataset_properties: Optional[Dict] = None,
         default: str = None,
         include: Optional[Dict] = None,
@@ -152,7 +152,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.choice.transform(X)
 
     def set_hyperparameters(
-        self, feat_type, configuration: ConfigurationSpace, init_params: Optional[Dict] = None
+        self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, feat_type=None
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
index 7e04082112..d9a4958bde 100644
--- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
+++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -139,7 +139,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type=None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index ae52062c2b..bb640b484b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -38,7 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type=None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
index 56d9ca16fa..570ac0f730 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
@@ -69,7 +69,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type=None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
index 0a7eaaf802..9e356d9f41 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
@@ -44,7 +44,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
index c223f165d4..f6afe06c8e 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
@@ -55,7 +55,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
index 5008f406d8..3af659331e 100644
--- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
+++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -63,7 +63,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type=None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 0fcf3d98e6..ceaec426ef 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -1,8 +1,6 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-import json
-import os
 import sklearn.compose
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -77,8 +75,8 @@ def __init__(
 
         self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
 
-        if self.feat_type is None:
-            raise ValueError("feat_type init requires feat_type")
+        # if self.feat_type is None:
+        #     raise ValueError("feat_type init requires feat_type")
 
         # The pipeline that will be applied to the categorical features (i.e. columns)
         # of the dataset
@@ -88,7 +86,7 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.categ_ppl = None
-        if "categorical" in self.feat_type.values():
+        if "categorical" in self.feat_type.values() or self.feat_type is None:
             self.categ_ppl = CategoricalPreprocessingPipeline(
                 feat_type=self.feat_type,
                 config=None,
@@ -108,7 +106,7 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.numer_ppl = None
-        if "numerical" in self.feat_type.values():
+        if "numerical" in self.feat_type.values() or self.feat_type is None:
             self.numer_ppl = NumericalPreprocessingPipeline(
                 feat_type=self.feat_type,
                 config=None,
@@ -129,7 +127,7 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.txt_ppl = None
-        if "string" in self.feat_type.values():
+        if "string" in self.feat_type.values() or self.feat_type is None:
             self.txt_ppl = TextPreprocessingPipeline(
                 feat_type=self.feat_type,
                 config=None,
@@ -298,7 +296,7 @@ def set_hyperparameters(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         self.dataset_properties = dataset_properties
@@ -313,10 +311,10 @@ def get_hyperparameter_search_space(
 
     @staticmethod
     def _get_hyperparameter_search_space_recursevely(
-        feat_type,
         dataset_properties: DATASET_PROPERTIES_TYPE,
         cs: ConfigurationSpace,
         transformer: BaseEstimator,
+        feat_type: Optional[Dict[Union[str, int], str]] = None
     ) -> ConfigurationSpace:
         for st_name, st_operation in transformer:
             if hasattr(st_operation, "get_hyperparameter_search_space"):
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index 599221187b..5020a81b5b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -46,7 +46,7 @@ class CategoricalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -94,7 +94,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -124,7 +124,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
index a50ede3985..fbba3b9172 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
@@ -39,7 +39,7 @@ class NumericalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -87,7 +87,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -120,7 +120,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
index e3d7078de2..beb4c099e0 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
@@ -34,7 +34,7 @@ class TextPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -81,7 +81,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -114,7 +114,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
index 40f3d1e93a..65a1542018 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
@@ -91,7 +91,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
index 99ab3a33c0..b5945ca6a1 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
@@ -62,7 +62,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index 6eff766085..61f8ebb83b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import os
 from collections import OrderedDict
@@ -37,12 +37,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         return components
 
     def get_hyperparameter_search_space(
-        self,
-        feat_type,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-        default: Optional[str] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+            self,
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+            default: Optional[str] = None,
+            include: Optional[Dict[str, str]] = None,
+            exclude: Optional[Dict[str, str]] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
@@ -87,7 +87,9 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+            self, configuration: Configuration,
+            init_params: Optional[Dict[str, Any]] = None,
+            feat_type: Optional[Dict[Union[str, int], str]] = None
     ) -> "CoalescenseChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index 9052188190..a64bdbcc9d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -15,7 +15,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         minimum_fraction: float = 0.01,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index 8c6314988e..40bbe5beaa 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -43,7 +43,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index 8f3caedd83..4eaddd0999 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 import os
 from collections import OrderedDict
@@ -42,7 +42,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index 7955d90b28..b0cc348673 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional, Union, Dict
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -38,7 +38,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
index 0c840fb44f..a797a5769a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
@@ -62,7 +62,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
index 9b4e01843d..b9d25235d3 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
@@ -59,7 +59,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 3cc71bc314..9c018894dc 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import os
 from collections import OrderedDict
@@ -38,12 +38,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         return components
 
     def get_hyperparameter_search_space(
-        self,
-        feat_type,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-        default: Optional[str] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+            self,
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+            default: Optional[str] = None,
+            include: Optional[Dict[str, str]] = None,
+            exclude: Optional[Dict[str, str]] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
@@ -89,7 +89,9 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+            self, configuration: Configuration,
+            init_params: Optional[Dict[str, Any]] = None,
+            feat_type: Optional[Dict[Union[str, int], str]] = None
     ) -> "BagOfWordChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
index 92692b1dda..9810006ffa 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
@@ -95,7 +95,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
index b2e6c0598d..2f23276824 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
@@ -101,7 +101,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
index 577cddf7d2..36238b4fa8 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
@@ -100,7 +100,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
index 1562b57249..be3ab9b00a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
@@ -74,7 +74,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
index 33f7a1a996..9849e348ed 100644
--- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
+++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
@@ -49,7 +49,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        ffeat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index bff511fe6e..0dbb4128d1 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -101,7 +101,7 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
+        self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index 084fe8e40d..c45f9b1cf2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -123,7 +123,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 23914b713f..98a5a5700c 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -125,7 +125,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index 95ae7cee49..0076f14121 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -63,7 +63,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
         affinity = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index 0fad8bc6b3..27b3446d57 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -82,7 +82,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         n_components = UniformIntegerHyperparameter(
             "n_components", 10, 2000, default_value=100
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 22d1cfd248..93673e75e9 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -69,7 +69,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         gamma = UniformFloatHyperparameter(
             "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index f7ee87d80f..43135da483 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -91,7 +91,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
 
         penalty = Constant("penalty", "l1")
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 016a44dd7b..0597cfcabe 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -34,6 +34,6 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 24ca8977a0..f93d8bbff3 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -94,7 +94,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         if dataset_properties is not None and (
             dataset_properties.get("sparse") is True
             or dataset_properties.get("signed") is False
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index a1ad9f3981..d86b38cf22 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -55,7 +55,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         keep_variance = UniformFloatHyperparameter(
             "keep_variance", 0.5, 0.9999, default_value=0.9999
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index d17e6d43a4..0d4b166f35 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -54,7 +54,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
         interaction_only = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index b6966b27ae..60b7df0c3a 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -94,7 +94,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         n_estimators = UniformIntegerHyperparameter(
             name="n_estimators", lower=10, upper=100, default_value=10
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index fdb15c50bd..3fa80f0ca1 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -110,7 +110,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         percentile = UniformFloatHyperparameter(
             name="percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index cdec8fe152..0f489f933f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -53,7 +53,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         percentile = UniformFloatHyperparameter(
             "percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index cb002c9b6d..c21ff3d7cb 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -116,7 +116,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index e9a5f7b943..a708b18e9f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -84,7 +84,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 214b0346d2..d515c9552a 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -48,7 +48,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type, dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         target_dim = UniformIntegerHyperparameter(
             "target_dim", 10, 256, default_value=128
         )
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 090083b5ba..cdeb73af45 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional, Union, Dict
 
 import copy
 from itertools import product
@@ -67,7 +67,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
 
     def __init__(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties=None,
@@ -114,7 +114,7 @@ def predict(self, X, batch_size=None):
         return y
 
     def _get_hyperparameter_search_space(
-        self, feat_type, include=None, exclude=None, dataset_properties=None
+        self, feat_type=None, include=None, exclude=None, dataset_properties=None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -262,7 +262,7 @@ def _get_hyperparameter_search_space(
     def _get_estimator_components(self):
         return regression_components._regressors
 
-    def _get_pipeline_steps(self, feat_type, dataset_properties, init_params=None):
+    def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=None):
         steps = []
 
         default_dataset_properties = {"target_type": "regression"}
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index bc6a27a71c..61e52f27aa 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -13,12 +13,13 @@
 )
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.regression import SimpleRegressionPipeline
+from autosklearn.data.xy_data_manager import XYDataManager
 
 __all__ = ["get_configuration_space"]
 
 
 def get_configuration_space(
-    datamanager: Dict[str, Any],
+    datamanager: XYDataManager,
     include: Optional[Dict[str, List[str]]] = None,
     exclude: Optional[Dict[str, List[str]]] = None,
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -27,8 +28,8 @@ def get_configuration_space(
 
     Parameters
     ----------
-    info: Dict[str, Any]
-        Information about the dataset
+    datamanager: XYDataManager
+        XYDataManger object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step
@@ -53,7 +54,7 @@ def get_configuration_space(
 
 
 def _get_regression_configuration_space(
-    datamanager: Dict[str, Any],
+    datamanager: XYDataManager,
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -62,8 +63,8 @@ def _get_regression_configuration_space(
 
     Parameters
     ----------
-    info: Dict[str, Any]
-        Information about the dataset
+    datamanager: XYDataManager
+        XYDataManger object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step
@@ -101,7 +102,7 @@ def _get_regression_configuration_space(
 
 
 def _get_classification_configuration_space(
-    datamanager: Dict[str, Any],
+    datamanager: XYDataManager,
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -110,8 +111,8 @@ def _get_classification_configuration_space(
 
     Parameters
     ----------
-    info: Dict[str, Any]
-        Information about the dataset
+   datamanager: XYDataManager
+        XYDataManger object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step

From a5c9bad21022b9f9b4189bc00d1c470b7ec9ee0c Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 14:30:10 +0200
Subject: [PATCH 09/63] fixing ensemble builder

---
 autosklearn/pipeline/base.py                                 | 5 ++++-
 .../minority_coalescense/minority_coalescer.py               | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 9e25af6802..29f8702f26 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -282,6 +282,9 @@ def _get_hyperparameter_search_space(
 
         Parameters
         ----------
+        feat_type: dict
+            python dictionary which maps the columns of the dataset to the data types
+
         estimator_name : str
             Name of the estimator hyperparameter which will be used in the
             configuration space. For a classification task, this would be
@@ -311,7 +314,7 @@ def _get_hyperparameter_search_space(
         raise NotImplementedError()
 
     def _get_base_search_space(
-        self, feat_type, cs, dataset_properties, exclude, include, pipeline
+        self, cs, dataset_properties, exclude, include, pipeline, feat_type=None
     ):
         if include is None:
             if self.include is None:
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index a64bdbcc9d..737e8c85f1 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -60,7 +60,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()

From 7f2f14b680bd62cb4f10d722960c765a5566c921 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 14:39:18 +0200
Subject: [PATCH 10/63] fixing ensemble builder

---
 autosklearn/pipeline/classification.py                |  2 +-
 .../variance_threshold/variance_threshold.py          |  2 +-
 .../test_data_preprocessing_numerical.py              | 11 ++++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 7819cd9e8f..4a6b267d1b 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional, Union, Dict
 
 import copy
 from itertools import product
diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
index 9849e348ed..f11c07a2d2 100644
--- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
+++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
@@ -49,7 +49,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        ffeat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
index d25cef2a2b..638a7e958d 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
@@ -11,13 +11,13 @@
 class NumericalPreprocessingPipelineTest(unittest.TestCase):
     def test_data_type_consistency(self):
         X = np.random.rand(3, 4)
-        Y = NumericalPreprocessingPipeline().fit_transform(X)
+        Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
         self.assertFalse(sparse.issparse(Y))
 
         X = sparse.csc_matrix(
             ([3.0, 6.0, 4.0, 5.0], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
         )
-        Y = NumericalPreprocessingPipeline().fit_transform(X)
+        Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
         self.assertTrue(sparse.issparse(Y))
 
     def test_fit_transform(self):
@@ -37,12 +37,13 @@ def test_fit_transform(self):
             ]
         )  # noqa : matrix legibility
         # dense input
-        Yt = NumericalPreprocessingPipeline().fit_transform(X)
+        Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
         np.testing.assert_array_almost_equal(Yt, Y1)
         # sparse input (uses with_mean=False)
         Y2 = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) / sdev
         X_sparse = sparse.csc_matrix(X)
-        Yt = NumericalPreprocessingPipeline().fit_transform(X_sparse)
+        Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(
+            X_sparse)
         np.testing.assert_array_almost_equal(Yt.todense(), Y2)
 
     def test_transform(self):
@@ -51,7 +52,7 @@ def test_transform(self):
         )  # noqa : matrix legibility
         sdev = np.sqrt(2 / 3)
         # fit
-        NPP = NumericalPreprocessingPipeline()
+        NPP = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"})
         NPP.fit_transform(X1)
         # transform
         X2 = np.array([[1.0, 5.0, 8.0], [2.0, 6.0, 9.0], [3.0, 7.0, np.nan]])

From 68b051ec5f3882e8bd063d1bcb38d798be904053 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 19:22:31 +0200
Subject: [PATCH 11/63] fixing ensemble builder

---
 .../data_preprocessing/feature_type.py        | 99 +++++++++----------
 test/fixtures/ensembles.py                    | 23 ++---
 .../data_preprocessing/test_balancing.py      |  2 +-
 3 files changed, 57 insertions(+), 67 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index ceaec426ef..10738aaed0 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -64,20 +64,6 @@ def __init__(
         self.feat_type = feat_type
         self.force_sparse_output = force_sparse_output
 
-        # load global feat_type
-        # f = open(f'{os.path.dirname(os.path.realpath(__file__))}/../../../feat_type.json')
-        # self.feat_type = json.load(f)
-        # is_number = True
-        # for key in self.feat_type.keys():
-        #     is_number *= key.isnumeric()
-        # if is_number:
-        #     self.feat_type = {int(key): value for key, value in self.feat_type.items()}
-
-        self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
-
-        # if self.feat_type is None:
-        #     raise ValueError("feat_type init requires feat_type")
-
         # The pipeline that will be applied to the categorical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -85,19 +71,16 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.categ_ppl = None
-        if "categorical" in self.feat_type.values() or self.feat_type is None:
-            self.categ_ppl = CategoricalPreprocessingPipeline(
-                feat_type=self.feat_type,
-                config=None,
-                steps=pipeline,
-                dataset_properties=dataset_properties,
-                include=include,
-                exclude=exclude,
-                random_state=random_state,
-                init_params=init_params,
-            )
-            self._transformers.append(("categorical_transformer", self.categ_ppl))
+        self.categ_ppl = CategoricalPreprocessingPipeline(
+            feat_type=self.feat_type,
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
         # The pipeline that will be applied to the numerical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -105,19 +88,16 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.numer_ppl = None
-        if "numerical" in self.feat_type.values() or self.feat_type is None:
-            self.numer_ppl = NumericalPreprocessingPipeline(
-                feat_type=self.feat_type,
-                config=None,
-                steps=pipeline,
-                dataset_properties=dataset_properties,
-                include=include,
-                exclude=exclude,
-                random_state=random_state,
-                init_params=init_params,
-            )
-            self._transformers.append(("numerical_transformer", self.numer_ppl))
+        self.numer_ppl = NumericalPreprocessingPipeline(
+            feat_type=self.feat_type,
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
 
         # The pipeline that will be applied to the text features (i.e. columns)
         # of the dataset
@@ -126,22 +106,31 @@ def __init__(
         # It is actually the call to set_hyperparameter who properly sets this argument
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
-        self.txt_ppl = None
-        if "string" in self.feat_type.values() or self.feat_type is None:
-            self.txt_ppl = TextPreprocessingPipeline(
-                feat_type=self.feat_type,
-                config=None,
-                steps=pipeline,
-                dataset_properties=dataset_properties,
-                include=include,
-                exclude=exclude,
-                random_state=random_state,
-                init_params=init_params,
-            )
-            self._transformers.append(("text_transformer", self.txt_ppl))
+        self.txt_ppl = TextPreprocessingPipeline(
+            feat_type=self.feat_type,
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
 
-        if self.config:
-            self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params)
+        if self.feat_type is None:
+            self._transformers: List[Tuple[str, AutoSklearnComponent]] = [("categorical_transformer", self.categ_ppl),
+                                                                          ("numerical_transformer", self.numer_ppl),
+                                                                          ("text_transformer", self.txt_ppl)]
+        else:
+            self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
+            if "categorical" in self.feat_type.values():
+                self._transformers.append(("categorical_transformer", self.categ_ppl))
+            if "numerical" in self.feat_type.values():
+                self._transformers.append(("numerical_transformer", self.numer_ppl))
+            if "string" in self.feat_type.values():
+                self._transformers.append(("text_transformer", self.txt_ppl))
+            if self.config:
+                self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params)
         self.column_transformer = column_transformer
 
     def fit(
diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py
index 467c53822f..82673d2c2d 100644
--- a/test/fixtures/ensembles.py
+++ b/test/fixtures/ensembles.py
@@ -36,15 +36,15 @@ def make_voting_classifier() -> Callable[..., VotingClassifier]:
     """
 
     def _make(
-        X: Optional[SUPPORTED_FEAT_TYPES] = None,
-        y: Optional[SUPPORTED_TARGET_TYPES] = None,
-        models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None,
-        seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
+            X: Optional[SUPPORTED_FEAT_TYPES] = None,
+            y: Optional[SUPPORTED_TARGET_TYPES] = None,
+            models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None,
+            seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
     ) -> VotingClassifier:
         assert not (X is None) ^ (y is None)
-
         if not models:
-            models = [MyDummyClassifier(config=1, random_state=seed) for _ in range(5)]
+            models = [MyDummyClassifier(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _
+                      in range(5)]
 
         if X is not None:
             for model in models:
@@ -73,15 +73,16 @@ def make_voting_regressor() -> Callable[..., VotingRegressor]:
     """
 
     def _make(
-        X: Optional[SUPPORTED_FEAT_TYPES] = None,
-        y: Optional[SUPPORTED_TARGET_TYPES] = None,
-        models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None,
-        seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
+            X: Optional[SUPPORTED_FEAT_TYPES] = None,
+            y: Optional[SUPPORTED_TARGET_TYPES] = None,
+            models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None,
+            seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
     ) -> VotingRegressor:
         assert not (X is None) ^ (y is None)
 
         if not models:
-            models = [MyDummyRegressor(config=1, random_state=seed) for _ in range(5)]
+            models = [MyDummyRegressor(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _
+                      in range(5)]
 
         if X is not None:
             for model in models:
diff --git a/test/test_pipeline/components/data_preprocessing/test_balancing.py b/test/test_pipeline/components/data_preprocessing/test_balancing.py
index 6a76ce419c..a128559833 100644
--- a/test/test_pipeline/components/data_preprocessing/test_balancing.py
+++ b/test/test_pipeline/components/data_preprocessing/test_balancing.py
@@ -215,7 +215,7 @@ def test_weighting_effect(self):
 
                 default._values["balancing:strategy"] = strategy
                 classifier = SimpleClassificationPipeline(
-                    default, random_state=1, include=include
+                    config=default, random_state=1, include=include
                 )
                 Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                 classifier.fit_estimator(Xt, Y_train, **fit_params)

From b940a974a068847fea21b64a8fc1ea1f0da58e5b Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 19:46:01 +0200
Subject: [PATCH 12/63] fixing ensemble builder

---
 test/test_evaluation/test_dummy_pipelines.py |  2 +-
 test/test_evaluation/test_test_evaluator.py  | 10 +++++++---
 test/test_evaluation/test_train_evaluator.py | 15 +++++++++------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py
index 8d1005e178..1d7b1b5f83 100644
--- a/test/test_evaluation/test_dummy_pipelines.py
+++ b/test/test_evaluation/test_dummy_pipelines.py
@@ -23,8 +23,8 @@ def test_dummy_pipeline(task_type: str) -> None:
         pytest.fail(task_type)
         return
 
-    estimator = estimator_class(config=1, random_state=0)
     X, y = data_maker(random_state=0)
+    estimator = estimator_class(feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0)
     estimator.fit(X, y)
     check_is_fitted(estimator)
 
diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py
index 457661df03..79af5a112d 100644
--- a/test/test_evaluation/test_test_evaluator.py
+++ b/test/test_evaluation/test_test_evaluator.py
@@ -86,12 +86,16 @@ def test_datasets(self):
                 self.assertTrue(np.isfinite(rval[0]["loss"]))
 
 
+class DummyDatamanager():
+    def __init__(self):
+        self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
+        self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'}
+
+
 class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
-        self.configuration = get_configuration_space(
-            {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
-        ).get_default_configuration()
+        self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
         self.tmp_dir = os.path.join(os.path.dirname(__file__), ".test_cv_functions")
         self.backend = unittest.mock.Mock(spec=Backend)
diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py
index 9413af5509..034453fa9d 100644
--- a/test/test_evaluation/test_train_evaluator.py
+++ b/test/test_evaluation/test_train_evaluator.py
@@ -2940,13 +2940,16 @@ def test_holdout_split_size(self, te_mock):
         self.assertEqual(len(train_samples), 6)
         self.assertEqual(len(test_samples), 3)
 
+class DummyDatamanager():
+    def __init__(self):
+        self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
+        self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'}
+
 
 class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
-        self.configuration = get_configuration_space(
-            {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
-        ).get_default_configuration()
+        self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
         self.tmp_dir = os.path.join(
             os.path.dirname(__file__), ".test_holdout_functions"
@@ -3205,7 +3208,7 @@ def test_eval_holdout_budget_iterations_multi_objective(self):
     def test_eval_holdout_budget_iterations_converged_multi_objective(self):
         configuration = get_configuration_space(
             exclude={"classifier": ["random_forest", "liblinear_svc"]},
-            info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False},
+            datamanager=DummyDatamanager(),
         ).get_default_configuration()
         eval_holdout(
             queue=self.queue,
@@ -3240,7 +3243,7 @@ def test_eval_holdout_budget_iterations_converged(self):
         }
         configuration = get_configuration_space(
             exclude={"classifier": ["random_forest", "liblinear_svc"]},
-            info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False},
+            datamanager=DummyDatamanager(),
         ).get_default_configuration()
         eval_holdout(
             queue=self.queue,
@@ -3357,7 +3360,7 @@ def test_eval_holdout_budget_mixed_iterations(self):
     def test_eval_holdout_budget_mixed_subsample(self):
         configuration = get_configuration_space(
             exclude={"classifier": ["random_forest"]},
-            info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False},
+            datamanager=DummyDatamanager(),
         ).get_default_configuration()
         self.assertEqual(configuration["classifier:__choice__"], "liblinear_svc")
         eval_holdout(

From f12419016bc05b426e36cf2c18ac7834f0245d20 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 20:06:59 +0200
Subject: [PATCH 13/63] fixing ensemble builder

---
 .../minority_coalescense/no_coalescense.py            | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index 40bbe5beaa..84025f3f17 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -10,12 +10,13 @@
 
 class NoCoalescence(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self, random_state: Optional[Union[int, np.random.RandomState]] = None
+            self, random_state: Optional[Union[int, np.random.RandomState]] = None,
+            feat_type: Optional[Dict[Union[str, int], str]] = None
     ) -> None:
         pass
 
     def fit(
-        self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
+            self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
     ) -> PIPELINE_DATA_DTYPE:
         self.preprocessor = "passthrough"
         return self
@@ -25,7 +26,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
 
     @staticmethod
     def get_properties(
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
         return {
             "shortname": "no coalescence",
@@ -43,8 +44,8 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs

From 91007492d0bf63db0a14a295f09c04393cd0fd39 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 20:20:02 +0200
Subject: [PATCH 14/63] fixing ensemble builder

---
 autosklearn/evaluation/train_evaluator.py     |  4 +-
 .../metalearning/metalearning/meta_base.py    |  4 +-
 autosklearn/pipeline/base.py                  | 24 +++++++----
 autosklearn/pipeline/classification.py        |  2 +-
 autosklearn/pipeline/components/base.py       |  7 +++-
 .../components/classification/__init__.py     |  7 +++-
 .../components/data_preprocessing/__init__.py |  8 ++--
 .../categorical_encoding/__init__.py          |  5 ++-
 .../data_preprocessing/feature_type.py        | 42 +++++++++++++------
 .../feature_type_categorical.py               |  2 +-
 .../data_preprocessing/feature_type_text.py   |  7 ++--
 .../minority_coalescense/__init__.py          | 19 +++++----
 .../minority_coalescense/no_coalescense.py    | 13 +++---
 .../data_preprocessing/rescaling/__init__.py  |  4 +-
 .../rescaling/abstract_rescaling.py           |  2 +-
 .../text_encoding/__init__.py                 | 23 +++++-----
 .../feature_preprocessing/__init__.py         |  7 +++-
 .../components/regression/__init__.py         |  7 +++-
 autosklearn/pipeline/regression.py            |  4 +-
 autosklearn/util/pipeline.py                  | 34 ++++++++-------
 test/fixtures/ensembles.py                    | 36 ++++++++++------
 test/test_evaluation/test_dummy_pipelines.py  |  4 +-
 test/test_evaluation/test_test_evaluator.py   | 13 ++++--
 test/test_evaluation/test_train_evaluator.py  | 14 +++++--
 .../test_data_preprocessing_numerical.py      | 21 +++++++---
 25 files changed, 207 insertions(+), 106 deletions(-)

diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index 228a685c37..c27b3c36f3 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -321,7 +321,9 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 Y_test_pred = [None] * self.num_cv_folds
                 train_splits = [None] * self.num_cv_folds
 
-                self.models = [self._get_model(self.feat_type) for i in range(self.num_cv_folds)]
+                self.models = [
+                    self._get_model(self.feat_type) for i in range(self.num_cv_folds)
+                ]
                 iterations = [1] * self.num_cv_folds
                 total_n_iterations = [0] * self.num_cv_folds
                 # model.estimator_supports_iterative_fit -> true
diff --git a/autosklearn/metalearning/metalearning/meta_base.py b/autosklearn/metalearning/metalearning/meta_base.py
index 61f16297fe..a85ec6a279 100644
--- a/autosklearn/metalearning/metalearning/meta_base.py
+++ b/autosklearn/metalearning/metalearning/meta_base.py
@@ -42,7 +42,9 @@ def __init__(self, configuration_space, aslib_directory, logger):
         self.configuration_space = configuration_space
         self.aslib_directory = aslib_directory
 
-        aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory, self.configuration_space)
+        aslib_reader = aslib_simple.AlgorithmSelectionProblem(
+            self.aslib_directory, self.configuration_space
+        )
         self.metafeatures = aslib_reader.metafeatures
         self.algorithm_runs: OrderedDict[
             str, pd.DataFrame
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 29f8702f26..eacc4ffacf 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -54,7 +54,9 @@ def __init__(
         self.feat_type = feat_type
 
         if steps is None:
-            self.steps = self._get_pipeline_steps(feat_type=feat_type, dataset_properties=dataset_properties)
+            self.steps = self._get_pipeline_steps(
+                feat_type=feat_type, dataset_properties=dataset_properties
+            )
         else:
             self.steps = steps
 
@@ -84,7 +86,9 @@ def __init__(
                 )
             self.config = config
 
-        self.set_hyperparameters(self.config, feat_type=feat_type, init_params=init_params)
+        self.set_hyperparameters(
+            self.config, feat_type=feat_type, init_params=init_params
+        )
 
         super().__init__(steps=self.steps)
 
@@ -211,8 +215,7 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
             node_name, node = n_
 
             sub_configuration_space = node.get_hyperparameter_search_space(
-                feat_type=feat_type,
-                dataset_properties=self.dataset_properties
+                feat_type=feat_type, dataset_properties=self.dataset_properties
             )
             sub_config_dict = {}
             for param in configuration:
@@ -239,7 +242,9 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
                 node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
                 node.set_hyperparameters(
-                    feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict
+                    feat_type=feat_type,
+                    configuration=sub_configuration,
+                    init_params=sub_init_params_dict,
                 )
             else:
                 raise NotImplementedError("Not supported yet!")
@@ -350,7 +355,10 @@ def _get_base_search_space(
             dataset_properties["signed"] = False
 
         matches = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline=pipeline, dataset_properties=dataset_properties, include=include, exclude=exclude
+            pipeline=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
         )
 
         # Now we have only legal combinations at this step of the pipeline
@@ -393,7 +401,9 @@ def _get_base_search_space(
                     )
                 )
                 sub_config_space = node.get_hyperparameter_search_space(
-                    feat_type=feat_type, dataset_properties=dataset_properties, include=choices_list
+                    feat_type=feat_type,
+                    dataset_properties=dataset_properties,
+                    include=choices_list,
                 )
                 cs.add_configuration_space(node_name, sub_config_space)
 
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 4a6b267d1b..203d26877c 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, Dict
+from typing import Dict, Optional, Union
 
 import copy
 from itertools import product
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index 59aa210248..c4d4485a7d 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -438,7 +438,12 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
         return self
 
     def get_hyperparameter_search_space(
-        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
+        self,
+        feat_type,
+        dataset_properties=None,
+        default=None,
+        include=None,
+        exclude=None,
     ):
         raise NotImplementedError()
 
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index e7d30e9e0b..ae9a09ff66 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -86,7 +86,12 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None
+        self,
+        feat_type=None,
+        dataset_properties=None,
+        default=None,
+        include=None,
+        exclude=None,
     ):
         if dataset_properties is None:
             dataset_properties = {}
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 24b6788d63..80d0d2fc9e 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -137,8 +137,7 @@ def get_hyperparameter_search_space(
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[name](
-                feat_type=feat_type,
-                dataset_properties=dataset_properties
+                feat_type=feat_type, dataset_properties=dataset_properties
             ).get_hyperparameter_search_space(dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
@@ -152,7 +151,10 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.choice.transform(X)
 
     def set_hyperparameters(
-        self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None, feat_type=None
+        self,
+        configuration: ConfigurationSpace,
+        init_params: Optional[Dict] = None,
+        feat_type=None,
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index bb640b484b..40ddb6e9c6 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -87,7 +87,10 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self,
+        feat_type,
+        configuration: Configuration,
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> "OHEChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 10738aaed0..8e7d2be98d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -118,9 +118,11 @@ def __init__(
         )
 
         if self.feat_type is None:
-            self._transformers: List[Tuple[str, AutoSklearnComponent]] = [("categorical_transformer", self.categ_ppl),
-                                                                          ("numerical_transformer", self.numer_ppl),
-                                                                          ("text_transformer", self.txt_ppl)]
+            self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
+                ("categorical_transformer", self.categ_ppl),
+                ("numerical_transformer", self.numer_ppl),
+                ("text_transformer", self.txt_ppl),
+            ]
         else:
             self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
             if "categorical" in self.feat_type.values():
@@ -130,7 +132,11 @@ def __init__(
             if "string" in self.feat_type.values():
                 self._transformers.append(("text_transformer", self.txt_ppl))
             if self.config:
-                self.set_hyperparameters(feat_type=self.feat_type, configuration=self.config, init_params=init_params)
+                self.set_hyperparameters(
+                    feat_type=self.feat_type,
+                    configuration=self.config,
+                    init_params=init_params,
+                )
         self.column_transformer = column_transformer
 
     def fit(
@@ -162,7 +168,9 @@ def fit(
                 if value.lower() == "categorical"
             ]
             if len(categorical_features) > 0:
-                transformer_lst.append(("categorical_transformer", self.categ_ppl, categorical_features))
+                transformer_lst.append(
+                    ("categorical_transformer", self.categ_ppl, categorical_features)
+                )
 
             numerical_features = [
                 key
@@ -170,7 +178,9 @@ def fit(
                 if value.lower() == "numerical"
             ]
             if len(numerical_features) > 0:
-                transformer_lst.append(("numerical_transformer", self.numer_ppl, numerical_features))
+                transformer_lst.append(
+                    ("numerical_transformer", self.numer_ppl, numerical_features)
+                )
 
             text_features = [
                 key
@@ -178,7 +188,9 @@ def fit(
                 if value.lower() == "string"
             ]
             if len(text_features) > 0:
-                transformer_lst.append(("text_transformer", self.txt_ppl, text_features))
+                transformer_lst.append(
+                    ("text_transformer", self.txt_ppl, text_features)
+                )
 
             sklearn_transf_spec = [
                 (name, transformer, feature_columns)
@@ -240,7 +252,10 @@ def get_properties(
         }
 
     def set_hyperparameters(
-        self, feat_type, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+        self,
+        feat_type,
+        configuration: Configuration,
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> "FeatTypeSplit":
         if init_params is not None and "feat_type" in init_params.keys():
             self.feat_type = init_params["feat_type"]
@@ -249,8 +264,7 @@ def set_hyperparameters(
 
         for transf_name, transf_op in self._transformers:
             sub_configuration_space = transf_op.get_hyperparameter_search_space(
-                dataset_properties=self.dataset_properties,
-                feat_type=feat_type
+                dataset_properties=self.dataset_properties, feat_type=feat_type
             )
             sub_config_dict = {}
             for param in configuration:
@@ -276,7 +290,9 @@ def set_hyperparameters(
                 transf_op, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
                 transf_op.set_hyperparameters(
-                    feat_type=feat_type, configuration=sub_configuration, init_params=sub_init_params_dict
+                    feat_type=feat_type,
+                    configuration=sub_configuration,
+                    init_params=sub_init_params_dict,
                 )
             else:
                 raise NotImplementedError("Not supported yet!")
@@ -294,7 +310,7 @@ def get_hyperparameter_search_space(
             feat_type=feat_type,
             dataset_properties=dataset_properties,
             cs=cs,
-            transformer=self._transformers
+            transformer=self._transformers,
         )
         return cs
 
@@ -303,7 +319,7 @@ def _get_hyperparameter_search_space_recursevely(
         dataset_properties: DATASET_PROPERTIES_TYPE,
         cs: ConfigurationSpace,
         transformer: BaseEstimator,
-        feat_type: Optional[Dict[Union[str, int], str]] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ) -> ConfigurationSpace:
         for st_name, st_operation in transformer:
             if hasattr(st_operation, "get_hyperparameter_search_space"):
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index 5020a81b5b..5ad0aabe70 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -64,7 +64,7 @@ def __init__(
             exclude=exclude,
             random_state=random_state,
             init_params=init_params,
-            feat_type=feat_type
+            feat_type=feat_type,
         )
 
     @staticmethod
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
index beb4c099e0..5cbb962ae5 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
@@ -52,7 +52,7 @@ def __init__(
             exclude=exclude,
             random_state=random_state,
             init_params=init_params,
-            feat_type=feat_type
+            feat_type=feat_type,
         )
 
     @staticmethod
@@ -130,13 +130,12 @@ def _get_pipeline_steps(
                     BagOfWordChoice(
                         feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
-                        random_state=self.random_state
+                        random_state=self.random_state,
                     ),
                 ),
                 (
                     "text_feature_reduction",
-                    TextFeatureReduction(
-                        random_state=self.random_state),
+                    TextFeatureReduction(random_state=self.random_state),
                 ),
             ]
         )
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index 61f8ebb83b..438a4ce681 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -37,12 +37,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         return components
 
     def get_hyperparameter_search_space(
-            self,
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-            default: Optional[str] = None,
-            include: Optional[Dict[str, str]] = None,
-            exclude: Optional[Dict[str, str]] = None,
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        default: Optional[str] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
@@ -87,9 +87,10 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-            self, configuration: Configuration,
-            init_params: Optional[Dict[str, Any]] = None,
-            feat_type: Optional[Dict[Union[str, int], str]] = None
+        self,
+        configuration: Configuration,
+        init_params: Optional[Dict[str, Any]] = None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ) -> "CoalescenseChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index 84025f3f17..433d9a8247 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -10,13 +10,14 @@
 
 class NoCoalescence(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-            self, random_state: Optional[Union[int, np.random.RandomState]] = None,
-            feat_type: Optional[Dict[Union[str, int], str]] = None
+        self,
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ) -> None:
         pass
 
     def fit(
-            self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
+        self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
     ) -> PIPELINE_DATA_DTYPE:
         self.preprocessor = "passthrough"
         return self
@@ -26,7 +27,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
 
     @staticmethod
     def get_properties(
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
         return {
             "shortname": "no coalescence",
@@ -44,8 +45,8 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index 4eaddd0999..d7b01c7a93 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -75,7 +75,9 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties)
+            ].get_hyperparameter_search_space(
+                feat_type=feat_type, dataset_properties=dataset_properties
+            )
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index b0cc348673..ba97eee886 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, Dict
+from typing import Dict, Optional, Union
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 9c018894dc..325beec9fa 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -38,12 +38,12 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         return components
 
     def get_hyperparameter_search_space(
-            self,
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-            default: Optional[str] = None,
-            include: Optional[Dict[str, str]] = None,
-            exclude: Optional[Dict[str, str]] = None,
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        default: Optional[str] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
@@ -76,7 +76,9 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(feat_type=feat_type, dataset_properties=dataset_properties)
+            ].get_hyperparameter_search_space(
+                feat_type=feat_type, dataset_properties=dataset_properties
+            )
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
@@ -89,9 +91,10 @@ def get_hyperparameter_search_space(
         return cs
 
     def set_hyperparameters(
-            self, configuration: Configuration,
-            init_params: Optional[Dict[str, Any]] = None,
-            feat_type: Optional[Dict[Union[str, int], str]] = None
+        self,
+        configuration: Configuration,
+        init_params: Optional[Dict[str, Any]] = None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ) -> "BagOfWordChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index 0dbb4128d1..d4f9bc6662 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -101,7 +101,12 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, feat_type=None, dataset_properties=None, default=None, include=None, exclude=None
+        self,
+        feat_type=None,
+        dataset_properties=None,
+        default=None,
+        include=None,
+        exclude=None,
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 457d9813d3..08452b4809 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -79,7 +79,12 @@ def get_available_components(
         return components_dict
 
     def get_hyperparameter_search_space(
-        self, feat_type, dataset_properties=None, default=None, include=None, exclude=None
+        self,
+        feat_type,
+        dataset_properties=None,
+        default=None,
+        include=None,
+        exclude=None,
     ):
         if include is not None and exclude is not None:
             raise ValueError(
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index cdeb73af45..529953cb18 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, Dict
+from typing import Dict, Optional, Union
 
 import copy
 from itertools import product
@@ -292,7 +292,7 @@ def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=No
                     regression_components.RegressorChoice(
                         feat_type=feat_type,
                         dataset_properties=default_dataset_properties,
-                        random_state=self.random_state
+                        random_state=self.random_state,
                     ),
                 ],
             ]
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index 61e52f27aa..78f94c2309 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -11,9 +11,9 @@
     MULTIOUTPUT_REGRESSION,
     REGRESSION_TASKS,
 )
+from autosklearn.data.xy_data_manager import XYDataManager
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.regression import SimpleRegressionPipeline
-from autosklearn.data.xy_data_manager import XYDataManager
 
 __all__ = ["get_configuration_space"]
 
@@ -46,7 +46,9 @@ def get_configuration_space(
         The configuration space for the pipeline
     """
     if datamanager.info["task"] in REGRESSION_TASKS:
-        return _get_regression_configuration_space(datamanager, include, exclude, random_state)
+        return _get_regression_configuration_space(
+            datamanager, include, exclude, random_state
+        )
     else:
         return _get_classification_configuration_space(
             datamanager, include, exclude, random_state
@@ -109,24 +111,24 @@ def _get_classification_configuration_space(
 ) -> ConfigurationSpace:
     """Get the configuration of a classification pipeline given some dataset info
 
-    Parameters
-    ----------
-   datamanager: XYDataManager
-        XYDataManger object storing all important information about the dataset
+     Parameters
+     ----------
+    datamanager: XYDataManager
+         XYDataManger object storing all important information about the dataset
 
-    include: Optional[Dict[str, List[str]]] = None
-        A dictionary of what components to include for each pipeline step
+     include: Optional[Dict[str, List[str]]] = None
+         A dictionary of what components to include for each pipeline step
 
-    exclude: Optional[Dict[str, List[str]]] = None
-        A dictionary of what components to exclude for each pipeline step
+     exclude: Optional[Dict[str, List[str]]] = None
+         A dictionary of what components to exclude for each pipeline step
 
-    random_state: Optional[Union[int, np.random.Randomstate]] = None
-        The random state to use for seeding the ConfigSpace
+     random_state: Optional[Union[int, np.random.Randomstate]] = None
+         The random state to use for seeding the ConfigSpace
 
-    Returns
-    -------
-    ConfigurationSpace
-        The configuration space for the classification pipeline
+     Returns
+     -------
+     ConfigurationSpace
+         The configuration space for the classification pipeline
     """
     task_type = datamanager.info["task"]
 
diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py
index 82673d2c2d..32bb706eee 100644
--- a/test/fixtures/ensembles.py
+++ b/test/fixtures/ensembles.py
@@ -36,15 +36,21 @@ def make_voting_classifier() -> Callable[..., VotingClassifier]:
     """
 
     def _make(
-            X: Optional[SUPPORTED_FEAT_TYPES] = None,
-            y: Optional[SUPPORTED_TARGET_TYPES] = None,
-            models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None,
-            seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
+        X: Optional[SUPPORTED_FEAT_TYPES] = None,
+        y: Optional[SUPPORTED_TARGET_TYPES] = None,
+        models: Optional[Collection[AutoSklearnClassificationAlgorithm]] = None,
+        seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
     ) -> VotingClassifier:
         assert not (X is None) ^ (y is None)
         if not models:
-            models = [MyDummyClassifier(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _
-                      in range(5)]
+            models = [
+                MyDummyClassifier(
+                    feat_type={i: "numerical" for i in range(4)},
+                    config=1,
+                    random_state=seed,
+                )
+                for _ in range(5)
+            ]
 
         if X is not None:
             for model in models:
@@ -73,16 +79,22 @@ def make_voting_regressor() -> Callable[..., VotingRegressor]:
     """
 
     def _make(
-            X: Optional[SUPPORTED_FEAT_TYPES] = None,
-            y: Optional[SUPPORTED_TARGET_TYPES] = None,
-            models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None,
-            seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
+        X: Optional[SUPPORTED_FEAT_TYPES] = None,
+        y: Optional[SUPPORTED_TARGET_TYPES] = None,
+        models: Optional[Collection[AutoSklearnRegressionAlgorithm]] = None,
+        seed: Union[int, None, np.random.RandomState] = DEFAULT_SEED,
     ) -> VotingRegressor:
         assert not (X is None) ^ (y is None)
 
         if not models:
-            models = [MyDummyRegressor(feat_type={i: "numerical" for i in range(4)}, config=1, random_state=seed) for _
-                      in range(5)]
+            models = [
+                MyDummyRegressor(
+                    feat_type={i: "numerical" for i in range(4)},
+                    config=1,
+                    random_state=seed,
+                )
+                for _ in range(5)
+            ]
 
         if X is not None:
             for model in models:
diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py
index 1d7b1b5f83..cc7ea3c284 100644
--- a/test/test_evaluation/test_dummy_pipelines.py
+++ b/test/test_evaluation/test_dummy_pipelines.py
@@ -24,7 +24,9 @@ def test_dummy_pipeline(task_type: str) -> None:
         return
 
     X, y = data_maker(random_state=0)
-    estimator = estimator_class(feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0)
+    estimator = estimator_class(
+        feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0
+    )
     estimator.fit(X, y)
     check_is_fitted(estimator)
 
diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py
index 79af5a112d..3decc30753 100644
--- a/test/test_evaluation/test_test_evaluator.py
+++ b/test/test_evaluation/test_test_evaluator.py
@@ -86,16 +86,23 @@ def test_datasets(self):
                 self.assertTrue(np.isfinite(rval[0]["loss"]))
 
 
-class DummyDatamanager():
+class DummyDatamanager:
     def __init__(self):
         self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
-        self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'}
+        self.feat_type = {
+            0: "numerical",
+            1: "Numerical",
+            2: "numerical",
+            3: "numerical",
+        }
 
 
 class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
-        self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration()
+        self.configuration = get_configuration_space(
+            DummyDatamanager()
+        ).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
         self.tmp_dir = os.path.join(os.path.dirname(__file__), ".test_cv_functions")
         self.backend = unittest.mock.Mock(spec=Backend)
diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py
index 034453fa9d..a8db34d832 100644
--- a/test/test_evaluation/test_train_evaluator.py
+++ b/test/test_evaluation/test_train_evaluator.py
@@ -2940,16 +2940,24 @@ def test_holdout_split_size(self, te_mock):
         self.assertEqual(len(train_samples), 6)
         self.assertEqual(len(test_samples), 3)
 
-class DummyDatamanager():
+
+class DummyDatamanager:
     def __init__(self):
         self.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
-        self.feat_type = {0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical'}
+        self.feat_type = {
+            0: "numerical",
+            1: "Numerical",
+            2: "numerical",
+            3: "numerical",
+        }
 
 
 class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
-        self.configuration = get_configuration_space(DummyDatamanager()).get_default_configuration()
+        self.configuration = get_configuration_space(
+            DummyDatamanager()
+        ).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
         self.tmp_dir = os.path.join(
             os.path.dirname(__file__), ".test_holdout_functions"
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
index 638a7e958d..6110793c8c 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
@@ -11,13 +11,17 @@
 class NumericalPreprocessingPipelineTest(unittest.TestCase):
     def test_data_type_consistency(self):
         X = np.random.rand(3, 4)
-        Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
+        Y = NumericalPreprocessingPipeline(
+            feat_type={0: "numerical", 1: "numerical", 2: "numerical"}
+        ).fit_transform(X)
         self.assertFalse(sparse.issparse(Y))
 
         X = sparse.csc_matrix(
             ([3.0, 6.0, 4.0, 5.0], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
         )
-        Y = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
+        Y = NumericalPreprocessingPipeline(
+            feat_type={0: "numerical", 1: "numerical", 2: "numerical"}
+        ).fit_transform(X)
         self.assertTrue(sparse.issparse(Y))
 
     def test_fit_transform(self):
@@ -37,13 +41,16 @@ def test_fit_transform(self):
             ]
         )  # noqa : matrix legibility
         # dense input
-        Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(X)
+        Yt = NumericalPreprocessingPipeline(
+            feat_type={0: "numerical", 1: "numerical", 2: "numerical"}
+        ).fit_transform(X)
         np.testing.assert_array_almost_equal(Yt, Y1)
         # sparse input (uses with_mean=False)
         Y2 = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) / sdev
         X_sparse = sparse.csc_matrix(X)
-        Yt = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"}).fit_transform(
-            X_sparse)
+        Yt = NumericalPreprocessingPipeline(
+            feat_type={0: "numerical", 1: "numerical", 2: "numerical"}
+        ).fit_transform(X_sparse)
         np.testing.assert_array_almost_equal(Yt.todense(), Y2)
 
     def test_transform(self):
@@ -52,7 +59,9 @@ def test_transform(self):
         )  # noqa : matrix legibility
         sdev = np.sqrt(2 / 3)
         # fit
-        NPP = NumericalPreprocessingPipeline(feat_type={0: "numerical", 1: "numerical", 2: "numerical"})
+        NPP = NumericalPreprocessingPipeline(
+            feat_type={0: "numerical", 1: "numerical", 2: "numerical"}
+        )
         NPP.fit_transform(X1)
         # transform
         X2 = np.array([[1.0, 5.0, 8.0], [2.0, 6.0, 9.0], [3.0, 7.0, np.nan]])

From 3149f8e0d3f6c1266b763dbd084ea1288a5fe6d2 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 20:27:55 +0200
Subject: [PATCH 15/63] fixing ensemble builder

---
 .../pipeline/components/data_preprocessing/feature_type.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 8e7d2be98d..95633afbcc 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -155,7 +155,7 @@ def fit(
                 try:
                     # columns = [str(col) for col in columns]
                     pass
-                except:
+                except Exception as e:
                     raise ValueError(
                         f"Train data has columns={expected} yet the"
                         f" feat_types are feat={columns}"

From e9428077a7442e3e3a7322c8ebe48dce6f0f6ee1 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 20:28:46 +0200
Subject: [PATCH 16/63] fixing ensemble builder

---
 autosklearn/util/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index 78f94c2309..623958e792 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -1,5 +1,5 @@
 # -*- encoding: utf-8 -*-
-from typing import Any, Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace

From a27ca67b6fb4572f911753b25159db8da3248193 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 21:17:19 +0200
Subject: [PATCH 17/63] fixing ensemble builder

---
 .../pipeline/components/data_preprocessing/feature_type.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 95633afbcc..ad6b6cfe85 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -158,7 +158,8 @@ def fit(
                 except Exception as e:
                     raise ValueError(
                         f"Train data has columns={expected} yet the"
-                        f" feat_types are feat={columns}"
+                        f" feat_types are feat={columns}\n"
+                        f"Exception: {e}"
                     )
             transformer_lst = []
 

From ecb3801e80f6ec08688c585c364c6add489c86a9 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sat, 11 Jun 2022 22:07:17 +0200
Subject: [PATCH 18/63] fixing ensemble builder

---
 scripts/02_retrieve_metadata.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index f87f65ecc4..931ad499f5 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -178,6 +178,11 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri
             fh.write("%s: %s\n" % (key, description[key]))
 
 
+class DummyDatamanager():
+    def __init__(self, info):
+        self.info = info
+        self.feat_type = {"A1": "numerical"}
+
 def main():
     parser = ArgumentParser()
 
@@ -220,7 +225,7 @@ def main():
             )
 
             configuration_space = pipeline.get_configuration_space(
-                {"is_sparse": sparse, "task": task}
+                DummyDatamanager({"is_sparse": sparse, "task": task})
             )
 
             outputs, configurations = retrieve_matadata(

From 0f39c36f025341c9f4e185c166c47357d6b24476 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Tue, 14 Jun 2022 13:36:00 +0200
Subject: [PATCH 19/63] fixing ensemble builder

---
 autosklearn/experimental/askl2.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index bc6f78764b..4d68a3bf6b 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -110,8 +110,9 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
+                _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()}
                 initial_configurations.append(
-                    Configuration(configuration_space=scenario.cs, values=member)
+                    Configuration(configuration_space=scenario.cs, values=_member)
                 )
             except ValueError:
                 pass
@@ -162,8 +163,9 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
+                _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()}
                 initial_configurations.append(
-                    Configuration(configuration_space=scenario.cs, values=member)
+                    Configuration(configuration_space=scenario.cs, values=_member)
                 )
             except ValueError:
                 pass

From cc0ffd2ecaa929796ae966b60a1aa1fa30b7d066 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 11:16:52 +0200
Subject: [PATCH 20/63] fixing ensemble builder

---
 .../components/data_preprocessing/feature_type.py  | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index ad6b6cfe85..919f0416ab 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -131,12 +131,14 @@ def __init__(
                 self._transformers.append(("numerical_transformer", self.numer_ppl))
             if "string" in self.feat_type.values():
                 self._transformers.append(("text_transformer", self.txt_ppl))
-            if self.config:
-                self.set_hyperparameters(
-                    feat_type=self.feat_type,
-                    configuration=self.config,
-                    init_params=init_params,
-                )
+
+        if self.config:
+            self.set_hyperparameters(
+                feat_type=self.feat_type,
+                configuration=self.config,
+                init_params=init_params,
+            )
+
         self.column_transformer = column_transformer
 
     def fit(

From 561d40e02d6b505666d957c65a0c24332e5670c1 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 11:23:56 +0200
Subject: [PATCH 21/63] fixing ensemble builder

---
 autosklearn/experimental/askl2.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index 4d68a3bf6b..beb68d43e0 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -110,7 +110,11 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
-                _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()}
+                _member = {
+                    key: member[key]
+                    for key in member
+                    if key in scenario.cs.get_hyperparameter_names()
+                }
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
                 )
@@ -163,7 +167,11 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
-                _member = {key: member[key] for key in member if key in scenario.cs.get_hyperparameter_names()}
+                _member = {
+                    key: member[key]
+                    for key in member
+                    if key in scenario.cs.get_hyperparameter_names()
+                }
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
                 )

From f6cc8a5aacb797037feb4a5c106b03dda52773ee Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 12:13:08 +0200
Subject: [PATCH 22/63] fixing ensemble builder

---
 .../components/feature_preprocessing/nystroem_sampler.py      | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index f93d8bbff3..a236999b9f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -51,10 +51,11 @@ def fit(self, X, Y=None):
         if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
+                X = X.todense()
             else:
                 X[X < 0] = 0.0
 
-        self.preprocessor.fit(X.astype(np.float64))
+        self.preprocessor.fit(X)
         return self
 
     def transform(self, X):
@@ -65,6 +66,7 @@ def transform(self, X):
         if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
+                X = X.todense()
             else:
                 X[X < 0] = 0.0
 

From 37b08b8aadbf706d09536b32a6f2ee234dcee013 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 12:50:04 +0200
Subject: [PATCH 23/63] fixing ensemble builder

---
 .../components/feature_preprocessing/nystroem_sampler.py         | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index a236999b9f..456e326e83 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -1,4 +1,3 @@
-import numpy as np
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (

From ed3da30b50a3337ab8b4178331772ae966ea7e98 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 16:32:51 +0200
Subject: [PATCH 24/63] fix search space bug

---
 autosklearn/evaluation/abstract_evaluator.py             | 4 ++--
 .../pipeline/components/data_preprocessing/__init__.py   | 9 ++++-----
 .../components/data_preprocessing/balancing/balancing.py | 2 +-
 .../data_preprocessing/categorical_encoding/__init__.py  | 6 +++---
 .../data_preprocessing/categorical_encoding/encoding.py  | 2 +-
 .../data_preprocessing/category_shift/category_shift.py  | 2 +-
 6 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 4735db13e9..ab29c4519f 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -45,7 +45,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
@@ -110,7 +110,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 80d0d2fc9e..25a5342846 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Type
+from typing import Dict, Optional, Type, Union
 
 import os
 from collections import OrderedDict
@@ -105,7 +105,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[Dict] = None,
         default: str = None,
         include: Optional[Dict] = None,
@@ -154,7 +154,7 @@ def set_hyperparameters(
         self,
         configuration: ConfigurationSpace,
         init_params: Optional[Dict] = None,
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
@@ -166,12 +166,11 @@ def set_hyperparameters(
             config[param] = value
 
         new_params = {}
-        # feat_type = None
         if init_params is not None:
             for param, value in init_params.items():
                 param = param.replace(choice, "").split(":", 1)[-1]
                 if "feat_type" in param:
-                    feat_type = value
+                    continue
                 else:
                     new_params[param] = value
         self.choice = self.get_components()[choice](
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
index d9a4958bde..2cdd112ba6 100644
--- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
+++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -139,7 +139,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index 40ddb6e9c6..887bff593e 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import os
 from collections import OrderedDict
@@ -38,7 +38,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -88,7 +88,7 @@ def get_hyperparameter_search_space(
 
     def set_hyperparameters(
         self,
-        feat_type,
+        feat_type: Optional[Dict[Union[str, int], str]],
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
     ) -> "OHEChoice":
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
index 570ac0f730..a8a2d0a89d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
@@ -69,7 +69,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
index 3af659331e..2d5e5607bd 100644
--- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
+++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -63,7 +63,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()

From 4690854fd571241cbe4637d267249cfd1619e63b Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 15 Jun 2022 17:30:32 +0200
Subject: [PATCH 25/63] fix search space bug

---
 autosklearn/pipeline/components/data_preprocessing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 25a5342846..9ebe1d0679 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -170,7 +170,7 @@ def set_hyperparameters(
             for param, value in init_params.items():
                 param = param.replace(choice, "").split(":", 1)[-1]
                 if "feat_type" in param:
-                    continue
+                    feat_type = value
                 else:
                     new_params[param] = value
         self.choice = self.get_components()[choice](

From aff8c04eec8ac3ff91414368f53590a72a2aac65 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 13:10:10 +0200
Subject: [PATCH 26/63] fix search space bug

---
 autosklearn/evaluation/abstract_evaluator.py  |  2 +-
 autosklearn/evaluation/train_evaluator.py     |  2 +-
 .../metalearning/input/aslib_simple.py        |  5 ++-
 autosklearn/pipeline/base.py                  | 44 +++++++++++++------
 autosklearn/pipeline/classification.py        | 12 ++++-
 autosklearn/pipeline/components/base.py       | 23 +++++++---
 .../components/classification/__init__.py     | 14 +++---
 .../components/data_preprocessing/__init__.py |  2 +-
 .../categorical_encoding/__init__.py          |  2 +-
 .../data_preprocessing/feature_type.py        |  4 +-
 .../minority_coalescense/__init__.py          |  2 +-
 .../feature_preprocessing/__init__.py         | 13 +++---
 .../extra_trees_preproc_for_classification.py |  8 +++-
 .../extra_trees_preproc_for_regression.py     |  8 +++-
 .../feature_agglomeration.py                  |  8 +++-
 .../feature_preprocessing/kernel_pca.py       |  8 +++-
 .../feature_preprocessing/kitchen_sinks.py    |  8 +++-
 .../liblinear_svc_preprocessor.py             |  8 +++-
 .../feature_preprocessing/no_preprocessing.py |  8 +++-
 .../components/feature_preprocessing/pca.py   |  8 +++-
 .../feature_preprocessing/polynomial.py       |  8 +++-
 .../random_trees_embedding.py                 |  8 +++-
 .../select_percentile_classification.py       |  8 +++-
 .../select_percentile_regression.py           |  8 +++-
 .../select_rates_classification.py            |  8 +++-
 .../select_rates_regression.py                |  3 ++
 .../feature_preprocessing/truncatedSVD.py     |  8 +++-
 .../components/regression/__init__.py         | 14 +++---
 autosklearn/pipeline/regression.py            | 23 +++++++---
 autosklearn/util/pipeline.py                  | 24 +++++-----
 .../data_preprocessing/test_scaling.py        |  2 +-
 31 files changed, 220 insertions(+), 83 deletions(-)

diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index ab29c4519f..0638b0a57d 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -303,7 +303,7 @@ def __init__(
         # Please mypy to prevent not defined attr
         self.model = self._get_model(feat_type=self.feat_type)
 
-    def _get_model(self, feat_type) -> BaseEstimator:
+    def _get_model(self, feat_type: Optional[Dict[Union[str, int], str]]) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
             model = self.model_class(
                 feat_type=feat_type,
diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index c27b3c36f3..eb5b735742 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -987,7 +987,7 @@ def _partial_fit_and_predict_standard(
         PIPELINE_DATA_DTYPE,  # test_pred
         TYPE_ADDITIONAL_INFO,
     ]:
-        model = self._get_model(feat_type=self.feat_type)
+        model = self._get_model(self.feat_type)
 
         self.indices[fold] = (train_indices, test_indices)
 
diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index c495c5cd69..4d84e6fe97 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -1,6 +1,7 @@
 import csv
 import logging
 import os
+from ConfigSpace.configuration_space import ConfigurationSpace
 from collections import OrderedDict, defaultdict
 
 import arff
@@ -8,7 +9,9 @@
 
 
 class AlgorithmSelectionProblem(object):
-    def __init__(self, directory, cs):
+    def __init__(self,
+                 directory: str,
+                 cs: ConfigurationSpace):
         self.logger = logging.getLogger(__name__)
 
         # Create data structures
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 29305f3b77..d1aba5e138 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -1,9 +1,9 @@
 from abc import ABCMeta
-from typing import Dict, Union
+from typing import Dict, Union, Optional, Any
 
 import numpy as np
 import scipy.sparse
-from ConfigSpace import Configuration
+from ConfigSpace import Configuration, ConfigurationSpace
 from sklearn.pipeline import Pipeline
 
 import autosklearn.pipeline.create_searchspace_util
@@ -34,14 +34,14 @@ class BasePipeline(Pipeline):
 
     def __init__(
         self,
-        feat_type=None,
-        config=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties=None,
-        include=None,
-        exclude=None,
-        random_state=None,
-        init_params=None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
+        init_params: Optional[Dict[str, Any]] = None,
     ):
 
         self.init_params = init_params if init_params is not None else {}
@@ -208,7 +208,10 @@ def predict(self, X, batch_size=None):
 
                 return y
 
-    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
+    def set_hyperparameters(self,
+                            configuration: Configuration,
+                            feat_type: Optional[Dict[Union[str, int], str]] = None,
+                            init_params: Optional[Dict[str, Any]] = None):
         self.config = configuration
 
         for node_idx, n_ in enumerate(self.steps):
@@ -255,7 +258,9 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
 
         return self
 
-    def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(self,
+                                        feat_type: Optional[Dict[Union[str, int], str]] = None,
+                                        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None):
         """Return the configuration space for the CASH problem.
 
         Returns
@@ -274,7 +279,11 @@ def get_hyperparameter_search_space(self, feat_type=None, dataset_properties=Non
         return self.config_space
 
     def _get_hyperparameter_search_space(
-        self, feat_type=None, include=None, exclude=None, dataset_properties=None
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -319,7 +328,13 @@ def _get_hyperparameter_search_space(
         raise NotImplementedError()
 
     def _get_base_search_space(
-        self, cs, dataset_properties, exclude, include, pipeline, feat_type=None
+        self,
+        cs: ConfigurationSpace,
+        dataset_properties: DATASET_PROPERTIES_TYPE,
+        include: Dict[str, str],
+        exclude: Dict[str, str],
+        pipeline,
+        feat_type: Optional[Dict[Union[str, int], str]] = None
     ):
         if include is None:
             if self.include is None:
@@ -385,7 +400,8 @@ def _get_base_search_space(
             if not is_choice:
                 cs.add_configuration_space(
                     node_name,
-                    node.get_hyperparameter_search_space(dataset_properties),
+                    node.get_hyperparameter_search_space(dataset_properties=dataset_properties,
+                                                         feat_type=feat_type),
                 )
             # If the node is a choice, we have to figure out which of its
             #  choices are actually legal choices
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 203d26877c..fece85823e 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -19,6 +19,8 @@
 )
 from autosklearn.pipeline.constants import SPARSE
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
     """This class implements the classification task.
@@ -168,7 +170,11 @@ def predict_proba(self, X, batch_size=None):
                 return y
 
     def _get_hyperparameter_search_space(
-        self, feat_type=None, include=None, exclude=None, dataset_properties=None
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
     ):
         """Create the hyperparameter configuration space.
 
@@ -349,7 +355,9 @@ def _get_hyperparameter_search_space(
         self.dataset_properties = dataset_properties
         return cs
 
-    def _get_pipeline_steps(self, dataset_properties, feat_type=None):
+    def _get_pipeline_steps(self,
+                            dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
+                            feat_type: Optional[Dict[Union[str, int], str]] = None):
         steps = []
 
         default_dataset_properties = {"target_type": "classification"}
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index c4d4485a7d..b2ff65db02 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Optional, Union, Any
 
 import importlib
 import inspect
@@ -6,10 +6,13 @@
 import sys
 from collections import OrderedDict
 
+from ConfigSpace.configuration_space import Configuration
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from autosklearn.pipeline.constants import SPARSE
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 _addons = dict()  # type: Dict[str, 'ThirdPartyComponents']
 
 
@@ -98,7 +101,10 @@ def get_properties(dataset_properties=None):
         raise NotImplementedError()
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         """Return the configuration space of this classification algorithm.
 
         Parameters
@@ -136,7 +142,10 @@ def fit(self, X, y):
         for further information."""
         raise NotImplementedError()
 
-    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
+    def set_hyperparameters(self,
+                            configuration: Configuration,
+                            feat_type: Optional[Dict[Union[str, int], str]] = None,
+                            init_params: Optional[Dict[str, Any]] = None):
         params = configuration.get_dictionary()
 
         for param, value in params.items():
@@ -439,11 +448,11 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
-        dataset_properties=None,
+        feat_type: Dict[Union[str, int], str],
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
-        include=None,
-        exclude=None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ):
         raise NotImplementedError()
 
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index ae9a09ff66..605f44aa05 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -1,6 +1,6 @@
 __author__ = "feurerm"
 
-from typing import Type
+from typing import Type, Dict, Union, Optional
 
 import os
 from collections import OrderedDict
@@ -23,6 +23,8 @@
 additional_components = ThirdPartyComponents(AutoSklearnClassificationAlgorithm)
 _addons["classification"] = additional_components
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None:
     additional_components.add_component(classifier)
@@ -87,11 +89,11 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type=None,
-        dataset_properties=None,
+        feat_type: Dict[Union[str, int], str],
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
-        include=None,
-        exclude=None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ):
         if dataset_properties is None:
             dataset_properties = {}
@@ -131,7 +133,7 @@ def get_hyperparameter_search_space(
         for estimator_name in available_estimators.keys():
             estimator_configuration_space = available_estimators[
                 estimator_name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": estimator, "value": estimator_name}
             cs.add_configuration_space(
                 estimator_name,
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 9ebe1d0679..848bbb5913 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -138,7 +138,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[name](
                 feat_type=feat_type, dataset_properties=dataset_properties
-            ).get_hyperparameter_search_space(dataset_properties)
+            ).get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index 887bff593e..ba3a555027 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -74,7 +74,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 919f0416ab..ac1978e80a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -256,7 +256,7 @@ def get_properties(
 
     def set_hyperparameters(
         self,
-        feat_type,
+        feat_type: Dict[Union[str, int], str],
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
     ) -> "FeatTypeSplit":
@@ -328,7 +328,7 @@ def _get_hyperparameter_search_space_recursevely(
             if hasattr(st_operation, "get_hyperparameter_search_space"):
                 cs.add_configuration_space(
                     st_name,
-                    st_operation.get_hyperparameter_search_space(dataset_properties),
+                    st_operation.get_hyperparameter_search_space(dataset_properties=dataset_properties),
                 )
             else:
                 return FeatTypeSplit._get_hyperparameter_search_space_recursevely(
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index 438a4ce681..bef957b7f4 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -74,7 +74,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index d4f9bc6662..db4b2983e0 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Optional, Union, Type
 
 import os
 from collections import OrderedDict
@@ -21,6 +21,7 @@
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
 _addons["feature_preprocessing"] = additional_components
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
 
 def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
     additional_components.add_component(preprocessor)
@@ -102,11 +103,11 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type=None,
-        dataset_properties=None,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
-        include=None,
-        exclude=None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ):
         cs = ConfigurationSpace()
 
@@ -135,7 +136,7 @@ def get_hyperparameter_search_space(
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[
                 name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": preprocessor, "value": name}
             cs.add_configuration_space(
                 name,
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index c45f9b1cf2..d2c178adb2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -11,6 +12,8 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class ExtraTreesPreprocessorClassification(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -123,7 +126,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 98a5a5700c..73fb9f3e11 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -12,6 +13,8 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -125,7 +128,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index 0076f14121..8af0e130e8 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import (
@@ -13,6 +14,8 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class FeatureAgglomeration(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, n_clusters, affinity, linkage, pooling_func, random_state=None):
@@ -63,7 +66,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
         affinity = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index 27b3446d57..bec513aec6 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 import warnings
 
 import numpy as np
@@ -12,6 +13,8 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class KernelPCA(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -82,7 +85,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         n_components = UniformIntegerHyperparameter(
             "n_components", 10, 2000, default_value=100
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 93673e75e9..6f9d6cd9f3 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from typing import Optional, Union
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -10,6 +11,8 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class RandomKitchenSinks(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -69,7 +72,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         gamma = UniformFloatHyperparameter(
             "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index 43135da483..c7b770e7d2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from ConfigSpace.hyperparameters import (
@@ -10,6 +11,8 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm):
     # Liblinear is not deterministic as it uses a RNG inside
@@ -91,7 +94,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         cs = ConfigurationSpace()
 
         penalty = Constant("penalty", "l1")
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 0597cfcabe..dc76a4a2a3 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -1,8 +1,11 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, random_state):
@@ -34,6 +37,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index d86b38cf22..761525c8da 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -9,6 +10,8 @@
 from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class PCA(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, keep_variance, whiten, random_state=None):
@@ -55,7 +58,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         keep_variance = UniformFloatHyperparameter(
             "keep_variance", 0.5, 0.9999, default_value=0.9999
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index 0d4b166f35..c563b1afbb 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -8,6 +9,8 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class PolynomialFeatures(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, degree, interaction_only, include_bias, random_state=None):
@@ -54,7 +57,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
         interaction_only = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index 60b7df0c3a..c41b051e9b 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -10,6 +11,8 @@
 from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class RandomTreesEmbedding(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -94,7 +97,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         n_estimators = UniformIntegerHyperparameter(
             name="n_estimators", lower=10, upper=100, default_value=10
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index 3fa80f0ca1..b5265ba122 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from functools import partial
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -19,6 +20,8 @@
     UNSIGNED_DATA,
 )
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SelectPercentileClassification(
     SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
@@ -110,7 +113,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         percentile = UniformFloatHyperparameter(
             name="percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index 0f489f933f..021305777f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from functools import partial
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -12,6 +13,8 @@
 )
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SelectPercentileRegression(
     SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
@@ -53,7 +56,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         percentile = UniformFloatHyperparameter(
             "percentile", lower=1, upper=99, default_value=50
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index c21ff3d7cb..d93f8e9528 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from functools import partial
 
 from ConfigSpace import NotEqualsCondition
@@ -16,6 +17,8 @@
     UNSIGNED_DATA,
 )
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, alpha, mode="fpr", score_func="chi2", random_state=None):
@@ -116,7 +119,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index a708b18e9f..fcc59ab2c7 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from functools import partial
 
 from ConfigSpace import NotEqualsCondition
@@ -10,6 +11,8 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm):
     def __init__(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index d515c9552a..4c9c84bf4d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -1,9 +1,12 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class TruncatedSVD(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, target_dim, random_state=None):
@@ -48,7 +51,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         target_dim = UniformIntegerHyperparameter(
             "target_dim", 10, 256, default_value=128
         )
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 08452b4809..5773aec45b 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Type, Dict, Union, Optional
 
 import os
 from collections import OrderedDict
@@ -21,6 +21,8 @@
 additional_components = ThirdPartyComponents(AutoSklearnRegressionAlgorithm)
 _addons["regression"] = additional_components
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
     additional_components.add_component(regressor)
@@ -80,11 +82,11 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type,
-        dataset_properties=None,
+        feat_type: Dict[Union[str, int], str],
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
-        include=None,
-        exclude=None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
     ):
         if include is not None and exclude is not None:
             raise ValueError(
@@ -121,7 +123,7 @@ def get_hyperparameter_search_space(
         for estimator_name in available_estimators.keys():
             estimator_configuration_space = available_estimators[
                 estimator_name
-            ].get_hyperparameter_search_space(dataset_properties)
+            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
             parent_hyperparameter = {"parent": estimator, "value": estimator_name}
             cs.add_configuration_space(
                 estimator_name,
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 529953cb18..d8358c5628 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Union, Any
 
 import copy
 from itertools import product
@@ -16,6 +16,8 @@
 from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
 from autosklearn.pipeline.constants import SPARSE
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
     """This class implements the regression task.
@@ -70,11 +72,11 @@ def __init__(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties=None,
-        include=None,
-        exclude=None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params=None,
+        init_params: Optional[Dict[str, Any]] = None,
     ):
         self._output_dtype = np.float32
         if dataset_properties is None:
@@ -114,7 +116,11 @@ def predict(self, X, batch_size=None):
         return y
 
     def _get_hyperparameter_search_space(
-        self, feat_type=None, include=None, exclude=None, dataset_properties=None
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        include: Optional[Dict[str, str]] = None,
+        exclude: Optional[Dict[str, str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -262,7 +268,10 @@ def _get_hyperparameter_search_space(
     def _get_estimator_components(self):
         return regression_components._regressors
 
-    def _get_pipeline_steps(self, dataset_properties, feat_type=None, init_params=None):
+    def _get_pipeline_steps(self,
+                            dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
+                            feat_type: Optional[Dict[Union[str, int], str]] = None,
+                            init_params: Optional[Dict[str, Any]] = None):
         steps = []
 
         default_dataset_properties = {"target_type": "regression"}
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index 623958e792..6ff74b0fb7 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -111,24 +111,24 @@ def _get_classification_configuration_space(
 ) -> ConfigurationSpace:
     """Get the configuration of a classification pipeline given some dataset info
 
-     Parameters
-     ----------
+    Parameters
+    ----------
     datamanager: XYDataManager
          XYDataManger object storing all important information about the dataset
 
-     include: Optional[Dict[str, List[str]]] = None
-         A dictionary of what components to include for each pipeline step
+    include: Optional[Dict[str, List[str]]] = None
+        A dictionary of what components to include for each pipeline step
 
-     exclude: Optional[Dict[str, List[str]]] = None
-         A dictionary of what components to exclude for each pipeline step
+    exclude: Optional[Dict[str, List[str]]] = None
+        A dictionary of what components to exclude for each pipeline step
 
-     random_state: Optional[Union[int, np.random.Randomstate]] = None
-         The random state to use for seeding the ConfigSpace
+    random_state: Optional[Union[int, np.random.Randomstate]] = None
+        The random state to use for seeding the ConfigSpace
 
-     Returns
-     -------
-     ConfigurationSpace
-         The configuration space for the classification pipeline
+    Returns
+    -------
+    ConfigurationSpace
+        The configuration space for the classification pipeline
     """
     task_type = datamanager.info["task"]
 
diff --git a/test/test_pipeline/components/data_preprocessing/test_scaling.py b/test/test_pipeline/components/data_preprocessing/test_scaling.py
index b87223d14d..faa5b3f1e1 100644
--- a/test/test_pipeline/components/data_preprocessing/test_scaling.py
+++ b/test/test_pipeline/components/data_preprocessing/test_scaling.py
@@ -19,7 +19,7 @@ def _test_helper(self, Preprocessor, dataset=None, make_sparse=False):
         original_X_train = X_train.copy()
         configuration_space = Preprocessor(
             dataset_properties
-        ).get_hyperparameter_search_space(dataset_properties)
+        ).get_hyperparameter_search_space(dataset_properties=dataset_properties)
         default = configuration_space.get_default_configuration()
 
         preprocessor = Preprocessor(dataset_properties, random_state=1)

From e4e9fe33d6dc9410b9e5e5a5b28d2d9ca8d06f10 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 13:10:26 +0200
Subject: [PATCH 27/63] fix search space bug

---
 .../components/feature_preprocessing/nystroem_sampler.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 456e326e83..29cc83603a 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -1,3 +1,4 @@
+from typing import Dict, Union, Optional
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -15,6 +16,8 @@
     UNSIGNED_DATA,
 )
 
+DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
+
 
 class Nystroem(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -65,7 +68,6 @@ def transform(self, X):
         if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
-                X = X.todense()
             else:
                 X[X < 0] = 0.0
 
@@ -95,7 +97,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[Dict[Union[str, int], str]] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         if dataset_properties is not None and (
             dataset_properties.get("sparse") is True
             or dataset_properties.get("signed") is False

From be72171f84d84fba65f0c637542bd62b29e89a6d Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 13:27:16 +0200
Subject: [PATCH 28/63] fix search space bug

---
 autosklearn/evaluation/abstract_evaluator.py  |  4 ++-
 .../metalearning/input/aslib_simple.py        |  6 ++--
 autosklearn/pipeline/base.py                  | 29 ++++++++++-------
 autosklearn/pipeline/classification.py        | 20 ++++++------
 autosklearn/pipeline/components/base.py       | 16 ++++++----
 .../components/classification/__init__.py     |  2 +-
 .../data_preprocessing/feature_type.py        | 32 ++++++++++---------
 .../feature_preprocessing/__init__.py         |  3 +-
 .../extra_trees_preproc_for_classification.py |  7 ++--
 .../extra_trees_preproc_for_regression.py     |  7 ++--
 .../feature_agglomeration.py                  |  7 ++--
 .../feature_preprocessing/kernel_pca.py       |  7 ++--
 .../feature_preprocessing/kitchen_sinks.py    |  7 ++--
 .../liblinear_svc_preprocessor.py             |  7 ++--
 .../feature_preprocessing/no_preprocessing.py |  7 ++--
 .../feature_preprocessing/nystroem_sampler.py |  7 ++--
 .../components/feature_preprocessing/pca.py   |  7 ++--
 .../feature_preprocessing/polynomial.py       |  7 ++--
 .../random_trees_embedding.py                 |  7 ++--
 .../select_percentile_classification.py       |  7 ++--
 .../select_percentile_regression.py           |  7 ++--
 .../select_rates_classification.py            |  7 ++--
 .../select_rates_regression.py                |  3 +-
 .../feature_preprocessing/truncatedSVD.py     |  7 ++--
 .../components/regression/__init__.py         |  2 +-
 autosklearn/pipeline/regression.py            | 20 ++++++------
 26 files changed, 135 insertions(+), 107 deletions(-)

diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 0638b0a57d..233165e2a9 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -303,7 +303,9 @@ def __init__(
         # Please mypy to prevent not defined attr
         self.model = self._get_model(feat_type=self.feat_type)
 
-    def _get_model(self, feat_type: Optional[Dict[Union[str, int], str]]) -> BaseEstimator:
+    def _get_model(
+        self, feat_type: Optional[Dict[Union[str, int], str]]
+    ) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
             model = self.model_class(
                 feat_type=feat_type,
diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index 4d84e6fe97..2a8d1e04d2 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -1,17 +1,15 @@
 import csv
 import logging
 import os
-from ConfigSpace.configuration_space import ConfigurationSpace
 from collections import OrderedDict, defaultdict
 
 import arff
 import pandas as pd
+from ConfigSpace.configuration_space import ConfigurationSpace
 
 
 class AlgorithmSelectionProblem(object):
-    def __init__(self,
-                 directory: str,
-                 cs: ConfigurationSpace):
+    def __init__(self, directory: str, cs: ConfigurationSpace):
         self.logger = logging.getLogger(__name__)
 
         # Create data structures
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index d1aba5e138..9eba727c4b 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -1,5 +1,5 @@
 from abc import ABCMeta
-from typing import Dict, Union, Optional, Any
+from typing import Any, Dict, Optional, Union
 
 import numpy as np
 import scipy.sparse
@@ -208,10 +208,12 @@ def predict(self, X, batch_size=None):
 
                 return y
 
-    def set_hyperparameters(self,
-                            configuration: Configuration,
-                            feat_type: Optional[Dict[Union[str, int], str]] = None,
-                            init_params: Optional[Dict[str, Any]] = None):
+    def set_hyperparameters(
+        self,
+        configuration: Configuration,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        init_params: Optional[Dict[str, Any]] = None,
+    ):
         self.config = configuration
 
         for node_idx, n_ in enumerate(self.steps):
@@ -258,9 +260,11 @@ def set_hyperparameters(self,
 
         return self
 
-    def get_hyperparameter_search_space(self,
-                                        feat_type: Optional[Dict[Union[str, int], str]] = None,
-                                        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None):
+    def get_hyperparameter_search_space(
+        self,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ):
         """Return the configuration space for the CASH problem.
 
         Returns
@@ -283,7 +287,7 @@ def _get_hyperparameter_search_space(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Return the configuration space for the CASH problem.
 
@@ -334,7 +338,7 @@ def _get_base_search_space(
         include: Dict[str, str],
         exclude: Dict[str, str],
         pipeline,
-        feat_type: Optional[Dict[Union[str, int], str]] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
     ):
         if include is None:
             if self.include is None:
@@ -400,8 +404,9 @@ def _get_base_search_space(
             if not is_choice:
                 cs.add_configuration_space(
                     node_name,
-                    node.get_hyperparameter_search_space(dataset_properties=dataset_properties,
-                                                         feat_type=feat_type),
+                    node.get_hyperparameter_search_space(
+                        dataset_properties=dataset_properties, feat_type=feat_type
+                    ),
                 )
             # If the node is a choice, we have to figure out which of its
             #  choices are actually legal choices
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index fece85823e..9ad54c3e41 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import copy
 from itertools import product
@@ -75,11 +75,11 @@ def __init__(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties=None,
-        include=None,
-        exclude=None,
+        dataset_properties: Dict[str, bool] = None,
+        include: Optional[Dict[str, List[str]]] = None,
+        exclude: Optional[Dict[str, List[str]]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params=None,
+        init_params: Optional[Dict[str, Any]] = None,
     ):
         self._output_dtype = np.int32
         if dataset_properties is None:
@@ -174,7 +174,7 @@ def _get_hyperparameter_search_space(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Create the hyperparameter configuration space.
 
@@ -355,9 +355,11 @@ def _get_hyperparameter_search_space(
         self.dataset_properties = dataset_properties
         return cs
 
-    def _get_pipeline_steps(self,
-                            dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-                            feat_type: Optional[Dict[Union[str, int], str]] = None):
+    def _get_pipeline_steps(
+        self,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+    ):
         steps = []
 
         default_dataset_properties = {"target_type": "classification"}
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index b2ff65db02..7185926931 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union, Any
+from typing import Any, Dict, Optional, Union
 
 import importlib
 import inspect
@@ -102,8 +102,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Return the configuration space of this classification algorithm.
 
@@ -142,10 +142,12 @@ def fit(self, X, y):
         for further information."""
         raise NotImplementedError()
 
-    def set_hyperparameters(self,
-                            configuration: Configuration,
-                            feat_type: Optional[Dict[Union[str, int], str]] = None,
-                            init_params: Optional[Dict[str, Any]] = None):
+    def set_hyperparameters(
+        self,
+        configuration: Configuration,
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        init_params: Optional[Dict[str, Any]] = None,
+    ):
         params = configuration.get_dictionary()
 
         for param, value in params.items():
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 605f44aa05..d1a7ea83cb 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -1,6 +1,6 @@
 __author__ = "feurerm"
 
-from typing import Type, Dict, Union, Optional
+from typing import Dict, Optional, Type, Union
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index ac1978e80a..bd3164ced4 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -117,20 +117,20 @@ def __init__(
             init_params=init_params,
         )
 
-        if self.feat_type is None:
-            self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
-                ("categorical_transformer", self.categ_ppl),
-                ("numerical_transformer", self.numer_ppl),
-                ("text_transformer", self.txt_ppl),
-            ]
-        else:
-            self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
-            if "categorical" in self.feat_type.values():
-                self._transformers.append(("categorical_transformer", self.categ_ppl))
-            if "numerical" in self.feat_type.values():
-                self._transformers.append(("numerical_transformer", self.numer_ppl))
-            if "string" in self.feat_type.values():
-                self._transformers.append(("text_transformer", self.txt_ppl))
+        # if self.feat_type is None:
+        #     self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
+        #         ("categorical_transformer", self.categ_ppl),
+        #         ("numerical_transformer", self.numer_ppl),
+        #         ("text_transformer", self.txt_ppl),
+        #     ]
+        # else:
+        self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
+        if "categorical" in self.feat_type.values():
+            self._transformers.append(("categorical_transformer", self.categ_ppl))
+        if "numerical" in self.feat_type.values():
+            self._transformers.append(("numerical_transformer", self.numer_ppl))
+        if "string" in self.feat_type.values():
+            self._transformers.append(("text_transformer", self.txt_ppl))
 
         if self.config:
             self.set_hyperparameters(
@@ -328,7 +328,9 @@ def _get_hyperparameter_search_space_recursevely(
             if hasattr(st_operation, "get_hyperparameter_search_space"):
                 cs.add_configuration_space(
                     st_name,
-                    st_operation.get_hyperparameter_search_space(dataset_properties=dataset_properties),
+                    st_operation.get_hyperparameter_search_space(
+                        dataset_properties=dataset_properties
+                    ),
                 )
             else:
                 return FeatTypeSplit._get_hyperparameter_search_space_recursevely(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index db4b2983e0..87b42ffe73 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union, Type
+from typing import Dict, Optional, Type, Union
 
 import os
 from collections import OrderedDict
@@ -23,6 +23,7 @@
 
 DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
 
+
 def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
     additional_components.add_component(preprocessor)
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index d2c178adb2..8ef60a4629 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -127,8 +128,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 73fb9f3e11..215817e577 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -129,8 +130,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index 8af0e130e8..5c6c8949d2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import (
@@ -67,8 +68,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index bec513aec6..edbe399e10 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 import warnings
 
 import numpy as np
@@ -86,8 +87,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         n_components = UniformIntegerHyperparameter(
             "n_components", 10, 2000, default_value=100
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 6f9d6cd9f3..59b7eb2418 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -1,5 +1,4 @@
-from typing import Dict, Union, Optional
-from typing import Optional, Union
+from typing import Dict, Optional, Union
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -73,8 +72,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         gamma = UniformFloatHyperparameter(
             "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index c7b770e7d2..aa7658c732 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from ConfigSpace.hyperparameters import (
@@ -95,8 +96,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index dc76a4a2a3..149fb96709 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
@@ -38,8 +39,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 29cc83603a..23f39c7e4d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -98,8 +99,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         if dataset_properties is not None and (
             dataset_properties.get("sparse") is True
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index 761525c8da..dd4871a840 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -59,8 +60,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         keep_variance = UniformFloatHyperparameter(
             "keep_variance", 0.5, 0.9999, default_value=0.9999
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index c563b1afbb..1fab87a700 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -58,8 +59,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index c41b051e9b..c6f8d61647 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -98,8 +99,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         n_estimators = UniformIntegerHyperparameter(
             name="n_estimators", lower=10, upper=100, default_value=10
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index b5265ba122..08c6c929be 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from functools import partial
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -114,8 +115,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         percentile = UniformFloatHyperparameter(
             name="percentile", lower=1, upper=99, default_value=50
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index 021305777f..fe5fd8ad03 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from functools import partial
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -57,8 +58,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         percentile = UniformFloatHyperparameter(
             "percentile", lower=1, upper=99, default_value=50
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index d93f8e9528..9546c8e8c6 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from functools import partial
 
 from ConfigSpace import NotEqualsCondition
@@ -120,8 +121,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index fcc59ab2c7..398ea6f23b 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Union
+
 from functools import partial
 
 from ConfigSpace import NotEqualsCondition
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 4c9c84bf4d..78d52309ec 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -1,4 +1,5 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
 
@@ -52,8 +53,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[Dict[Union[str, int], str]] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         target_dim = UniformIntegerHyperparameter(
             "target_dim", 10, 256, default_value=128
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 5773aec45b..b750d7fb1c 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -1,4 +1,4 @@
-from typing import Type, Dict, Union, Optional
+from typing import Dict, Optional, Type, Union
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index d8358c5628..9ffaf00c93 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union, Any
+from typing import Any, Dict, List, Optional, Union
 
 import copy
 from itertools import product
@@ -72,9 +72,9 @@ def __init__(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+        dataset_properties: Dict[str, bool] = None,
+        include: Optional[Dict[str, List[str]]] = None,
+        exclude: Optional[Dict[str, List[str]]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
         init_params: Optional[Dict[str, Any]] = None,
     ):
@@ -120,7 +120,7 @@ def _get_hyperparameter_search_space(
         feat_type: Optional[Dict[Union[str, int], str]] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Return the configuration space for the CASH problem.
 
@@ -268,10 +268,12 @@ def _get_hyperparameter_search_space(
     def _get_estimator_components(self):
         return regression_components._regressors
 
-    def _get_pipeline_steps(self,
-                            dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-                            feat_type: Optional[Dict[Union[str, int], str]] = None,
-                            init_params: Optional[Dict[str, Any]] = None):
+    def _get_pipeline_steps(
+        self,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
+        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        init_params: Optional[Dict[str, Any]] = None,
+    ):
         steps = []
 
         default_dataset_properties = {"target_type": "regression"}

From 34bb58fbcb5ca0f5d405b979274c779126045741 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 14:45:43 +0200
Subject: [PATCH 29/63] fix search space bug

---
 .../data_preprocessing/feature_type.py        | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index bd3164ced4..407b3ee626 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -117,20 +117,20 @@ def __init__(
             init_params=init_params,
         )
 
-        # if self.feat_type is None:
-        #     self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
-        #         ("categorical_transformer", self.categ_ppl),
-        #         ("numerical_transformer", self.numer_ppl),
-        #         ("text_transformer", self.txt_ppl),
-        #     ]
-        # else:
-        self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
-        if "categorical" in self.feat_type.values():
-            self._transformers.append(("categorical_transformer", self.categ_ppl))
-        if "numerical" in self.feat_type.values():
-            self._transformers.append(("numerical_transformer", self.numer_ppl))
-        if "string" in self.feat_type.values():
-            self._transformers.append(("text_transformer", self.txt_ppl))
+        if self.feat_type is None:
+            self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
+                ("categorical_transformer", self.categ_ppl),
+                ("numerical_transformer", self.numer_ppl),
+                ("text_transformer", self.txt_ppl),
+            ]
+        else:
+            self._transformers: List[Tuple[str, AutoSklearnComponent]] = []
+            if "categorical" in self.feat_type.values():
+                self._transformers.append(("categorical_transformer", self.categ_ppl))
+            if "numerical" in self.feat_type.values():
+                self._transformers.append(("numerical_transformer", self.numer_ppl))
+            if "string" in self.feat_type.values():
+                self._transformers.append(("text_transformer", self.txt_ppl))
 
         if self.config:
             self.set_hyperparameters(

From 6b0fdb49a1ef12b53b411102ffe11ace3fabffd1 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 14:53:50 +0200
Subject: [PATCH 30/63] fix search space bug

---
 autosklearn/experimental/askl2.py                           | 6 ++++--
 autosklearn/metalearning/input/aslib_simple.py              | 3 ++-
 .../pipeline/components/data_preprocessing/feature_type.py  | 3 +--
 test/fixtures/ensembles.py                                  | 4 ++--
 test/test_evaluation/test_dummy_pipelines.py                | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index 24d02e544a..7984f2b33e 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -51,10 +51,11 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
+                hp_names = scenario.cs.get_hyperparameter_names()
                 _member = {
                     key: member[key]
                     for key in member
-                    if key in scenario.cs.get_hyperparameter_names()
+                    if key in hp_names
                 }
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
@@ -108,10 +109,11 @@ def __call__(
         initial_configurations = []
         for member in self.portfolio.values():
             try:
+                hp_names = scenario.cs.get_hyperparameter_names()
                 _member = {
                     key: member[key]
                     for key in member
-                    if key in scenario.cs.get_hyperparameter_names()
+                    if key in hp_names
                 }
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index 2a8d1e04d2..a1724a3846 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -145,6 +145,7 @@ def _read_configurations(self, filename):
             csv_reader = csv.DictReader(fh)
 
             configurations = dict()
+            hp_names = self.cs.get_hyperparameter_names()
             for line in csv_reader:
                 configuration = dict()
                 algorithm_id = line["idx"]
@@ -152,7 +153,7 @@ def _read_configurations(self, filename):
                     # Todo adapt to search space
                     if not value or hp_name == "idx":
                         continue
-                    if hp_name not in self.cs.get_hyperparameter_names():
+                    if hp_name not in hp_names:
                         continue
                     try:
                         value = int(value)
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 407b3ee626..9f2d33597d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -155,8 +155,7 @@ def fit(
                 columns = set(range(n_feats))
             if expected != columns:
                 try:
-                    # columns = [str(col) for col in columns]
-                    pass
+                    columns = [str(col) for col in columns]
                 except Exception as e:
                     raise ValueError(
                         f"Train data has columns={expected} yet the"
diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py
index 32bb706eee..113974b256 100644
--- a/test/fixtures/ensembles.py
+++ b/test/fixtures/ensembles.py
@@ -45,7 +45,7 @@ def _make(
         if not models:
             models = [
                 MyDummyClassifier(
-                    feat_type={i: "numerical" for i in range(4)},
+                    feat_type={i: "numerical" for i in range(X.shape[1])},
                     config=1,
                     random_state=seed,
                 )
@@ -89,7 +89,7 @@ def _make(
         if not models:
             models = [
                 MyDummyRegressor(
-                    feat_type={i: "numerical" for i in range(4)},
+                    feat_type={i: "numerical" for i in range(X.shape[1])},
                     config=1,
                     random_state=seed,
                 )
diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py
index cc7ea3c284..c69578420f 100644
--- a/test/test_evaluation/test_dummy_pipelines.py
+++ b/test/test_evaluation/test_dummy_pipelines.py
@@ -25,7 +25,7 @@ def test_dummy_pipeline(task_type: str) -> None:
 
     X, y = data_maker(random_state=0)
     estimator = estimator_class(
-        feat_type={i: "numerical" for i in range(X.shape[0])}, config=1, random_state=0
+        feat_type={i: "numerical" for i in range(X.shape[1])}, config=1, random_state=0
     )
     estimator.fit(X, y)
     check_is_fitted(estimator)

From 9096ea30898a47978c99b372fdf960a0e4e3ba16 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 15:00:52 +0200
Subject: [PATCH 31/63] fix search space bug

---
 test/test_pipeline/test_base.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py
index f8cfe26912..33d57cd230 100644
--- a/test/test_pipeline/test_base.py
+++ b/test/test_pipeline/test_base.py
@@ -37,7 +37,11 @@ def test_get_hyperparameter_configuration_space_3choices(self):
 
         base = BasePipelineMock()
         cs = base._get_base_search_space(
-            cs, dataset_properties, exclude, include, pipeline
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=pipeline
         )
 
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
@@ -51,7 +55,11 @@ def test_get_hyperparameter_configuration_space_3choices(self):
         dataset_properties = {"target_type": "classification", "signed": True}
         include = {"c": ["multinomial_nb"]}
         cs = base._get_base_search_space(
-            cs, dataset_properties, exclude, include, pipeline
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=pipeline
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 10)
@@ -66,7 +74,11 @@ def test_get_hyperparameter_configuration_space_3choices(self):
         dataset_properties = {"target_type": "classification", "signed": True}
         include = {}
         cs = base._get_base_search_space(
-            cs, dataset_properties, exclude, include, pipeline
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=pipeline
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
@@ -78,7 +90,11 @@ def test_get_hyperparameter_configuration_space_3choices(self):
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
         dataset_properties = {"target_type": "classification", "sparse": True}
         cs = base._get_base_search_space(
-            cs, dataset_properties, exclude, include, pipeline
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=pipeline
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
@@ -93,7 +109,11 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             "signed": True,
         }
         cs = base._get_base_search_space(
-            cs, dataset_properties, exclude, include, pipeline
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=pipeline
         )
 
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)

From 372d9791a0c9065a6c084fc8174bcc65b90ac724 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 15:01:10 +0200
Subject: [PATCH 32/63] fix search space bug

---
 autosklearn/experimental/askl2.py | 12 ++----------
 test/test_pipeline/test_base.py   | 10 +++++-----
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index 7984f2b33e..078355dfbb 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -52,11 +52,7 @@ def __call__(
         for member in self.portfolio.values():
             try:
                 hp_names = scenario.cs.get_hyperparameter_names()
-                _member = {
-                    key: member[key]
-                    for key in member
-                    if key in hp_names
-                }
+                _member = {key: member[key] for key in member if key in hp_names}
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
                 )
@@ -110,11 +106,7 @@ def __call__(
         for member in self.portfolio.values():
             try:
                 hp_names = scenario.cs.get_hyperparameter_names()
-                _member = {
-                    key: member[key]
-                    for key in member
-                    if key in hp_names
-                }
+                _member = {key: member[key] for key in member if key in hp_names}
                 initial_configurations.append(
                     Configuration(configuration_space=scenario.cs, values=_member)
                 )
diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py
index 33d57cd230..af5123b4f7 100644
--- a/test/test_pipeline/test_base.py
+++ b/test/test_pipeline/test_base.py
@@ -41,7 +41,7 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
-            pipeline=pipeline
+            pipeline=pipeline,
         )
 
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
@@ -59,7 +59,7 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
-            pipeline=pipeline
+            pipeline=pipeline,
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 10)
@@ -78,7 +78,7 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
-            pipeline=pipeline
+            pipeline=pipeline,
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
@@ -94,7 +94,7 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
-            pipeline=pipeline
+            pipeline=pipeline,
         )
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)
         self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
@@ -113,7 +113,7 @@ def test_get_hyperparameter_configuration_space_3choices(self):
             dataset_properties=dataset_properties,
             exclude=exclude,
             include=include,
-            pipeline=pipeline
+            pipeline=pipeline,
         )
 
         self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)

From 3b1105e4bf7c14bc24c98828e7f95a5127ad47bd Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 16:13:30 +0200
Subject: [PATCH 33/63] fix search space bug

---
 autosklearn/pipeline/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 9ad54c3e41..6dd0d38d20 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -113,7 +113,7 @@ def fit_transformer(self, X, y, fit_params=None):
             )
             _init_params.update(self.init_params)
             self.set_hyperparameters(
-                configuration=self.config, init_params=_init_params
+                feat_type=self.feat_type, configuration=self.config, init_params=_init_params
             )
 
             if _fit_params is not None:

From b090ecfb588d55cc0dd6d0c91e2d675246c32c2d Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Thu, 16 Jun 2022 16:14:14 +0200
Subject: [PATCH 34/63] fix search space bug

---
 autosklearn/pipeline/classification.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 6dd0d38d20..61cdef9f4f 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -113,7 +113,9 @@ def fit_transformer(self, X, y, fit_params=None):
             )
             _init_params.update(self.init_params)
             self.set_hyperparameters(
-                feat_type=self.feat_type, configuration=self.config, init_params=_init_params
+                feat_type=self.feat_type,
+                configuration=self.config,
+                init_params=_init_params,
             )
 
             if _fit_params is not None:

From 280d3d09f50c64baaf21c2c89edbeb6d3e106aab Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 00:22:52 +0200
Subject: [PATCH 35/63] fix typing

---
 autosklearn/askl_typing.py                            |  3 +++
 autosklearn/evaluation/abstract_evaluator.py          |  7 ++++---
 autosklearn/pipeline/base.py                          | 11 ++++++-----
 autosklearn/pipeline/classification.py                |  7 ++++---
 autosklearn/pipeline/components/base.py               |  7 ++++---
 .../pipeline/components/classification/__init__.py    |  3 ++-
 .../components/data_preprocessing/__init__.py         |  5 +++--
 .../data_preprocessing/balancing/balancing.py         |  3 ++-
 .../categorical_encoding/__init__.py                  |  5 +++--
 .../categorical_encoding/encoding.py                  |  3 ++-
 .../categorical_encoding/no_encoding.py               |  3 ++-
 .../categorical_encoding/one_hot_encoding.py          |  3 ++-
 .../category_shift/category_shift.py                  |  3 ++-
 .../components/data_preprocessing/feature_type.py     |  9 +++++----
 .../data_preprocessing/feature_type_categorical.py    |  7 ++++---
 .../data_preprocessing/feature_type_numerical.py      |  7 ++++---
 .../data_preprocessing/feature_type_text.py           |  7 ++++---
 .../imputation/categorical_imputation.py              |  3 ++-
 .../imputation/numerical_imputation.py                |  3 ++-
 .../minority_coalescense/__init__.py                  |  5 +++--
 .../minority_coalescense/minority_coalescer.py        |  6 ++++--
 .../minority_coalescense/no_coalescense.py            |  5 +++--
 .../data_preprocessing/rescaling/__init__.py          |  3 ++-
 .../rescaling/abstract_rescaling.py                   |  3 ++-
 .../rescaling/quantile_transformer.py                 |  3 ++-
 .../data_preprocessing/rescaling/robust_scaler.py     |  3 ++-
 .../data_preprocessing/text_encoding/__init__.py      |  5 +++--
 .../text_encoding/bag_of_word_encoding.py             |  3 ++-
 .../text_encoding/bag_of_word_encoding_distinct.py    |  3 ++-
 .../text_encoding/tfidf_encoding.py                   |  3 ++-
 .../text_feature_reduction/truncated_svd.py           |  3 ++-
 .../variance_threshold/variance_threshold.py          |  3 ++-
 .../components/feature_preprocessing/__init__.py      |  3 ++-
 .../extra_trees_preproc_for_classification.py         |  3 ++-
 .../extra_trees_preproc_for_regression.py             |  3 ++-
 .../feature_preprocessing/feature_agglomeration.py    |  3 ++-
 .../components/feature_preprocessing/kernel_pca.py    |  3 ++-
 .../components/feature_preprocessing/kitchen_sinks.py |  3 ++-
 .../liblinear_svc_preprocessor.py                     |  3 ++-
 .../feature_preprocessing/no_preprocessing.py         |  3 ++-
 .../feature_preprocessing/nystroem_sampler.py         |  3 ++-
 .../pipeline/components/feature_preprocessing/pca.py  |  3 ++-
 .../components/feature_preprocessing/polynomial.py    |  3 ++-
 .../feature_preprocessing/random_trees_embedding.py   |  3 ++-
 .../select_percentile_classification.py               |  3 ++-
 .../select_percentile_regression.py                   |  3 ++-
 .../select_rates_classification.py                    |  3 ++-
 .../feature_preprocessing/select_rates_regression.py  |  8 ++++++--
 .../components/feature_preprocessing/truncatedSVD.py  |  3 ++-
 .../pipeline/components/regression/__init__.py        |  3 ++-
 autosklearn/pipeline/regression.py                    |  7 ++++---
 51 files changed, 135 insertions(+), 78 deletions(-)
 create mode 100644 autosklearn/askl_typing.py

diff --git a/autosklearn/askl_typing.py b/autosklearn/askl_typing.py
new file mode 100644
index 0000000000..61d01bef30
--- /dev/null
+++ b/autosklearn/askl_typing.py
@@ -0,0 +1,3 @@
+from typing import Dict, Union
+
+FEAT_TYPE_TYPE = Dict[Union[str, int], str]
diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 233165e2a9..99453807eb 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -17,6 +17,7 @@
 
 import autosklearn.pipeline.classification
 import autosklearn.pipeline.regression
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.automl_common.common.utils.backend import Backend
 from autosklearn.constants import (
     CLASSIFICATION_TASKS,
@@ -45,7 +46,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
@@ -110,7 +111,7 @@ def __init__(
         self,
         config: Configuration,
         random_state: Optional[Union[int, np.random.RandomState]],
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         init_params: Optional[Dict[str, Any]] = None,
         dataset_properties: Dict[str, Any] = {},
         include: Optional[List[str]] = None,
@@ -304,7 +305,7 @@ def __init__(
         self.model = self._get_model(feat_type=self.feat_type)
 
     def _get_model(
-        self, feat_type: Optional[Dict[Union[str, int], str]]
+        self, feat_type: Optional[FEAT_TYPE_TYPE]
     ) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
             model = self.model_class(
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 9eba727c4b..42a3fa029f 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -7,6 +7,7 @@
 from sklearn.pipeline import Pipeline
 
 import autosklearn.pipeline.create_searchspace_util
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 
 from .components.base import AutoSklearnChoice, AutoSklearnComponent
 
@@ -34,7 +35,7 @@ class BasePipeline(Pipeline):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -211,7 +212,7 @@ def predict(self, X, batch_size=None):
     def set_hyperparameters(
         self,
         configuration: Configuration,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         init_params: Optional[Dict[str, Any]] = None,
     ):
         self.config = configuration
@@ -262,7 +263,7 @@ def set_hyperparameters(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Return the configuration space for the CASH problem.
@@ -284,7 +285,7 @@ def get_hyperparameter_search_space(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -338,7 +339,7 @@ def _get_base_search_space(
         include: Dict[str, str],
         exclude: Dict[str, str],
         pipeline,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ):
         if include is None:
             if self.include is None:
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 61cdef9f4f..bbf56fa429 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -8,6 +8,7 @@
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from sklearn.base import ClassifierMixin
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import BasePipeline
 from autosklearn.pipeline.components.classification import ClassifierChoice
 from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
@@ -72,7 +73,7 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties: Dict[str, bool] = None,
@@ -173,7 +174,7 @@ def predict_proba(self, X, batch_size=None):
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -360,7 +361,7 @@ def _get_hyperparameter_search_space(
     def _get_pipeline_steps(
         self,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ):
         steps = []
 
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index 7185926931..4737dfb790 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -9,6 +9,7 @@
 from ConfigSpace.configuration_space import Configuration
 from sklearn.base import BaseEstimator, TransformerMixin
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.constants import SPARSE
 
 DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
@@ -102,7 +103,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         """Return the configuration space of this classification algorithm.
@@ -145,7 +146,7 @@ def fit(self, X, y):
     def set_hyperparameters(
         self,
         configuration: Configuration,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         init_params: Optional[Dict[str, Any]] = None,
     ):
         params = configuration.get_dictionary()
@@ -450,7 +451,7 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Dict[Union[str, int], str],
+        feat_type: FEAT_TYPE_TYPE,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index d1a7ea83cb..1967eec874 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -8,6 +8,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnClassificationAlgorithm,
@@ -89,7 +90,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Dict[Union[str, int], str],
+        feat_type: FEAT_TYPE_TYPE,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 848bbb5913..72eeb51e07 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -6,6 +6,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import PIPELINE_DATA_DTYPE
 
 from ..base import (
@@ -105,7 +106,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[Dict] = None,
         default: str = None,
         include: Optional[Dict] = None,
@@ -154,7 +155,7 @@ def set_hyperparameters(
         self,
         configuration: ConfigurationSpace,
         init_params: Optional[Dict] = None,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
index 2cdd112ba6..106eb377f7 100644
--- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
+++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -5,6 +5,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import (
@@ -139,7 +140,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index ba3a555027..188fc3fad2 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -8,6 +8,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
 from ...base import (
@@ -38,7 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -88,7 +89,7 @@ def get_hyperparameter_search_space(
 
     def set_hyperparameters(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]],
+        feat_type: FEAT_TYPE_TYPE,
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
     ) -> "OHEChoice":
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
index a8a2d0a89d..7c904635f8 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
@@ -5,6 +5,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.preprocessing import OrdinalEncoder
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -69,7 +70,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
index 9e356d9f41..cead9331d4 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
@@ -3,6 +3,7 @@
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -44,7 +45,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
index f6afe06c8e..989cf86680 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
@@ -5,6 +5,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.preprocessing import OneHotEncoder as DenseOneHotEncoder
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -55,7 +56,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
index 2d5e5607bd..65ec36f7e7 100644
--- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
+++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 import autosklearn.pipeline.implementations.CategoryShift
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -63,7 +64,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 9f2d33597d..479af234ab 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -7,6 +7,7 @@
 from scipy import sparse
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES
 from autosklearn.pipeline.base import (
     DATASET_PROPERTIES_TYPE,
@@ -46,7 +47,7 @@ def __init__(
         exclude: Optional[Dict[str, str]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
         init_params: Optional[Dict[str, Any]] = None,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         force_sparse_output: bool = False,
         column_transformer: Optional[sklearn.compose.ColumnTransformer] = None,
     ):
@@ -255,7 +256,7 @@ def get_properties(
 
     def set_hyperparameters(
         self,
-        feat_type: Dict[Union[str, int], str],
+        feat_type: FEAT_TYPE_TYPE,
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
     ) -> "FeatTypeSplit":
@@ -303,7 +304,7 @@ def set_hyperparameters(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         self.dataset_properties = dataset_properties
@@ -321,7 +322,7 @@ def _get_hyperparameter_search_space_recursevely(
         dataset_properties: DATASET_PROPERTIES_TYPE,
         cs: ConfigurationSpace,
         transformer: BaseEstimator,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ) -> ConfigurationSpace:
         for st_name, st_operation in transformer:
             if hasattr(st_operation, "get_hyperparameter_search_space"):
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index 5ad0aabe70..07cfeb7fa5 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
 from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import (  # noqa: E501
     OHEChoice,
@@ -46,7 +47,7 @@ class CategoricalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -94,7 +95,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -124,7 +125,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
index fbba3b9172..5cc3f19561 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
 from autosklearn.pipeline.components.data_preprocessing import (
     rescaling as rescaling_components,
@@ -39,7 +40,7 @@ class NumericalPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -87,7 +88,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -120,7 +121,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
index 5cbb962ae5..e92ef09c03 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
 from autosklearn.pipeline.components.data_preprocessing.text_encoding import (
     BagOfWordChoice,
@@ -34,7 +35,7 @@ class TextPreprocessingPipeline(BasePipeline):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -81,7 +82,7 @@ def get_properties(
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -114,7 +115,7 @@ def _get_hyperparameter_search_space(
 
     def _get_pipeline_steps(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[Dict[str, str]] = None,
     ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
index 65a1542018..31b762eb60 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from scipy.sparse import spmatrix
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -91,7 +92,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
index b5945ca6a1..0d09b7bf11 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
@@ -4,6 +4,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -62,7 +63,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         # TODO add replace by zero!
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index bef957b7f4..8912c781d2 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -8,6 +8,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
 from ...base import (
@@ -38,7 +39,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -90,7 +91,7 @@ def set_hyperparameters(
         self,
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ) -> "CoalescenseChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index 737e8c85f1..9c5dc6da56 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -5,6 +5,8 @@
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
 import autosklearn.pipeline.implementations.MinorityCoalescer
+
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -15,7 +17,7 @@ class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         minimum_fraction: float = 0.01,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
@@ -60,7 +62,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index 433d9a8247..f563371fe6 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -3,6 +3,7 @@
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -12,7 +13,7 @@ class NoCoalescence(AutoSklearnPreprocessingAlgorithm):
     def __init__(
         self,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ) -> None:
         pass
 
@@ -45,7 +46,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index d7b01c7a93..957c985296 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -7,6 +7,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
     Rescaling,
@@ -42,7 +43,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index ba97eee886..e567f5cd2b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -5,6 +5,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 
@@ -38,7 +39,7 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
index a797a5769a..51beabcc7a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
@@ -7,6 +7,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
@@ -62,7 +63,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
index b9d25235d3..8762c1be96 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
@@ -6,6 +6,7 @@
 from scipy import sparse
 from sklearn.exceptions import NotFittedError
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
@@ -59,7 +60,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 325beec9fa..1182cce461 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -8,6 +8,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 from sklearn.base import BaseEstimator
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
 from ...base import (
@@ -39,7 +40,7 @@ def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default: Optional[str] = None,
         include: Optional[Dict[str, str]] = None,
@@ -94,7 +95,7 @@ def set_hyperparameters(
         self,
         configuration: Configuration,
         init_params: Optional[Dict[str, Any]] = None,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ) -> "BagOfWordChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
index 9810006ffa..a90b1c1fa4 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
@@ -9,6 +9,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.feature_extraction.text import CountVectorizer
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -95,7 +96,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
index 2f23276824..de852b5d6b 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
@@ -8,6 +8,7 @@
 from scipy.sparse import hstack
 from sklearn.feature_extraction.text import CountVectorizer
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -101,7 +102,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
index 36238b4fa8..3956ec9eff 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
@@ -9,6 +9,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.feature_extraction.text import TfidfVectorizer
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -100,7 +101,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
index be3ab9b00a..d6380e03dd 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_feature_reduction/truncated_svd.py
@@ -5,6 +5,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.decomposition import TruncatedSVD
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -74,7 +75,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
index f11c07a2d2..eb917d6915 100644
--- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
+++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
@@ -4,6 +4,7 @@
 import sklearn.feature_selection
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
@@ -49,7 +50,7 @@ def get_properties(
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index 87b42ffe73..22996b00a8 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -6,6 +6,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnPreprocessingAlgorithm,
@@ -104,7 +105,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index 8ef60a4629..f247fe1e90 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -9,6 +9,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -128,7 +129,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 215817e577..84e9d3afbc 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -10,6 +10,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -130,7 +131,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index 5c6c8949d2..734beb834d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -12,6 +12,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 
@@ -68,7 +69,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index edbe399e10..5b7f04c2b4 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -11,6 +11,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA
 
@@ -87,7 +88,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         n_components = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 59b7eb2418..23382abcd8 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -7,6 +7,7 @@
 )
 from numpy.random import RandomState
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
@@ -72,7 +73,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         gamma = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index aa7658c732..59087ce249 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -8,6 +8,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -96,7 +97,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 149fb96709..979021a04a 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -2,6 +2,7 @@
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
@@ -39,7 +40,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 23f39c7e4d..9190c8f715 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -8,6 +8,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import (
     DENSE,
@@ -99,7 +100,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         if dataset_properties is not None and (
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index dd4871a840..ea1f5a29b0 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -7,6 +7,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
@@ -60,7 +61,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         keep_variance = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index 1fab87a700..b44c0c3240 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -6,6 +6,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
@@ -59,7 +60,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         # More than degree 3 is too expensive!
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index c6f8d61647..785458aa8e 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -8,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -99,7 +100,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         n_estimators = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index 08c6c929be..0f6dcab0f6 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -9,6 +9,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.components.feature_preprocessing.select_percentile import (
     SelectPercentileBase,
@@ -115,7 +116,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         percentile = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index fe5fd8ad03..61a0c8598d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -8,6 +8,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.components.feature_preprocessing.select_percentile import (
     SelectPercentileBase,
@@ -58,7 +59,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         percentile = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index 9546c8e8c6..1d52628766 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -9,6 +9,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import (
     DENSE,
@@ -121,7 +122,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         alpha = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index 398ea6f23b..9e6be1ddc8 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Union
+from typing import Dict, Union, Optional
 
 from functools import partial
 
@@ -9,6 +9,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
@@ -88,7 +89,10 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
+    def get_hyperparameter_search_space(
+            feat_type: Optional[FEAT_TYPE_TYPE] = None,
+            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+    ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
         )
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 78d52309ec..77e49028eb 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -3,6 +3,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
@@ -53,7 +54,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         target_dim = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index b750d7fb1c..f2c041d73b 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -6,6 +6,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnRegressionAlgorithm,
@@ -82,7 +83,7 @@ def get_available_components(
 
     def get_hyperparameter_search_space(
         self,
-        feat_type: Dict[Union[str, int], str],
+        feat_type: FEAT_TYPE_TYPE,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
         default=None,
         include: Optional[Dict[str, str]] = None,
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 9ffaf00c93..43d0d8c1fa 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -8,6 +8,7 @@
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from sklearn.base import RegressorMixin
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import BasePipeline
 from autosklearn.pipeline.components import (
     feature_preprocessing as feature_preprocessing_components,
@@ -69,7 +70,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
 
     def __init__(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps=None,
         dataset_properties: Dict[str, bool] = None,
@@ -117,7 +118,7 @@ def predict(self, X, batch_size=None):
 
     def _get_hyperparameter_search_space(
         self,
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
@@ -271,7 +272,7 @@ def _get_estimator_components(self):
     def _get_pipeline_steps(
         self,
         dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-        feat_type: Optional[Dict[Union[str, int], str]] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         init_params: Optional[Dict[str, Any]] = None,
     ):
         steps = []

From f8df417a665db081dcb78e6ae642a35148781a83 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 01:13:27 +0200
Subject: [PATCH 36/63] fixing pre-commit

---
 autosklearn/pipeline/components/data_preprocessing/__init__.py  | 2 +-
 .../data_preprocessing/categorical_encoding/__init__.py         | 2 +-
 .../data_preprocessing/minority_coalescense/__init__.py         | 2 +-
 .../components/data_preprocessing/rescaling/__init__.py         | 2 +-
 .../data_preprocessing/rescaling/abstract_rescaling.py          | 2 +-
 .../components/data_preprocessing/text_encoding/__init__.py     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 72eeb51e07..5f89548377 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Type, Union
+from typing import Dict, Optional, Type
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index 188fc3fad2..5b1cf075b3 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index 8912c781d2..85002ec349 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index 957c985296..9f83881472 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Dict, Optional
 
 import os
 from collections import OrderedDict
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index e567f5cd2b..0b3244cc62 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 1182cce461..bbfbf9196f 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 import os
 from collections import OrderedDict

From 5b8f0d55edd611ef87836225f9258f81d241b8ae Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 01:29:38 +0200
Subject: [PATCH 37/63] fixing pre-commit

---
 autosklearn/evaluation/abstract_evaluator.py                | 4 +---
 autosklearn/pipeline/components/classification/__init__.py  | 1 +
 .../minority_coalescense/minority_coalescer.py              | 1 -
 .../pipeline/components/feature_preprocessing/__init__.py   | 1 +
 .../feature_preprocessing/select_rates_regression.py        | 6 +++---
 autosklearn/pipeline/components/regression/__init__.py      | 1 +
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 99453807eb..fbdfd4463e 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -304,9 +304,7 @@ def __init__(
         # Please mypy to prevent not defined attr
         self.model = self._get_model(feat_type=self.feat_type)
 
-    def _get_model(
-        self, feat_type: Optional[FEAT_TYPE_TYPE]
-    ) -> BaseEstimator:
+    def _get_model(self, feat_type: Optional[FEAT_TYPE_TYPE]) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
             model = self.model_class(
                 feat_type=feat_type,
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 1967eec874..0233aee2b2 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -9,6 +9,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
 from autosklearn.askl_typing import FEAT_TYPE_TYPE
+
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnClassificationAlgorithm,
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index 9c5dc6da56..2533e92e8d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -5,7 +5,6 @@
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
 import autosklearn.pipeline.implementations.MinorityCoalescer
-
 from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index 22996b00a8..46cd476337 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -7,6 +7,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
 from autosklearn.askl_typing import FEAT_TYPE_TYPE
+
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnPreprocessingAlgorithm,
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index 9e6be1ddc8..802a4c3267 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Union, Optional
+from typing import Dict, Optional, Union
 
 from functools import partial
 
@@ -90,8 +90,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-            feat_type: Optional[FEAT_TYPE_TYPE] = None,
-            dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
     ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index f2c041d73b..806cef98f3 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -7,6 +7,7 @@
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
 from autosklearn.askl_typing import FEAT_TYPE_TYPE
+
 from ..base import (
     AutoSklearnChoice,
     AutoSklearnRegressionAlgorithm,

From 4739c7fac1703206f1ca36bf907becffb77d1e84 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 15:19:04 +0200
Subject: [PATCH 38/63] fixing pre-commit

---
 autosklearn/pipeline/classification.py        | 20 +++++------
 autosklearn/pipeline/components/base.py       | 34 +++++++++++--------
 .../components/classification/__init__.py     | 10 +++---
 .../components/data_preprocessing/__init__.py |  2 +-
 .../minority_coalescense/no_coalescense.py    |  2 +-
 .../text_encoding/__init__.py                 |  2 +-
 .../feature_preprocessing/__init__.py         | 10 +++---
 .../extra_trees_preproc_for_classification.py |  7 ++--
 .../extra_trees_preproc_for_regression.py     |  7 ++--
 .../feature_agglomeration.py                  |  7 ++--
 .../feature_preprocessing/kernel_pca.py       |  7 ++--
 .../feature_preprocessing/kitchen_sinks.py    |  7 ++--
 .../liblinear_svc_preprocessor.py             |  7 ++--
 .../feature_preprocessing/no_preprocessing.py |  7 ++--
 .../feature_preprocessing/nystroem_sampler.py |  8 ++---
 .../components/feature_preprocessing/pca.py   |  7 ++--
 .../feature_preprocessing/polynomial.py       |  7 ++--
 .../random_trees_embedding.py                 |  7 ++--
 .../select_percentile_classification.py       |  7 ++--
 .../select_percentile_regression.py           |  7 ++--
 .../select_rates_classification.py            |  7 ++--
 .../select_rates_regression.py                |  7 ++--
 .../feature_preprocessing/truncatedSVD.py     |  7 ++--
 .../components/regression/__init__.py         | 10 +++---
 autosklearn/pipeline/regression.py            | 23 +++++--------
 25 files changed, 85 insertions(+), 141 deletions(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index bbf56fa429..e99dcc2cff 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Union
+from typing import Dict, Optional, Union
 
 import copy
 from itertools import product
@@ -76,11 +76,11 @@ def __init__(
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties: Dict[str, bool] = None,
-        include: Optional[Dict[str, List[str]]] = None,
-        exclude: Optional[Dict[str, List[str]]] = None,
+        dataset_properties=None,
+        include=None,
+        exclude=None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        init_params=None,
     ):
         self._output_dtype = np.int32
         if dataset_properties is None:
@@ -175,9 +175,9 @@ def predict_proba(self, X, batch_size=None):
     def _get_hyperparameter_search_space(
         self,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        include=None,
+        exclude=None,
+        dataset_properties=None,
     ):
         """Create the hyperparameter configuration space.
 
@@ -359,9 +359,7 @@ def _get_hyperparameter_search_space(
         return cs
 
     def _get_pipeline_steps(
-        self,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None
     ):
         steps = []
 
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index 4737dfb790..7b496842b2 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Optional
 
 import importlib
 import inspect
@@ -6,14 +6,11 @@
 import sys
 from collections import OrderedDict
 
-from ConfigSpace.configuration_space import Configuration
 from sklearn.base import BaseEstimator, TransformerMixin
 
 from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.constants import SPARSE
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 _addons = dict()  # type: Dict[str, 'ThirdPartyComponents']
 
 
@@ -103,14 +100,13 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         """Return the configuration space of this classification algorithm.
 
         Parameters
         ----------
-
+        feat_type : FEAT_TYPE_TYPE (default=None)
         dataset_properties : dict, optional (default=None)
 
         Returns
@@ -145,9 +141,9 @@ def fit(self, X, y):
 
     def set_hyperparameters(
         self,
-        configuration: Configuration,
+        configuration,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        init_params=None,
     ):
         params = configuration.get_dictionary()
 
@@ -351,7 +347,12 @@ def get_estimator(self):
 
 
 class AutoSklearnChoice(object):
-    def __init__(self, dataset_properties, feat_type=None, random_state=None):
+    def __init__(
+        self,
+        dataset_properties,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        random_state=None,
+    ):
         """
         Parameters
         ----------
@@ -426,7 +427,12 @@ def get_available_components(
 
         return components_dict
 
-    def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
+    def set_hyperparameters(
+        self,
+        configuration,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        init_params=None,
+    ):
         new_params = {}
 
         params = configuration.get_dictionary()
@@ -452,10 +458,10 @@ def set_hyperparameters(self, configuration, feat_type=None, init_params=None):
     def get_hyperparameter_search_space(
         self,
         feat_type: FEAT_TYPE_TYPE,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        dataset_properties=None,
         default=None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+        include=None,
+        exclude=None,
     ):
         raise NotImplementedError()
 
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 0233aee2b2..6475fa0156 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -1,6 +1,6 @@
 __author__ = "feurerm"
 
-from typing import Dict, Optional, Type, Union
+from typing import Type
 
 import os
 from collections import OrderedDict
@@ -25,8 +25,6 @@
 additional_components = ThirdPartyComponents(AutoSklearnClassificationAlgorithm)
 _addons["classification"] = additional_components
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None:
     additional_components.add_component(classifier)
@@ -92,10 +90,10 @@ def get_available_components(
     def get_hyperparameter_search_space(
         self,
         feat_type: FEAT_TYPE_TYPE,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        dataset_properties=None,
         default=None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+        include=None,
+        exclude=None,
     ):
         if dataset_properties is None:
             dataset_properties = {}
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 5f89548377..3cc968f7d1 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -154,8 +154,8 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
     def set_hyperparameters(
         self,
         configuration: ConfigurationSpace,
-        init_params: Optional[Dict] = None,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        init_params: Optional[Dict] = None,
     ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index f563371fe6..2732795649 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -12,8 +12,8 @@
 class NoCoalescence(AutoSklearnPreprocessingAlgorithm):
     def __init__(
         self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         pass
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index bbfbf9196f..75c173e181 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -94,8 +94,8 @@ def get_hyperparameter_search_space(
     def set_hyperparameters(
         self,
         configuration: Configuration,
-        init_params: Optional[Dict[str, Any]] = None,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> "BagOfWordChoice":
         new_params = {}
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index 46cd476337..9a0bf69a30 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Type, Union
+from typing import Optional, Type
 
 import os
 from collections import OrderedDict
@@ -23,8 +23,6 @@
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
 _addons["feature_preprocessing"] = additional_components
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
     additional_components.add_component(preprocessor)
@@ -107,10 +105,10 @@ def get_available_components(
     def get_hyperparameter_search_space(
         self,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        dataset_properties=None,
         default=None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+        include=None,
+        exclude=None,
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index f247fe1e90..904004b201 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -14,8 +14,6 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class ExtraTreesPreprocessorClassification(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -129,8 +127,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index 84e9d3afbc..10e741a44e 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -15,8 +15,6 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -131,8 +129,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index 734beb834d..2a8db4eaad 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -16,8 +16,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class FeatureAgglomeration(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, n_clusters, affinity, linkage, pooling_func, random_state=None):
@@ -69,8 +67,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index 5b7f04c2b4..08c72efb6f 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 import warnings
 
@@ -15,8 +15,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class KernelPCA(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -88,8 +86,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         n_components = UniformIntegerHyperparameter(
             "n_components", 10, 2000, default_value=100
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 23382abcd8..4e6a348f17 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -11,8 +11,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class RandomKitchenSinks(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -73,8 +71,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         gamma = UniformFloatHyperparameter(
             "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index 59087ce249..7031089e91 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
@@ -13,8 +13,6 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm):
     # Liblinear is not deterministic as it uses a RNG inside
@@ -97,8 +95,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         cs = ConfigurationSpace()
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 979021a04a..38c11bdb58 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 
@@ -6,8 +6,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, random_state):
@@ -40,8 +38,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index 9190c8f715..a7dc227056 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -18,8 +18,6 @@
     UNSIGNED_DATA,
 )
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class Nystroem(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -55,7 +53,6 @@ def fit(self, X, Y=None):
         if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
-                X = X.todense()
             else:
                 X[X < 0] = 0.0
 
@@ -100,8 +97,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         if dataset_properties is not None and (
             dataset_properties.get("sparse") is True
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index ea1f5a29b0..7c69f8eb80 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -12,8 +12,6 @@
 from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class PCA(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, keep_variance, whiten, random_state=None):
@@ -61,8 +59,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         keep_variance = UniformFloatHyperparameter(
             "keep_variance", 0.5, 0.9999, default_value=0.9999
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index b44c0c3240..78e3ff2676 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -11,8 +11,6 @@
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class PolynomialFeatures(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, degree, interaction_only, include_bias, random_state=None):
@@ -60,8 +58,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index 785458aa8e..2b5aa340a9 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -13,8 +13,6 @@
 from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class RandomTreesEmbedding(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -100,8 +98,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         n_estimators = UniformIntegerHyperparameter(
             name="n_estimators", lower=10, upper=100, default_value=10
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index 0f6dcab0f6..98495eaedb 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from functools import partial
 
@@ -22,8 +22,6 @@
     UNSIGNED_DATA,
 )
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SelectPercentileClassification(
     SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
@@ -116,8 +114,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         percentile = UniformFloatHyperparameter(
             name="percentile", lower=1, upper=99, default_value=50
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index 61a0c8598d..a653dc4a7e 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from functools import partial
 
@@ -15,8 +15,6 @@
 )
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SelectPercentileRegression(
     SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
@@ -59,8 +57,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         percentile = UniformFloatHyperparameter(
             "percentile", lower=1, upper=99, default_value=50
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index 1d52628766..3a728d753e 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from functools import partial
 
@@ -19,8 +19,6 @@
     UNSIGNED_DATA,
 )
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, alpha, mode="fpr", score_func="chi2", random_state=None):
@@ -122,8 +120,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index 802a4c3267..89c84905b2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from functools import partial
 
@@ -13,8 +13,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm):
     def __init__(
@@ -90,8 +88,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.01, upper=0.5, default_value=0.1
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 77e49028eb..0c61e72c1c 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
@@ -7,8 +7,6 @@
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class TruncatedSVD(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, target_dim, random_state=None):
@@ -54,8 +52,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         target_dim = UniformIntegerHyperparameter(
             "target_dim", 10, 256, default_value=128
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 806cef98f3..0f693e2d08 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Type, Union
+from typing import Type
 
 import os
 from collections import OrderedDict
@@ -23,8 +23,6 @@
 additional_components = ThirdPartyComponents(AutoSklearnRegressionAlgorithm)
 _addons["regression"] = additional_components
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
     additional_components.add_component(regressor)
@@ -85,10 +83,10 @@ def get_available_components(
     def get_hyperparameter_search_space(
         self,
         feat_type: FEAT_TYPE_TYPE,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        dataset_properties=None,
         default=None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
+        include=None,
+        exclude=None,
     ):
         if include is not None and exclude is not None:
             raise ValueError(
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 43d0d8c1fa..0c42fa8e4d 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Union
+from typing import Optional, Union
 
 import copy
 from itertools import product
@@ -17,8 +17,6 @@
 from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
 from autosklearn.pipeline.constants import SPARSE
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
     """This class implements the regression task.
@@ -73,11 +71,11 @@ def __init__(
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties: Dict[str, bool] = None,
-        include: Optional[Dict[str, List[str]]] = None,
-        exclude: Optional[Dict[str, List[str]]] = None,
+        dataset_properties=None,
+        include=None,
+        exclude=None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        init_params=None,
     ):
         self._output_dtype = np.float32
         if dataset_properties is None:
@@ -119,9 +117,9 @@ def predict(self, X, batch_size=None):
     def _get_hyperparameter_search_space(
         self,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        include=None,
+        exclude=None,
+        dataset_properties=None,
     ):
         """Return the configuration space for the CASH problem.
 
@@ -270,10 +268,7 @@ def _get_estimator_components(self):
         return regression_components._regressors
 
     def _get_pipeline_steps(
-        self,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE],
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None
     ):
         steps = []
 

From a260e395ee9a161fcecf2f5b67dbf0da6c840ce7 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 15:22:08 +0200
Subject: [PATCH 39/63] fixing pre-commit

---
 autosklearn/pipeline/base.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 42a3fa029f..889c0c82ff 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -35,14 +35,14 @@ class BasePipeline(Pipeline):
 
     def __init__(
         self,
+        config=None,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        config: Optional[Configuration] = None,
         steps=None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
-        random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        dataset_properties=None,
+        include=None,
+        exclude=None,
+        random_state=None,
+        init_params=None,
     ):
 
         self.init_params = init_params if init_params is not None else {}

From 8341e822723279ef40e12a778b30b245c69174a3 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 15:27:24 +0200
Subject: [PATCH 40/63] fixing pre-commit

---
 autosklearn/pipeline/base.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 889c0c82ff..b4647215c6 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -1,9 +1,9 @@
 from abc import ABCMeta
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Optional, Union
 
 import numpy as np
 import scipy.sparse
-from ConfigSpace import Configuration, ConfigurationSpace
+from ConfigSpace import Configuration
 from sklearn.pipeline import Pipeline
 
 import autosklearn.pipeline.create_searchspace_util
@@ -211,9 +211,9 @@ def predict(self, X, batch_size=None):
 
     def set_hyperparameters(
         self,
-        configuration: Configuration,
+        configuration,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        init_params: Optional[Dict[str, Any]] = None,
+        init_params=None,
     ):
         self.config = configuration
 
@@ -262,9 +262,7 @@ def set_hyperparameters(
         return self
 
     def get_hyperparameter_search_space(
-        self,
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        self, feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
     ):
         """Return the configuration space for the CASH problem.
 
@@ -286,9 +284,9 @@ def get_hyperparameter_search_space(
     def _get_hyperparameter_search_space(
         self,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
-        include: Optional[Dict[str, str]] = None,
-        exclude: Optional[Dict[str, str]] = None,
-        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+        include=None,
+        exclude=None,
+        dataset_properties=None,
     ):
         """Return the configuration space for the CASH problem.
 
@@ -334,10 +332,10 @@ def _get_hyperparameter_search_space(
 
     def _get_base_search_space(
         self,
-        cs: ConfigurationSpace,
-        dataset_properties: DATASET_PROPERTIES_TYPE,
-        include: Dict[str, str],
-        exclude: Dict[str, str],
+        cs,
+        dataset_properties,
+        include,
+        exclude,
         pipeline,
         feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ):
@@ -544,7 +542,9 @@ def __repr__(self):
 
         return return_value
 
-    def _get_pipeline_steps(self, dataset_properties, feat_type=None):
+    def _get_pipeline_steps(
+        self, dataset_properties, feat_type: Optional[FEAT_TYPE_TYPE] = None
+    ):
         raise NotImplementedError()
 
     def _get_estimator_hyperparameter_name(self):

From 6cf49dd577a59245c68cd02188868327ff7c532c Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 15:29:42 +0200
Subject: [PATCH 41/63] fixing pre-commit

---
 autosklearn/pipeline/classification.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index e99dcc2cff..ba5a662a3f 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 
 import copy
 from itertools import product
@@ -20,8 +20,6 @@
 )
 from autosklearn.pipeline.constants import SPARSE
 
-DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
-
 
 class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
     """This class implements the classification task.

From d0f9d963df0caf38917cacd12171110a4609b6f0 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 15:32:14 +0200
Subject: [PATCH 42/63] fixing pre-commit

---
 autosklearn/metalearning/input/aslib_simple.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index a1724a3846..bbf4f44fb9 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -150,10 +150,11 @@ def _read_configurations(self, filename):
                 configuration = dict()
                 algorithm_id = line["idx"]
                 for hp_name, value in line.items():
-                    # Todo adapt to search space
                     if not value or hp_name == "idx":
                         continue
                     if hp_name not in hp_names:
+                        # skip meta learning configuration
+                        # if it is not existing in the current search space
                         continue
                     try:
                         value = int(value)

From adc011ef6f35bc8c750bad008d77ecebc5fd95f0 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 17:10:35 +0200
Subject: [PATCH 43/63] fixing pre-commit

---
 .../pipeline/components/classification/bernoulli_nb.py     | 7 ++++++-
 .../pipeline/components/classification/decision_tree.py    | 7 ++++++-
 .../pipeline/components/classification/extra_trees.py      | 7 ++++++-
 .../pipeline/components/classification/gaussian_nb.py      | 7 ++++++-
 .../components/classification/gradient_boosting.py         | 7 ++++++-
 .../components/classification/k_nearest_neighbors.py       | 7 ++++++-
 autosklearn/pipeline/components/classification/lda.py      | 7 ++++++-
 .../pipeline/components/classification/liblinear_svc.py    | 7 ++++++-
 .../pipeline/components/classification/libsvm_svc.py       | 7 ++++++-
 autosklearn/pipeline/components/classification/mlp.py      | 7 ++++++-
 .../pipeline/components/classification/multinomial_nb.py   | 7 ++++++-
 .../components/classification/passive_aggressive.py        | 7 ++++++-
 autosklearn/pipeline/components/classification/qda.py      | 7 ++++++-
 .../pipeline/components/classification/random_forest.py    | 7 ++++++-
 autosklearn/pipeline/components/classification/sgd.py      | 7 ++++++-
 .../pipeline/components/feature_preprocessing/densifier.py | 7 ++++++-
 .../pipeline/components/feature_preprocessing/fast_ica.py  | 7 ++++++-
 autosklearn/pipeline/components/regression/adaboost.py     | 7 ++++++-
 .../pipeline/components/regression/ard_regression.py       | 7 ++++++-
 .../pipeline/components/regression/decision_tree.py        | 7 ++++++-
 autosklearn/pipeline/components/regression/extra_trees.py  | 7 ++++++-
 .../pipeline/components/regression/gaussian_process.py     | 7 ++++++-
 .../pipeline/components/regression/gradient_boosting.py    | 7 ++++++-
 .../pipeline/components/regression/k_nearest_neighbors.py  | 7 ++++++-
 .../pipeline/components/regression/liblinear_svr.py        | 7 ++++++-
 autosklearn/pipeline/components/regression/libsvm_svr.py   | 7 ++++++-
 autosklearn/pipeline/components/regression/mlp.py          | 7 ++++++-
 .../pipeline/components/regression/random_forest.py        | 7 ++++++-
 autosklearn/pipeline/components/regression/sgd.py          | 7 ++++++-
 examples/80_extending/example_extending_classification.py  | 7 ++++++-
 .../80_extending/example_extending_data_preprocessor.py    | 7 ++++++-
 examples/80_extending/example_extending_preprocessor.py    | 7 ++++++-
 examples/80_extending/example_extending_regression.py      | 7 ++++++-
 .../example_restrict_number_of_hyperparameters.py          | 6 +++++-
 test/test_pipeline/components/regression/test_mlp.py       | 2 +-
 35 files changed, 204 insertions(+), 35 deletions(-)

diff --git a/autosklearn/pipeline/components/classification/bernoulli_nb.py b/autosklearn/pipeline/components/classification/bernoulli_nb.py
index 8271c5f602..de52bc939c 100644
--- a/autosklearn/pipeline/components/classification/bernoulli_nb.py
+++ b/autosklearn/pipeline/components/classification/bernoulli_nb.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -5,6 +7,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
@@ -64,7 +67,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         # the smoothing parameter is a non-negative float
diff --git a/autosklearn/pipeline/components/classification/decision_tree.py b/autosklearn/pipeline/components/classification/decision_tree.py
index fbfc6b7c6a..1369ecf906 100644
--- a/autosklearn/pipeline/components/classification/decision_tree.py
+++ b/autosklearn/pipeline/components/classification/decision_tree.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -8,6 +10,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import (
@@ -106,7 +109,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/extra_trees.py b/autosklearn/pipeline/components/classification/extra_trees.py
index 5c7ce1879a..36edd82584 100644
--- a/autosklearn/pipeline/components/classification/extra_trees.py
+++ b/autosklearn/pipeline/components/classification/extra_trees.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
@@ -156,7 +159,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/gaussian_nb.py b/autosklearn/pipeline/components/classification/gaussian_nb.py
index 8e978e9631..bf43f4e4a5 100644
--- a/autosklearn/pipeline/components/classification/gaussian_nb.py
+++ b/autosklearn/pipeline/components/classification/gaussian_nb.py
@@ -1,6 +1,9 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 
@@ -55,6 +58,8 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/classification/gradient_boosting.py b/autosklearn/pipeline/components/classification/gradient_boosting.py
index 50b0b284bd..618028dff7 100644
--- a/autosklearn/pipeline/components/classification/gradient_boosting.py
+++ b/autosklearn/pipeline/components/classification/gradient_boosting.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -9,6 +11,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
@@ -182,7 +185,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         loss = Constant("loss", "auto")
         learning_rate = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
index fe55e0783d..d524bd42d9 100644
--- a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
+++ b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
@@ -1,9 +1,12 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
@@ -57,7 +60,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         n_neighbors = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/lda.py b/autosklearn/pipeline/components/classification/lda.py
index 29a08f80b5..e7ebec290b 100644
--- a/autosklearn/pipeline/components/classification/lda.py
+++ b/autosklearn/pipeline/components/classification/lda.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.conditions import EqualsCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -5,6 +7,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
@@ -76,7 +79,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         shrinkage = CategoricalHyperparameter(
             "shrinkage", ["None", "auto", "manual"], default_value="None"
diff --git a/autosklearn/pipeline/components/classification/liblinear_svc.py b/autosklearn/pipeline/components/classification/liblinear_svc.py
index 3f57ef8f94..d1beb08837 100644
--- a/autosklearn/pipeline/components/classification/liblinear_svc.py
+++ b/autosklearn/pipeline/components/classification/liblinear_svc.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from ConfigSpace.hyperparameters import (
@@ -6,6 +8,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
@@ -104,7 +107,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2")
diff --git a/autosklearn/pipeline/components/classification/libsvm_svc.py b/autosklearn/pipeline/components/classification/libsvm_svc.py
index ba423161c1..43bd017c5c 100644
--- a/autosklearn/pipeline/components/classification/libsvm_svc.py
+++ b/autosklearn/pipeline/components/classification/libsvm_svc.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import resource
 import sys
 
@@ -10,6 +12,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
@@ -138,7 +141,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         # No linear kernel here, because we have liblinear
         kernel = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/mlp.py b/autosklearn/pipeline/components/classification/mlp.py
index f7001d7bc1..d8c95fa9d0 100644
--- a/autosklearn/pipeline/components/classification/mlp.py
+++ b/autosklearn/pipeline/components/classification/mlp.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import copy
 
 import numpy as np
@@ -11,6 +13,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponent,
@@ -203,7 +206,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         hidden_layer_depth = UniformIntegerHyperparameter(
             name="hidden_layer_depth", lower=1, upper=3, default_value=1
diff --git a/autosklearn/pipeline/components/classification/multinomial_nb.py b/autosklearn/pipeline/components/classification/multinomial_nb.py
index 7b65be8a5c..dee1507f01 100644
--- a/autosklearn/pipeline/components/classification/multinomial_nb.py
+++ b/autosklearn/pipeline/components/classification/multinomial_nb.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -5,6 +7,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SIGNED_DATA, SPARSE
 from autosklearn.util.common import check_for_bool
@@ -76,7 +79,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         # the smoothing parameter is a non-negative float
diff --git a/autosklearn/pipeline/components/classification/passive_aggressive.py b/autosklearn/pipeline/components/classification/passive_aggressive.py
index 494ea7db06..97a11a0283 100644
--- a/autosklearn/pipeline/components/classification/passive_aggressive.py
+++ b/autosklearn/pipeline/components/classification/passive_aggressive.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
@@ -152,7 +155,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
         fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
         loss = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/classification/qda.py b/autosklearn/pipeline/components/classification/qda.py
index 7b25858392..0b6f6f7653 100644
--- a/autosklearn/pipeline/components/classification/qda.py
+++ b/autosklearn/pipeline/components/classification/qda.py
@@ -1,7 +1,10 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
@@ -72,7 +75,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         reg_param = UniformFloatHyperparameter("reg_param", 0.0, 1.0, default_value=0.0)
         cs = ConfigurationSpace()
         cs.add_hyperparameter(reg_param)
diff --git a/autosklearn/pipeline/components/classification/random_forest.py b/autosklearn/pipeline/components/classification/random_forest.py
index 6ccd720b3a..892d8611d5 100644
--- a/autosklearn/pipeline/components/classification/random_forest.py
+++ b/autosklearn/pipeline/components/classification/random_forest.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
@@ -149,7 +152,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         criterion = CategoricalHyperparameter(
             "criterion", ["gini", "entropy"], default_value="gini"
diff --git a/autosklearn/pipeline/components/classification/sgd.py b/autosklearn/pipeline/components/classification/sgd.py
index 469c2605dd..5073f8ec20 100644
--- a/autosklearn/pipeline/components/classification/sgd.py
+++ b/autosklearn/pipeline/components/classification/sgd.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
@@ -169,7 +172,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         loss = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/feature_preprocessing/densifier.py b/autosklearn/pipeline/components/feature_preprocessing/densifier.py
index f5c88ecadf..f571d6abee 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/densifier.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/densifier.py
@@ -1,5 +1,8 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
@@ -36,6 +39,8 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
index 695ff3c2cc..fe23177fc9 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import warnings
 
 from ConfigSpace.conditions import EqualsCondition
@@ -7,6 +9,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -74,7 +77,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         n_components = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/adaboost.py b/autosklearn/pipeline/components/regression/adaboost.py
index e78a57e6a2..8faae821c3 100644
--- a/autosklearn/pipeline/components/regression/adaboost.py
+++ b/autosklearn/pipeline/components/regression/adaboost.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -5,6 +7,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
@@ -62,7 +65,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         # base_estimator = Constant(name="base_estimator", value="None")
diff --git a/autosklearn/pipeline/components/regression/ard_regression.py b/autosklearn/pipeline/components/regression/ard_regression.py
index 219cb775af..758c4b04d7 100644
--- a/autosklearn/pipeline/components/regression/ard_regression.py
+++ b/autosklearn/pipeline/components/regression/ard_regression.py
@@ -1,9 +1,12 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     UniformFloatHyperparameter,
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
@@ -89,7 +92,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         n_iter = UnParametrizedHyperparameter("n_iter", value=300)
         tol = UniformFloatHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/decision_tree.py b/autosklearn/pipeline/components/regression/decision_tree.py
index db59767587..80890889f9 100644
--- a/autosklearn/pipeline/components/regression/decision_tree.py
+++ b/autosklearn/pipeline/components/regression/decision_tree.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -8,6 +10,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_none
@@ -96,7 +99,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/extra_trees.py b/autosklearn/pipeline/components/regression/extra_trees.py
index c4646a2709..b1d8eeb00a 100644
--- a/autosklearn/pipeline/components/regression/extra_trees.py
+++ b/autosklearn/pipeline/components/regression/extra_trees.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
@@ -148,7 +151,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/gaussian_process.py b/autosklearn/pipeline/components/regression/gaussian_process.py
index 1acf238cd1..d08a3b0239 100644
--- a/autosklearn/pipeline/components/regression/gaussian_process.py
+++ b/autosklearn/pipeline/components/regression/gaussian_process.py
@@ -1,6 +1,9 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 
@@ -65,7 +68,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=1e-14, upper=1.0, default_value=1e-8, log=True
         )
diff --git a/autosklearn/pipeline/components/regression/gradient_boosting.py b/autosklearn/pipeline/components/regression/gradient_boosting.py
index b7503f5fd0..16b7df965d 100644
--- a/autosklearn/pipeline/components/regression/gradient_boosting.py
+++ b/autosklearn/pipeline/components/regression/gradient_boosting.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -9,6 +11,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
@@ -166,7 +169,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         loss = CategoricalHyperparameter(
             "loss", ["least_squares"], default_value="least_squares"
diff --git a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
index 83c13cd191..c16e8a6404 100644
--- a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
+++ b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
@@ -1,9 +1,12 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
@@ -52,7 +55,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         n_neighbors = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/liblinear_svr.py b/autosklearn/pipeline/components/regression/liblinear_svr.py
index e129331298..62e38c1551 100644
--- a/autosklearn/pipeline/components/regression/liblinear_svr.py
+++ b/autosklearn/pipeline/components/regression/liblinear_svr.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from ConfigSpace.hyperparameters import (
@@ -6,6 +8,7 @@
     UniformFloatHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
@@ -83,7 +86,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         loss = CategoricalHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/libsvm_svr.py b/autosklearn/pipeline/components/regression/libsvm_svr.py
index d4173d7f01..c3ac98b1f9 100644
--- a/autosklearn/pipeline/components/regression/libsvm_svr.py
+++ b/autosklearn/pipeline/components/regression/libsvm_svr.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import resource
 import sys
 
@@ -10,6 +12,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
@@ -149,7 +152,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         C = UniformFloatHyperparameter(
             name="C", lower=0.03125, upper=32768, log=True, default_value=1.0
         )
diff --git a/autosklearn/pipeline/components/regression/mlp.py b/autosklearn/pipeline/components/regression/mlp.py
index 645c29403a..42ceff4556 100644
--- a/autosklearn/pipeline/components/regression/mlp.py
+++ b/autosklearn/pipeline/components/regression/mlp.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 from ConfigSpace.conditions import InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -9,6 +11,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
@@ -225,7 +228,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         hidden_layer_depth = UniformIntegerHyperparameter(
             name="hidden_layer_depth", lower=1, upper=3, default_value=1
diff --git a/autosklearn/pipeline/components/regression/random_forest.py b/autosklearn/pipeline/components/regression/random_forest.py
index 128113fc43..043d62e16b 100644
--- a/autosklearn/pipeline/components/regression/random_forest.py
+++ b/autosklearn/pipeline/components/regression/random_forest.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
@@ -135,7 +138,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         criterion = CategoricalHyperparameter(
             "criterion", ["mse", "friedman_mse", "mae"]
diff --git a/autosklearn/pipeline/components/regression/sgd.py b/autosklearn/pipeline/components/regression/sgd.py
index 3b3f939fa8..915e45169f 100644
--- a/autosklearn/pipeline/components/regression/sgd.py
+++ b/autosklearn/pipeline/components/regression/sgd.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -6,6 +8,7 @@
     UnParametrizedHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
@@ -185,7 +188,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         loss = CategoricalHyperparameter(
diff --git a/examples/80_extending/example_extending_classification.py b/examples/80_extending/example_extending_classification.py
index b5112c022b..9e46b9e8cd 100644
--- a/examples/80_extending/example_extending_classification.py
+++ b/examples/80_extending/example_extending_classification.py
@@ -6,6 +6,7 @@
 The following example demonstrates how to create a new classification
 component for using in auto-sklearn.
 """
+from typing import Optional
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -16,6 +17,8 @@
 )
 
 import sklearn.metrics
+
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
@@ -100,7 +103,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         hidden_layer_depth = UniformIntegerHyperparameter(
             name="hidden_layer_depth", lower=1, upper=3, default_value=1
diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py
index aa5c443255..eb0325d9df 100644
--- a/examples/80_extending/example_extending_data_preprocessor.py
+++ b/examples/80_extending/example_extending_data_preprocessor.py
@@ -5,12 +5,15 @@
 
 The following example demonstrates how to turn off data preprocessing step in auto-skearn.
 """
+from typing import Optional
 from pprint import pprint
 
 import autosklearn.classification
 import autosklearn.pipeline.components.data_preprocessing
 import sklearn.metrics
 from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
 from sklearn.datasets import load_breast_cancer
@@ -49,7 +52,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         return ConfigurationSpace()  # Return an empty configuration as there is None
 
 
diff --git a/examples/80_extending/example_extending_preprocessor.py b/examples/80_extending/example_extending_preprocessor.py
index 1eb3fc1daf..8516931780 100644
--- a/examples/80_extending/example_extending_preprocessor.py
+++ b/examples/80_extending/example_extending_preprocessor.py
@@ -7,6 +7,7 @@
 discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor
 in auto-sklearn.
 """
+from typing import Optional
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -17,6 +18,8 @@
 from ConfigSpace.conditions import InCondition
 
 import sklearn.metrics
+
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 import autosklearn.classification
 import autosklearn.pipeline.components.feature_preprocessing
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
@@ -76,7 +79,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         solver = CategoricalHyperparameter(
             name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd"
diff --git a/examples/80_extending/example_extending_regression.py b/examples/80_extending/example_extending_regression.py
index 4d6987a9db..ad2fb8850f 100644
--- a/examples/80_extending/example_extending_regression.py
+++ b/examples/80_extending/example_extending_regression.py
@@ -6,6 +6,7 @@
 The following example demonstrates how to create a new regression
 component for using in auto-sklearn.
 """
+from typing import Optional
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -17,6 +18,8 @@
 from ConfigSpace.conditions import EqualsCondition
 
 import sklearn.metrics
+
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 import autosklearn.regression
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
@@ -86,7 +89,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=10**-5, upper=1, log=True, default_value=1.0
diff --git a/examples/80_extending/example_restrict_number_of_hyperparameters.py b/examples/80_extending/example_restrict_number_of_hyperparameters.py
index d8bd2f4a98..a17aa128aa 100644
--- a/examples/80_extending/example_restrict_number_of_hyperparameters.py
+++ b/examples/80_extending/example_restrict_number_of_hyperparameters.py
@@ -7,6 +7,7 @@
 component with a new component, implementing the same classifier,
 but with different hyperparameters .
 """
+from typing import Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -17,6 +18,7 @@
 from sklearn.datasets import load_breast_cancer
 from sklearn.model_selection import train_test_split
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
 from autosklearn.pipeline.components.classification import (
@@ -84,7 +86,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         # The maximum number of features used in the forest is calculated as m^max_features, where
diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py
index 9e2a92acac..7052952302 100644
--- a/test/test_pipeline/components/regression/test_mlp.py
+++ b/test/test_pipeline/components/regression/test_mlp.py
@@ -47,7 +47,7 @@ class MLPComponentTest(BaseRegressionComponentTest):
     res["default_boston"] = 0.2750079862455884
     res["default_boston_places"] = 1
     res["boston_n_calls"] = [8, 9]
-    res["boston_iterative_n_iter"] = [236, 331]
+    res["boston_iterative_n_iter"] = [236, 327, 331]
     res["default_boston_iterative"] = res["default_boston"]
     res["default_boston_iterative_places"] = 1
     res["default_boston_sparse"] = -0.10972947168054104

From 3402ed976b0a90f92fead60cebf6728c54330764 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Mon, 4 Jul 2022 17:12:38 +0200
Subject: [PATCH 44/63] fixing pre-commit

---
 test/test_pipeline/components/regression/test_mlp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py
index 7052952302..5e89e7a452 100644
--- a/test/test_pipeline/components/regression/test_mlp.py
+++ b/test/test_pipeline/components/regression/test_mlp.py
@@ -47,7 +47,7 @@ class MLPComponentTest(BaseRegressionComponentTest):
     res["default_boston"] = 0.2750079862455884
     res["default_boston_places"] = 1
     res["boston_n_calls"] = [8, 9]
-    res["boston_iterative_n_iter"] = [236, 327, 331]
+    res["boston_iterative_n_iter"] = [236, 331, 327]
     res["default_boston_iterative"] = res["default_boston"]
     res["default_boston_iterative_places"] = 1
     res["default_boston_sparse"] = -0.10972947168054104

From 3bf835e3a7b1d78424b2d0ebbb1c2e8bd4a154bd Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Wed, 6 Jul 2022 13:02:49 +0200
Subject: [PATCH 45/63] adding new test. Test if new configuration space is
 correct.

---
 .../test_data_preprocessing_feat_type.py      | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py

diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
new file mode 100644
index 0000000000..3318f3e08b
--- /dev/null
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
@@ -0,0 +1,128 @@
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
+
+import unittest
+
+
+class PreprocessingPipelineFeatTypeTest(unittest.TestCase):
+    def test_single_type(self):
+        DPP = FeatTypeSplit(feat_type={"A": "numerical"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "numerical"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("text", key.split(":")[0])
+            self.assertNotIn("categorical", key.split(":")[0])
+
+        DPP = FeatTypeSplit(feat_type={"A": "categorical"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "categorical"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("text", key.split(":")[0])
+            self.assertNotIn("numerical", key.split(":")[0])
+
+        DPP = FeatTypeSplit(feat_type={"A": "string"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "string"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("numerical", key.split(":")[0])
+            self.assertNotIn("categorical", key.split(":")[0])
+
+    def test_dual_type(self):
+        DPP = FeatTypeSplit(feat_type={"A": "numerical", "B": "categorical"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "numerical", "B": "categorical"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("text", key.split(":")[0])
+
+        DPP = FeatTypeSplit(feat_type={"A": "categorical", "B": "string"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "categorical", "B": "string"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("numerical", key.split(":")[0])
+
+        DPP = FeatTypeSplit(feat_type={"A": "string", "B": "categorical"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "string", "B": "categorical"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("numerical", key.split(":")[0])
+
+    def test_triple_type(self):
+        DPP = FeatTypeSplit(
+            feat_type={"A": "numerical", "B": "categorical", "C": "string"}
+        )
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "numerical", "B": "categorical", "C": "string"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        truth_table = [False] * 3
+        for key in cs.get_hyperparameters_dict().keys():
+            if "text" in key.split(":")[0]:
+                truth_table[0] = True
+            elif "categorical" in key.split(":")[0]:
+                truth_table[1] = True
+            elif "numerical" in key.split(":")[0]:
+                truth_table[2] = True
+
+        self.assertEqual(sum(truth_table), 3)

From 869db4942495706982c4eb4f8ba881425764dcba Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 13:38:48 +0200
Subject: [PATCH 46/63] add new tests and fixing some issues from PR

---
 autosklearn/pipeline/base.py                  |  4 +-
 .../data_preprocessing/feature_type.py        | 26 ++----
 .../pyMetaLearn/test_metalearner.py           | 28 ++++--
 test/test_pipeline/test_base.py               |  7 +-
 test/test_pipeline/test_classification.py     | 87 +++++++++++++++----
 5 files changed, 106 insertions(+), 46 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index b4647215c6..d138a32a84 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -221,7 +221,7 @@ def set_hyperparameters(
             node_name, node = n_
 
             sub_configuration_space = node.get_hyperparameter_search_space(
-                feat_type=feat_type, dataset_properties=self.dataset_properties
+                feat_type=self.feat_type, dataset_properties=self.dataset_properties
             )
             sub_config_dict = {}
             for param in configuration:
@@ -248,7 +248,7 @@ def set_hyperparameters(
                 node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
                 node.set_hyperparameters(
-                    feat_type=feat_type,
+                    feat_type=self.feat_type,
                     configuration=sub_configuration,
                     init_params=sub_init_params_dict,
                 )
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index 479af234ab..11085a1f74 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -155,15 +155,11 @@ def fit(
             else:
                 columns = set(range(n_feats))
             if expected != columns:
-                try:
-                    columns = [str(col) for col in columns]
-                except Exception as e:
-                    raise ValueError(
-                        f"Train data has columns={expected} yet the"
-                        f" feat_types are feat={columns}\n"
-                        f"Exception: {e}"
-                    )
-            transformer_lst = []
+                raise ValueError(
+                    f"Train data has columns={expected} yet the"
+                    f" feat_types are feat={columns}"
+                )
+            sklearn_transf_spec = []
 
             categorical_features = [
                 key
@@ -171,7 +167,7 @@ def fit(
                 if value.lower() == "categorical"
             ]
             if len(categorical_features) > 0:
-                transformer_lst.append(
+                sklearn_transf_spec.append(
                     ("categorical_transformer", self.categ_ppl, categorical_features)
                 )
 
@@ -181,7 +177,7 @@ def fit(
                 if value.lower() == "numerical"
             ]
             if len(numerical_features) > 0:
-                transformer_lst.append(
+                sklearn_transf_spec.append(
                     ("numerical_transformer", self.numer_ppl, numerical_features)
                 )
 
@@ -191,15 +187,9 @@ def fit(
                 if value.lower() == "string"
             ]
             if len(text_features) > 0:
-                transformer_lst.append(
+                sklearn_transf_spec.append(
                     ("text_transformer", self.txt_ppl, text_features)
                 )
-
-            sklearn_transf_spec = [
-                (name, transformer, feature_columns)
-                for name, transformer, feature_columns in transformer_lst
-                if len(feature_columns) > 0
-            ]
         else:
             # self.feature_type == None assumes numerical case
             sklearn_transf_spec = [
diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py
index 42d27d49da..9e7a54a77f 100644
--- a/test/test_metalearning/pyMetaLearn/test_metalearner.py
+++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py
@@ -23,14 +23,26 @@ def setUp(self):
         data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
-        pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
-        self.cs = pipeline.get_hyperparameter_search_space()
-
-        self.logger = logging.getLogger()
-        meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
-        self.meta_optimizer = metalearner.MetaLearningOptimizer(
-            "233", self.cs, meta_base, logger=self.logger
-        )
+        for feat_type in [
+            None,
+            {"A": "numerical"},
+            {"A": "categorical"},
+            {"A": "string"},
+            {"A": "numerical", "B": "categorical"},
+            {"A": "numerical", "B": "string"},
+            {"A": "categorical", "B": "string"},
+            {"A": "categorical", "B": "string", "C": "numerical"},
+        ]:
+            pipeline = (
+                autosklearn.pipeline.classification.SimpleClassificationPipeline()
+            )
+            self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type)
+
+            self.logger = logging.getLogger()
+            meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
+            self.meta_optimizer = metalearner.MetaLearningOptimizer(
+                "233", self.cs, meta_base, logger=self.logger
+            )
 
     def tearDown(self):
         os.chdir(self.cwd)
diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py
index af5123b4f7..1b604caf58 100644
--- a/test/test_pipeline/test_base.py
+++ b/test/test_pipeline/test_base.py
@@ -1,17 +1,20 @@
+from typing import Optional
+
 import ConfigSpace.configuration_space
 
 import autosklearn.pipeline.base
 import autosklearn.pipeline.components.base
 import autosklearn.pipeline.components.classification as classification
 import autosklearn.pipeline.components.feature_preprocessing as feature_preprocessing
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 
 import unittest
 import unittest.mock
 
 
 class BasePipelineMock(autosklearn.pipeline.base.BasePipeline):
-    def __init__(self):
-        pass
+    def __init__(self, feat_type: Optional[FEAT_TYPE_TYPE] = None):
+        self.feat_type = feat_type
 
 
 class BaseTest(unittest.TestCase):
diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py
index 7be8038119..eb127ad02d 100644
--- a/test/test_pipeline/test_classification.py
+++ b/test/test_pipeline/test_classification.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Union
+from typing import Any, Dict, Optional, Union
 
 import copy
 import itertools
@@ -20,6 +20,7 @@
 
 import autosklearn.pipeline.components.classification as classification_components
 import autosklearn.pipeline.components.feature_preprocessing as preprocessing_components
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.components.base import (
     AutoSklearnChoice,
@@ -352,16 +353,6 @@ def test_configurations_categorical_data(self):
         -------
         * All configurations should fit, predict and predict_proba successfully
         """
-        pipeline = SimpleClassificationPipeline(
-            dataset_properties={"sparse": False},
-            include={
-                "feature_preprocessor": ["no_preprocessing"],
-                "classifier": ["sgd", "adaboost"],
-            },
-        )
-
-        cs = pipeline.get_hyperparameter_search_space()
-
         categorical_columns = [
             True,
             True,
@@ -407,6 +398,17 @@ def test_configurations_categorical_data(self):
             for i, is_categorical in enumerate(categorical_columns)
         }
 
+        pipeline = SimpleClassificationPipeline(
+            feat_type=categorical,
+            dataset_properties={"sparse": False},
+            include={
+                "feature_preprocessor": ["no_preprocessing"],
+                "classifier": ["sgd", "adaboost"],
+            },
+        )
+
+        cs = pipeline.get_hyperparameter_search_space()
+
         here = os.path.dirname(__file__)
         dataset_path = os.path.join(
             here, "components", "data_preprocessing", "dataset.pkl"
@@ -429,7 +431,10 @@ def test_configurations_categorical_data(self):
         init_params = {"data_preprocessor:feat_type": categorical}
 
         self._test_configurations(
-            configurations_space=cs, dataset=data, init_params=init_params
+            configurations_space=cs,
+            dataset=data,
+            init_params=init_params,
+            feat_type=categorical,
         )
 
     @unittest.mock.patch(
@@ -459,7 +464,8 @@ def test_categorical_passed_to_one_hot_encoder(self, ohe_mock):
             feat_types = {0: "categorical", 1: "numerical"}
 
             cls = SimpleClassificationPipeline(
-                init_params={"data_preprocessor:feat_type": feat_types}
+                feat_type=feat_types,
+                init_params={"data_preprocessor:feat_type": feat_types},
             )
 
             init_args = ohe_mock.call_args[1]["init_params"]
@@ -485,6 +491,7 @@ def _test_configurations(
         init_params: Dict[str, Any] = None,
         dataset_properties: Dict[str, Any] = None,
         n_samples: int = 10,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
     ):
         """Tests a configuration space by taking multiple samples and fiting each
         before calling predict and predict_proba.
@@ -560,7 +567,9 @@ def _test_configurations(
             init_params_ = copy.deepcopy(init_params)
 
             cls = SimpleClassificationPipeline(
-                dataset_properties=dataset_properties, init_params=init_params_
+                feat_type=feat_type,
+                dataset_properties=dataset_properties,
+                init_params=init_params_,
             )
             cls.set_hyperparameters(config, init_params=init_params_)
 
@@ -659,7 +668,9 @@ def test_get_hyperparameter_search_space(self):
         * (n_hyperparameters - 4) different conditionals for the pipeline
         * 53 forbidden combinations
         """
-        pipeline = SimpleClassificationPipeline()
+        pipeline = SimpleClassificationPipeline(
+            feat_type={"A": "numerical", "B": "categorical", "C": "string"}
+        )
         cs = pipeline.get_hyperparameter_search_space()
         self.assertIsInstance(cs, ConfigurationSpace)
 
@@ -897,7 +908,10 @@ def test_predict_proba_batched(self):
             perform near identically
         """
         # Multiclass
-        cls = SimpleClassificationPipeline(include={"classifier": ["sgd"]})
+        cls = SimpleClassificationPipeline(
+            feat_type={i: "numerical" for i in range(0, 64)},
+            include={"classifier": ["sgd"]},
+        )
         X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
 
         with ignore_warnings(classifier_warnings):
@@ -951,6 +965,7 @@ def test_predict_proba_batched_sparse(self):
             perform near identically
         """
         cls = SimpleClassificationPipeline(
+            feat_type={i: "numerical" for i in range(0, 64)},
             dataset_properties={"sparse": True, "multiclass": True},
             include={"classifier": ["sgd"]},
         )
@@ -1313,3 +1328,43 @@ def test_fit_instantiates_component(self):
         del preprocessing_components.additional_components.components[
             "CrashPreprocessor"
         ]
+
+    def test_get_hyperparameter_search_space_feat_type(self):
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "numerical"}
+        ).get_hyperparameter_search_space(dataset_properties={"multiclass": True})
+        self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc))
+        self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc))
+
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "categorical"}
+        ).get_hyperparameter_search_space(dataset_properties={"multilabel": True})
+        self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc))
+        self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc))
+
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "string"}
+        ).get_hyperparameter_search_space(dataset_properties={"sparse": True})
+        self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc))
+        self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc))
+
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "numerical", "2": "categorical"}
+        ).get_hyperparameter_search_space(
+            dataset_properties={"multilabel": True, "multiclass": True}
+        )
+        self.assertNotIn("data_preprocessor:feature_type:text", str(cs_mc))
+
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "numerical", "2": "string"}
+        ).get_hyperparameter_search_space(
+            dataset_properties={"multilabel": True, "multiclass": True}
+        )
+        self.assertNotIn("data_preprocessor:feature_type:categorical", str(cs_mc))
+
+        cs_mc = SimpleClassificationPipeline(
+            feat_type={"1": "categorical", "2": "string"}
+        ).get_hyperparameter_search_space(
+            dataset_properties={"multilabel": True, "multiclass": True}
+        )
+        self.assertNotIn("data_preprocessor:feature_type:numerical", str(cs_mc))

From e546632de030a7f26b1bca666dfc35854c5773b0 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 13:57:49 +0200
Subject: [PATCH 47/63] add new tests and fixing some issues from PR

---
 autosklearn/pipeline/classification.py | 2 +-
 autosklearn/pipeline/regression.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index ba5a662a3f..332c076b9b 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -71,8 +71,8 @@ class SimpleClassificationPipeline(BasePipeline, ClassifierMixin):
 
     def __init__(
         self,
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         steps=None,
         dataset_properties=None,
         include=None,
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index 0c42fa8e4d..dcc2fa3fcf 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -68,8 +68,8 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
 
     def __init__(
         self,
-        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         config: Optional[Configuration] = None,
+        feat_type: Optional[FEAT_TYPE_TYPE] = None,
         steps=None,
         dataset_properties=None,
         include=None,

From 63b94847a18ec94828d5d6e8126ff9edbe6808cc Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 14:34:37 +0200
Subject: [PATCH 48/63] add new tests and fixing some issues from PR

---
 .../test_data_preprocessing_feat_type.py          | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
index 3318f3e08b..c5c5369ded 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
@@ -101,6 +101,21 @@ def test_dual_type(self):
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("numerical", key.split(":")[0])
 
+        DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"})
+        cs = DPP.get_hyperparameter_search_space(
+            feat_type={"A": "string", "B": "numerical"},
+            dataset_properties={
+                "task": 1,
+                "sparse": False,
+                "multilabel": False,
+                "multiclass": False,
+                "target_type": "classification",
+                "signed": False,
+            },
+        )
+        for key in cs.get_hyperparameters_dict().keys():
+            self.assertNotIn("categorical", key.split(":")[0])
+
     def test_triple_type(self):
         DPP = FeatTypeSplit(
             feat_type={"A": "numerical", "B": "categorical", "C": "string"}

From 4f47450d2b54fe3672fc38c304226a1eb7fa6a9c Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 14:38:48 +0200
Subject: [PATCH 49/63] add new tests and fixing some issues from PR

---
 autosklearn/metalearning/input/aslib_simple.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index bbf4f44fb9..871cccd2c1 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -153,7 +153,7 @@ def _read_configurations(self, filename):
                     if not value or hp_name == "idx":
                         continue
                     if hp_name not in hp_names:
-                        # skip meta learning configuration
+                        # skip hyperparameter
                         # if it is not existing in the current search space
                         continue
                     try:

From 2764037e2e4e0cdb209f42b5f5d464153b3e5e24 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Fri, 8 Jul 2022 14:46:25 +0200
Subject: [PATCH 50/63] fix-1535-Exception-in-the-fit()-call-of-AutoSklearn
 (#1539)

* Create PR

* Fix test fixture
---
 test/fixtures/ensemble_building.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/fixtures/ensemble_building.py b/test/fixtures/ensemble_building.py
index 548d1c5d72..20e771fd1a 100644
--- a/test/fixtures/ensemble_building.py
+++ b/test/fixtures/ensemble_building.py
@@ -231,7 +231,10 @@ def _make(
             # Hence, we take the y_train of the datamanager and use that as the
             # the targets
             if "Y_train" in datamanager.data:
-                backend.save_targets_ensemble(datamanager.data["Y_train"])
+                backend.save_additional_data(
+                    datamanager.data["Y_train"],
+                    what="targets_ensemble",
+                )
 
         return EnsembleBuilderManager(
             backend=backend,

From 519ce4dcc79e7eb78b666d7f41a291f8950c17cd Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 15:36:32 +0200
Subject: [PATCH 51/63] add new tests and fixing some issues from PR

---
 autosklearn/util/pipeline.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index 6ff74b0fb7..5d3b132343 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -11,7 +11,7 @@
     MULTIOUTPUT_REGRESSION,
     REGRESSION_TASKS,
 )
-from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.data.abstract_data_manager import AbstractDataManager
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.regression import SimpleRegressionPipeline
 
@@ -19,7 +19,7 @@
 
 
 def get_configuration_space(
-    datamanager: XYDataManager,
+    datamanager: AbstractDataManager,
     include: Optional[Dict[str, List[str]]] = None,
     exclude: Optional[Dict[str, List[str]]] = None,
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -56,7 +56,7 @@ def get_configuration_space(
 
 
 def _get_regression_configuration_space(
-    datamanager: XYDataManager,
+    datamanager: AbstractDataManager,
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -104,7 +104,7 @@ def _get_regression_configuration_space(
 
 
 def _get_classification_configuration_space(
-    datamanager: XYDataManager,
+    datamanager: AbstractDataManager,
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
     random_state: Optional[Union[int, np.random.RandomState]] = None,

From 05d4ca85849fae89a532de99c777ea7517c77686 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 16:06:26 +0200
Subject: [PATCH 52/63] add new tests and fixing some issues from PR

---
 autosklearn/pipeline/base.py                  |  4 +--
 .../pyMetaLearn/test_metalearner.py           | 28 ++++++-------------
 .../test_data_preprocessing_feat_type.py      | 15 ----------
 3 files changed, 10 insertions(+), 37 deletions(-)

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index d138a32a84..b4647215c6 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -221,7 +221,7 @@ def set_hyperparameters(
             node_name, node = n_
 
             sub_configuration_space = node.get_hyperparameter_search_space(
-                feat_type=self.feat_type, dataset_properties=self.dataset_properties
+                feat_type=feat_type, dataset_properties=self.dataset_properties
             )
             sub_config_dict = {}
             for param in configuration:
@@ -248,7 +248,7 @@ def set_hyperparameters(
                 node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
             ):
                 node.set_hyperparameters(
-                    feat_type=self.feat_type,
+                    feat_type=feat_type,
                     configuration=sub_configuration,
                     init_params=sub_init_params_dict,
                 )
diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py
index 9e7a54a77f..42d27d49da 100644
--- a/test/test_metalearning/pyMetaLearn/test_metalearner.py
+++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py
@@ -23,26 +23,14 @@ def setUp(self):
         data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
-        for feat_type in [
-            None,
-            {"A": "numerical"},
-            {"A": "categorical"},
-            {"A": "string"},
-            {"A": "numerical", "B": "categorical"},
-            {"A": "numerical", "B": "string"},
-            {"A": "categorical", "B": "string"},
-            {"A": "categorical", "B": "string", "C": "numerical"},
-        ]:
-            pipeline = (
-                autosklearn.pipeline.classification.SimpleClassificationPipeline()
-            )
-            self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type)
-
-            self.logger = logging.getLogger()
-            meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
-            self.meta_optimizer = metalearner.MetaLearningOptimizer(
-                "233", self.cs, meta_base, logger=self.logger
-            )
+        pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
+        self.cs = pipeline.get_hyperparameter_search_space()
+
+        self.logger = logging.getLogger()
+        meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
+        self.meta_optimizer = metalearner.MetaLearningOptimizer(
+            "233", self.cs, meta_base, logger=self.logger
+        )
 
     def tearDown(self):
         os.chdir(self.cwd)
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
index c5c5369ded..6135883705 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
@@ -86,21 +86,6 @@ def test_dual_type(self):
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("numerical", key.split(":")[0])
 
-        DPP = FeatTypeSplit(feat_type={"A": "string", "B": "categorical"})
-        cs = DPP.get_hyperparameter_search_space(
-            feat_type={"A": "string", "B": "categorical"},
-            dataset_properties={
-                "task": 1,
-                "sparse": False,
-                "multilabel": False,
-                "multiclass": False,
-                "target_type": "classification",
-                "signed": False,
-            },
-        )
-        for key in cs.get_hyperparameters_dict().keys():
-            self.assertNotIn("numerical", key.split(":")[0])
-
         DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"})
         cs = DPP.get_hyperparameter_search_space(
             feat_type={"A": "string", "B": "numerical"},

From c6dbab3ea23c88a76d3c8375ba1af2f3cfb6c853 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 16:20:47 +0200
Subject: [PATCH 53/63] chancing metalearning test to check whether the
 configspace adapts to feat_type or not

---
 .../pyMetaLearn/test_metalearner.py           | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py
index 42d27d49da..2b7b5c51c6 100644
--- a/test/test_metalearning/pyMetaLearn/test_metalearner.py
+++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py
@@ -23,14 +23,27 @@ def setUp(self):
         data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
-        pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
-        self.cs = pipeline.get_hyperparameter_search_space()
-
-        self.logger = logging.getLogger()
-        meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
-        self.meta_optimizer = metalearner.MetaLearningOptimizer(
-            "233", self.cs, meta_base, logger=self.logger
-        )
+        for feat_type, cs_size in [
+            ({"A": "numerical"}, 165),
+            ({"A": "categorical"}, 162),
+            ({"A": "string"}, 174),
+            ({"A": "numerical", "B": "categorical"}, 168),
+            ({"A": "numerical", "B": "string"}, 180),
+            ({"A": "categorical", "B": "string"}, 177),
+            ({"A": "categorical", "B": "string", "C": "numerical"}, 183),
+        ]:
+            pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline(
+                feat_type=feat_type
+            )
+            self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type)
+            # print(self.cs.get_default_configuration())
+
+            self.logger = logging.getLogger()
+            meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
+            self.meta_optimizer = metalearner.MetaLearningOptimizer(
+                "233", self.cs, meta_base, logger=self.logger
+            )
+            self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size)
 
     def tearDown(self):
         os.chdir(self.cwd)

From 65b037829c5f643b7e59131883aeeb21e4e25873 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 16:29:46 +0200
Subject: [PATCH 54/63] in `test_data_preprocessing_feat_type.py` check for
 configuration space size

---
 .../test_data_preprocessing_feat_type.py                   | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
index 6135883705..0cf5ee6bd2 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_feat_type.py
@@ -22,6 +22,7 @@ def test_single_type(self):
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("text", key.split(":")[0])
             self.assertNotIn("categorical", key.split(":")[0])
+        self.assertEqual(len(cs), 6)
 
         DPP = FeatTypeSplit(feat_type={"A": "categorical"})
         cs = DPP.get_hyperparameter_search_space(
@@ -38,6 +39,7 @@ def test_single_type(self):
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("text", key.split(":")[0])
             self.assertNotIn("numerical", key.split(":")[0])
+        self.assertEqual(len(cs), 3)
 
         DPP = FeatTypeSplit(feat_type={"A": "string"})
         cs = DPP.get_hyperparameter_search_space(
@@ -54,6 +56,7 @@ def test_single_type(self):
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("numerical", key.split(":")[0])
             self.assertNotIn("categorical", key.split(":")[0])
+        self.assertEqual(len(cs), 15)
 
     def test_dual_type(self):
         DPP = FeatTypeSplit(feat_type={"A": "numerical", "B": "categorical"})
@@ -70,6 +73,7 @@ def test_dual_type(self):
         )
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("text", key.split(":")[0])
+        self.assertEqual(len(cs), 9)
 
         DPP = FeatTypeSplit(feat_type={"A": "categorical", "B": "string"})
         cs = DPP.get_hyperparameter_search_space(
@@ -85,6 +89,7 @@ def test_dual_type(self):
         )
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("numerical", key.split(":")[0])
+        self.assertEqual(len(cs), 18)
 
         DPP = FeatTypeSplit(feat_type={"A": "string", "B": "numerical"})
         cs = DPP.get_hyperparameter_search_space(
@@ -100,6 +105,7 @@ def test_dual_type(self):
         )
         for key in cs.get_hyperparameters_dict().keys():
             self.assertNotIn("categorical", key.split(":")[0])
+        self.assertEqual(len(cs), 21)
 
     def test_triple_type(self):
         DPP = FeatTypeSplit(
@@ -126,3 +132,4 @@ def test_triple_type(self):
                 truth_table[2] = True
 
         self.assertEqual(sum(truth_table), 3)
+        self.assertEqual(len(cs), 24)

From f00aa76711ceb41aeeed957338ebee677923e63c Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 17:30:43 +0200
Subject: [PATCH 55/63] in `test_data_preprocessing_feat_type.py` check for
 configuration space size

---
 .../components/classification/__init__.py     |  4 +-
 .../components/classification/adaboost.py     |  7 +++-
 .../components/regression/__init__.py         |  4 +-
 autosklearn/util/pipeline.py                  | 12 +++---
 scripts/02_retrieve_metadata.py               |  2 +-
 test/fixtures/ensembles.py                    |  4 +-
 .../pyMetaLearn/test_metalearner.py           | 30 +++++---------
 .../test_metalearning_configuration.py        | 40 +++++++++++++++++++
 8 files changed, 70 insertions(+), 33 deletions(-)
 create mode 100644 test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py

diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 6475fa0156..31fa2ea9ca 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -133,7 +133,9 @@ def get_hyperparameter_search_space(
         for estimator_name in available_estimators.keys():
             estimator_configuration_space = available_estimators[
                 estimator_name
-            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
+            ].get_hyperparameter_search_space(
+                feat_type=feat_type, dataset_properties=dataset_properties
+            )
             parent_hyperparameter = {"parent": estimator, "value": estimator_name}
             cs.add_configuration_space(
                 estimator_name,
diff --git a/autosklearn/pipeline/components/classification/adaboost.py b/autosklearn/pipeline/components/classification/adaboost.py
index 3634f53956..08a9bc06bd 100644
--- a/autosklearn/pipeline/components/classification/adaboost.py
+++ b/autosklearn/pipeline/components/classification/adaboost.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -5,6 +7,7 @@
     UniformIntegerHyperparameter,
 )
 
+from autosklearn.askl_typing import FEAT_TYPE_TYPE
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
 from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
@@ -68,7 +71,9 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(
+        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
+    ):
         cs = ConfigurationSpace()
 
         n_estimators = UniformIntegerHyperparameter(
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 0f693e2d08..9d1ef58650 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -123,7 +123,9 @@ def get_hyperparameter_search_space(
         for estimator_name in available_estimators.keys():
             estimator_configuration_space = available_estimators[
                 estimator_name
-            ].get_hyperparameter_search_space(dataset_properties=dataset_properties)
+            ].get_hyperparameter_search_space(
+                feat_type=feat_type, dataset_properties=dataset_properties
+            )
             parent_hyperparameter = {"parent": estimator, "value": estimator_name}
             cs.add_configuration_space(
                 estimator_name,
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index 5d3b132343..f0a66a2a86 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -28,8 +28,8 @@ def get_configuration_space(
 
     Parameters
     ----------
-    datamanager: XYDataManager
-        XYDataManger object storing all important information about the dataset
+    datamanager: AbstractDataManager
+        AbstractDataManager object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step
@@ -65,8 +65,8 @@ def _get_regression_configuration_space(
 
     Parameters
     ----------
-    datamanager: XYDataManager
-        XYDataManger object storing all important information about the dataset
+    datamanager: AbstractDataManager
+        AbstractDataManager object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step
@@ -113,8 +113,8 @@ def _get_classification_configuration_space(
 
     Parameters
     ----------
-    datamanager: XYDataManager
-         XYDataManger object storing all important information about the dataset
+    datamanager: AbstractDataManager
+         AbstractDataManager object storing all important information about the dataset
 
     include: Optional[Dict[str, List[str]]] = None
         A dictionary of what components to include for each pipeline step
diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index 931ad499f5..8c3a2e5468 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -181,7 +181,7 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri
 class DummyDatamanager():
     def __init__(self, info):
         self.info = info
-        self.feat_type = {"A1": "numerical"}
+        self.feat_type = None
 
 def main():
     parser = ArgumentParser()
diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py
index 113974b256..80a3425e2d 100644
--- a/test/fixtures/ensembles.py
+++ b/test/fixtures/ensembles.py
@@ -45,7 +45,7 @@ def _make(
         if not models:
             models = [
                 MyDummyClassifier(
-                    feat_type={i: "numerical" for i in range(X.shape[1])},
+                    feat_type=None,
                     config=1,
                     random_state=seed,
                 )
@@ -89,7 +89,7 @@ def _make(
         if not models:
             models = [
                 MyDummyRegressor(
-                    feat_type={i: "numerical" for i in range(X.shape[1])},
+                    feat_type=None,
                     config=1,
                     random_state=seed,
                 )
diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py
index 2b7b5c51c6..33ce3f9f88 100644
--- a/test/test_metalearning/pyMetaLearn/test_metalearner.py
+++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py
@@ -23,27 +23,15 @@ def setUp(self):
         data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
-        for feat_type, cs_size in [
-            ({"A": "numerical"}, 165),
-            ({"A": "categorical"}, 162),
-            ({"A": "string"}, 174),
-            ({"A": "numerical", "B": "categorical"}, 168),
-            ({"A": "numerical", "B": "string"}, 180),
-            ({"A": "categorical", "B": "string"}, 177),
-            ({"A": "categorical", "B": "string", "C": "numerical"}, 183),
-        ]:
-            pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline(
-                feat_type=feat_type
-            )
-            self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type)
-            # print(self.cs.get_default_configuration())
-
-            self.logger = logging.getLogger()
-            meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
-            self.meta_optimizer = metalearner.MetaLearningOptimizer(
-                "233", self.cs, meta_base, logger=self.logger
-            )
-            self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size)
+        pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
+        self.cs = pipeline.get_hyperparameter_search_space()
+        # print(self.cs.get_default_configuration())
+
+        self.logger = logging.getLogger()
+        meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
+        self.meta_optimizer = metalearner.MetaLearningOptimizer(
+            "233", self.cs, meta_base, logger=self.logger
+        )
 
     def tearDown(self):
         os.chdir(self.cwd)
diff --git a/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py
new file mode 100644
index 0000000000..1e08805d87
--- /dev/null
+++ b/test/test_metalearning/pyMetaLearn/test_metalearning_configuration.py
@@ -0,0 +1,40 @@
+import logging
+import os
+
+import autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner as metalearner  # noqa: E501
+import autosklearn.pipeline.classification
+from autosklearn.metalearning.metalearning.meta_base import MetaBase
+
+import unittest
+
+logging.basicConfig()
+
+
+class MetalearningConfiguration(unittest.TestCase):
+    def test_metalearning_cs_size(self):
+        self.cwd = os.getcwd()
+        data_dir = os.path.dirname(__file__)
+        data_dir = os.path.join(data_dir, "test_meta_base_data")
+        os.chdir(data_dir)
+
+        for feat_type, cs_size in [
+            ({"A": "numerical"}, 165),
+            ({"A": "categorical"}, 162),
+            ({"A": "string"}, 174),
+            ({"A": "numerical", "B": "categorical"}, 168),
+            ({"A": "numerical", "B": "string"}, 180),
+            ({"A": "categorical", "B": "string"}, 177),
+            ({"A": "categorical", "B": "string", "C": "numerical"}, 183),
+        ]:
+            pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline(
+                feat_type=feat_type
+            )
+            self.cs = pipeline.get_hyperparameter_search_space(feat_type=feat_type)
+            # print(self.cs.get_default_configuration())
+
+            self.logger = logging.getLogger()
+            meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
+            self.meta_optimizer = metalearner.MetaLearningOptimizer(
+                "233", self.cs, meta_base, logger=self.logger
+            )
+            self.assertEqual(len(self.meta_optimizer.configuration_space), cs_size)

From c43627eecdd4a8fd8d9ced9438161f75142cea1d Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 17:34:53 +0200
Subject: [PATCH 56/63] in `test_data_preprocessing_feat_type.py` check for
 configuration space size

---
 scripts/02_retrieve_metadata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index 8c3a2e5468..fa104464f2 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -225,7 +225,10 @@ def main():
             )
 
             configuration_space = pipeline.get_configuration_space(
-                DummyDatamanager({"is_sparse": sparse, "task": task})
+                DummyDatamanager(
+                    info={"is_sparse": sparse, "task": task},
+                    feat_type={"A": "numerical", "B": "categorical"}
+                )
             )
 
             outputs, configurations = retrieve_matadata(

From 1c26599118d81fccdad3830bd72b2ddf9dca42bc Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 17:41:07 +0200
Subject: [PATCH 57/63] include feedback from review

---
 test/fixtures/ensembles.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test/fixtures/ensembles.py b/test/fixtures/ensembles.py
index 80a3425e2d..250841511a 100644
--- a/test/fixtures/ensembles.py
+++ b/test/fixtures/ensembles.py
@@ -5,7 +5,11 @@
 import numpy as np
 from sklearn.ensemble import VotingClassifier, VotingRegressor
 
-from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES
+from autosklearn.data.validation import (
+    SUPPORTED_FEAT_TYPES,
+    SUPPORTED_TARGET_TYPES,
+    InputValidator,
+)
 from autosklearn.evaluation.abstract_evaluator import (
     MyDummyClassifier,
     MyDummyRegressor,
@@ -43,9 +47,10 @@ def _make(
     ) -> VotingClassifier:
         assert not (X is None) ^ (y is None)
         if not models:
+            validator = InputValidator(is_classification=True).fit(X, y)
             models = [
                 MyDummyClassifier(
-                    feat_type=None,
+                    feat_type=validator.feature_validator.feat_type,
                     config=1,
                     random_state=seed,
                 )
@@ -87,9 +92,10 @@ def _make(
         assert not (X is None) ^ (y is None)
 
         if not models:
+            validator = InputValidator(is_classification=False).fit(X, y)
             models = [
                 MyDummyRegressor(
-                    feat_type=None,
+                    feat_type=validator.feature_validator.feat_type,
                     config=1,
                     random_state=seed,
                 )

From 0af7f9053c99540e8ee69f8dcf749a36968cbc16 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 18:24:08 +0200
Subject: [PATCH 58/63] include feedback from review

---
 test/test_pipeline/test_classification.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py
index eb127ad02d..94fd8c7a65 100644
--- a/test/test_pipeline/test_classification.py
+++ b/test/test_pipeline/test_classification.py
@@ -62,7 +62,7 @@ def get_properties(dataset_properties=None):
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
+    def get_hyperparameter_search_space(feat_type=None, dataset_properties=None):
         cs = ConfigurationSpace()
         return cs
 
@@ -407,7 +407,7 @@ def test_configurations_categorical_data(self):
             },
         )
 
-        cs = pipeline.get_hyperparameter_search_space()
+        cs = pipeline.get_hyperparameter_search_space(feat_type=categorical)
 
         here = os.path.dirname(__file__)
         dataset_path = os.path.join(
@@ -474,8 +474,11 @@ def test_categorical_passed_to_one_hot_encoder(self, ohe_mock):
             # Check through `set_hyperparameters`
             feat_types = {0: "categorical", 1: "categorical", 2: "numerical"}
 
-            default = cls.get_hyperparameter_search_space().get_default_configuration()
+            default = cls.get_hyperparameter_search_space(
+                feat_type=feat_types
+            ).get_default_configuration()
             cls.set_hyperparameters(
+                feat_type=feat_types,
                 configuration=default,
                 init_params={"data_preprocessor:feat_type": feat_types},
             )
@@ -571,7 +574,9 @@ def _test_configurations(
                 dataset_properties=dataset_properties,
                 init_params=init_params_,
             )
-            cls.set_hyperparameters(config, init_params=init_params_)
+            cls.set_hyperparameters(
+                config, init_params=init_params_, feat_type=feat_type
+            )
 
             # First make sure that for this configuration, setting the parameters
             # does not mistakenly set the estimator as fitted

From 5057b260624609c108ce69c9c40217269a430759 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Fri, 8 Jul 2022 18:35:37 +0200
Subject: [PATCH 59/63] include feedback from review

---
 scripts/02_retrieve_metadata.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index fa104464f2..e0b20d1e61 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -179,9 +179,9 @@ def write_output(outputs, configurations, output_dir, configuration_space, metri
 
 
 class DummyDatamanager():
-    def __init__(self, info):
+    def __init__(self, info, feat_type=None):
         self.info = info
-        self.feat_type = None
+        self.feat_type = feat_type
 
 def main():
     parser = ArgumentParser()

From 18ab29e3197686ebca73e49b282095c468bbd435 Mon Sep 17 00:00:00 2001
From: lukas <lukas.j.m.strack@gmail.com>
Date: Sun, 10 Jul 2022 16:29:38 +0200
Subject: [PATCH 60/63] adapted meta_data_generation search space

---
 scripts/02_retrieve_metadata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index e0b20d1e61..574a24c87b 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -227,7 +227,7 @@ def main():
             configuration_space = pipeline.get_configuration_space(
                 DummyDatamanager(
                     info={"is_sparse": sparse, "task": task},
-                    feat_type={"A": "numerical", "B": "categorical"}
+                    feat_type={"A": "numerical"}
                 )
             )
 

From 12fe4495f905ce02424094f1006cdf8df809f351 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 11 Jul 2022 11:46:23 +0200
Subject: [PATCH 61/63] Bump docker/build-push-action from 1 to 3 (#1515)

* Bump docker/build-push-action from 1 to 3

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 1 to 3.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v1...v3)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update docker-publish.yml

Replace password by token

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
---
 .github/workflows/docker-publish.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 4c4773700e..525004269e 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -26,9 +26,8 @@ jobs:
         run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
         id: extract_branch
 
-      # Updating this to @v2 requires a github auth token
       - name: Push to GitHub Packages
-        uses: docker/build-push-action@v1
+        uses: docker/build-push-action@v3
         with:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
@@ -37,12 +36,11 @@ jobs:
           tag_with_ref: true
           tags: ${{ steps.extract_branch.outputs.branch }}
 
-      # Updating this to @v2 requires a github auth token
       - name: Push to Docker Hub
-        uses: docker/build-push-action@v1
+        uses: docker/build-push-action@v3
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
+          password: ${{ secrets.DOCKER_TOKEN }}
           repository: mfeurer/auto-sklearn
           tags: ${{ steps.extract_branch.outputs.branch }}
 

From af9d46983c4680b710c79c7714ed0047077d02dc Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Sat, 16 Jul 2022 20:35:59 +0200
Subject: [PATCH 62/63] fix-1532-_ERROR_-asyncio.exceptions.CancelledError
 (#1540)

* Create PR

* Abstract out dask client types

* Fix _ issue

* Extend scope of dask_client in automl.py

* Add docstring to dask module

* Indent result addition

* Add basic tests for Dask wrappers
---
 autosklearn/automl.py                   | 282 +++++++++++-------------
 autosklearn/util/dask.py                | 142 ++++++++++++
 test/test_automl/test_construction.py   |   6 +-
 test/test_estimators/test_estimators.py |   2 -
 test/test_util/test_dask.py             |  75 +++++++
 5 files changed, 347 insertions(+), 160 deletions(-)
 create mode 100644 autosklearn/util/dask.py
 create mode 100644 test/test_util/test_dask.py

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 278cd5c146..f76a03adec 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -21,7 +21,6 @@
 import os
 import platform
 import sys
-import tempfile
 import time
 import types
 import uuid
@@ -37,7 +36,7 @@
 import sklearn.utils
 from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from ConfigSpace.read_and_write import json as cs_json
-from dask.distributed import Client, LocalCluster
+from dask.distributed import Client
 from scipy.sparse import spmatrix
 from sklearn.base import BaseEstimator
 from sklearn.dummy import DummyClassifier, DummyRegressor
@@ -105,6 +104,7 @@
 from autosklearn.pipeline.components.regression import RegressorChoice
 from autosklearn.smbo import AutoMLSMBO
 from autosklearn.util import RE_PATTERN, pipeline
+from autosklearn.util.dask import Dask, LocalDask, UserDask
 from autosklearn.util.data import (
     DatasetCompressionSpec,
     default_dataset_compression_arg,
@@ -120,7 +120,6 @@
     warnings_to,
 )
 from autosklearn.util.parallel import preload_modules
-from autosklearn.util.single_thread_client import SingleThreadedClient
 from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback
 from autosklearn.util.stopwatch import StopWatch
 
@@ -299,21 +298,22 @@ def __init__(
         self._initial_configurations_via_metalearning = (
             initial_configurations_via_metalearning
         )
+        self._n_jobs = n_jobs
 
         self._scoring_functions = scoring_functions or []
         self._resampling_strategy_arguments = resampling_strategy_arguments or {}
+        self._multiprocessing_context = "forkserver"
 
         # Single core, local runs should use fork to prevent the __main__ requirements
         # in examples. Nevertheless, multi-process runs have spawn as requirement to
         # reduce the possibility of a deadlock
-        if n_jobs == 1 and dask_client is None:
-            self._multiprocessing_context = "fork"
-            self._dask_client = SingleThreadedClient()
-            self._n_jobs = 1
+        self._dask: Dask
+        if dask_client is not None:
+            self._dask = UserDask(client=dask_client)
         else:
-            self._multiprocessing_context = "forkserver"
-            self._dask_client = dask_client
-            self._n_jobs = n_jobs
+            self._dask = LocalDask(n_jobs=n_jobs)
+            if n_jobs == 1:
+                self._multiprocessing_context = "fork"
 
         # Create the backend
         self._backend: Backend = create(
@@ -350,38 +350,6 @@ def __init__(
         self.num_run = 0
         self.fitted = False
 
-    def _create_dask_client(self) -> None:
-        self._is_dask_client_internally_created = True
-        self._dask_client = Client(
-            LocalCluster(
-                n_workers=self._n_jobs,
-                processes=False,
-                threads_per_worker=1,
-                # We use the temporal directory to save the
-                # dask workers, because deleting workers takes
-                # more time than deleting backend directories
-                # This prevent an error saying that the worker
-                # file was deleted, so the client could not close
-                # the worker properly
-                local_directory=tempfile.gettempdir(),
-                # Memory is handled by the pynisher, not by the dask worker/nanny
-                memory_limit=0,
-            ),
-            # Heartbeat every 10s
-            heartbeat_interval=10000,
-        )
-
-    def _close_dask_client(self, force: bool = False) -> None:
-        if getattr(self, "_dask_client", None) is not None and (
-            force or getattr(self, "_is_dask_client_internally_created", False)
-        ):
-            self._dask_client.shutdown()
-            self._dask_client.close()
-            del self._dask_client
-            self._dask_client = None
-            self._is_dask_client_internally_created = False
-            del self._is_dask_client_internally_created
-
     def _get_logger(self, name: str) -> PicklableClientLogger:
         logger_name = "AutoML(%d):%s" % (self._seed, name)
 
@@ -747,17 +715,6 @@ def fit(
                     "autosklearn.metrics.Scorer."
                 )
 
-            # If no dask client was provided, we create one, so that we can
-            # start a ensemble process in parallel to smbo optimize
-            if self._dask_client is None and (
-                self._ensemble_class is not None
-                or self._n_jobs is not None
-                and self._n_jobs > 1
-            ):
-                self._create_dask_client()
-            else:
-                self._is_dask_client_internally_created = False
-
             self._dataset_name = dataset_name
             self._stopwatch.start(self._dataset_name)
 
@@ -902,70 +859,85 @@ def fit(
                         )
 
                     n_meta_configs = self._initial_configurations_via_metalearning
-                    _proc_smac = AutoMLSMBO(
-                        config_space=self.configuration_space,
-                        dataset_name=self._dataset_name,
-                        backend=self._backend,
-                        total_walltime_limit=time_left,
-                        func_eval_time_limit=per_run_time_limit,
-                        memory_limit=self._memory_limit,
-                        data_memory_limit=self._data_memory_limit,
-                        stopwatch=self._stopwatch,
-                        n_jobs=self._n_jobs,
-                        dask_client=self._dask_client,
-                        start_num_run=self.num_run,
-                        num_metalearning_cfgs=n_meta_configs,
-                        config_file=configspace_path,
-                        seed=self._seed,
-                        metadata_directory=self._metadata_directory,
-                        metrics=self._metrics,
-                        resampling_strategy=self._resampling_strategy,
-                        resampling_strategy_args=self._resampling_strategy_arguments,
-                        include=self._include,
-                        exclude=self._exclude,
-                        disable_file_output=self._disable_evaluator_output,
-                        get_smac_object_callback=self._get_smac_object_callback,
-                        smac_scenario_args=self._smac_scenario_args,
-                        scoring_functions=self._scoring_functions,
-                        port=self._logger_port,
-                        pynisher_context=self._multiprocessing_context,
-                        ensemble_callback=proc_ensemble,
-                        trials_callback=self._get_trials_callback,
-                    )
+                    with self._dask as dask_client:
+                        resamp_args = self._resampling_strategy_arguments
+                        _proc_smac = AutoMLSMBO(
+                            config_space=self.configuration_space,
+                            dataset_name=self._dataset_name,
+                            backend=self._backend,
+                            total_walltime_limit=time_left,
+                            func_eval_time_limit=per_run_time_limit,
+                            memory_limit=self._memory_limit,
+                            data_memory_limit=self._data_memory_limit,
+                            stopwatch=self._stopwatch,
+                            n_jobs=self._n_jobs,
+                            dask_client=dask_client,
+                            start_num_run=self.num_run,
+                            num_metalearning_cfgs=n_meta_configs,
+                            config_file=configspace_path,
+                            seed=self._seed,
+                            metadata_directory=self._metadata_directory,
+                            metrics=self._metrics,
+                            resampling_strategy=self._resampling_strategy,
+                            resampling_strategy_args=resamp_args,
+                            include=self._include,
+                            exclude=self._exclude,
+                            disable_file_output=self._disable_evaluator_output,
+                            get_smac_object_callback=self._get_smac_object_callback,
+                            smac_scenario_args=self._smac_scenario_args,
+                            scoring_functions=self._scoring_functions,
+                            port=self._logger_port,
+                            pynisher_context=self._multiprocessing_context,
+                            ensemble_callback=proc_ensemble,
+                            trials_callback=self._get_trials_callback,
+                        )
 
-                    (
-                        self.runhistory_,
-                        self.trajectory_,
-                        self._budget_type,
-                    ) = _proc_smac.run_smbo()
-                    trajectory_filename = os.path.join(
-                        self._backend.get_smac_output_directory_for_run(self._seed),
-                        "trajectory.json",
-                    )
-                    saveable_trajectory = [
-                        list(entry[:2]) + [entry[2].get_dictionary()] + list(entry[3:])
-                        for entry in self.trajectory_
-                    ]
-                    with open(trajectory_filename, "w") as fh:
-                        json.dump(saveable_trajectory, fh)
-
-            self._logger.info("Starting shutdown...")
-            # Wait until the ensemble process is finished to avoid shutting down
-            # while the ensemble builder tries to access the data
-            if proc_ensemble is not None:
-                self.ensemble_performance_history = list(proc_ensemble.history)
-
-                if len(proc_ensemble.futures) > 0:
-                    # Now we wait for the future to return as it cannot be cancelled
-                    # while it is running: https://stackoverflow.com/a/49203129
-                    self._logger.info(
-                        "Ensemble script still running, waiting for it to finish."
-                    )
-                    result = proc_ensemble.futures.pop().result()
-                    if result:
-                        ensemble_history, _ = result
-                        self.ensemble_performance_history.extend(ensemble_history)
-                    self._logger.info("Ensemble script finished, continue shutdown.")
+                        (
+                            self.runhistory_,
+                            self.trajectory_,
+                            self._budget_type,
+                        ) = _proc_smac.run_smbo()
+
+                        trajectory_filename = os.path.join(
+                            self._backend.get_smac_output_directory_for_run(self._seed),
+                            "trajectory.json",
+                        )
+                        saveable_trajectory = [
+                            list(entry[:2])
+                            + [entry[2].get_dictionary()]
+                            + list(entry[3:])
+                            for entry in self.trajectory_
+                        ]
+                        with open(trajectory_filename, "w") as fh:
+                            json.dump(saveable_trajectory, fh)
+
+                        self._logger.info("Starting shutdown...")
+                        # Wait until the ensemble process is finished to avoid shutting
+                        # down while the ensemble builder tries to access the data
+                        if proc_ensemble is not None:
+                            self.ensemble_performance_history = list(
+                                proc_ensemble.history
+                            )
+
+                            if len(proc_ensemble.futures) > 0:
+                                # Now we wait for the future to return as it cannot be
+                                # cancelled while it is running
+                                # * https://stackoverflow.com/a/49203129
+                                self._logger.info(
+                                    "Ensemble script still running,"
+                                    " waiting for it to finish."
+                                )
+                                result = proc_ensemble.futures.pop().result()
+
+                            if result:
+                                ensemble_history, _ = result
+                                self.ensemble_performance_history.extend(
+                                    ensemble_history
+                                )
+
+                            self._logger.info(
+                                "Ensemble script finished, continue shutdown."
+                            )
 
                 # save the ensemble performance history file
                 if len(self.ensemble_performance_history) > 0:
@@ -1054,7 +1026,7 @@ def _log_fit_setup(self) -> None:
         self._logger.debug(
             "  multiprocessing_context: %s", str(self._multiprocessing_context)
         )
-        self._logger.debug("  dask_client: %s", str(self._dask_client))
+        self._logger.debug("  dask_client: %s", str(self._dask))
         self._logger.debug("  precision: %s", str(self.precision))
         self._logger.debug(
             "  disable_evaluator_output: %s", str(self._disable_evaluator_output)
@@ -1090,7 +1062,6 @@ def __sklearn_is_fitted__(self) -> bool:
 
     def _fit_cleanup(self) -> None:
         self._logger.info("Closing the dask infrastructure")
-        self._close_dask_client()
         self._logger.info("Finished closing the dask infrastructure")
 
         # Clean up the logger
@@ -1555,12 +1526,6 @@ def fit_ensemble(
         # Make sure that input is valid
         y = self.InputValidator.target_validator.transform(y)
 
-        # Create a client if needed
-        if self._dask_client is None:
-            self._create_dask_client()
-        else:
-            self._is_dask_client_internally_created = False
-
         metrics = metrics if metrics is not None else self._metrics
         if not isinstance(metrics, Sequence):
             metrics = [metrics]
@@ -1568,35 +1533,41 @@ def fit_ensemble(
         # Use the current thread to start the ensemble builder process
         # The function ensemble_builder_process will internally create a ensemble
         # builder in the provide dask client
-        manager = EnsembleBuilderManager(
-            start_time=time.time(),
-            time_left_for_ensembles=self._time_for_task,
-            backend=copy.deepcopy(self._backend),
-            dataset_name=dataset_name if dataset_name else self._dataset_name,
-            task=task if task else self._task,
-            metrics=metrics if metrics is not None else self._metrics,
-            ensemble_class=(
-                ensemble_class if ensemble_class is not None else self._ensemble_class
-            ),
-            ensemble_kwargs=(
-                ensemble_kwargs
-                if ensemble_kwargs is not None
-                else self._ensemble_kwargs
-            ),
-            ensemble_nbest=ensemble_nbest if ensemble_nbest else self._ensemble_nbest,
-            max_models_on_disc=self._max_models_on_disc,
-            seed=self._seed,
-            precision=precision if precision else self.precision,
-            max_iterations=1,
-            read_at_most=None,
-            memory_limit=self._memory_limit,
-            random_state=self._seed,
-            logger_port=self._logger_port,
-            pynisher_context=self._multiprocessing_context,
-        )
-        manager.build_ensemble(self._dask_client)
-        future = manager.futures.pop()
-        result = future.result()
+        with self._dask as dask_client:
+            manager = EnsembleBuilderManager(
+                start_time=time.time(),
+                time_left_for_ensembles=self._time_for_task,
+                backend=copy.deepcopy(self._backend),
+                dataset_name=dataset_name if dataset_name else self._dataset_name,
+                task=task if task else self._task,
+                metrics=metrics if metrics is not None else self._metrics,
+                ensemble_class=(
+                    ensemble_class
+                    if ensemble_class is not None
+                    else self._ensemble_class
+                ),
+                ensemble_kwargs=(
+                    ensemble_kwargs
+                    if ensemble_kwargs is not None
+                    else self._ensemble_kwargs
+                ),
+                ensemble_nbest=ensemble_nbest
+                if ensemble_nbest
+                else self._ensemble_nbest,
+                max_models_on_disc=self._max_models_on_disc,
+                seed=self._seed,
+                precision=precision if precision else self.precision,
+                max_iterations=1,
+                read_at_most=None,
+                memory_limit=self._memory_limit,
+                random_state=self._seed,
+                logger_port=self._logger_port,
+                pynisher_context=self._multiprocessing_context,
+            )
+            manager.build_ensemble(dask_client)
+            future = manager.futures.pop()
+            result = future.result()
+
         if result is None:
             raise ValueError(
                 "Error building the ensemble - please check the log file and command "
@@ -1606,7 +1577,6 @@ def fit_ensemble(
         self._ensemble_class = ensemble_class
 
         self._load_models()
-        self._close_dask_client()
         return self
 
     def _load_models(self):
@@ -2295,7 +2265,7 @@ def _create_search_space(
 
     def __getstate__(self) -> dict[str, Any]:
         # Cannot serialize a client!
-        self._dask_client = None
+        self._dask = None
         self.logging_server = None
         self.stop_logging_server = None
         return self.__dict__
@@ -2304,8 +2274,6 @@ def __del__(self) -> None:
         # Clean up the logger
         self._clean_logger()
 
-        self._close_dask_client()
-
 
 class AutoMLClassifier(AutoML):
 
diff --git a/autosklearn/util/dask.py b/autosklearn/util/dask.py
new file mode 100644
index 0000000000..624fecfae9
--- /dev/null
+++ b/autosklearn/util/dask.py
@@ -0,0 +1,142 @@
+""" Provides simplified 2 use cases of dask that we consider
+
+1. A UserDask is when a user supplies a dask client, in which case
+we don't close this down and leave it up to the user to control its lifetime.
+2.  A LocalDask is one we use when no user dask is supplied. In this case
+we make sure to spin up and close down clients as needed.
+
+Both of these can be uniformly accessed as a context manager.
+
+.. code:: python
+
+    # Locally controlled dask client
+    local_dask = LocalDask(n_jobs=2)
+    with local_dask as client:
+        # Do stuff with client
+        ...
+
+    # `client` is shutdown properly
+
+    # ----------------
+
+    # User controlled dask client
+    user_dask = UserDask(user_client)
+
+    with user_dask as client:
+        # Do stuff with (client == user_client)
+        ...
+
+    # `user_client` is still open and up to the user to close
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+import tempfile
+
+from dask.distributed import Client, LocalCluster
+
+from autosklearn.util.single_thread_client import SingleThreadedClient
+
+
+class Dask(ABC):
+    @abstractmethod
+    def client(self) -> Client:
+        """Should return a dask client"""
+        ...
+
+    @abstractmethod
+    def close(self) -> None:
+        """Should close up any resources needed for the dask client"""
+        ...
+
+    def __enter__(self) -> Client:
+        return self.client()
+
+    def __exit__(self, *args: Any, **kwargs: Any) -> None:
+        self.close()
+
+    @abstractmethod
+    def __repr__(self) -> str:
+        ...
+
+
+class UserDask(Dask):
+    """A dask instance created by a user"""
+
+    def __init__(self, client: Client):
+        """
+        Parameters
+        ----------
+        client : Client
+            The client they passed in
+        """
+        self._client = client
+
+    def client(self) -> Client:
+        """The dask client"""
+        return self._client
+
+    def close(self) -> None:
+        """Close the dask client"""
+        # We do nothing, it's user provided
+        pass
+
+    def __repr__(self) -> str:
+        return "UserDask(...)"
+
+
+class LocalDask(Dask):
+    def __init__(self, n_jobs: int | None = None) -> None:
+        self.n_jobs = n_jobs
+        self._client: Client | None = None
+        self._cluster: LocalCluster | None = None
+
+    def client(self) -> Client:
+        """Creates a usable dask client or returns an existing one
+
+        If there is not current client, because it has been closed, create
+        a new one.
+        * If ``n_jobs == 1``, create a ``SingleThreadedClient``
+        * Else create a ``Client`` with a ``LocalCluster``
+        """
+        if self._client is not None:
+            return self._client
+
+        if self.n_jobs == 1:
+            cluster = None
+            client = SingleThreadedClient()
+        else:
+            cluster = LocalCluster(
+                n_workers=self.n_jobs,
+                processes=False,
+                threads_per_worker=1,
+                # We use tmpdir to save the workers as deleting workers takes
+                # more time than deleting backend directories.
+                # This prevent an error saying that the worker file was deleted,
+                # so the client could not close the worker properly
+                local_directory=tempfile.gettempdir(),
+                # Memory is handled by the pynisher, not by the dask worker/nanny
+                memory_limit=0,
+            )
+            client = Client(cluster, heartbeat_interval=10000)  # 10s
+
+        self._client = client
+        self._cluster = cluster
+        return self._client
+
+    def close(self) -> None:
+        """Closes any open dask client"""
+        if self._client is None:
+            return
+
+        self._client.close()
+        if self._cluster is not None:
+            self._cluster.close()
+
+        self._client = None
+        self._cluster = None
+
+    def __repr__(self) -> str:
+        return f"LocalDask(n_jobs = {self.n_jobs})"
diff --git a/test/test_automl/test_construction.py b/test/test_automl/test_construction.py
index 5b68d35118..be6fe0e39b 100644
--- a/test/test_automl/test_construction.py
+++ b/test/test_automl/test_construction.py
@@ -2,6 +2,7 @@
 from typing import Any, Dict, Optional, Union
 
 from autosklearn.automl import AutoML
+from autosklearn.util.dask import LocalDask
 from autosklearn.util.data import default_dataset_compression_arg
 from autosklearn.util.single_thread_client import SingleThreadedClient
 
@@ -87,4 +88,7 @@ def test_single_job_and_no_dask_client_sets_correct_multiprocessing_context() ->
 
     assert automl._multiprocessing_context == "fork"
     assert automl._n_jobs == 1
-    assert isinstance(automl._dask_client, SingleThreadedClient)
+    assert isinstance(automl._dask, LocalDask)
+
+    with automl._dask as client:
+        assert isinstance(client, SingleThreadedClient)
diff --git a/test/test_estimators/test_estimators.py b/test/test_estimators/test_estimators.py
index d0d3f28bdb..e1e33d684a 100644
--- a/test/test_estimators/test_estimators.py
+++ b/test/test_estimators/test_estimators.py
@@ -140,8 +140,6 @@ def __call__(self, *args, **kwargs):
     assert count_succeses(automl.cv_results_) > 0
     assert includes_train_scores(automl.performance_over_time_.columns) is True
     assert performance_over_time_is_plausible(automl.performance_over_time_) is True
-    # For travis-ci it is important that the client no longer exists
-    assert automl.automl_._dask_client is None
 
 
 def test_feat_type_wrong_arguments():
diff --git a/test/test_util/test_dask.py b/test/test_util/test_dask.py
new file mode 100644
index 0000000000..1dbc290500
--- /dev/null
+++ b/test/test_util/test_dask.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+from dask.distributed import Client, LocalCluster
+
+from autosklearn.util.dask import LocalDask, UserDask
+
+import pytest
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_user_dask(tmp_path: Path, n_jobs: int) -> None:
+    """
+    Expects
+    -------
+    * A UserDask should not close the client after exiting context
+    """
+    cluster = LocalCluster(
+        n_workers=n_jobs,
+        processes=False,
+        threads_per_worker=1,
+        local_directory=tmp_path,
+    )
+    client = Client(cluster, heartbeat_interval=10000)
+
+    # Active at creation
+    dask = UserDask(client)
+
+    client_1 = None
+    with dask as user_client:
+        client_1 = user_client
+        assert user_client.status == "running"
+
+    client_2 = None
+    with dask as user_client:
+        assert user_client.status == "running"
+        client_2 = user_client
+
+    # Make sure they are the same client
+    assert id(client_1) == id(client_2)
+
+    # Remains running after context
+    assert client_1.status == "running"
+
+    cluster.close()
+    client.close()
+
+    assert client.status == "closed"
+
+
+def test_local_dask_creates_new_clients(tmp_path: Path) -> None:
+    """
+    Expects
+    -------
+    * A LocalDask should create new dask clusters at each context usage
+    """
+    # We need 2 to use an actual dask client and not a SingleThreadedClient
+    local_dask = LocalDask(n_jobs=2)
+
+    client_1 = None
+    with local_dask as client:
+        client_1 = client
+        assert client_1.status == "running"
+
+    assert client_1.status == "closed"
+
+    client_2 = None
+    with local_dask as client:
+        client_2 = client
+        assert client_2.status == "running"
+
+    # Make sure they were different clients
+    assert id(client_1) != id(client_2)
+
+    assert client_2.status == "closed"
+    assert client_1.status == "closed"

From 0d8168c37412b27e38b3b3691e08fb0b09d5d618 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 3 Aug 2022 18:34:41 +0200
Subject: [PATCH 63/63] Update config space in meta-data generation script

---
 scripts/02_retrieve_metadata.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index 574a24c87b..56a0395b9e 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -1,5 +1,4 @@
 from argparse import ArgumentParser
-from collections import defaultdict
 import csv
 import glob
 import itertools
@@ -10,6 +9,7 @@
 import numpy as np
 
 from ConfigSpace.configuration_space import Configuration
+from ConfigSpace.util import deactivate_inactive_hyperparameters
 
 from autosklearn.constants import *
 from autosklearn.metrics import CLASSIFICATION_METRICS, REGRESSION_METRICS
@@ -66,8 +66,18 @@ def retrieve_matadata(
                 n_better += 1
 
                 try:
+                    for hp in configuration_space.get_hyperparameters():
+                        if hp.name not in config:
+                            config[hp.name] = hp.default_value
+
                     best_configuration = Configuration(
-                        configuration_space=configuration_space, values=config
+                        configuration_space=configuration_space,
+                        values=config,
+                        allow_inactive_with_values=True,
+                    )
+                    best_configuration = deactivate_inactive_hyperparameters(
+                        configuration=best_configuration,
+                        configuration_space=configuration_space,
                     )
                     best_value = score
                     best_configuration_dir = validation_trajectory_file
@@ -227,7 +237,7 @@ def main():
             configuration_space = pipeline.get_configuration_space(
                 DummyDatamanager(
                     info={"is_sparse": sparse, "task": task},
-                    feat_type={"A": "numerical"}
+                    feat_type={"A": "numerical", "B": "categorical"}
                 )
             )