Skip to content

Commit 66b782a

Browse files
Louquinzeeddiebergmandependabot[bot]mfeurer
committed
Fixing hps remain active & meta hp configuration (#1536)
* fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. * fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. * fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. * fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. * fixing the issue that metalearning tries to use every hp defined in the csv files. Also fixing the hps remain active bug. * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fixing ensemble builder * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix search space bug * fix typing * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * fixing pre-commit * adding new test. Test if new configuration space is correct. * add new tests and fixing some issues from PR * add new tests and fixing some issues from PR * add new tests and fixing some issues from PR * add new tests and fixing some issues from PR * fix-1535-Exception-in-the-fit()-call-of-AutoSklearn (#1539) * Create PR * Fix test fixture * add new tests and fixing some issues from PR * add new tests and fixing some issues from PR * chancing metalearning test to check whether the configspace adapts to feat_type or not * in `test_data_preprocessing_feat_type.py` check for configuration space size * in `test_data_preprocessing_feat_type.py` check for configuration space size * in `test_data_preprocessing_feat_type.py` check for configuration space size * include feedback from review * include feedback from review * include feedback from review * adapted meta_data_generation search space * Bump docker/build-push-action from 1 to 3 (#1515) * Bump docker/build-push-action from 1 to 3 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 1 to 3. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](docker/build-push-action@v1...v3) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <[email protected]> * Update docker-publish.yml Replace password by token Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matthias Feurer <[email protected]> * fix-1532-_ERROR_-asyncio.exceptions.CancelledError (#1540) * Create PR * Abstract out dask client types * Fix _ issue * Extend scope of dask_client in automl.py * Add docstring to dask module * Indent result addition * Add basic tests for Dask wrappers * Update config space in meta-data generation script Co-authored-by: Eddie Bergman <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matthias Feurer <[email protected]>
1 parent a9d49d4 commit 66b782a

File tree

107 files changed

+1077
-228
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+1077
-228
lines changed

autosklearn/askl_typing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from typing import Dict, Union
2+
3+
FEAT_TYPE_TYPE = Dict[Union[str, int], str]

autosklearn/automl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,7 @@ def _create_search_space(
22502250
) -> Tuple[ConfigurationSpace, str]:
22512251
configspace_path = os.path.join(tmp_dir, "space.json")
22522252
configuration_space = pipeline.get_configuration_space(
2253-
datamanager.info,
2253+
datamanager,
22542254
include=include,
22552255
exclude=exclude,
22562256
)

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import autosklearn.pipeline.classification
1919
import autosklearn.pipeline.regression
20+
from autosklearn.askl_typing import FEAT_TYPE_TYPE
2021
from autosklearn.automl_common.common.utils.backend import Backend
2122
from autosklearn.constants import (
2223
CLASSIFICATION_TASKS,
@@ -45,6 +46,7 @@ def __init__(
4546
self,
4647
config: Configuration,
4748
random_state: Optional[Union[int, np.random.RandomState]],
49+
feat_type: Optional[FEAT_TYPE_TYPE] = None,
4850
init_params: Optional[Dict[str, Any]] = None,
4951
dataset_properties: Dict[str, Any] = {},
5052
include: Optional[List[str]] = None,
@@ -61,6 +63,7 @@ def __init__(
6163
self.dataset_properties = dataset_properties
6264
self.include = include
6365
self.exclude = exclude
66+
self.feat_type = feat_type
6467

6568
def pre_transform(
6669
self,
@@ -108,6 +111,7 @@ def __init__(
108111
self,
109112
config: Configuration,
110113
random_state: Optional[Union[int, np.random.RandomState]],
114+
feat_type: Optional[FEAT_TYPE_TYPE] = None,
111115
init_params: Optional[Dict[str, Any]] = None,
112116
dataset_properties: Dict[str, Any] = {},
113117
include: Optional[List[str]] = None,
@@ -123,6 +127,7 @@ def __init__(
123127
self.dataset_properties = dataset_properties
124128
self.include = include
125129
self.exclude = exclude
130+
self.feat_type = feat_type
126131

127132
def pre_transform(
128133
self,
@@ -217,6 +222,7 @@ def __init__(
217222
self.queue = queue
218223

219224
self.datamanager = self.backend.load_datamanager()
225+
self.feat_type = self.datamanager.feat_type
220226
self.include = include
221227
self.exclude = exclude
222228

@@ -294,11 +300,12 @@ def __init__(
294300
_addons[key].add_component(component)
295301

296302
# Please mypy to prevent not defined attr
297-
self.model = self._get_model()
303+
self.model = self._get_model(feat_type=self.feat_type)
298304

299-
def _get_model(self) -> BaseEstimator:
305+
def _get_model(self, feat_type: Optional[FEAT_TYPE_TYPE]) -> BaseEstimator:
300306
if not isinstance(self.configuration, Configuration):
301307
model = self.model_class(
308+
feat_type=feat_type,
302309
config=self.configuration,
303310
random_state=self.seed,
304311
init_params=self._init_params,
@@ -318,6 +325,7 @@ def _get_model(self) -> BaseEstimator:
318325
"multiclass": self.task_type == MULTICLASS_CLASSIFICATION,
319326
}
320327
model = self.model_class(
328+
feat_type=feat_type,
321329
config=self.configuration,
322330
dataset_properties=dataset_properties,
323331
random_state=self.seed,

autosklearn/evaluation/test_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(
5858
self.X_test = self.datamanager.data.get("X_test")
5959
self.Y_test = self.datamanager.data.get("Y_test")
6060

61-
self.model = self._get_model()
61+
self.model = self._get_model(self.feat_type)
6262

6363
def fit_predict_and_loss(self) -> None:
6464
_fit_and_suppress_warnings(self.logger, self.model, self.X_train, self.Y_train)

autosklearn/evaluation/train_evaluator.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ def __init__(
247247
budget_type=budget_type,
248248
)
249249

250+
self.feat_type = self.backend.load_datamanager().feat_type
250251
self.resampling_strategy = resampling_strategy
251252
if resampling_strategy_args is None:
252253
self.resampling_strategy_args = {}
@@ -305,7 +306,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
305306

306307
# Test if the model allows for an iterative fit, if not,
307308
# call this method again without the iterative argument
308-
model = self._get_model()
309+
model = self._get_model(self.feat_type)
309310
if not model.estimator_supports_iterative_fit():
310311
self.fit_predict_and_loss(iterative=False)
311312
return
@@ -319,7 +320,9 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
319320
Y_test_pred = [None] * self.num_cv_folds
320321
train_splits = [None] * self.num_cv_folds
321322

322-
self.models = [self._get_model() for i in range(self.num_cv_folds)]
323+
self.models = [
324+
self._get_model(self.feat_type) for i in range(self.num_cv_folds)
325+
]
323326
iterations = [1] * self.num_cv_folds
324327
total_n_iterations = [0] * self.num_cv_folds
325328
# model.estimator_supports_iterative_fit -> true
@@ -515,7 +518,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
515518
self.Y_optimization = Y_targets
516519
self.Y_actual_train = Y_train_targets
517520

518-
self.model = self._get_model()
521+
self.model = self._get_model(self.feat_type)
519522
status = StatusType.DONOTADVANCE
520523
if any(
521524
[
@@ -679,7 +682,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
679682
self.Y_actual_train = Y_train_targets
680683

681684
if self.num_cv_folds > 1:
682-
self.model = self._get_model()
685+
self.model = self._get_model(self.feat_type)
683686
# Bad style, but necessary for unit testing that self.model is
684687
# actually a new model
685688
self._added_empty_model = True
@@ -798,7 +801,7 @@ def _partial_fit_and_predict_iterative(
798801
test_indices: List[int],
799802
add_model_to_self: bool,
800803
) -> None:
801-
model = self._get_model()
804+
model = self._get_model(self.feat_type)
802805

803806
self.indices[fold] = (train_indices, test_indices)
804807

@@ -939,7 +942,7 @@ def _partial_fit_and_predict_standard(
939942
PIPELINE_DATA_DTYPE, # test_pred
940943
TYPE_ADDITIONAL_INFO,
941944
]:
942-
model = self._get_model()
945+
model = self._get_model(self.feat_type)
943946

944947
self.indices[fold] = (train_indices, test_indices)
945948

@@ -1005,7 +1008,7 @@ def _partial_fit_and_predict_budget(
10051008
# Add this statement for mypy
10061009
assert self.budget is not None
10071010

1008-
model = self._get_model()
1011+
model = self._get_model(self.feat_type)
10091012
self.indices[fold] = (train_indices, test_indices)
10101013
self.X_targets[fold] = self.X_train[test_indices]
10111014
self.Y_targets[fold] = self.Y_train[test_indices]

autosklearn/experimental/askl2.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@ def __call__(
5151
initial_configurations = []
5252
for member in self.portfolio.values():
5353
try:
54+
hp_names = scenario.cs.get_hyperparameter_names()
55+
_member = {key: member[key] for key in member if key in hp_names}
5456
initial_configurations.append(
55-
Configuration(configuration_space=scenario.cs, values=member)
57+
Configuration(configuration_space=scenario.cs, values=_member)
5658
)
5759
except ValueError:
5860
pass
@@ -103,8 +105,10 @@ def __call__(
103105
initial_configurations = []
104106
for member in self.portfolio.values():
105107
try:
108+
hp_names = scenario.cs.get_hyperparameter_names()
109+
_member = {key: member[key] for key in member if key in hp_names}
106110
initial_configurations.append(
107-
Configuration(configuration_space=scenario.cs, values=member)
111+
Configuration(configuration_space=scenario.cs, values=_member)
108112
)
109113
except ValueError:
110114
pass

autosklearn/metalearning/input/aslib_simple.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55

66
import arff
77
import pandas as pd
8+
from ConfigSpace.configuration_space import ConfigurationSpace
89

910

1011
class AlgorithmSelectionProblem(object):
11-
def __init__(self, directory):
12+
def __init__(self, directory: str, cs: ConfigurationSpace):
1213
self.logger = logging.getLogger(__name__)
1314

1415
# Create data structures
16+
self.cs = cs
1517
self.dir_ = directory
1618
self.algorithm_runs = None
1719
self.configurations = None
@@ -143,13 +145,17 @@ def _read_configurations(self, filename):
143145
csv_reader = csv.DictReader(fh)
144146

145147
configurations = dict()
148+
hp_names = self.cs.get_hyperparameter_names()
146149
for line in csv_reader:
147150
configuration = dict()
148151
algorithm_id = line["idx"]
149152
for hp_name, value in line.items():
150153
if not value or hp_name == "idx":
151154
continue
152-
155+
if hp_name not in hp_names:
156+
# skip hyperparameter
157+
# if it is not existing in the current search space
158+
continue
153159
try:
154160
value = int(value)
155161
except Exception:

autosklearn/metalearning/metalearning/meta_base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ def __init__(self, configuration_space, aslib_directory, logger):
4242
self.configuration_space = configuration_space
4343
self.aslib_directory = aslib_directory
4444

45-
aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory)
45+
aslib_reader = aslib_simple.AlgorithmSelectionProblem(
46+
self.aslib_directory, self.configuration_space
47+
)
4648
self.metafeatures = aslib_reader.metafeatures
4749
self.algorithm_runs: OrderedDict[
4850
str, pd.DataFrame

0 commit comments

Comments
 (0)