Skip to content

Remove references to validation set in evaluator #1517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/regressions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ jobs:
&& github.event.action == 'labeled'
&& github.event.label.name == 'regression-tests'
)
uses: peter-evans/find-comment@v1
uses: peter-evans/find-comment@v2
id: comment_finder
with:
issue-number: ${{ github.event.pull_request.number }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/stale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v4
- uses: actions/stale@v5
with:
days-before-stale: 60
days-before-close: 7
Expand Down
558 changes: 287 additions & 271 deletions autosklearn/automl.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ def __init__(
list are:

* ``'y_optimization'`` : do not save the predictions for the
optimization/validation set, which would later on be used to build
an ensemble.
optimization set, which would later on be used to build an ensemble.

* ``model`` : do not save any model files

smac_scenario_args : dict, optional (None)
Expand Down
24 changes: 1 addition & 23 deletions autosklearn/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,7 @@ def __init__(
self.memory_limit = memory_limit

dm = self.backend.load_datamanager()
if "X_valid" in dm.data and "Y_valid" in dm.data:
self._get_validation_loss = True
else:
self._get_validation_loss = False
if "X_test" in dm.data and "Y_test" in dm.data:
self._get_test_loss = True
else:
self._get_test_loss = False
self._get_test_loss = "X_test" in dm.data and "Y_test" in dm.data

self.port = port
self.pynisher_context = pynisher_context
Expand Down Expand Up @@ -533,21 +526,6 @@ def run(
additional_run_info["train_learning_curve"] = train_learning_curve
additional_run_info["learning_curve_runtime"] = learning_curve_runtime

if self._get_validation_loss:
validation_learning_curve = (
autosklearn.evaluation.util.extract_learning_curve(
info,
"validation_loss",
)
)
if len(validation_learning_curve) > 1:
additional_run_info[
"validation_learning_curve"
] = validation_learning_curve
additional_run_info[
"learning_curve_runtime"
] = learning_curve_runtime

if self._get_test_loss:
test_learning_curve = (
autosklearn.evaluation.util.extract_learning_curve(
Expand Down
65 changes: 14 additions & 51 deletions autosklearn/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,6 @@ def __init__(
self.include = include
self.exclude = exclude

self.X_valid = self.datamanager.data.get("X_valid")
self.y_valid = self.datamanager.data.get("Y_valid")
self.X_test = self.datamanager.data.get("X_test")
self.y_test = self.datamanager.data.get("Y_test")

Expand Down Expand Up @@ -359,7 +357,6 @@ def finish_up(
loss: Union[Dict[str, float], float],
train_loss: Optional[Dict[str, float]],
opt_pred: np.ndarray,
valid_pred: np.ndarray,
test_pred: np.ndarray,
additional_run_info: Optional[TYPE_ADDITIONAL_INFO],
file_output: bool,
Expand All @@ -382,19 +379,12 @@ def finish_up(
self.duration = time.time() - self.starttime

if file_output:
file_out_loss, additional_run_info_ = self.file_output(
opt_pred,
valid_pred,
test_pred,
)
file_out_loss, additional_run_info_ = self.file_output(opt_pred, test_pred)
else:
file_out_loss = None
additional_run_info_ = {}

validation_loss, test_loss = self.calculate_auxiliary_losses(
valid_pred,
test_pred,
)
test_loss = self.calculate_auxiliary_losses(test_pred)

if file_out_loss is not None:
return self.duration, file_out_loss, self.seed, additional_run_info_
Expand Down Expand Up @@ -424,8 +414,6 @@ def finish_up(
additional_run_info["train_loss"] = [
train_loss[metric.name] for metric in self.metrics
]
if validation_loss is not None:
additional_run_info["validation_loss"] = validation_loss
if test_loss is not None:
additional_run_info["test_loss"] = test_loss

Expand All @@ -442,41 +430,22 @@ def finish_up(

def calculate_auxiliary_losses(
self,
Y_valid_pred: np.ndarray,
Y_test_pred: np.ndarray,
) -> Tuple[Optional[float | Sequence[float]], Optional[float | Sequence[float]]]:
if Y_valid_pred is not None:
if self.y_valid is not None:
validation_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
self.y_valid, Y_valid_pred
)
if len(self.metrics) == 1:
validation_loss = validation_loss[self.metrics[0].name]
else:
validation_loss = None
else:
validation_loss = None
Y_test_pred: np.ndarray | None,
) -> float | dict[str, float] | None:
if Y_test_pred is None or self.y_test is None:
return None

if Y_test_pred is not None:
if self.y_test is not None:
test_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
self.y_test, Y_test_pred
)
if len(self.metrics) == 1:
test_loss = test_loss[self.metrics[0].name]
else:
test_loss = None
else:
test_loss = None
test_loss = self._loss(self.y_test, Y_test_pred)
if len(self.metrics) == 1:
test_loss = test_loss[self.metrics[0].name]

return validation_loss, test_loss
return test_loss

def file_output(
self,
Y_optimization_pred: np.ndarray,
Y_valid_pred: np.ndarray,
Y_test_pred: np.ndarray,
) -> Tuple[Optional[float], Dict[str, Union[str, int, float, List, Dict, Tuple]]]:
) -> tuple[float | None, dict[str, Any]]:
# Abort if self.Y_optimization is None
# self.Y_optimization can be None if we use partial-cv, then,
# obviously no output should be saved.
Expand All @@ -496,12 +465,7 @@ def file_output(
)

# Abort if predictions contain NaNs
for y, s in [
# Y_train_pred deleted here. Fix unittest accordingly.
[Y_optimization_pred, "optimization"],
[Y_valid_pred, "validation"],
[Y_test_pred, "test"],
]:
for y, s in [(Y_optimization_pred, "optimization"), (Y_test_pred, "test")]:
if y is not None and not np.all(np.isfinite(y)):
return (
1.0,
Expand Down Expand Up @@ -553,14 +517,13 @@ def file_output(
budget=self.budget,
model=self.model if "model" not in self.disable_file_output else None,
cv_model=models if "cv_model" not in self.disable_file_output else None,
# TODO: below line needs to be deleted once backend is updated
valid_predictions=None,
ensemble_predictions=(
Y_optimization_pred
if "y_optimization" not in self.disable_file_output
else None
),
valid_predictions=(
Y_valid_pred if "y_valid" not in self.disable_file_output else None
),
test_predictions=(
Y_test_pred if "y_test" not in self.disable_file_output else None
),
Expand Down
2 changes: 0 additions & 2 deletions autosklearn/evaluation/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def fit_predict_and_loss(self) -> None:
loss=loss,
train_loss=None,
opt_pred=Y_pred,
valid_pred=None,
test_pred=None,
file_output=False,
final_call=True,
Expand All @@ -78,7 +77,6 @@ def fit_predict_and_loss(self) -> None:
def predict_and_loss(
self, train: bool = False
) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]:

if train:
Y_pred = self.predict_function(
self.X_train, self.model, self.task_type, self.Y_train
Expand Down
Loading