Add grid search script and test.

cdancette · cdancette · commit d7071f7360b5 · 2020-07-01T23:38:07.000+02:00
Grid search is called like this:
python -m bootstrap.grid -o &lt;path&gt; --gpu 0.5 --cpu 10

This will run training on all available gpus and cpus, with 10 cpus per
training and 0.5 gpus assigned for each training (2 jobs per gpu).

The option file must contain a new option: "gridsearch:"
which contains a list of options to modify, and their
associated values should be a list containing all the values
to test.

See the example file in tests/options-grid.yaml

TODO: analysis at the end. This will need an api to get the best results
for a given run, which exists only as a script for now (compare.py)
diff --git a/bootstrap/grid.py b/bootstrap/grid.py
@@ -0,0 +1,86 @@
+import ray
+from ray import tune
+import os
+import argparse
+import yaml
+
+from bootstrap.run import run
+
+
+def train_func(config):
+    # change exp dir
+
+    option_path = config.pop("option_file")
+    os.chdir(config.pop("run_dir"))
+    exp_dir = config.pop("exp_dir_prefix")
+
+    override_options = {
+        "resume": "last",
+    }
+
+    for name, value in config.items():
+        override_options[name] = value
+        if type(value) == list:
+            value_str = ",".join(str(x) for x in value)
+        else:
+            value_str = str(value)
+        exp_dir += f"--{name.split('.')[-1]}_{value_str}"
+
+    override_options["exp.dir"] = exp_dir
+    run(path_opts=option_path, override_options=override_options, run_parser=False)
+
+
+def build_tune_config(option_path):
+    with open(option_path, "r") as yaml_file:
+        options = yaml.load(yaml_file)
+    config = {}
+    for key, values in options["gridsearch"].items():
+        config[key] = tune.grid_search(values)
+    config["exp_dir_prefix"] = options["exp"]["dir"]
+    config["option_file"] = option_path
+    config["run_dir"] = os.getcwd()
+    return config, config["exp_dir_prefix"]
+
+
+def grid(path_opts, cpu_per_trial=2, gpu_per_trial=0.5):
+    config, name = build_tune_config(path_opts)
+    ray.init()
+    tune.run(
+        train_func,
+        name=name,
+        # stop={"avg_inc_acc": 100},
+        config=config,
+        resources_per_trial={"cpu": cpu_per_trial, "gpu": gpu_per_trial},
+        local_dir="ray_results",
+    )
+
+    # TODO: tune analysis to get best results.
+    # For this, we need to extract the best score for each experiment.
+    # analysis = tune.run(
+    #     train_mnist, config={"lr": tune.grid_search([0.001, 0.01, 0.1])})
+    # print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--path_opts", required=True, help="Main file")
+    parser.add_argument(
+        "-g",
+        "--gpu",
+        type=float,
+        default=0.5,
+        help="Percentage of gpu needed for one training",
+    )
+    parser.add_argument(
+        "-c",
+        "--cpu",
+        type=float,
+        default=2,
+        help="Percentage of gpu needed for one training",
+    )
+    args = parser.parse_args()
+    grid(args.path_opts, args.cpu, args.gpu)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bootstrap/lib/options.py b/bootstrap/lib/options.py
@@ -161,7 +161,7 @@ def exit(self, status=0, message=None):
                 raise Options.MissingOptionsException()
             super().exit(status, message)
 
-    def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=True):
+    def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=True, override_options=None):
         # Options is a singleton, we will only build if it has not been built before
         if not Options.__instance:
             Options.__instance = object.__new__(Options)
@@ -178,7 +178,7 @@ def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=Tr
 
             if run_parser:
                 fullopt_parser = Options.HelpParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-                fullopt_parser.add_argument('-o', '--path_opts', type=str, required=True)
+                fullopt_parser.add_argument('-o', '--path_opts', type=str)
                 Options.__instance.add_options(fullopt_parser, options_dict)
 
                 arguments = fullopt_parser.parse_args()
@@ -201,6 +201,10 @@ def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=Tr
             else:
                 Options.__instance.options = options_dict
 
+        if override_options is not None:
+            for key, value in override_options.items():
+                Options.__instance.options[key] = value
+
         if lock:
             Options.__instance.lock()
         return Options.__instance
diff --git a/bootstrap/run.py b/bootstrap/run.py
@@ -53,9 +53,9 @@ def init_logs_options_files(exp_dir, resume=None):
     Logger(exp_dir, name=logs_name)
 
 
-def run(path_opts=None):
+def run(path_opts=None, override_options=None, run_parser=True):
     # first call to Options() load the options yaml file from --path_opts command line argument if path_opts=None
-    Options(path_opts)
+    Options(path_opts, override_options=override_options, run_parser=run_parser)
 
     # init options and exp dir for logging
     init_experiment_directory(Options()['exp']['dir'], Options()['exp']['resume'])
diff --git a/tests/options-grid.yaml b/tests/options-grid.yaml
@@ -0,0 +1,51 @@
+exp:
+  dir: logs/myproject/1_exp
+  resume: # last, best_[...], or empty (from scratch)
+dataset:
+  import: myproject.datasets.factory
+  name: myproject
+  dir: data/myproject
+  train_split: train
+  eval_split: val
+  nb_threads: 4
+  batch_size: 64
+  nb_items: 100
+model:
+  name: default
+  network:
+    import: myproject.models.networks.factory
+    name: myproject
+    dim_in: 10
+    dim_out: 1
+  criterion:
+    import: myproject.models.criterions.factory
+    name: myproject
+  metric:
+    import: myproject.models.metrics.factory
+    name: myproject
+    thresh: 0.5
+optimizer:
+  name: adam
+  lr: 0.0004
+engine:
+  name: default
+  debug: False
+  nb_epochs: 10
+  print_freq: 10
+  saving_criteria:
+  - loss:min     # save when new_best < best
+  - accuracy:max # save when new_best > best
+misc:
+  cuda: False
+  seed: 1337
+views:
+  name: plotly
+  items:
+  - logs:train_epoch.loss+logs:eval_epoch.loss
+  - logs:train_batch.loss
+  - logs:train_epoch.accuracy+logs:eval_epoch.accuracy
+
+
+gridsearch:
+  "optimizer.lr": [0.1, 1.e-3]
+  "misc.seed": [1337, 42]
diff --git a/tests/test_grid.py b/tests/test_grid.py
@@ -0,0 +1,51 @@
+from os import path as osp
+import os
+import shutil
+import sys
+from bootstrap.new import new_project
+from tests.test_options import reset_options_instance
+from bootstrap.grid import main as main_grid
+
+
+def test_grid(tmpdir):
+    new_project("MyProject", tmpdir)
+    code_dir = osp.join(tmpdir, "myproject.bootstrap.pytorch")
+    path_opts = osp.join(code_dir, "myproject/options/options-grid.yaml")
+    shutil.copy("tests/options-grid.yaml", path_opts)
+    os.chdir(code_dir)
+
+    expected_exp_dirs = [
+        "logs/myproject/1_exp--lr_0.1--seed_1337",
+        "logs/myproject/1_exp--lr_0.1--seed_42",
+        "logs/myproject/1_exp--lr_0.001--seed_1337",
+        "logs/myproject/1_exp--lr_0.001--seed_42",
+    ]
+
+    # path needed to change import
+    # https://stackoverflow.com/questions/23619595/pythons-os-chdir-function-isnt-working
+    sys.path.insert(0, code_dir)
+    reset_options_instance()
+    sys.argv += ["--path_opts", path_opts]
+    sys.argv += ["--gpu-per-trial", "0.0"]
+    sys.argv += ["--cpu-per-trial", "0.5"]
+    main_grid()
+
+    fnames = [
+        "ckpt_best_accuracy_engine.pth.tar",
+        "ckpt_best_loss_optimizer.pth.tar",
+        "logs.txt",
+        "ckpt_best_accuracy_model.pth.tar",
+        "ckpt_last_engine.pth.tar",
+        "options.yaml",
+        "ckpt_best_accuracy_optimizer.pth.tar",
+        "ckpt_last_model.pth.tar",
+        "view.html",
+        "ckpt_best_loss_engine.pth.tar",
+        "ckpt_last_optimizer.pth.tar",
+        "ckpt_best_loss_model.pth.tar",
+        "logs.json",
+    ]
+    for exp_dir in expected_exp_dirs:
+        for fname in fnames:
+            file_path = osp.join(code_dir, f"{exp_dir}/{fname}")
+            assert osp.isfile(file_path)