diff --git a/LearningMachine.py b/LearningMachine.py index 4b00240..f327bd7 100644 --- a/LearningMachine.py +++ b/LearningMachine.py @@ -3,7 +3,6 @@ import torch import torch.nn as nn - import os import time import numpy as np @@ -29,7 +28,7 @@ class LearningMachine(object): - def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, **kwargs): + def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_gpu=False, automl=False, **kwargs): if initialize is True: assert vocab_info is not None self.model = Model(conf, problem, vocab_info, use_gpu) @@ -54,6 +53,7 @@ def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_g self.problem = problem self.phase = phase self.use_gpu = use_gpu + self.automl = automl # if it is a 2-class classification problem, figure out the real positive label # CAUTION: multi-class classification @@ -335,6 +335,9 @@ def train(self, optimizer, loss_fn): del data_batches, length_batches, target_batches lr_scheduler.step() epoch += 1 + if self.automl: + import nni + nni.report_final_result(float(best_result)) def test(self, loss_fn, test_data_path=None, predict_output_path=None): if test_data_path is None: @@ -622,6 +625,9 @@ def evaluate(self, data, length, target, input_types, evaluator, if phase == 'valid': cur_result = evaluator.get_first_metric_result() + if self.automl: + import nni + nni.report_intermediate_result(cur_result) if self.evaluator.compare(cur_result, cur_best_result) == 1: logging.info( 'Cur result %f is better than previous best result %s, renew the best model now...' % (cur_result, "%f" % cur_best_result if cur_best_result else "None")) diff --git a/ModelConf.py b/ModelConf.py index 9b170d0..b1bb6f0 100644 --- a/ModelConf.py +++ b/ModelConf.py @@ -11,13 +11,14 @@ import torch import logging import shutil +from string import digits from losses.BaseLossConf import BaseLossConf -#import traceback from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields, ConstantStatic from utils.common_utils import log_set, prepare_dir, md5, load_from_json, dump_to_json from utils.exceptions import ConfigurationError import numpy as np +import random class ConstantStaticItems(ConstantStatic): @staticmethod @@ -174,6 +175,21 @@ def raise_configuration_error(self, key): def load_from_file(self, conf_path): # load file self.conf = load_from_json(conf_path, debug=False) + + if self.params.automl: + parameters = nni.get_next_parameter() + for para in parameters.keys(): + it = self.conf + for path in para.split('.'): + if path[0] in digits: + path = int(path) + try: + it = it[path] + except KeyError: + raise KeyError('Cannot access {} in parameter {}. Please check parameter names in search space file.' + .format(path, para)) + it = parameters[para] + self = self.Conf.load_data(self, {'Conf' : self.conf}, key_prefix_desc='Conf') self.language = self.language.lower() self.configurate_outputs() diff --git a/Tutorial.md b/Tutorial.md index ebe88dc..05f3ab4 100644 --- a/Tutorial.md +++ b/Tutorial.md @@ -26,6 +26,7 @@ * [Extra Feature Support](#extra-feature) * [Learning Rate Decay](#lr-decay) * [Fix Embedding Weight & Limit Vocabulary Size](#fix-embedding) +* [Auto Tuning Hyperparameters](#auto-ml) * [Frequently Asked Questions](#faq) ## Installation @@ -732,6 +733,22 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em ***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency. ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary + +## Auto Tuning Hyperparameters + +This function integrates with [NNI](https://github.com/microsoft/nni) to try hyperparameters generated by various tuning algorithms and run experiments on different training environments. You can use `pip install nni` to install the dependency. + +In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as Hyper Parameter Optimization (HPO). + +Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`. + +Here are some instructions about the files mentioned above. + +* `config.yaml`: In this file, basic experiment settings are specified, including experiment name, author name, which tuner to use, and how to start a trial. Users can also do some extra settings, like how many GPUs are available for one trial. +* `search_space.json`: Tuners are going to find hyperparameters within the range specified in this file. Users can set the prior distribution empirically for every hyperparameter searched to speed up the HPO process. The names of tuning variables should follow the pattern to align with the architecture described in json file. Here is an example, if you want to tune `architecture['training_params']['batch_size']` automatically, the variable name in `search_space.json` should be `training_params.batch_size`. In a word, concatenate variable paths with dot to map from search space to architecture in json file. +* `exp.py`: Function `get_hyperparameters` accepts the model from `model.json` as a parameter and get new hyperparameters from NNI. Some hyperparameters in the model are replaced by the new ones manually. + +After setting up, an experiment can be launched simply by `python3 exp.py --config_file CONFIG_FILE --port PORT`. ## Frequently Asked Questions diff --git a/Tutorial_zh_CN.md b/Tutorial_zh_CN.md index 501af09..6950052 100644 --- a/Tutorial_zh_CN.md +++ b/Tutorial_zh_CN.md @@ -26,6 +26,7 @@ * [额外的feature](#extra-feature) * [学习率衰减](#lr-decay) * [固定embedding 和 词表大小设置](#fix-embedding) +* [自动超参数调整](#auto-ml) * [常见问题与答案](#faq) ## 安装 @@ -723,4 +724,29 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary +## 自动超参数调整 + +This function integrates with [NNI](https://github.com/microsoft/nni) to try multiple sets of hyperparameters generated by various tuning algorithms and run on different training environments. You can use `pip install nni` to install the dependency. + +此功能通过和 [NNI](https://github.com/microsoft/nni) 集成,以尝试通过多种自动调参算法生成超参数,并支持在多种平台上进行实验。您可以通过 `pip install nni` 来安装这个依赖。 + +In an experiment, a set of hyperparameters is sampled from the search space, the space of hyperparameter configurations to search over, then a trial is conducted, and the result is collected. From these results, tuners can find parameter values within the search space that fit the task better. This process is known as Hyper Parameter Optimization (HPO). + + +在实验中,首先从搜索空间中采样一组超参数,然后进行实验并收集结果。根据这些结果,自动调参算法可以在搜索空间中找到更适合当前任务的超参数值。这个过程称为超参数优化(HPO)。搜索空间指的是超参数的取值范围,自动调参算法将在这个空间内搜索合适的取值。 + +Before launching an AutoML experiment, it is essential to specify the search space and tuning algorithm in `search_space.json` and `config.yaml` respectively and modify function `get_hyperparameters` in `exp.py`. Note that the function accepts the original architecture as its parameter and how the model uses these hyperparameters are defined here. More details can be found [here](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html). An example is available at `autoML-demo`. + +在启动自动机器学习实验之前,需要分别在 `search_space.json` 和 `config.yaml` 中指定搜索空间和调整算法,并在 `exp.py` 中修改函数 `get_hyperparameters`。请注意,该函数接受原始的模型作为其参数,并在这里定义了模型如何使用这些超参数。可以在[这里](https://nni.readthedocs.io/en/latest/Tutorial/QuickStart.html)找到详细信息。在目录 `autoML-demo` 中可以找到一个已经配置好的例子。 + +Here are some instructions about the files mentioned above. + +以下是之前提到文件的一些说明。 + +* `config.yaml`:在此文件中,指定了基本的实验设置,包括实验名称,作者名称,要使用的调参算法以及开启实验的命令。用户还可以进行一些额外的设置,例如一次实验可以使用多少个GPU。 +* `search_space.json`:自动调参算法将在此文件中指定的范围内寻找超参数。用户可以根据经验为搜索的每个超参数设置先验分布来加快搜索。search space文件里定义的变量通过一定的格式和描述结构的json文件中的变量进行关联。举个例子,如果需要调整 json 文件中的 `architecture['training_params']['batch_size']`,在 search_space 中的变量名应该是 `training_params.batch_size`。通过 `.` 连接访问希望调整的变量路径来关联描述模型结构的 json 文件和搜索空间中的变量。 +* `exp.py`:函数 `get_hyperparameters` 接受来自 `model.json` 的模型作为参数,并从 NNI 获取新的超参数。模型中的某些超参数需要被手动替换为新的。 + +设置完成后,只需通过 `python3 exp.py --config_file CONFIG_FILE --port PORT` 即可启动实验。 + ## 常见问题与答案 diff --git a/autoML-demo/config.yaml b/autoML-demo/config.yaml new file mode 100644 index 0000000..b88606f --- /dev/null +++ b/autoML-demo/config.yaml @@ -0,0 +1,24 @@ +authorName: default +experimentName: lstm +trialConcurrency: 1 +maxExecDuration: 1000h +maxTrialNum: 500 +# 可选项: local, remote, pai, kubeflow +trainingServicePlatform: local +searchSpacePath: ./lstm_search_space.json +# 可选项: true, false, 默认值: false +useAnnotation: false +# 可选项: true, false, 默认值: false +multiThread: false +tuner: + builtinTunerName: TPE + classArgs: + optimize_mode: maximize + parallel_optimize: True + constant_liar_type: min +trial: + command: pip install -r requirements.txt && python train.py --conf_path model.json --automl + codeDir: . + gpuNum: 1 +localConfig: + useActiveGpu: true diff --git a/autoML-demo/exp.py b/autoML-demo/exp.py new file mode 100644 index 0000000..6feb2f4 --- /dev/null +++ b/autoML-demo/exp.py @@ -0,0 +1,22 @@ +from nnicli import Experiment +import argparse +import os +import sys +import yaml +import argparse +import nni + + +def parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--config_file', type=str, required=True, help='experiment config file') + parser.add_argument('--port', type=int, default=8080, help='show webUI on which port') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parser() + exp = Experiment() + # exp.stop_experiment() + exp.start_experiment(args.config_file, port=args.port) diff --git a/autoML-demo/model.json b/autoML-demo/model.json new file mode 100644 index 0000000..c436ef0 --- /dev/null +++ b/autoML-demo/model.json @@ -0,0 +1,190 @@ +{ + "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.", + "tool_version": "1.1.0", + "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7854 in WikiQACorpus test set.", + "inputs": { + "use_cache": false, + "dataset_type": "classification", + "data_paths": { + "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv", + "valid_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv", + "test_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv", + "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt" + }, + "file_with_col_header": true, + "add_start_end_for_seq": true, + "file_header": { + "question_text": 0, + "passage_text": 1, + "label": 2 + }, + "model_inputs": { + "question": ["question_text"], + "passage": ["passage_text"] + }, + "target": ["label"] + }, + "outputs":{ + "save_base_dir": "models_wikiqa_matchatt/", + "model_name": "model.nb", + "train_log_name": "train.log", + "test_log_name": "test.log", + "predict_log_name": "predict.log", + "predict_fields": ["prediction"], + "predict_output_name": "predict.tsv" + }, + "training_params": { + "vocabulary": { + "min_word_frequency": 1 + }, + "optimizer": { + "name": "Adam", + "params": { + "lr": 0.002 + } + }, + "lr_decay": 1, + "minimum_lr": 0.0001, + "epoch_start_lr_decay": 3, + "steps_per_validation": 50, + "use_gpu": true, + "batch_size": 128, + "batch_num_to_show_results": 100, + "max_epoch": 10, + "valid_times_per_epoch": 5, + "max_lengths": { + "query": 50, + "passage": 200 + }, + "cpu_num_workers": 4 + }, + "architecture":[ + { + "layer": "Embedding", + "conf": { + "word": { + "cols": ["question_text", "passage_text"], + "dim": 300, + "fix_weight": true, + "dropout": 0 + } + } + }, + { + "layer_id": "query_dropout", + "layer": "Dropout", + "conf": { + "dropout": 0.1 + }, + "inputs": ["question"] + }, + { + "layer_id": "passage_dropout", + "layer": "Dropout", + "conf": { + "dropout": 0.1 + }, + "inputs": ["passage"] + }, + { + "layer_id": "query_1", + "layer": "BiLSTM", + "conf": { + "hidden_dim": 128, + "dropout": 0.1, + "num_layers": 2 + }, + "inputs": ["query_dropout"] + }, + { + "layer_id": "passage_1", + "layer": "BiLSTM", + "conf": { + "hidden_dim": 128, + "dropout": 0.1, + "num_layers": 2 + }, + "inputs": ["passage_dropout"] + }, + { + "layer_id": "query_matched", + "layer": "MatchAttention", + "conf": { + }, + "inputs": ["query_dropout", "passage_dropout"] + }, + { + "layer_id": "passage_matched", + "layer": "MatchAttention", + "conf": { + }, + "inputs": ["passage_dropout", "query_dropout"] + }, + { + "layer_id": "query_combined", + "layer": "Combination", + "conf": { + "operations": ["origin"] + }, + "inputs": ["query_1","query_matched"] + }, + { + "layer_id": "passage_combined", + "layer": "Combination", + "conf": { + "operations": ["origin"] + }, + "inputs": ["passage_1","passage_matched"] + }, + { + "layer_id": "query_linear_att", + "layer": "Pooling", + "conf": { + "pool_axis": 1, + "pool_type": "max" + }, + "inputs": ["query_combined"] + }, + { + "layer_id": "passage_linear_att", + "layer": "Pooling", + "conf": { + "pool_axis": 1, + "pool_type": "max" + }, + "inputs": ["passage_combined"] + }, + { + "layer_id": "comb", + "layer": "Combination", + "conf": { + "operations": ["origin"] + }, + "inputs": ["query_linear_att","passage_linear_att"] + }, + { + "output_layer_flag": true, + "layer_id": "output", + "layer": "Linear", + "conf": { + "hidden_dim": [128,2], + "activation": "PReLU", + "last_hidden_activation": false + }, + "inputs": ["comb"] + } + ], + "loss": { + "losses": [ + { + "type": "CrossEntropyLoss", + "conf": { + "weight": [0.4,0.6], + "size_average": true + }, + "inputs": ["output","label"] + } + ] + }, + "metrics": ["auc","accuracy"] +} diff --git a/autoML-demo/search_space.json b/autoML-demo/search_space.json new file mode 100644 index 0000000..b1b16ef --- /dev/null +++ b/autoML-demo/search_space.json @@ -0,0 +1,8 @@ +{ + "architecture.3.conf.dropout": { "_type": "uniform", "_value": [0, 0.999] }, + "training_params.batch_size": { "_type": "choice", "_value": [32, 64, 128, 256] }, + "training_params.optimizer.params.lr": { "_type": "loguniform", "_value": [0.0001, 0.01] }, + "architecture.0.conf.word.dropout": { "_type": "uniform", "_value": [0, 0.999] }, + "architecture.2.conf.dropout'": { "_type": "uniform", "_value": [0, 0.999] }, + "training_params.lr_decay": { "_type": "uniform", "_value": [0.95, 1] } +} diff --git a/exp.py b/exp.py new file mode 100644 index 0000000..6feb2f4 --- /dev/null +++ b/exp.py @@ -0,0 +1,22 @@ +from nnicli import Experiment +import argparse +import os +import sys +import yaml +import argparse +import nni + + +def parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--config_file', type=str, required=True, help='experiment config file') + parser.add_argument('--port', type=int, default=8080, help='show webUI on which port') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parser() + exp = Experiment() + # exp.stop_experiment() + exp.start_experiment(args.config_file, port=args.port) diff --git a/train.py b/train.py index da92182..a1ad998 100644 --- a/train.py +++ b/train.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT license. +import torch +import torch.nn as nn from settings import ProblemTypes, version, Setting as st import os @@ -11,8 +13,6 @@ import numpy as np import copy -import torch -import torch.nn as nn from ModelConf import ModelConf from problem import Problem from utils.common_utils import dump_to_pkl, load_from_pkl, load_from_json, dump_to_json, prepare_dir, md5 @@ -22,6 +22,9 @@ from LearningMachine import LearningMachine +logger = logging.getLogger() + + class Cache: def __init__(self): self.dictionary_invalid = True @@ -285,7 +288,7 @@ def main(params): if not conf.pretrained_model_path: vocab_info, initialize = get_vocab_info(conf, problem, emb_matrix), True - lm = LearningMachine('train', conf, problem, vocab_info=vocab_info, initialize=initialize, use_gpu=conf.use_gpu) + lm = LearningMachine('train', conf, problem, vocab_info=vocab_info, initialize=initialize, use_gpu=conf.use_gpu, automl=params.automl) if conf.pretrained_model_path: logging.info('Loading the pretrained model: %s...' % conf.pretrained_model_path) lm.load_model(conf.pretrained_model_path) @@ -355,6 +358,7 @@ def get_vocab_info(conf, problem, emb_matrix): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Training') + parser.add_argument("--automl", default=False, action='store_true', help="whether autoML will be used (rely on NNI)") parser.add_argument("--conf_path", type=str, help="configuration path") parser.add_argument("--train_data_path", type=str) parser.add_argument("--valid_data_path", type=str) @@ -362,25 +366,28 @@ def get_vocab_info(conf, problem, emb_matrix): parser.add_argument("--pretrained_emb_path", type=str) parser.add_argument("--pretrained_emb_type", type=str, default='glove', help='glove|word2vec|fasttext') parser.add_argument("--pretrained_emb_binary_or_text", type=str, default='text', help='text|binary') - parser.add_argument("--involve_all_words_in_pretrained_emb", type=bool, default=False, help='By default, only words that show up in the training data are involved.') + parser.add_argument("--involve_all_words_in_pretrained_emb", type=bool, default=False, + help='By default, only words that show up in the training data are involved.') parser.add_argument("--pretrained_model_path", type=str, help='load pretrained model, and then finetune it.') - parser.add_argument("--cache_dir", type=str, help='where stores the built problem.pkl where there are dictionaries like word2id, id2word. CAUTION: if there is a previous model, the dictionaries would be loaded from os.path.dir(previous_model_path)/.necessary_cache/problem.pkl') + parser.add_argument("--cache_dir", type=str, + help='where stores the built problem.pkl where there are dictionaries like word2id, id2word. CAUTION: if there is a previous model, the dictionaries would be loaded from os.path.dir(previous_model_path)/.necessary_cache/problem.pkl') parser.add_argument("--model_save_dir", type=str, help='where to store models') - parser.add_argument("--predict_output_path", type=str, help='specify another prediction output path, instead of conf[outputs][save_base_dir] + conf[outputs][predict_output_name] defined in configuration file') - parser.add_argument("--log_dir", type=str, help='If not specified, logs would be stored in conf_bilstmlast.json/outputs/save_base_dir') + parser.add_argument("--predict_output_path", type=str, + help='specify another prediction output path, instead of conf[outputs][save_base_dir] + conf[outputs][predict_output_name] defined in configuration file') + parser.add_argument("--log_dir", type=str, + help='If not specified, logs would be stored in conf_bilstmlast.json/outputs/save_base_dir') parser.add_argument("--make_cache_only", type=bool, default=False, help='make cache without training') parser.add_argument("--max_epoch", type=int, help='maximum number of epochs') parser.add_argument("--batch_size", type=int, help='batch_size of each gpu') parser.add_argument("--learning_rate", type=float, help='learning rate') parser.add_argument("--mode", type=str, default='normal', help='normal|philly') - parser.add_argument("--force", type=bool, default=False, help='Allow overwriting if some files or directories already exist.') + parser.add_argument("--force", type=bool, default=True, + help='Allow overwriting if some files or directories already exist.') parser.add_argument("--disable_log_file", type=bool, default=False, help='If True, disable log file') parser.add_argument("--debug", type=bool, default=False) params, _ = parser.parse_known_args() - # use for debug, remember delete - # params.conf_path = 'configs_example/conf_debug_charemb.json' - + assert params.conf_path, 'Please specify a configuration path via --conf_path' if params.pretrained_emb_path and not os.path.isabs(params.pretrained_emb_path): params.pretrained_emb_path = os.path.join(os.getcwd(), params.pretrained_emb_path)