From bc8876aed5dd3c78beec85cd141cdf0fbcf4cb87 Mon Sep 17 00:00:00 2001 From: Nikhil Singh Date: Sun, 27 May 2018 20:14:44 +0530 Subject: [PATCH 1/4] readme changed --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3021acc..3e4d36d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +Forked from [avisingh599]https://github.com/avisingh599/visual-qa.git + # Deep Learning for Visual Question Answering [Click here](https://avisingh599.github.io/deeplearning/visual-qa/) to go to the accompanying blog post. From d7241c6e4cccf5e51485ca336180f42c97631041 Mon Sep 17 00:00:00 2001 From: Nikhil Singh Date: Wed, 30 May 2018 20:43:45 +0530 Subject: [PATCH 2/4] filename changed in preprocessed dir --- scripts/evaluateMLP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 scripts/evaluateMLP.py diff --git a/scripts/evaluateMLP.py b/scripts/evaluateMLP.py old mode 100644 new mode 100755 index 483836e..413b329 --- a/scripts/evaluateMLP.py +++ b/scripts/evaluateMLP.py @@ -26,7 +26,7 @@ def main(): 'r').read().decode('utf8').splitlines() answers_val = open('../data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() - images_val = open('../data/preprocessed/images_val2014.txt', + images_val = open('../data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' From 0a6f248d84833b5bd7763781102e93d669af8db3 Mon Sep 17 00:00:00 2001 From: Nikhil Singh Date: Fri, 1 Jun 2018 23:49:28 +0530 Subject: [PATCH 3/4] more initial changes for first run --- scripts/trainLSTM_1.py | 5 +++-- scripts/trainLSTM_language.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) mode change 100644 => 100755 scripts/trainLSTM_1.py mode change 100644 => 100755 scripts/trainLSTM_language.py diff --git a/scripts/trainLSTM_1.py b/scripts/trainLSTM_1.py old mode 100644 new mode 100755 index 1b205e3..e35febf --- a/scripts/trainLSTM_1.py +++ b/scripts/trainLSTM_1.py @@ -4,7 +4,8 @@ import argparse from keras.models import Sequential -from keras.layers.core import Dense, Activation, Merge, Dropout, Reshape +from keras.layers.core import Dense, Activation, Dropout, Reshape +from keras.layers import Merge from keras.layers.recurrent import LSTM from keras.utils import np_utils, generic_utils from keras.callbacks import ModelCheckpoint, RemoteMonitor @@ -119,4 +120,4 @@ def main(): model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k)) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/trainLSTM_language.py b/scripts/trainLSTM_language.py old mode 100644 new mode 100755 index 1c46648..e7559d0 --- a/scripts/trainLSTM_language.py +++ b/scripts/trainLSTM_language.py @@ -27,7 +27,7 @@ def main(): questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() - answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines() + answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() max_answers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers) From c32665d8069c34420f6aed7b2137f568be32331f Mon Sep 17 00:00:00 2001 From: Nikhil Singh Date: Mon, 11 Jun 2018 13:51:47 +0530 Subject: [PATCH 4/4] trainLSTM_1.py running --- scripts/README.md | 0 scripts/demo_batch.py | 0 scripts/dumpText.py | 0 scripts/evaluateLSTM.py | 0 scripts/extract_features.py | 0 scripts/features.py | 0 scripts/get_started.sh | 0 scripts/own_image.py | 0 scripts/trainLSTM_1.py | 10 ++++++---- scripts/trainMLP.py | 16 ++++++++-------- scripts/utils.py | 0 scripts/vgg_features.prototxt | 0 12 files changed, 14 insertions(+), 12 deletions(-) mode change 100644 => 100755 scripts/README.md mode change 100644 => 100755 scripts/demo_batch.py mode change 100644 => 100755 scripts/dumpText.py mode change 100644 => 100755 scripts/evaluateLSTM.py mode change 100644 => 100755 scripts/extract_features.py mode change 100644 => 100755 scripts/features.py mode change 100644 => 100755 scripts/get_started.sh mode change 100644 => 100755 scripts/own_image.py mode change 100644 => 100755 scripts/trainMLP.py mode change 100644 => 100755 scripts/utils.py mode change 100644 => 100755 scripts/vgg_features.prototxt diff --git a/scripts/README.md b/scripts/README.md old mode 100644 new mode 100755 diff --git a/scripts/demo_batch.py b/scripts/demo_batch.py old mode 100644 new mode 100755 diff --git a/scripts/dumpText.py b/scripts/dumpText.py old mode 100644 new mode 100755 diff --git a/scripts/evaluateLSTM.py b/scripts/evaluateLSTM.py old mode 100644 new mode 100755 diff --git a/scripts/extract_features.py b/scripts/extract_features.py old mode 100644 new mode 100755 diff --git a/scripts/features.py b/scripts/features.py old mode 100644 new mode 100755 diff --git a/scripts/get_started.sh b/scripts/get_started.sh old mode 100644 new mode 100755 diff --git a/scripts/own_image.py b/scripts/own_image.py old mode 100644 new mode 100755 diff --git a/scripts/trainLSTM_1.py b/scripts/trainLSTM_1.py index e35febf..1b2a7b7 100755 --- a/scripts/trainLSTM_1.py +++ b/scripts/trainLSTM_1.py @@ -22,7 +22,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units_mlp', type=int, default=1024) - parser.add_argument('-num_hidden_units_lstm', type=int, default=512) + parser.add_argument('-num_hidden_units_lstm', type=int, default=4096) parser.add_argument('-num_hidden_layers_mlp', type=int, default=3) parser.add_argument('-num_hidden_layers_lstm', type=int, default=1) parser.add_argument('-dropout', type=float, default=0.5) @@ -57,13 +57,14 @@ def main(): joblib.dump(labelencoder,'../models/labelencoder.pkl') image_model = Sequential() - image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,))) + #image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,))) + image_model.add(Reshape((4096,), input_shape=(4096,)))#input_shape = (img_dim,), dims=(img_dim,))) language_model = Sequential() if args.num_hidden_layers_lstm == 1: - language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim))) + language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(None, word_vec_dim))) else: - language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim))) + language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(None, word_vec_dim))) for i in xrange(args.num_hidden_layers_lstm-2): language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True)) language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False)) @@ -110,6 +111,7 @@ def main(): X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) + print X_q_batch.shape loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) diff --git a/scripts/trainMLP.py b/scripts/trainMLP.py old mode 100644 new mode 100755 index 42c7a4d..468ba69 --- a/scripts/trainMLP.py +++ b/scripts/trainMLP.py @@ -22,13 +22,13 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) - parser.add_argument('-num_hidden_layers', type=int, default=3) - parser.add_argument('-dropout', type=float, default=0.5) + parser.add_argument('-num_hidden_layers', type=int, default=5) + parser.add_argument('-dropout', type=float, default=0.2) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default= False) - parser.add_argument('-num_epochs', type=int, default=100) + parser.add_argument('-num_epochs', type=int, default=50) parser.add_argument('-model_save_interval', type=int, default=10) - parser.add_argument('-batch_size', type=int, default=128) + parser.add_argument('-batch_size', type=int, default=256) args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() @@ -42,7 +42,7 @@ def main(): labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) - joblib.dump(labelencoder,'../models/labelencoder.pkl') + joblib.dump(labelencoder,'../models3/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] @@ -76,9 +76,9 @@ def main(): json_string = model.to_json() if args.language_only: - model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) + model_file_name = '../models3/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) else: - model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) + model_file_name = '../models3/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' @@ -113,4 +113,4 @@ def main(): model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/utils.py b/scripts/utils.py old mode 100644 new mode 100755 diff --git a/scripts/vgg_features.prototxt b/scripts/vgg_features.prototxt old mode 100644 new mode 100755