diff --git a/mnist/README.md b/mnist/README.md index 548e52d801..7f9b831b88 100644 --- a/mnist/README.md +++ b/mnist/README.md @@ -2,6 +2,5 @@ ```bash pip install -r requirements.txt -python data.py python main.py ``` diff --git a/mnist/main.py b/mnist/main.py index 1fd5c3d799..9a0db35307 100644 --- a/mnist/main.py +++ b/mnist/main.py @@ -1,5 +1,5 @@ from __future__ import print_function -import os +import os, argparse import torch import torch.nn as nn import torch.optim as optim @@ -7,20 +7,34 @@ cuda = torch.cuda.is_available() -def print_header(msg): - print('===>', msg) +# Training settings +parser = argparse.ArgumentParser(description='PyTorch MNIST Example') +parser.add_argument('--batchSize', type=int, default=64, metavar='input batch size') +parser.add_argument('--testBatchSize', type=int, default=1000, metavar='input batch size for testing') +parser.add_argument('--trainSize', type=int, default=1000, metavar='Train dataset size (max=60000). Default: 1000') +parser.add_argument('--nEpochs', type=int, default=2, metavar='number of epochs to train') +parser.add_argument('--lr', type=float, default=0.01, metavar='Learning Rate. Default=0.01') +parser.add_argument('--momentum', type=float, default=0.5, metavar='Default=0.5') +parser.add_argument('--seed', type=int, default=123, metavar='Random Seed to use. Default=123') +opt = parser.parse_args() +print(opt) + +torch.manual_seed(opt.seed) +if cuda == True: + torch.cuda.manual_seed(opt.seed) if not os.path.exists('data/processed/training.pt'): import data # Data -print_header('Loading data') +print('===> Loading data') with open('data/processed/training.pt', 'rb') as f: training_set = torch.load(f) with open('data/processed/test.pt', 'rb') as f: test_set = torch.load(f) training_data = training_set[0].view(-1, 1, 28, 28).div(255) +training_data = training_data[:opt.trainSize] training_labels = training_set[1] test_data = test_set[0].view(-1, 1, 28, 28).div(255) test_labels = test_set[1] @@ -28,25 +42,23 @@ def print_header(msg): del training_set del test_set -# Model -print_header('Building model') +print('===> Building model') class Net(nn.Container): def __init__(self): - super(Net, self).__init__( - conv1 = nn.Conv2d(1, 20, 5), - pool1 = nn.MaxPool2d(2, 2), - conv2 = nn.Conv2d(20, 50, 5), - pool2 = nn.MaxPool2d(2, 2), - fc1 = nn.Linear(800, 500), - fc2 = nn.Linear(500, 10), - relu = nn.ReLU(), - softmax = nn.LogSoftmax(), - ) + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, 5) + self.pool1 = nn.MaxPool2d(2,2) + self.conv2 = nn.Conv2d(10, 20, 5) + self.pool2 = nn.MaxPool2d(2, 2) + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + self.relu = nn.ReLU() + self.softmax = nn.LogSoftmax() def forward(self, x): x = self.relu(self.pool1(self.conv1(x))) x = self.relu(self.pool2(self.conv2(x))) - x = x.view(-1, 800) + x = x.view(-1, 320) x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.softmax(x) @@ -56,60 +68,59 @@ def forward(self, x): model.cuda() criterion = nn.NLLLoss() - -# Training settings -BATCH_SIZE = 150 -TEST_BATCH_SIZE = 1000 -NUM_EPOCHS = 2 - -optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) +optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum) def train(epoch): - batch_data_t = torch.FloatTensor(BATCH_SIZE, 1, 28, 28) - batch_targets_t = torch.LongTensor(BATCH_SIZE) + # create buffers for mini-batch + batch_data = torch.FloatTensor(opt.batchSize, 1, 28, 28) + batch_targets = torch.LongTensor(opt.batchSize) if cuda: - batch_data_t = batch_data_t.cuda() - batch_targets_t = batch_targets_t.cuda() - batch_data = Variable(batch_data_t, requires_grad=False) - batch_targets = Variable(batch_targets_t, requires_grad=False) - for i in range(0, training_data.size(0), BATCH_SIZE): + batch_data, batch_targets = batch_data.cuda(), batch_targets.cuda() + + # create autograd Variables over these buffers + batch_data, batch_targets = Variable(batch_data), Variable(batch_targets) + + for i in range(0, training_data.size(0)-opt.batchSize+1, opt.batchSize): + start, end = i, i+opt.batchSize optimizer.zero_grad() - batch_data.data[:] = training_data[i:i+BATCH_SIZE] - batch_targets.data[:] = training_labels[i:i+BATCH_SIZE] - loss = criterion(model(batch_data), batch_targets) + batch_data.data[:] = training_data[start:end] + batch_targets.data[:] = training_labels[start:end] + output = model(batch_data) + loss = criterion(output, batch_targets) loss.backward() loss = loss.data[0] optimizer.step() - print('Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.4f}'.format(epoch, - i+BATCH_SIZE, training_data.size(0), - float(i+BATCH_SIZE)/training_data.size(0)*100, loss)) + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.4f}' + .format(epoch, end, opt.trainSize, float(end)/opt.trainSize*100, loss)) def test(epoch): - test_loss = 0 - batch_data_t = torch.FloatTensor(TEST_BATCH_SIZE, 1, 28, 28) - batch_targets_t = torch.LongTensor(TEST_BATCH_SIZE) + # create buffers for mini-batch + batch_data = torch.FloatTensor(opt.testBatchSize, 1, 28, 28) + batch_targets = torch.LongTensor(opt.testBatchSize) if cuda: - batch_data_t = batch_data_t.cuda() - batch_targets_t = batch_targets_t.cuda() - batch_data = Variable(batch_data_t, volatile=True) - batch_targets = Variable(batch_targets_t, volatile=True) + batch_data, batch_targets = batch_data.cuda(), batch_targets.cuda() + + # create autograd Variables over these buffers + batch_data = Variable(batch_data, volatile=True) + batch_targets = Variable(batch_targets, volatile=True) + + test_loss = 0 correct = 0 - for i in range(0, test_data.size(0), TEST_BATCH_SIZE): - print('Testing model: {}/{}'.format(i, test_data.size(0)), end='\r') - batch_data.data[:] = test_data[i:i+TEST_BATCH_SIZE] - batch_targets.data[:] = test_labels[i:i+TEST_BATCH_SIZE] + + for i in range(0, test_data.size(0), opt.testBatchSize): + batch_data.data[:] = test_data[i:i+opt.testBatchSize] + batch_targets.data[:] = test_labels[i:i+opt.testBatchSize] output = model(batch_data) test_loss += criterion(output, batch_targets) - pred = output.data.max(1)[1] + pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.long().eq(batch_targets.data.long()).cpu().sum() test_loss = test_loss.data[0] - test_loss /= (test_data.size(0) / TEST_BATCH_SIZE) # criterion averages over batch size - print('TEST SET RESULTS:' + ' ' * 20) - print('Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format( + test_loss /= (test_data.size(0) / opt.testBatchSize) # criterion averages over batch size + print('\nTest Set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, test_data.size(0), float(correct)/test_data.size(0)*100)) -for epoch in range(1, NUM_EPOCHS+1): +for epoch in range(1, opt.nEpochs+1): train(epoch) test(epoch) diff --git a/mnist/requirements.txt b/mnist/requirements.txt index eb2e4049b0..9309f121ea 100644 --- a/mnist/requirements.txt +++ b/mnist/requirements.txt @@ -1,2 +1,3 @@ torch six +tqdm diff --git a/word_language_model/main.py b/word_language_model/main.py index 35fd09047b..2303d61c62 100644 --- a/word_language_model/main.py +++ b/word_language_model/main.py @@ -5,10 +5,7 @@ # test set. ############################################################################### -import argparse -import time -import math - +import argparse, time, math import torch import torch.nn as nn from torch.autograd import Variable @@ -16,7 +13,7 @@ import data import model -parser = argparse.ArgumentParser(description='PyTorch PTB Language Model') +parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model') # Data parameters parser.add_argument('-data' , type=str, default='./data/penn', help='Location of the data corpus' ) @@ -41,8 +38,7 @@ # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) -# If the GPU is enabled, do some plumbing. - +# If the GPU is enabled, warn the user to use it if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with -cuda") diff --git a/word_language_model/model.py b/word_language_model/model.py index 5520950ebe..85f3704775 100644 --- a/word_language_model/model.py +++ b/word_language_model/model.py @@ -7,10 +7,10 @@ class RNNModel(nn.Container): and a decoder. Runs one RNN step at a time. """ - def __init__(self, rnnType, ntoken, ninp, nhid, nlayers): + def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers): super(RNNModel, self).__init__( encoder = nn.sparse.Embedding(ntoken, ninp), - rnn = nn.RNNBase(rnnType, ninp, nhid, nlayers, bias=False), + rnn = nn.RNNBase(rnn_type, ninp, nhid, nlayers, bias=False), decoder = nn.Linear(nhid, ntoken), ) @@ -21,7 +21,7 @@ def __init__(self, rnnType, ntoken, ninp, nhid, nlayers): self.decoder.bias.data.fill_(0) self.decoder.weight.data.uniform_(-initrange, initrange) - self.rnnType = rnnType + self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers @@ -33,7 +33,7 @@ def forward(self, input, hidden): def initHidden(self, bsz): weight = next(self.parameters()).data - if self.rnnType == 'LSTM': + if self.rnn_type == 'LSTM': return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()), Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())) else: