From c35ce447a6929b8d1c1978e2e1469377736b18cf Mon Sep 17 00:00:00 2001 From: akshat Date: Thu, 2 Jan 2020 03:58:42 +0530 Subject: [PATCH 1/6] added .mb file for cnn made a .mb file for the convolution network implementation of the MNIST recognition task. Added the updated source code file too. --- vision/mnist/MNIST_CONV_NET/mnist conv.jl | 198 ++++++++++++++++ vision/mnist/MNIST_CONV_NET/mnist conv.md | 265 ++++++++++++++++++++++ vision/mnist/conv.jl | 126 ---------- 3 files changed, 463 insertions(+), 126 deletions(-) create mode 100644 vision/mnist/MNIST_CONV_NET/mnist conv.jl create mode 100644 vision/mnist/MNIST_CONV_NET/mnist conv.md delete mode 100644 vision/mnist/conv.jl diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.jl b/vision/mnist/MNIST_CONV_NET/mnist conv.jl new file mode 100644 index 000000000..6c8ce05a8 --- /dev/null +++ b/vision/mnist/MNIST_CONV_NET/mnist conv.jl @@ -0,0 +1,198 @@ +# # Classification of MNIST digits with a convolutional network. +# This program writes out saved model to the file "mnist_conv.bson". +# This file demonstrates basic implimentation of ~ +# - data structure +# - construction +# - pooling +# - training +# - saving +# - conditional early-exit +# - learning rate scheduling. +# +# +# **This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.** +# +# For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything +# +# ## Importing required libraries +# Can't do machine learning in flux whithout `Flux` :) +using Flux +# Import the mnist dataset +using Flux.Data.MNIST +# `Statistics` to calculate the mean which is required for finding the accuracy +using Statistics +# The uses of these will be explained later +using Flux: onehotbatch, onecold, crossentropy, throttle +using Base.Iterators: repeated, partition +# `Printf` For outputing the accuracy and other information and `BSON` forsaving the model +using Printf, BSON + +# ## Load labels and images from Flux.Data.MNIST +@info("Loading data set") +train_labels = MNIST.labels() +train_imgs = MNIST.images() + +# ## Reshaping and batching +# The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching. +# For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`. +# The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**. +# +# A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28. +# Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28. +# An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels. +# +# This type of thinking is good for beginners as it helps visualize things. +# +# Note that we're adding image to the last (samples) dim +# So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,) +function make_minibatch(X, Y, idxs) + X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs)) + for i in 1:length(idxs) + + X_batch[:, :, :, i] = Float32.(X[idxs[i]]) + end + Y_batch = onehotbatch(Y[idxs], 0:9) + return (X_batch, Y_batch) +end + +# In the above, onehotencoding turns neumerical data into kind of truth tabels. +# example,`onehot(:b, [:a, :b, :c])` whill output +# ```julia +# 3-element Flux.OneHotVector: +# false +# true +# false +# ``` +# onehotbatch just one hot encodes all the data in a provided array +# and yields the encoding of every element in an output *OneHotMatrix*. + + +batch_size = 128 +mb_idxs = partition(1:length(train_imgs), batch_size) +train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs] + +# We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`. +# This **array of arrays** of numbers will be used as indexs. + +# ## Prepare test set as one giant minibatch: +test_imgs = MNIST.images(:test) +test_labels = MNIST.labels(:test) +test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs)) + +# ## Define our model. +# We will use a simple convolutional architecture with +# three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense +# layer that feeds into a softmax probability output. +# +@info("Constructing model...") +model = Chain( + + Conv((3, 3), 1=>16, pad=(1,1), relu), + MaxPool((2,2)), + + Conv((3, 3), 16=>32, pad=(1,1), relu), + MaxPool((2,2)), + + + Conv((3, 3), 32=>32, pad=(1,1), relu), + MaxPool((2,2)), + + + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), + + + softmax, +) + + +# A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)` +# +# `(3,3)` is the size of the filter that will be convolving. +# `1=>16` follows the `input_size=>output_size` format. +# `relu` is the name of the activation function we're gonna be using +# +# First convolution, operating upon a 28x28 image +# Second convolution, operating upon a 14x14 image +# Third convolution, operating upon a 7x7 image +# Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) +# which is where we get the 288 in the `Dense`. +# +# Finally, softmax to get nice probabilities + + +# ## Generic Pre-processing +# Load model and datasets onto GPU, if enabled +train_set = gpu.(train_set) +test_set = gpu.(test_set) +model = gpu(model) + +# Make sure our model is nicely precompiled before starting our training loop +model(train_set[1][1]) + +# `loss()` calculates the crossentropy loss between our prediction `y_hat` +# (calculated from `model(x)`) and the ground truth `y`. We augment the data +# a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting. +# +function loss(x, y) + + x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x))) + + y_hat = model(x_aug) + return crossentropy(y_hat, y) +end +accuracy(x, y) = mean(onecold(model(x)) .== onecold(y)) +# Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data. +# +# There are two reasons for using onecold. +# One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution. +# We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data. +# As it turns out, onecold works with probablity distributions too. +# This means it turns the PB into normal neumerical data which can be used with the `==` sign. + + +opt = ADAM(0.001) +# Train our model with the given training set using the ADAM optimizer and +# printing out performance against the test set as we go. + +# ## Training +@info("Beginning training loop...") +best_acc = 0.0 +last_improvement = 0 +for epoch_idx in 1:100 + global best_acc, last_improvement + # Train for a single epoch + Flux.train!(loss, params(model), train_set, opt) + + # Calculate accuracy: + acc = accuracy(test_set...) + @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc)) + + # If our accuracy is good enough, quit out. + if acc >= 0.999 + @info(" -> Early-exiting: We reached our target accuracy of 99.9%") + break + end + + # If this is the best accuracy we've seen so far, save the model out + if acc >= best_acc + @info(" -> New best accuracy! Saving model out to mnist_conv.bson") + BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc + best_acc = acc + last_improvement = epoch_idx + end + + # If we haven't seen improvement in 5 epochs, drop our learning rate: + if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6 + opt.eta /= 10.0 + @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!") + + # After dropping learning rate, give it a few epochs to improve + last_improvement = epoch_idx + end + + if epoch_idx - last_improvement >= 10 + @warn(" -> We're calling this converged.") + break + end +end diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.md b/vision/mnist/MNIST_CONV_NET/mnist conv.md new file mode 100644 index 000000000..047460c6c --- /dev/null +++ b/vision/mnist/MNIST_CONV_NET/mnist conv.md @@ -0,0 +1,265 @@ +# Classification of MNIST digits with a convolutional network. +This program writes out saved model to the file "mnist_conv.bson". +This file demonstrates basic implimentation of ~ +- data structure +- construction +- pooling +- training +- saving +- conditional early-exit +- learning rate scheduling. + + +**This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.** + +For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything + +## Importing required libraries +Can't do machine learning in flux whithout `Flux` :) + +```julia +using Flux +``` + +Import the mnist dataset + +```julia +using Flux.Data.MNIST +``` + +`Statistics` to calculate the mean which is required for finding the accuracy + +```julia +using Statistics +``` + +The uses of these will be explained later + +```julia +using Flux: onehotbatch, onecold, crossentropy, throttle +using Base.Iterators: repeated, partition +``` + +`Printf` For outputing the accuracy and other information and `BSON` forsaving the model + +```julia +using Printf, BSON +``` + +## Load labels and images from Flux.Data.MNIST + +```julia +@info("Loading data set") +train_labels = MNIST.labels() +train_imgs = MNIST.images() +``` + +## Reshaping and batching +The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching. +For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`. +The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**. + +A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28. +Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28. +An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels. + +This type of thinking is good for beginners as it helps visualize things. + +Note that we're adding image to the last (samples) dim +So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,) + +```julia +function make_minibatch(X, Y, idxs) + X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs)) + for i in 1:length(idxs) + + X_batch[:, :, :, i] = Float32.(X[idxs[i]]) + end + Y_batch = onehotbatch(Y[idxs], 0:9) + return (X_batch, Y_batch) +end +``` + +In the above, onehotencoding turns neumerical data into kind of truth tabels. +example,`onehot(:b, [:a, :b, :c])` whill output +```julia +3-element Flux.OneHotVector: +false + true +false +``` +onehotbatch just one hot encodes all the data in a provided array +and yields the encoding of every element in an output *OneHotMatrix*. + +```julia +batch_size = 128 +mb_idxs = partition(1:length(train_imgs), batch_size) +train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs] +``` + +We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`. +This **array of arrays** of numbers will be used as indexs. + +## Prepare test set as one giant minibatch: + +```julia +test_imgs = MNIST.images(:test) +test_labels = MNIST.labels(:test) +test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs)) +``` + +## Define our model. +We will use a simple convolutional architecture with +three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense +layer that feeds into a softmax probability output. + +```julia +@info("Constructing model...") +model = Chain( + + Conv((3, 3), 1=>16, pad=(1,1), relu), + MaxPool((2,2)), + + Conv((3, 3), 16=>32, pad=(1,1), relu), + MaxPool((2,2)), + + + Conv((3, 3), 32=>32, pad=(1,1), relu), + MaxPool((2,2)), + + + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), + + + softmax, +) +``` + +A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)` + +`(3,3)` is the size of the filter that will be convolving. +`1=>16` follows the `input_size=>output_size` format. +`relu` is the name of the activation function we're gonna be using + +First convolution, operating upon a 28x28 image +Second convolution, operating upon a 14x14 image +Third convolution, operating upon a 7x7 image +Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) +which is where we get the 288 in the `Dense`. + +Finally, softmax to get nice probabilities + +## Generic Pre-processing +Load model and datasets onto GPU, if enabled + +```julia +train_set = gpu.(train_set) +test_set = gpu.(test_set) +model = gpu(model) +``` + +Make sure our model is nicely precompiled before starting our training loop + +```julia +model(train_set[1][1]) +``` + +`loss()` calculates the crossentropy loss between our prediction `y_hat` +(calculated from `model(x)`) and the ground truth `y`. We augment the data +a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting. + +```julia +function loss(x, y) + + x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x))) + + y_hat = model(x_aug) + return crossentropy(y_hat, y) +end +accuracy(x, y) = mean(onecold(model(x)) .== onecold(y)) +``` + +Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data. + +There are two reasons for using onecold. +One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution. +We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data. +As it turns out, onecold works with probablity distributions too. +This means it turns the PB into normal neumerical data which can be used with the `==` sign. + +```julia +opt = ADAM(0.001) +``` + +Train our model with the given training set using the ADAM optimizer and +printing out performance against the test set as we go. + +## Training + +```julia +@info("Beginning training loop...") +best_acc = 0.0 +last_improvement = 0 +for epoch_idx in 1:100 + global best_acc, last_improvement +``` + +Train for a single epoch + +```julia + Flux.train!(loss, params(model), train_set, opt) +``` + +Calculate accuracy: + +```julia + acc = accuracy(test_set...) + @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc)) +``` + +If our accuracy is good enough, quit out. + +```julia + if acc >= 0.999 + @info(" -> Early-exiting: We reached our target accuracy of 99.9%") + break + end +``` + +If this is the best accuracy we've seen so far, save the model out + +```julia + if acc >= best_acc + @info(" -> New best accuracy! Saving model out to mnist_conv.bson") + BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc + best_acc = acc + last_improvement = epoch_idx + end +``` + +If we haven't seen improvement in 5 epochs, drop our learning rate: + +```julia + if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6 + opt.eta /= 10.0 + @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!") +``` + +After dropping learning rate, give it a few epochs to improve + +```julia + last_improvement = epoch_idx + end + + if epoch_idx - last_improvement >= 10 + @warn(" -> We're calling this converged.") + break + end +end +``` + +--- + +*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).* + diff --git a/vision/mnist/conv.jl b/vision/mnist/conv.jl deleted file mode 100644 index b803324a2..000000000 --- a/vision/mnist/conv.jl +++ /dev/null @@ -1,126 +0,0 @@ -# Classifies MNIST digits with a convolutional network. -# Writes out saved model to the file "mnist_conv.bson". -# Demonstrates basic model construction, training, saving, -# conditional early-exit, and learning rate scheduling. -# -# This model, while simple, should hit around 99% test -# accuracy after training for approximately 20 epochs. - -using Flux, Flux.Data.MNIST, Statistics -using Flux: onehotbatch, onecold, crossentropy, throttle -using Base.Iterators: repeated, partition -using Printf, BSON - -# Load labels and images from Flux.Data.MNIST -@info("Loading data set") -train_labels = MNIST.labels() -train_imgs = MNIST.images() - -# Bundle images together with labels and group into minibatchess -function make_minibatch(X, Y, idxs) - X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs)) - for i in 1:length(idxs) - X_batch[:, :, :, i] = Float32.(X[idxs[i]]) - end - Y_batch = onehotbatch(Y[idxs], 0:9) - return (X_batch, Y_batch) -end -batch_size = 128 -mb_idxs = partition(1:length(train_imgs), batch_size) -train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs] - -# Prepare test set as one giant minibatch: -test_imgs = MNIST.images(:test) -test_labels = MNIST.labels(:test) -test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs)) - -# Define our model. We will use a simple convolutional architecture with -# three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense -# layer that feeds into a softmax probability output. -@info("Constructing model...") -model = Chain( - # First convolution, operating upon a 28x28 image - Conv((3, 3), 1=>16, pad=(1,1), relu), - MaxPool((2,2)), - - # Second convolution, operating upon a 14x14 image - Conv((3, 3), 16=>32, pad=(1,1), relu), - MaxPool((2,2)), - - # Third convolution, operating upon a 7x7 image - Conv((3, 3), 32=>32, pad=(1,1), relu), - MaxPool((2,2)), - - # Reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) - # which is where we get the 288 in the `Dense` layer below: - x -> reshape(x, :, size(x, 4)), - Dense(288, 10), - - # Finally, softmax to get nice probabilities - softmax, -) - -# Load model and datasets onto GPU, if enabled -train_set = gpu.(train_set) -test_set = gpu.(test_set) -model = gpu(model) - -# Make sure our model is nicely precompiled before starting our training loop -model(train_set[1][1]) - -# `loss()` calculates the crossentropy loss between our prediction `y_hat` -# (calculated from `model(x)`) and the ground truth `y`. We augment the data -# a bit, adding gaussian random noise to our image to make it more robust. -function loss(x, y) - # We augment `x` a little bit here, adding in random noise - x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x))) - - y_hat = model(x_aug) - return crossentropy(y_hat, y) -end -accuracy(x, y) = mean(onecold(model(x)) .== onecold(y)) - -# Train our model with the given training set using the ADAM optimizer and -# printing out performance against the test set as we go. -opt = ADAM(0.001) - -@info("Beginning training loop...") -best_acc = 0.0 -last_improvement = 0 -for epoch_idx in 1:100 - global best_acc, last_improvement - # Train for a single epoch - Flux.train!(loss, params(model), train_set, opt) - - # Calculate accuracy: - acc = accuracy(test_set...) - @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc)) - - # If our accuracy is good enough, quit out. - if acc >= 0.999 - @info(" -> Early-exiting: We reached our target accuracy of 99.9%") - break - end - - # If this is the best accuracy we've seen so far, save the model out - if acc >= best_acc - @info(" -> New best accuracy! Saving model out to mnist_conv.bson") - BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc - best_acc = acc - last_improvement = epoch_idx - end - - # If we haven't seen improvement in 5 epochs, drop our learning rate: - if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6 - opt.eta /= 10.0 - @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!") - - # After dropping learning rate, give it a few epochs to improve - last_improvement = epoch_idx - end - - if epoch_idx - last_improvement >= 10 - @warn(" -> We're calling this converged.") - break - end -end From 32a8540bc4273d3e430478242c3827690bb7d212 Mon Sep 17 00:00:00 2001 From: akshat Date: Thu, 2 Jan 2020 12:56:28 +0530 Subject: [PATCH 2/6] changed the arrangment of the folder --- vision/mnist/{MNIST_CONV_NET/mnist conv.jl => conv.jl} | 0 vision/mnist/{MNIST_CONV_NET/mnist conv.md => conv.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename vision/mnist/{MNIST_CONV_NET/mnist conv.jl => conv.jl} (100%) rename vision/mnist/{MNIST_CONV_NET/mnist conv.md => conv.md} (100%) diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.jl b/vision/mnist/conv.jl similarity index 100% rename from vision/mnist/MNIST_CONV_NET/mnist conv.jl rename to vision/mnist/conv.jl diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.md b/vision/mnist/conv.md similarity index 100% rename from vision/mnist/MNIST_CONV_NET/mnist conv.md rename to vision/mnist/conv.md From e87c0be83e01f0faea90ab4ab1b6b3c06781b3af Mon Sep 17 00:00:00 2001 From: akshat Date: Sat, 4 Jan 2020 02:50:28 +0530 Subject: [PATCH 3/6] added the conv implementation of Judge A book by its cover. Added the Conv net for judge a book by its cover task --- vision/judge a book by its cover/Conv.jl | 81 ++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 vision/judge a book by its cover/Conv.jl diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl new file mode 100644 index 000000000..7db8366a2 --- /dev/null +++ b/vision/judge a book by its cover/Conv.jl @@ -0,0 +1,81 @@ +using Flux +using Flux: @epochs, onehotbatch, crossentropy, throttle +using Images +using FileIO +using CSV +using Statistics + +train_path = "G:\\Book-Cover-Train\\" +train_csv = "F:\\book-dataset\\Task1\\book30-listing-train.csv" + +test_path = "G:\\Book-Cover-Test\\" +test_csv = "F:\\book-dataset\\Task1\\book30-listing-test.csv" + +train_imglist = readdir(train_path) +test_imglist = readdir(test_path) + +train_setsize = length(train_imglist) +test_setsize = length(test_imglist) + +batch_size = 1000 + + +function create_dataset(indexs; path, csv, images) + dataset = CSV.read(csv) + X = Array{Float32}(undef, 100, 100, 3, length(indexs)) + for i = 1:length(indexs) + img = load(string(path, images[i])) + img = channelview(imresize(img, 100, 100)) + img = Float32.(permutedims(img, (2, 3, 1))) + X[:, :, :, i] = img + end + Y = onehotbatch(dataset[indexs[1]:indexs[end], 6], 0:29) + return (X, Y) +end + +indexs = Base.Iterators.partition(1:train_setsize, batch_size) +train_set = [create_dataset( + i; + path = train_path, + csv = train_csv, + images = train_imglist, +) for i in indexs] + +test_set = create_dataset( + 1:test_setsize; + path = test_path, + csv = test_csv, + images = test_imglist, +) + +m = Chain( + Conv((3, 3), 3 => 32, pad = (1, 1), relu), + MaxPool((2, 2)), + + Conv((3, 3), 32 => 64, pad = (1, 1), relu), + MaxPool((2, 2)), + + Conv((3, 3), 64 => 256, pad = (1, 1), relu), + MaxPool((2, 2)), + + Conv((2, 2), 256 => 512, pad = (1, 1), relu), + MaxPool((2, 2)), + + x -> reshape(x, :, size(x, 4)), + Dense(18432, 256, relu), + Dense(256, 30), + softmax, +) + +loss(x, y) = crossentropy(m(x), y) +accuracy(x, y) = mean(Flux.onecold(m(x)) .== Flux.onecold(y)) + +function cbfunc() + ca = accuracy(test_set...) + @show(ca) + cl = loss(test_set...) + @show(cl) +end + +opt = ADAM() +@epochs 5 Flux.train!(loss, params(m), train_set, opt, cb = throttle(cbfunc, 3)) From 5e7d9e1233da7f1995adaf72dcb668d43e3b1542 Mon Sep 17 00:00:00 2001 From: akshat Date: Sat, 4 Jan 2020 03:00:57 +0530 Subject: [PATCH 4/6] synced the book branch and added code --- vision/mnist/conv.jl | 114 ++++--------------- vision/mnist/conv.md | 265 ------------------------------------------- 2 files changed, 21 insertions(+), 358 deletions(-) delete mode 100644 vision/mnist/conv.md diff --git a/vision/mnist/conv.jl b/vision/mnist/conv.jl index 6c8ce05a8..b803324a2 100644 --- a/vision/mnist/conv.jl +++ b/vision/mnist/conv.jl @@ -1,127 +1,65 @@ -# # Classification of MNIST digits with a convolutional network. -# This program writes out saved model to the file "mnist_conv.bson". -# This file demonstrates basic implimentation of ~ -# - data structure -# - construction -# - pooling -# - training -# - saving -# - conditional early-exit -# - learning rate scheduling. +# Classifies MNIST digits with a convolutional network. +# Writes out saved model to the file "mnist_conv.bson". +# Demonstrates basic model construction, training, saving, +# conditional early-exit, and learning rate scheduling. # -# -# **This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.** -# -# For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything -# -# ## Importing required libraries -# Can't do machine learning in flux whithout `Flux` :) -using Flux -# Import the mnist dataset -using Flux.Data.MNIST -# `Statistics` to calculate the mean which is required for finding the accuracy -using Statistics -# The uses of these will be explained later +# This model, while simple, should hit around 99% test +# accuracy after training for approximately 20 epochs. + +using Flux, Flux.Data.MNIST, Statistics using Flux: onehotbatch, onecold, crossentropy, throttle using Base.Iterators: repeated, partition -# `Printf` For outputing the accuracy and other information and `BSON` forsaving the model using Printf, BSON -# ## Load labels and images from Flux.Data.MNIST +# Load labels and images from Flux.Data.MNIST @info("Loading data set") train_labels = MNIST.labels() train_imgs = MNIST.images() -# ## Reshaping and batching -# The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching. -# For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`. -# The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**. -# -# A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28. -# Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28. -# An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels. -# -# This type of thinking is good for beginners as it helps visualize things. -# -# Note that we're adding image to the last (samples) dim -# So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,) +# Bundle images together with labels and group into minibatchess function make_minibatch(X, Y, idxs) X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs)) for i in 1:length(idxs) - X_batch[:, :, :, i] = Float32.(X[idxs[i]]) end Y_batch = onehotbatch(Y[idxs], 0:9) return (X_batch, Y_batch) end - -# In the above, onehotencoding turns neumerical data into kind of truth tabels. -# example,`onehot(:b, [:a, :b, :c])` whill output -# ```julia -# 3-element Flux.OneHotVector: -# false -# true -# false -# ``` -# onehotbatch just one hot encodes all the data in a provided array -# and yields the encoding of every element in an output *OneHotMatrix*. - - batch_size = 128 mb_idxs = partition(1:length(train_imgs), batch_size) train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs] -# We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`. -# This **array of arrays** of numbers will be used as indexs. - -# ## Prepare test set as one giant minibatch: +# Prepare test set as one giant minibatch: test_imgs = MNIST.images(:test) test_labels = MNIST.labels(:test) test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs)) -# ## Define our model. -# We will use a simple convolutional architecture with +# Define our model. We will use a simple convolutional architecture with # three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense # layer that feeds into a softmax probability output. -# @info("Constructing model...") model = Chain( - + # First convolution, operating upon a 28x28 image Conv((3, 3), 1=>16, pad=(1,1), relu), MaxPool((2,2)), + # Second convolution, operating upon a 14x14 image Conv((3, 3), 16=>32, pad=(1,1), relu), MaxPool((2,2)), - + # Third convolution, operating upon a 7x7 image Conv((3, 3), 32=>32, pad=(1,1), relu), MaxPool((2,2)), - + # Reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) + # which is where we get the 288 in the `Dense` layer below: x -> reshape(x, :, size(x, 4)), Dense(288, 10), - + # Finally, softmax to get nice probabilities softmax, ) - -# A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)` -# -# `(3,3)` is the size of the filter that will be convolving. -# `1=>16` follows the `input_size=>output_size` format. -# `relu` is the name of the activation function we're gonna be using -# -# First convolution, operating upon a 28x28 image -# Second convolution, operating upon a 14x14 image -# Third convolution, operating upon a 7x7 image -# Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) -# which is where we get the 288 in the `Dense`. -# -# Finally, softmax to get nice probabilities - - -# ## Generic Pre-processing # Load model and datasets onto GPU, if enabled train_set = gpu.(train_set) test_set = gpu.(test_set) @@ -132,30 +70,20 @@ model(train_set[1][1]) # `loss()` calculates the crossentropy loss between our prediction `y_hat` # (calculated from `model(x)`) and the ground truth `y`. We augment the data -# a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting. -# +# a bit, adding gaussian random noise to our image to make it more robust. function loss(x, y) - + # We augment `x` a little bit here, adding in random noise x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x))) y_hat = model(x_aug) return crossentropy(y_hat, y) end accuracy(x, y) = mean(onecold(model(x)) .== onecold(y)) -# Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data. -# -# There are two reasons for using onecold. -# One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution. -# We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data. -# As it turns out, onecold works with probablity distributions too. -# This means it turns the PB into normal neumerical data which can be used with the `==` sign. - -opt = ADAM(0.001) # Train our model with the given training set using the ADAM optimizer and # printing out performance against the test set as we go. +opt = ADAM(0.001) -# ## Training @info("Beginning training loop...") best_acc = 0.0 last_improvement = 0 diff --git a/vision/mnist/conv.md b/vision/mnist/conv.md deleted file mode 100644 index 047460c6c..000000000 --- a/vision/mnist/conv.md +++ /dev/null @@ -1,265 +0,0 @@ -# Classification of MNIST digits with a convolutional network. -This program writes out saved model to the file "mnist_conv.bson". -This file demonstrates basic implimentation of ~ -- data structure -- construction -- pooling -- training -- saving -- conditional early-exit -- learning rate scheduling. - - -**This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.** - -For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything - -## Importing required libraries -Can't do machine learning in flux whithout `Flux` :) - -```julia -using Flux -``` - -Import the mnist dataset - -```julia -using Flux.Data.MNIST -``` - -`Statistics` to calculate the mean which is required for finding the accuracy - -```julia -using Statistics -``` - -The uses of these will be explained later - -```julia -using Flux: onehotbatch, onecold, crossentropy, throttle -using Base.Iterators: repeated, partition -``` - -`Printf` For outputing the accuracy and other information and `BSON` forsaving the model - -```julia -using Printf, BSON -``` - -## Load labels and images from Flux.Data.MNIST - -```julia -@info("Loading data set") -train_labels = MNIST.labels() -train_imgs = MNIST.images() -``` - -## Reshaping and batching -The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching. -For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`. -The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**. - -A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28. -Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28. -An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels. - -This type of thinking is good for beginners as it helps visualize things. - -Note that we're adding image to the last (samples) dim -So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,) - -```julia -function make_minibatch(X, Y, idxs) - X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs)) - for i in 1:length(idxs) - - X_batch[:, :, :, i] = Float32.(X[idxs[i]]) - end - Y_batch = onehotbatch(Y[idxs], 0:9) - return (X_batch, Y_batch) -end -``` - -In the above, onehotencoding turns neumerical data into kind of truth tabels. -example,`onehot(:b, [:a, :b, :c])` whill output -```julia -3-element Flux.OneHotVector: -false - true -false -``` -onehotbatch just one hot encodes all the data in a provided array -and yields the encoding of every element in an output *OneHotMatrix*. - -```julia -batch_size = 128 -mb_idxs = partition(1:length(train_imgs), batch_size) -train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs] -``` - -We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`. -This **array of arrays** of numbers will be used as indexs. - -## Prepare test set as one giant minibatch: - -```julia -test_imgs = MNIST.images(:test) -test_labels = MNIST.labels(:test) -test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs)) -``` - -## Define our model. -We will use a simple convolutional architecture with -three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense -layer that feeds into a softmax probability output. - -```julia -@info("Constructing model...") -model = Chain( - - Conv((3, 3), 1=>16, pad=(1,1), relu), - MaxPool((2,2)), - - Conv((3, 3), 16=>32, pad=(1,1), relu), - MaxPool((2,2)), - - - Conv((3, 3), 32=>32, pad=(1,1), relu), - MaxPool((2,2)), - - - x -> reshape(x, :, size(x, 4)), - Dense(288, 10), - - - softmax, -) -``` - -A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)` - -`(3,3)` is the size of the filter that will be convolving. -`1=>16` follows the `input_size=>output_size` format. -`relu` is the name of the activation function we're gonna be using - -First convolution, operating upon a 28x28 image -Second convolution, operating upon a 14x14 image -Third convolution, operating upon a 7x7 image -Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N) -which is where we get the 288 in the `Dense`. - -Finally, softmax to get nice probabilities - -## Generic Pre-processing -Load model and datasets onto GPU, if enabled - -```julia -train_set = gpu.(train_set) -test_set = gpu.(test_set) -model = gpu(model) -``` - -Make sure our model is nicely precompiled before starting our training loop - -```julia -model(train_set[1][1]) -``` - -`loss()` calculates the crossentropy loss between our prediction `y_hat` -(calculated from `model(x)`) and the ground truth `y`. We augment the data -a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting. - -```julia -function loss(x, y) - - x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x))) - - y_hat = model(x_aug) - return crossentropy(y_hat, y) -end -accuracy(x, y) = mean(onecold(model(x)) .== onecold(y)) -``` - -Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data. - -There are two reasons for using onecold. -One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution. -We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data. -As it turns out, onecold works with probablity distributions too. -This means it turns the PB into normal neumerical data which can be used with the `==` sign. - -```julia -opt = ADAM(0.001) -``` - -Train our model with the given training set using the ADAM optimizer and -printing out performance against the test set as we go. - -## Training - -```julia -@info("Beginning training loop...") -best_acc = 0.0 -last_improvement = 0 -for epoch_idx in 1:100 - global best_acc, last_improvement -``` - -Train for a single epoch - -```julia - Flux.train!(loss, params(model), train_set, opt) -``` - -Calculate accuracy: - -```julia - acc = accuracy(test_set...) - @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc)) -``` - -If our accuracy is good enough, quit out. - -```julia - if acc >= 0.999 - @info(" -> Early-exiting: We reached our target accuracy of 99.9%") - break - end -``` - -If this is the best accuracy we've seen so far, save the model out - -```julia - if acc >= best_acc - @info(" -> New best accuracy! Saving model out to mnist_conv.bson") - BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc - best_acc = acc - last_improvement = epoch_idx - end -``` - -If we haven't seen improvement in 5 epochs, drop our learning rate: - -```julia - if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6 - opt.eta /= 10.0 - @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!") -``` - -After dropping learning rate, give it a few epochs to improve - -```julia - last_improvement = epoch_idx - end - - if epoch_idx - last_improvement >= 10 - @warn(" -> We're calling this converged.") - break - end -end -``` - ---- - -*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).* - From 98dc41d09456f94bacce58a152f9e866684ae4a2 Mon Sep 17 00:00:00 2001 From: akshat Date: Sat, 11 Jan 2020 02:30:10 +0530 Subject: [PATCH 5/6] updated a model, comments, batch loading --- vision/judge a book by its cover/Conv.jl | 128 ++++++++++++++--------- 1 file changed, 76 insertions(+), 52 deletions(-) diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl index 7db8366a2..ce8988bd0 100644 --- a/vision/judge a book by its cover/Conv.jl +++ b/vision/judge a book by its cover/Conv.jl @@ -1,81 +1,105 @@ +# After a lot of debugging and asking around +# The fix was found to be to remove every conv layer. +# Previously, whilst using the conv model, the accuracy was stuck at exactly 0.8 and loss at exactly 3.33253 +# Changing the model to be that of a simple dense one seems to bring the accuracy up to what is expected. +# In the research paper, authors were able to achive a general accuracy of 24% in the model which they trained from the ground up +# In doing so they used the whole 30000 image dataset. This model uses 1/3 of that datset and around 500 testing images because of processing constraints +# The accuracy achived after 20 epochs was right at ~12%. Considering that it uses a third of the datset, this is pretty decent. + using Flux -using Flux: @epochs, onehotbatch, crossentropy, throttle +using Flux: onehotbatch, crossentropy, throttle using Images using FileIO using CSV using Statistics -train_path = "G:\\Book-Cover-Train\\" -train_csv = "F:\\book-dataset\\Task1\\book30-listing-train.csv" - -test_path = "G:\\Book-Cover-Test\\" -test_csv = "F:\\book-dataset\\Task1\\book30-listing-test.csv" - -train_imglist = readdir(train_path) -test_imglist = readdir(test_path) - -train_setsize = length(train_imglist) -test_setsize = length(test_imglist) - -batch_size = 1000 +const train_path = "G:\\mlimgs\\Book-Train-FULL\\" # contains the training images +const train_csv = "F:\\book-dataset\\Task1\\book30-listing-train1.csv" # This file contains the labels(genre) for the training images + +const test_path = "G:\\mlimgs\\Book-Test-FULL\\" #Contains the testing dataset +const test_csv = "F:\\book-dataset\\Task1\\book30-listing-test1.csv" # This file contains the labels(genre) for the testing images + +const train_dataset = CSV.read(train_csv) # read the csv for training labels + # The CSVs have 2 colums: first of genre and second of name of the book + # The name of the book isn't required for the functioning of the model but is included for debugging purposes + +const test_dataset = CSV.read(test_csv) # read the csv for testing labels + +# find the total number of images in sets so we can correctly divide the dataset into batches +const train_imglist = readdir(train_path) +const test_imglist = readdir(test_path) +const train_setsize = length(train_imglist) +const test_setsize = length(test_imglist) + +# Self-explainatory Hyper Parameters +const batch_size = 400 +const imsize = 60 +const epochs = 20 +const learning_rate = 0.0001 + +function create_batch(indexs; path, csv, dataset) + X = Array{Float32}(undef, imsize*imsize*3, length(indexs)) # everytime this function is called a new batch is created with the correct size + # It should be able to hold multiple flattened images (flattened because we're using a dense network) + # Thats why it is shaped like (size_of_image,no._of_images) + for (p,i) in enumerate(indexs) + img = load(string(path,i,".png")) # The images are labeled like 1.png, 2.png, and so on. + img = channelview(RGB.(imresize(img, imsize, imsize))) + img = reshape(Float32.(img),(imsize*imsize*3)) # The current image has 3 layers of 60 by 60 pixels all compiled into a 3D array + # We need the image in a flat array so we reshape into a flat array for it to be eligible to be added to array X + X[:, p] = img # add the img to X. - -function create_dataset(indexs; path, csv, images) - dataset = CSV.read(csv) - X = Array{Float32}(undef, 100, 100, 3, length(indexs)) - for i = 1:length(indexs) - img = load(string(path, images[i])) - img = channelview(imresize(img, 100, 100)) - img = Float32.(permutedims(img, (2, 3, 1))) - X[:, :, :, i] = img end - Y = onehotbatch(dataset[indexs[1]:indexs[end], 6], 0:29) + Y = onehotbatch(dataset[indexs, 1], 0:29) return (X, Y) end -indexs = Base.Iterators.partition(1:train_setsize, batch_size) -train_set = [create_dataset( - i; - path = train_path, - csv = train_csv, - images = train_imglist, -) for i in indexs] +const indexs = Base.Iterators.partition(1:train_setsize, batch_size) -test_set = create_dataset( +const test_set = create_batch( 1:test_setsize; path = test_path, csv = test_csv, - images = test_imglist, + dataset = test_dataset ) -m = Chain( - Conv((3, 3), 3 => 32, pad = (1, 1), relu), - MaxPool((2, 2)), - - Conv((3, 3), 32 => 64, pad = (1, 1), relu), - MaxPool((2, 2)), - - Conv((3, 3), 64 => 256, pad = (1, 1), relu), - MaxPool((2, 2)), +@info "creating the model" +# I've tried using a conv net described in the paper but that +# yields an accuracy of 0.8 and has a lot of inconsistencies with it +# People over at #julia-bridged (after a LONG thread of conversation) told me +# to just change out the conv with a dense model. Sure enough, it started to behave like its supposed to - Conv((2, 2), 256 => 512, pad = (1, 1), relu), - MaxPool((2, 2)), - - x -> reshape(x, :, size(x, 4)), - Dense(18432, 256, relu), - Dense(256, 30), +m = Chain( + Dense(imsize*imsize*3, 512, relu), # we're expecting an image array + Dense(512, 64), + Dense(64, 30), softmax, ) loss(x, y) = crossentropy(m(x), y) accuracy(x, y) = mean(Flux.onecold(m(x)) .== Flux.onecold(y)) +opt = ADAM(learning_rate) + function cbfunc() ca = accuracy(test_set...) - @show(ca) + print("batch_acc: ",string(ca),"; ") cl = loss(test_set...) - @show(cl) + println("batch_loss: ",string(cl)) + +end + +for e in 1:epochs + @info "Epoch no.-> $e" + b = 1 + for i in indexs + println("Batch no. -> $b") + train_batch = [create_batch(i; path = train_path, csv = train_csv, dataset = train_dataset)] # we load every batch before training + # This way we dont have to load the whole big dataset into one array + Flux.train!(loss, params(m), train_batch , opt, cb = cbfunc) + b+=1 + end + end -opt = ADAM() -@epochs 5 Flux.train!(loss, params(m), train_set, opt, cb = throttle(cbfunc, 3)) +println("Final acc and loss : ") +cbfunc() From cea54b9ba7aa98387d1a5727a208c04f5963c006 Mon Sep 17 00:00:00 2001 From: akshat Date: Sat, 11 Jan 2020 17:28:04 +0530 Subject: [PATCH 6/6] Update Conv.jl --- vision/judge a book by its cover/Conv.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl index ce8988bd0..e61d88a24 100644 --- a/vision/judge a book by its cover/Conv.jl +++ b/vision/judge a book by its cover/Conv.jl @@ -1,3 +1,4 @@ +# This model can reach accuracies of upto ~22%(close to what is described in the paper) # After a lot of debugging and asking around # The fix was found to be to remove every conv layer. # Previously, whilst using the conv model, the accuracy was stuck at exactly 0.8 and loss at exactly 3.33253