From c35ce447a6929b8d1c1978e2e1469377736b18cf Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Thu, 2 Jan 2020 03:58:42 +0530
Subject: [PATCH 1/6] added .mb file for cnn

made a .mb file for the convolution network implementation of the MNIST recognition task. Added the updated source code file too.
---
 vision/mnist/MNIST_CONV_NET/mnist conv.jl | 198 ++++++++++++++++
 vision/mnist/MNIST_CONV_NET/mnist conv.md | 265 ++++++++++++++++++++++
 vision/mnist/conv.jl                      | 126 ----------
 3 files changed, 463 insertions(+), 126 deletions(-)
 create mode 100644 vision/mnist/MNIST_CONV_NET/mnist conv.jl
 create mode 100644 vision/mnist/MNIST_CONV_NET/mnist conv.md
 delete mode 100644 vision/mnist/conv.jl

diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.jl b/vision/mnist/MNIST_CONV_NET/mnist conv.jl
new file mode 100644
index 000000000..6c8ce05a8
--- /dev/null
+++ b/vision/mnist/MNIST_CONV_NET/mnist conv.jl	
@@ -0,0 +1,198 @@
+# # Classification of MNIST digits with a convolutional network.
+# This program writes out saved model to the file "mnist_conv.bson".
+# This file demonstrates basic implimentation of ~
+# - data structure
+# - construction
+# - pooling
+# - training
+# - saving
+# - conditional early-exit
+# - learning rate scheduling.
+#
+#
+# **This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.**
+#
+# For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything
+#
+# ## Importing required libraries
+# Can't do machine learning in flux whithout `Flux` :)
+using Flux
+# Import the mnist dataset
+using Flux.Data.MNIST
+# `Statistics` to calculate the mean which is required for finding the accuracy
+using Statistics
+# The uses of these will be explained later
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using Base.Iterators: repeated, partition
+# `Printf` For outputing the accuracy and other information and `BSON` forsaving the model
+using Printf, BSON
+
+# ## Load labels and images from Flux.Data.MNIST
+@info("Loading data set")
+train_labels = MNIST.labels()
+train_imgs = MNIST.images()
+
+# ## Reshaping and batching
+# The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching.
+# For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`.
+# The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**.
+#
+# A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28.
+# Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28.
+# An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels.
+#
+# This type of thinking is good for beginners as it helps visualize things.
+#
+# Note that we're adding image to the last (samples) dim
+# So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,)
+function make_minibatch(X, Y, idxs)
+    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
+    for i in 1:length(idxs)
+
+        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
+    end
+    Y_batch = onehotbatch(Y[idxs], 0:9)
+    return (X_batch, Y_batch)
+end
+
+# In the above, onehotencoding turns neumerical data into kind of truth tabels.
+# example,`onehot(:b, [:a, :b, :c])` whill output
+# ```julia
+# 3-element Flux.OneHotVector:
+# false
+#  true
+# false
+# ```
+# onehotbatch just one hot encodes all the data in a provided array
+# and yields the encoding of every element in an output *OneHotMatrix*.
+
+
+batch_size = 128
+mb_idxs = partition(1:length(train_imgs), batch_size)
+train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs]
+
+# We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`.
+# This **array of arrays** of numbers will be used as indexs.
+
+# ## Prepare test set as one giant minibatch:
+test_imgs = MNIST.images(:test)
+test_labels = MNIST.labels(:test)
+test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs))
+
+# ## Define our model.
+# We will use a simple convolutional architecture with
+# three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense
+# layer that feeds into a softmax probability output.
+#
+@info("Constructing model...")
+model = Chain(
+
+    Conv((3, 3), 1=>16, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+    Conv((3, 3), 16=>32, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+
+    Conv((3, 3), 32=>32, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+
+    x -> reshape(x, :, size(x, 4)),
+    Dense(288, 10),
+
+
+    softmax,
+)
+
+
+# A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)`
+#
+# `(3,3)` is the size of the filter that will be convolving.
+# `1=>16` follows the `input_size=>output_size` format.
+# `relu` is the name of the activation function we're gonna be using
+#
+# First convolution, operating upon a 28x28 image
+# Second convolution, operating upon a 14x14 image
+# Third convolution, operating upon a 7x7 image
+# Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
+# which is where we get the 288 in the `Dense`.
+#
+# Finally, softmax to get nice probabilities
+
+
+# ## Generic Pre-processing
+# Load model and datasets onto GPU, if enabled
+train_set = gpu.(train_set)
+test_set = gpu.(test_set)
+model = gpu(model)
+
+# Make sure our model is nicely precompiled before starting our training loop
+model(train_set[1][1])
+
+# `loss()` calculates the crossentropy loss between our prediction `y_hat`
+# (calculated from `model(x)`) and the ground truth `y`.  We augment the data
+# a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting.
+#
+function loss(x, y)
+
+    x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x)))
+
+    y_hat = model(x_aug)
+    return crossentropy(y_hat, y)
+end
+accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
+# Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data.
+#
+# There are two reasons for using onecold.
+# One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution.
+# We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data.
+# As it turns out, onecold works with probablity distributions too.
+# This means it turns the PB into normal neumerical data which can be used with the `==` sign.
+
+
+opt = ADAM(0.001)
+# Train our model with the given training set using the ADAM optimizer and
+# printing out performance against the test set as we go.
+
+# ## Training
+@info("Beginning training loop...")
+best_acc = 0.0
+last_improvement = 0
+for epoch_idx in 1:100
+    global best_acc, last_improvement
+    # Train for a single epoch
+    Flux.train!(loss, params(model), train_set, opt)
+
+    # Calculate accuracy:
+    acc = accuracy(test_set...)
+    @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc))
+
+    # If our accuracy is good enough, quit out.
+    if acc >= 0.999
+        @info(" -> Early-exiting: We reached our target accuracy of 99.9%")
+        break
+    end
+
+    # If this is the best accuracy we've seen so far, save the model out
+    if acc >= best_acc
+        @info(" -> New best accuracy! Saving model out to mnist_conv.bson")
+        BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc
+        best_acc = acc
+        last_improvement = epoch_idx
+    end
+
+    # If we haven't seen improvement in 5 epochs, drop our learning rate:
+    if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
+        opt.eta /= 10.0
+        @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!")
+
+        # After dropping learning rate, give it a few epochs to improve
+        last_improvement = epoch_idx
+    end
+
+    if epoch_idx - last_improvement >= 10
+        @warn(" -> We're calling this converged.")
+        break
+    end
+end
diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.md b/vision/mnist/MNIST_CONV_NET/mnist conv.md
new file mode 100644
index 000000000..047460c6c
--- /dev/null
+++ b/vision/mnist/MNIST_CONV_NET/mnist conv.md	
@@ -0,0 +1,265 @@
+# Classification of MNIST digits with a convolutional network.
+This program writes out saved model to the file "mnist_conv.bson".
+This file demonstrates basic implimentation of ~
+- data structure
+- construction
+- pooling
+- training
+- saving
+- conditional early-exit
+- learning rate scheduling.
+
+
+**This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.**
+
+For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything
+
+## Importing required libraries
+Can't do machine learning in flux whithout `Flux` :)
+
+```julia
+using Flux
+```
+
+Import the mnist dataset
+
+```julia
+using Flux.Data.MNIST
+```
+
+`Statistics` to calculate the mean which is required for finding the accuracy
+
+```julia
+using Statistics
+```
+
+The uses of these will be explained later
+
+```julia
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using Base.Iterators: repeated, partition
+```
+
+`Printf` For outputing the accuracy and other information and `BSON` forsaving the model
+
+```julia
+using Printf, BSON
+```
+
+## Load labels and images from Flux.Data.MNIST
+
+```julia
+@info("Loading data set")
+train_labels = MNIST.labels()
+train_imgs = MNIST.images()
+```
+
+## Reshaping and batching
+The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching.
+For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`.
+The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**.
+
+A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28.
+Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28.
+An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels.
+
+This type of thinking is good for beginners as it helps visualize things.
+
+Note that we're adding image to the last (samples) dim
+So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,)
+
+```julia
+function make_minibatch(X, Y, idxs)
+    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
+    for i in 1:length(idxs)
+
+        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
+    end
+    Y_batch = onehotbatch(Y[idxs], 0:9)
+    return (X_batch, Y_batch)
+end
+```
+
+In the above, onehotencoding turns neumerical data into kind of truth tabels.
+example,`onehot(:b, [:a, :b, :c])` whill output
+```julia
+3-element Flux.OneHotVector:
+false
+ true
+false
+```
+onehotbatch just one hot encodes all the data in a provided array
+and yields the encoding of every element in an output *OneHotMatrix*.
+
+```julia
+batch_size = 128
+mb_idxs = partition(1:length(train_imgs), batch_size)
+train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs]
+```
+
+We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`.
+This **array of arrays** of numbers will be used as indexs.
+
+## Prepare test set as one giant minibatch:
+
+```julia
+test_imgs = MNIST.images(:test)
+test_labels = MNIST.labels(:test)
+test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs))
+```
+
+## Define our model.
+We will use a simple convolutional architecture with
+three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense
+layer that feeds into a softmax probability output.
+
+```julia
+@info("Constructing model...")
+model = Chain(
+
+    Conv((3, 3), 1=>16, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+    Conv((3, 3), 16=>32, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+
+    Conv((3, 3), 32=>32, pad=(1,1), relu),
+    MaxPool((2,2)),
+
+
+    x -> reshape(x, :, size(x, 4)),
+    Dense(288, 10),
+
+
+    softmax,
+)
+```
+
+A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)`
+
+`(3,3)` is the size of the filter that will be convolving.
+`1=>16` follows the `input_size=>output_size` format.
+`relu` is the name of the activation function we're gonna be using
+
+First convolution, operating upon a 28x28 image
+Second convolution, operating upon a 14x14 image
+Third convolution, operating upon a 7x7 image
+Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
+which is where we get the 288 in the `Dense`.
+
+Finally, softmax to get nice probabilities
+
+## Generic Pre-processing
+Load model and datasets onto GPU, if enabled
+
+```julia
+train_set = gpu.(train_set)
+test_set = gpu.(test_set)
+model = gpu(model)
+```
+
+Make sure our model is nicely precompiled before starting our training loop
+
+```julia
+model(train_set[1][1])
+```
+
+`loss()` calculates the crossentropy loss between our prediction `y_hat`
+(calculated from `model(x)`) and the ground truth `y`.  We augment the data
+a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting.
+
+```julia
+function loss(x, y)
+
+    x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x)))
+
+    y_hat = model(x_aug)
+    return crossentropy(y_hat, y)
+end
+accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
+```
+
+Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data.
+
+There are two reasons for using onecold.
+One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution.
+We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data.
+As it turns out, onecold works with probablity distributions too.
+This means it turns the PB into normal neumerical data which can be used with the `==` sign.
+
+```julia
+opt = ADAM(0.001)
+```
+
+Train our model with the given training set using the ADAM optimizer and
+printing out performance against the test set as we go.
+
+## Training
+
+```julia
+@info("Beginning training loop...")
+best_acc = 0.0
+last_improvement = 0
+for epoch_idx in 1:100
+    global best_acc, last_improvement
+```
+
+Train for a single epoch
+
+```julia
+    Flux.train!(loss, params(model), train_set, opt)
+```
+
+Calculate accuracy:
+
+```julia
+    acc = accuracy(test_set...)
+    @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc))
+```
+
+If our accuracy is good enough, quit out.
+
+```julia
+    if acc >= 0.999
+        @info(" -> Early-exiting: We reached our target accuracy of 99.9%")
+        break
+    end
+```
+
+If this is the best accuracy we've seen so far, save the model out
+
+```julia
+    if acc >= best_acc
+        @info(" -> New best accuracy! Saving model out to mnist_conv.bson")
+        BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc
+        best_acc = acc
+        last_improvement = epoch_idx
+    end
+```
+
+If we haven't seen improvement in 5 epochs, drop our learning rate:
+
+```julia
+    if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
+        opt.eta /= 10.0
+        @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!")
+```
+
+After dropping learning rate, give it a few epochs to improve
+
+```julia
+        last_improvement = epoch_idx
+    end
+
+    if epoch_idx - last_improvement >= 10
+        @warn(" -> We're calling this converged.")
+        break
+    end
+end
+```
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/vision/mnist/conv.jl b/vision/mnist/conv.jl
deleted file mode 100644
index b803324a2..000000000
--- a/vision/mnist/conv.jl
+++ /dev/null
@@ -1,126 +0,0 @@
-# Classifies MNIST digits with a convolutional network.
-# Writes out saved model to the file "mnist_conv.bson".
-# Demonstrates basic model construction, training, saving,
-# conditional early-exit, and learning rate scheduling.
-#
-# This model, while simple, should hit around 99% test
-# accuracy after training for approximately 20 epochs.
-
-using Flux, Flux.Data.MNIST, Statistics
-using Flux: onehotbatch, onecold, crossentropy, throttle
-using Base.Iterators: repeated, partition
-using Printf, BSON
-
-# Load labels and images from Flux.Data.MNIST
-@info("Loading data set")
-train_labels = MNIST.labels()
-train_imgs = MNIST.images()
-
-# Bundle images together with labels and group into minibatchess
-function make_minibatch(X, Y, idxs)
-    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
-    for i in 1:length(idxs)
-        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
-    end
-    Y_batch = onehotbatch(Y[idxs], 0:9)
-    return (X_batch, Y_batch)
-end
-batch_size = 128
-mb_idxs = partition(1:length(train_imgs), batch_size)
-train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs]
-
-# Prepare test set as one giant minibatch:
-test_imgs = MNIST.images(:test)
-test_labels = MNIST.labels(:test)
-test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs))
-
-# Define our model.  We will use a simple convolutional architecture with
-# three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense
-# layer that feeds into a softmax probability output.
-@info("Constructing model...")
-model = Chain(
-    # First convolution, operating upon a 28x28 image
-    Conv((3, 3), 1=>16, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-    # Second convolution, operating upon a 14x14 image
-    Conv((3, 3), 16=>32, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-    # Third convolution, operating upon a 7x7 image
-    Conv((3, 3), 32=>32, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-    # Reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
-    # which is where we get the 288 in the `Dense` layer below:
-    x -> reshape(x, :, size(x, 4)),
-    Dense(288, 10),
-
-    # Finally, softmax to get nice probabilities
-    softmax,
-)
-
-# Load model and datasets onto GPU, if enabled
-train_set = gpu.(train_set)
-test_set = gpu.(test_set)
-model = gpu(model)
-
-# Make sure our model is nicely precompiled before starting our training loop
-model(train_set[1][1])
-
-# `loss()` calculates the crossentropy loss between our prediction `y_hat`
-# (calculated from `model(x)`) and the ground truth `y`.  We augment the data
-# a bit, adding gaussian random noise to our image to make it more robust.
-function loss(x, y)
-    # We augment `x` a little bit here, adding in random noise
-    x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x)))
-
-    y_hat = model(x_aug)
-    return crossentropy(y_hat, y)
-end
-accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
-
-# Train our model with the given training set using the ADAM optimizer and
-# printing out performance against the test set as we go.
-opt = ADAM(0.001)
-
-@info("Beginning training loop...")
-best_acc = 0.0
-last_improvement = 0
-for epoch_idx in 1:100
-    global best_acc, last_improvement
-    # Train for a single epoch
-    Flux.train!(loss, params(model), train_set, opt)
-
-    # Calculate accuracy:
-    acc = accuracy(test_set...)
-    @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc))
-
-    # If our accuracy is good enough, quit out.
-    if acc >= 0.999
-        @info(" -> Early-exiting: We reached our target accuracy of 99.9%")
-        break
-    end
-
-    # If this is the best accuracy we've seen so far, save the model out
-    if acc >= best_acc
-        @info(" -> New best accuracy! Saving model out to mnist_conv.bson")
-        BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc
-        best_acc = acc
-        last_improvement = epoch_idx
-    end
-
-    # If we haven't seen improvement in 5 epochs, drop our learning rate:
-    if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
-        opt.eta /= 10.0
-        @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!")
-
-        # After dropping learning rate, give it a few epochs to improve
-        last_improvement = epoch_idx
-    end
-
-    if epoch_idx - last_improvement >= 10
-        @warn(" -> We're calling this converged.")
-        break
-    end
-end

From 32a8540bc4273d3e430478242c3827690bb7d212 Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Thu, 2 Jan 2020 12:56:28 +0530
Subject: [PATCH 2/6] changed the arrangment of the folder

---
 vision/mnist/{MNIST_CONV_NET/mnist conv.jl => conv.jl} | 0
 vision/mnist/{MNIST_CONV_NET/mnist conv.md => conv.md} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename vision/mnist/{MNIST_CONV_NET/mnist conv.jl => conv.jl} (100%)
 rename vision/mnist/{MNIST_CONV_NET/mnist conv.md => conv.md} (100%)

diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.jl b/vision/mnist/conv.jl
similarity index 100%
rename from vision/mnist/MNIST_CONV_NET/mnist conv.jl
rename to vision/mnist/conv.jl
diff --git a/vision/mnist/MNIST_CONV_NET/mnist conv.md b/vision/mnist/conv.md
similarity index 100%
rename from vision/mnist/MNIST_CONV_NET/mnist conv.md
rename to vision/mnist/conv.md

From e87c0be83e01f0faea90ab4ab1b6b3c06781b3af Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Sat, 4 Jan 2020 02:50:28 +0530
Subject: [PATCH 3/6] added the conv implementation of Judge A book by its
 cover.

Added the Conv net for judge a book by its cover task
---
 vision/judge a book by its cover/Conv.jl | 81 ++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 vision/judge a book by its cover/Conv.jl

diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl
new file mode 100644
index 000000000..7db8366a2
--- /dev/null
+++ b/vision/judge a book by its cover/Conv.jl	
@@ -0,0 +1,81 @@
+using Flux
+using Flux: @epochs, onehotbatch, crossentropy, throttle
+using Images
+using FileIO
+using CSV
+using Statistics
+
+train_path = "G:\\Book-Cover-Train\\"
+train_csv = "F:\\book-dataset\\Task1\\book30-listing-train.csv"
+
+test_path = "G:\\Book-Cover-Test\\"
+test_csv = "F:\\book-dataset\\Task1\\book30-listing-test.csv"
+
+train_imglist = readdir(train_path)
+test_imglist = readdir(test_path)
+
+train_setsize = length(train_imglist)
+test_setsize = length(test_imglist)
+
+batch_size = 1000
+
+
+function create_dataset(indexs; path, csv, images)
+    dataset = CSV.read(csv)
+    X = Array{Float32}(undef, 100, 100, 3, length(indexs))
+    for i = 1:length(indexs)
+        img = load(string(path, images[i]))
+        img = channelview(imresize(img, 100, 100))
+        img = Float32.(permutedims(img, (2, 3, 1)))
+        X[:, :, :, i] = img
+    end
+    Y = onehotbatch(dataset[indexs[1]:indexs[end], 6], 0:29)
+    return (X, Y)
+end
+
+indexs = Base.Iterators.partition(1:train_setsize, batch_size)
+train_set = [create_dataset(
+    i;
+    path = train_path,
+    csv = train_csv,
+    images = train_imglist,
+) for i in indexs]
+
+test_set = create_dataset(
+    1:test_setsize;
+    path = test_path,
+    csv = test_csv,
+    images = test_imglist,
+)
+
+m = Chain(
+    Conv((3, 3), 3 => 32, pad = (1, 1), relu),
+    MaxPool((2, 2)),
+
+    Conv((3, 3), 32 => 64, pad = (1, 1), relu),
+    MaxPool((2, 2)),
+
+    Conv((3, 3), 64 => 256, pad = (1, 1), relu),
+    MaxPool((2, 2)),
+
+    Conv((2, 2), 256 => 512, pad = (1, 1), relu),
+    MaxPool((2, 2)),
+
+    x -> reshape(x, :, size(x, 4)),
+    Dense(18432, 256, relu),
+    Dense(256, 30),
+    softmax,
+)
+
+loss(x, y) = crossentropy(m(x), y)
+accuracy(x, y) = mean(Flux.onecold(m(x)) .== Flux.onecold(y))
+
+function cbfunc()
+    ca = accuracy(test_set...)
+    @show(ca)
+    cl = loss(test_set...)
+    @show(cl)
+end
+
+opt = ADAM()
+@epochs 5 Flux.train!(loss, params(m), train_set, opt, cb = throttle(cbfunc, 3))

From 5e7d9e1233da7f1995adaf72dcb668d43e3b1542 Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Sat, 4 Jan 2020 03:00:57 +0530
Subject: [PATCH 4/6] synced the book branch and added code

---
 vision/mnist/conv.jl | 114 ++++---------------
 vision/mnist/conv.md | 265 -------------------------------------------
 2 files changed, 21 insertions(+), 358 deletions(-)
 delete mode 100644 vision/mnist/conv.md

diff --git a/vision/mnist/conv.jl b/vision/mnist/conv.jl
index 6c8ce05a8..b803324a2 100644
--- a/vision/mnist/conv.jl
+++ b/vision/mnist/conv.jl
@@ -1,127 +1,65 @@
-# # Classification of MNIST digits with a convolutional network.
-# This program writes out saved model to the file "mnist_conv.bson".
-# This file demonstrates basic implimentation of ~
-# - data structure
-# - construction
-# - pooling
-# - training
-# - saving
-# - conditional early-exit
-# - learning rate scheduling.
+# Classifies MNIST digits with a convolutional network.
+# Writes out saved model to the file "mnist_conv.bson".
+# Demonstrates basic model construction, training, saving,
+# conditional early-exit, and learning rate scheduling.
 #
-#
-# **This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.**
-#
-# For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything
-#
-# ## Importing required libraries
-# Can't do machine learning in flux whithout `Flux` :)
-using Flux
-# Import the mnist dataset
-using Flux.Data.MNIST
-# `Statistics` to calculate the mean which is required for finding the accuracy
-using Statistics
-# The uses of these will be explained later
+# This model, while simple, should hit around 99% test
+# accuracy after training for approximately 20 epochs.
+
+using Flux, Flux.Data.MNIST, Statistics
 using Flux: onehotbatch, onecold, crossentropy, throttle
 using Base.Iterators: repeated, partition
-# `Printf` For outputing the accuracy and other information and `BSON` forsaving the model
 using Printf, BSON
 
-# ## Load labels and images from Flux.Data.MNIST
+# Load labels and images from Flux.Data.MNIST
 @info("Loading data set")
 train_labels = MNIST.labels()
 train_imgs = MNIST.images()
 
-# ## Reshaping and batching
-# The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching.
-# For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`.
-# The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**.
-#
-# A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28.
-# Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28.
-# An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels.
-#
-# This type of thinking is good for beginners as it helps visualize things.
-#
-# Note that we're adding image to the last (samples) dim
-# So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,)
+# Bundle images together with labels and group into minibatchess
 function make_minibatch(X, Y, idxs)
     X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
     for i in 1:length(idxs)
-
         X_batch[:, :, :, i] = Float32.(X[idxs[i]])
     end
     Y_batch = onehotbatch(Y[idxs], 0:9)
     return (X_batch, Y_batch)
 end
-
-# In the above, onehotencoding turns neumerical data into kind of truth tabels.
-# example,`onehot(:b, [:a, :b, :c])` whill output
-# ```julia
-# 3-element Flux.OneHotVector:
-# false
-#  true
-# false
-# ```
-# onehotbatch just one hot encodes all the data in a provided array
-# and yields the encoding of every element in an output *OneHotMatrix*.
-
-
 batch_size = 128
 mb_idxs = partition(1:length(train_imgs), batch_size)
 train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs]
 
-# We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`.
-# This **array of arrays** of numbers will be used as indexs.
-
-# ## Prepare test set as one giant minibatch:
+# Prepare test set as one giant minibatch:
 test_imgs = MNIST.images(:test)
 test_labels = MNIST.labels(:test)
 test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs))
 
-# ## Define our model.
-# We will use a simple convolutional architecture with
+# Define our model.  We will use a simple convolutional architecture with
 # three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense
 # layer that feeds into a softmax probability output.
-#
 @info("Constructing model...")
 model = Chain(
-
+    # First convolution, operating upon a 28x28 image
     Conv((3, 3), 1=>16, pad=(1,1), relu),
     MaxPool((2,2)),
 
+    # Second convolution, operating upon a 14x14 image
     Conv((3, 3), 16=>32, pad=(1,1), relu),
     MaxPool((2,2)),
 
-
+    # Third convolution, operating upon a 7x7 image
     Conv((3, 3), 32=>32, pad=(1,1), relu),
     MaxPool((2,2)),
 
-
+    # Reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
+    # which is where we get the 288 in the `Dense` layer below:
     x -> reshape(x, :, size(x, 4)),
     Dense(288, 10),
 
-
+    # Finally, softmax to get nice probabilities
     softmax,
 )
 
-
-# A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)`
-#
-# `(3,3)` is the size of the filter that will be convolving.
-# `1=>16` follows the `input_size=>output_size` format.
-# `relu` is the name of the activation function we're gonna be using
-#
-# First convolution, operating upon a 28x28 image
-# Second convolution, operating upon a 14x14 image
-# Third convolution, operating upon a 7x7 image
-# Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
-# which is where we get the 288 in the `Dense`.
-#
-# Finally, softmax to get nice probabilities
-
-
-# ## Generic Pre-processing
 # Load model and datasets onto GPU, if enabled
 train_set = gpu.(train_set)
 test_set = gpu.(test_set)
@@ -132,30 +70,20 @@ model(train_set[1][1])
 
 # `loss()` calculates the crossentropy loss between our prediction `y_hat`
 # (calculated from `model(x)`) and the ground truth `y`.  We augment the data
-# a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting.
-#
+# a bit, adding gaussian random noise to our image to make it more robust.
 function loss(x, y)
-
+    # We augment `x` a little bit here, adding in random noise
     x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x)))
 
     y_hat = model(x_aug)
     return crossentropy(y_hat, y)
 end
 accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
-# Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data.
-#
-# There are two reasons for using onecold.
-# One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution.
-# We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data.
-# As it turns out, onecold works with probablity distributions too.
-# This means it turns the PB into normal neumerical data which can be used with the `==` sign.
 
-
-opt = ADAM(0.001)
 # Train our model with the given training set using the ADAM optimizer and
 # printing out performance against the test set as we go.
+opt = ADAM(0.001)
 
-# ## Training
 @info("Beginning training loop...")
 best_acc = 0.0
 last_improvement = 0
diff --git a/vision/mnist/conv.md b/vision/mnist/conv.md
deleted file mode 100644
index 047460c6c..000000000
--- a/vision/mnist/conv.md
+++ /dev/null
@@ -1,265 +0,0 @@
-# Classification of MNIST digits with a convolutional network.
-This program writes out saved model to the file "mnist_conv.bson".
-This file demonstrates basic implimentation of ~
-- data structure
-- construction
-- pooling
-- training
-- saving
-- conditional early-exit
-- learning rate scheduling.
-
-
-**This model, while simple, should hit around 99% test accuracy after training for approximately 20 epochs.**
-
-For core concepts of ML and Convolution check out [towardsdatascience's guide](https://towardsdatascience.com/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way-3bd2b1164a53) for a detailed explanation of everything
-
-## Importing required libraries
-Can't do machine learning in flux whithout `Flux` :)
-
-```julia
-using Flux
-```
-
-Import the mnist dataset
-
-```julia
-using Flux.Data.MNIST
-```
-
-`Statistics` to calculate the mean which is required for finding the accuracy
-
-```julia
-using Statistics
-```
-
-The uses of these will be explained later
-
-```julia
-using Flux: onehotbatch, onecold, crossentropy, throttle
-using Base.Iterators: repeated, partition
-```
-
-`Printf` For outputing the accuracy and other information and `BSON` forsaving the model
-
-```julia
-using Printf, BSON
-```
-
-## Load labels and images from Flux.Data.MNIST
-
-```julia
-@info("Loading data set")
-train_labels = MNIST.labels()
-train_imgs = MNIST.images()
-```
-
-## Reshaping and batching
-The `train_imgs` is **not** of the right shape. So, it can't be directly divided into smaller parts for batching.
-For this, we are creating another array of the correct shape which will be our batch, and we'll fill this array with the data from our `train_imgs`.
-The correct shape, which Flux expects the batch to be in, is **(IMG_DATA, IMG_DATA, COLOR_CHANNEL, SAMPLES)**.
-
-A good way to think about this is that every image is a *2D* rectangle. Which has a hight and width of 28.
-Flux requires us to add another dim which represents the color. So a ***BLACK AND WHITE*** image would be like a cuboid of thickness 1, hight and width of 28.
-An ***RGB*** image would be like cuboid thickness 3, made up of 3 rectangles stackted on top of each other . each rectangle representing red, blue and green channels.
-
-This type of thinking is good for beginners as it helps visualize things.
-
-Note that we're adding image to the last (samples) dim
-So, a batch can be visualized as multiple cubiods, each of which is made up of *3* rectangles (R - rectangle, G - rectangle,)
-
-```julia
-function make_minibatch(X, Y, idxs)
-    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
-    for i in 1:length(idxs)
-
-        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
-    end
-    Y_batch = onehotbatch(Y[idxs], 0:9)
-    return (X_batch, Y_batch)
-end
-```
-
-In the above, onehotencoding turns neumerical data into kind of truth tabels.
-example,`onehot(:b, [:a, :b, :c])` whill output
-```julia
-3-element Flux.OneHotVector:
-false
- true
-false
-```
-onehotbatch just one hot encodes all the data in a provided array
-and yields the encoding of every element in an output *OneHotMatrix*.
-
-```julia
-batch_size = 128
-mb_idxs = partition(1:length(train_imgs), batch_size)
-train_set = [make_minibatch(train_imgs, train_labels, i) for i in mb_idxs]
-```
-
-We are dividing an array of numbers of the size of the imgs into the sizes of batches and saving it in `mb_idxs`.
-This **array of arrays** of numbers will be used as indexs.
-
-## Prepare test set as one giant minibatch:
-
-```julia
-test_imgs = MNIST.images(:test)
-test_labels = MNIST.labels(:test)
-test_set = make_minibatch(test_imgs, test_labels, 1:length(test_imgs))
-```
-
-## Define our model.
-We will use a simple convolutional architecture with
-three iterations of Conv -> ReLU -> MaxPool, followed by a final Dense
-layer that feeds into a softmax probability output.
-
-```julia
-@info("Constructing model...")
-model = Chain(
-
-    Conv((3, 3), 1=>16, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-    Conv((3, 3), 16=>32, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-
-    Conv((3, 3), 32=>32, pad=(1,1), relu),
-    MaxPool((2,2)),
-
-
-    x -> reshape(x, :, size(x, 4)),
-    Dense(288, 10),
-
-
-    softmax,
-)
-```
-
-A conv layer is defined as `Conv((3, 3), 1=>16, pad=(1,1), relu)`
-
-`(3,3)` is the size of the filter that will be convolving.
-`1=>16` follows the `input_size=>output_size` format.
-`relu` is the name of the activation function we're gonna be using
-
-First convolution, operating upon a 28x28 image
-Second convolution, operating upon a 14x14 image
-Third convolution, operating upon a 7x7 image
-Then reshape 3d tensor into a 2d one, at this point it should be (3, 3, 32, N)
-which is where we get the 288 in the `Dense`.
-
-Finally, softmax to get nice probabilities
-
-## Generic Pre-processing
-Load model and datasets onto GPU, if enabled
-
-```julia
-train_set = gpu.(train_set)
-test_set = gpu.(test_set)
-model = gpu(model)
-```
-
-Make sure our model is nicely precompiled before starting our training loop
-
-```julia
-model(train_set[1][1])
-```
-
-`loss()` calculates the crossentropy loss between our prediction `y_hat`
-(calculated from `model(x)`) and the ground truth `y`.  We augment the data
-a bit, adding gaussian random noise to our image to make it more robust and avoid overfitting.
-
-```julia
-function loss(x, y)
-
-    x_aug = x .+ 0.1f0*gpu(randn(eltype(x), size(x)))
-
-    y_hat = model(x_aug)
-    return crossentropy(y_hat, y)
-end
-accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
-```
-
-Onecold does what is sounds like it might do, it turns one hotencoded vectors into the orignal data.
-
-There are two reasons for using onecold.
-One is that m(x) has its last layer as `softmax`. This means it returns a probability distribution.
-We can not equate PB distributions. So, we need to turn the probablity distribution into neumerical data.
-As it turns out, onecold works with probablity distributions too.
-This means it turns the PB into normal neumerical data which can be used with the `==` sign.
-
-```julia
-opt = ADAM(0.001)
-```
-
-Train our model with the given training set using the ADAM optimizer and
-printing out performance against the test set as we go.
-
-## Training
-
-```julia
-@info("Beginning training loop...")
-best_acc = 0.0
-last_improvement = 0
-for epoch_idx in 1:100
-    global best_acc, last_improvement
-```
-
-Train for a single epoch
-
-```julia
-    Flux.train!(loss, params(model), train_set, opt)
-```
-
-Calculate accuracy:
-
-```julia
-    acc = accuracy(test_set...)
-    @info(@sprintf("[%d]: Test accuracy: %.4f", epoch_idx, acc))
-```
-
-If our accuracy is good enough, quit out.
-
-```julia
-    if acc >= 0.999
-        @info(" -> Early-exiting: We reached our target accuracy of 99.9%")
-        break
-    end
-```
-
-If this is the best accuracy we've seen so far, save the model out
-
-```julia
-    if acc >= best_acc
-        @info(" -> New best accuracy! Saving model out to mnist_conv.bson")
-        BSON.@save joinpath(dirname(@__FILE__), "mnist_conv.bson") model epoch_idx acc
-        best_acc = acc
-        last_improvement = epoch_idx
-    end
-```
-
-If we haven't seen improvement in 5 epochs, drop our learning rate:
-
-```julia
-    if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6
-        opt.eta /= 10.0
-        @warn(" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!")
-```
-
-After dropping learning rate, give it a few epochs to improve
-
-```julia
-        last_improvement = epoch_idx
-    end
-
-    if epoch_idx - last_improvement >= 10
-        @warn(" -> We're calling this converged.")
-        break
-    end
-end
-```
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-

From 98dc41d09456f94bacce58a152f9e866684ae4a2 Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Sat, 11 Jan 2020 02:30:10 +0530
Subject: [PATCH 5/6] updated a model, comments, batch loading

---
 vision/judge a book by its cover/Conv.jl | 128 ++++++++++++++---------
 1 file changed, 76 insertions(+), 52 deletions(-)

diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl
index 7db8366a2..ce8988bd0 100644
--- a/vision/judge a book by its cover/Conv.jl	
+++ b/vision/judge a book by its cover/Conv.jl	
@@ -1,81 +1,105 @@
+# After a lot of debugging and asking around
+# The fix was found to be to remove every conv layer.
+# Previously, whilst using the conv model, the accuracy was stuck at exactly 0.8 and loss at exactly 3.33253
+# Changing the model to be that of a simple dense one seems to bring the accuracy up to what is expected.
+# In the research paper, authors were able to achive a general accuracy of 24% in the model which they trained from the ground up
+# In doing so they used the whole 30000 image dataset. This model uses 1/3 of that datset and around 500 testing images because of processing constraints
+# The accuracy achived after 20 epochs was right at ~12%. Considering that it uses a third of the datset, this is pretty decent.
+
 using Flux
-using Flux: @epochs, onehotbatch, crossentropy, throttle
+using Flux: onehotbatch, crossentropy, throttle
 using Images
 using FileIO
 using CSV
 using Statistics
 
-train_path = "G:\\Book-Cover-Train\\"
-train_csv = "F:\\book-dataset\\Task1\\book30-listing-train.csv"
-
-test_path = "G:\\Book-Cover-Test\\"
-test_csv = "F:\\book-dataset\\Task1\\book30-listing-test.csv"
-
-train_imglist = readdir(train_path)
-test_imglist = readdir(test_path)
-
-train_setsize = length(train_imglist)
-test_setsize = length(test_imglist)
-
-batch_size = 1000
+const train_path = "G:\\mlimgs\\Book-Train-FULL\\" # contains the training images
+const train_csv = "F:\\book-dataset\\Task1\\book30-listing-train1.csv" # This file contains the labels(genre) for the training images
+
+const test_path = "G:\\mlimgs\\Book-Test-FULL\\" #Contains the testing dataset
+const test_csv = "F:\\book-dataset\\Task1\\book30-listing-test1.csv" # This file contains the labels(genre) for the testing images
+
+const train_dataset  = CSV.read(train_csv)  # read the csv for training labels
+                                            # The CSVs have 2 colums: first of genre and second of name of the book
+                                            # The name of the book isn't required for the functioning of the model but is included for debugging purposes
+
+const test_dataset  = CSV.read(test_csv) # read the csv for testing labels
+
+# find the total number of images in sets so we can correctly divide the dataset into batches
+const train_imglist = readdir(train_path)
+const test_imglist = readdir(test_path)
+const train_setsize = length(train_imglist)
+const test_setsize = length(test_imglist)
+
+# Self-explainatory Hyper Parameters
+const batch_size = 400
+const imsize = 60
+const epochs = 20
+const learning_rate = 0.0001
+
+function create_batch(indexs; path, csv, dataset)
+    X = Array{Float32}(undef, imsize*imsize*3, length(indexs))  # everytime this function is called a new batch is created with the correct size
+                                                                # It should be able to hold multiple flattened images (flattened because we're using a dense network)
+                                                                # Thats why it is shaped like (size_of_image,no._of_images)
+    for (p,i) in enumerate(indexs)
+        img = load(string(path,i,".png")) # The images are labeled like 1.png, 2.png, and so on.
+        img = channelview(RGB.(imresize(img, imsize, imsize)))
+        img = reshape(Float32.(img),(imsize*imsize*3))  # The current image has 3 layers of 60 by 60 pixels all compiled into a 3D array
+                                                        # We need the image in a flat array so we reshape into a flat array for it to be eligible to be added to array X
+        X[:, p] = img # add the img to X.
 
-
-function create_dataset(indexs; path, csv, images)
-    dataset = CSV.read(csv)
-    X = Array{Float32}(undef, 100, 100, 3, length(indexs))
-    for i = 1:length(indexs)
-        img = load(string(path, images[i]))
-        img = channelview(imresize(img, 100, 100))
-        img = Float32.(permutedims(img, (2, 3, 1)))
-        X[:, :, :, i] = img
     end
-    Y = onehotbatch(dataset[indexs[1]:indexs[end], 6], 0:29)
+    Y = onehotbatch(dataset[indexs, 1], 0:29)
     return (X, Y)
 end
 
-indexs = Base.Iterators.partition(1:train_setsize, batch_size)
-train_set = [create_dataset(
-    i;
-    path = train_path,
-    csv = train_csv,
-    images = train_imglist,
-) for i in indexs]
+const indexs = Base.Iterators.partition(1:train_setsize, batch_size)
 
-test_set = create_dataset(
+const test_set = create_batch(
     1:test_setsize;
     path = test_path,
     csv = test_csv,
-    images = test_imglist,
+    dataset = test_dataset
 )
 
-m = Chain(
-    Conv((3, 3), 3 => 32, pad = (1, 1), relu),
-    MaxPool((2, 2)),
-
-    Conv((3, 3), 32 => 64, pad = (1, 1), relu),
-    MaxPool((2, 2)),
-
-    Conv((3, 3), 64 => 256, pad = (1, 1), relu),
-    MaxPool((2, 2)),
+@info "creating the model"
+# I've tried using a conv net described in the paper but that
+# yields an accuracy of 0.8 and has a lot of inconsistencies  with it
+# People over at #julia-bridged (after a LONG thread of conversation) told me
+# to just change out the conv with a dense model. Sure enough, it started to behave like its supposed to
 
-    Conv((2, 2), 256 => 512, pad = (1, 1), relu),
-    MaxPool((2, 2)),
-
-    x -> reshape(x, :, size(x, 4)),
-    Dense(18432, 256, relu),
-    Dense(256, 30),
+m = Chain(
+    Dense(imsize*imsize*3, 512, relu), # we're expecting an image array
+    Dense(512, 64),
+    Dense(64, 30),
     softmax,
 )
 
 loss(x, y) = crossentropy(m(x), y)
 accuracy(x, y) = mean(Flux.onecold(m(x)) .== Flux.onecold(y))
 
+opt = ADAM(learning_rate)
+
 function cbfunc()
     ca = accuracy(test_set...)
-    @show(ca)
+    print("batch_acc: ",string(ca),"; ")
     cl = loss(test_set...)
-    @show(cl)
+    println("batch_loss: ",string(cl))
+
+end
+
+for e in 1:epochs
+    @info "Epoch no.-> $e"
+    b = 1
+    for i in indexs
+        println("Batch no. -> $b")
+        train_batch = [create_batch(i; path = train_path, csv = train_csv, dataset = train_dataset)] # we load every batch before training
+                                                                                                     # This way we dont have to load the whole big dataset into one array
+        Flux.train!(loss, params(m), train_batch , opt, cb = cbfunc)
+        b+=1
+    end
+
 end
 
-opt = ADAM()
-@epochs 5 Flux.train!(loss, params(m), train_set, opt, cb = throttle(cbfunc, 3))
+println("Final acc and loss : ")
+cbfunc()

From cea54b9ba7aa98387d1a5727a208c04f5963c006 Mon Sep 17 00:00:00 2001
From: akshat <akshatmehrotra2004@gmail.com>
Date: Sat, 11 Jan 2020 17:28:04 +0530
Subject: [PATCH 6/6] Update Conv.jl

---
 vision/judge a book by its cover/Conv.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vision/judge a book by its cover/Conv.jl b/vision/judge a book by its cover/Conv.jl
index ce8988bd0..e61d88a24 100644
--- a/vision/judge a book by its cover/Conv.jl	
+++ b/vision/judge a book by its cover/Conv.jl	
@@ -1,3 +1,4 @@
+# This model can reach accuracies of upto ~22%(close to what is described in the paper)
 # After a lot of debugging and asking around
 # The fix was found to be to remove every conv layer.
 # Previously, whilst using the conv model, the accuracy was stuck at exactly 0.8 and loss at exactly 3.33253