diff --git a/app/age_hist.py b/app/age_hist.py
new file mode 100644
index 0000000..c7aecd9
--- /dev/null
+++ b/app/age_hist.py
@@ -0,0 +1,13 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+from .load_data_3d import load_targets
+from .settings import DATA_DIRECTORY
+
+def age_hist():
+    data = load_targets()
+    data = data['Y'].tolist()
+    plt.figure()
+    plt.hist(data, bins=100)
+    plt.savefig("plots/age_hist.pdf")
+    plt.close()
diff --git a/app/cut_brain.py b/app/cut_brain.py
index 4fe6cc1..7b67416 100644
--- a/app/cut_brain.py
+++ b/app/cut_brain.py
@@ -4,14 +4,14 @@ def cut_brain(sample, pos):
     data = [] 
     for d in sample:
         if pos == "lt":
-            z_s,z_e = 60,len(d[0,0,:])
+            z_s,z_e = 60,d.shape[2]
             y_s,y_e = 0,60
         elif pos == "mt":
-            z_s,z_e = 60,len(d[0,0,:])
+            z_s,z_e = 60,d.shape[2]
             y_s,y_e = 60,110
         elif pos == "rt":
-            z_s,z_e = 50,len(d[0,0,:])
-            y_s,y_e = 110,len(d[0,:,0])
+            z_s,z_e = 50,d.shape[2]
+            y_s,y_e = 110,d.shape[1]
         elif pos == "lb":
             z_s,z_e = 0,60 
             y_s,y_e = 0,60
@@ -20,7 +20,7 @@ def cut_brain(sample, pos):
             y_s,y_e = 60,110
         elif pos == "rb":
             z_s,z_e = 0,50
-            y_s,y_e = 110,len(d[0,:,0])
+            y_s,y_e = 110,d.shape[1]
 
         area = np.array([[z_s,z_e],[y_s,y_e]])
 
diff --git a/app/cut_brain_test.py b/app/cut_brain_test.py
new file mode 100644
index 0000000..d81a754
--- /dev/null
+++ b/app/cut_brain_test.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+def cut_brain(d, pos):
+    if pos == "lt":
+        z_s,z_e = 60,d.shape[2]
+        y_s,y_e = 0,60
+    elif pos == "mt":
+        z_s,z_e = 60,d.shape[2]
+        y_s,y_e = 60,110
+    elif pos == "rt":
+        z_s,z_e = 50,d.shape[2]
+        y_s,y_e = 110,d.shape[1]
+    elif pos == "lb":
+        z_s,z_e = 0,60 
+        y_s,y_e = 0,60
+    elif pos == "mb":
+        z_s,z_e = 0,60
+        y_s,y_e = 60,110
+    elif pos == "rb":
+        z_s,z_e = 0,50
+        y_s,y_e = 110,d.shape[1]
+
+    area = np.array([[z_s,z_e],[y_s,y_e]])
+
+    z = range(area[0][0],area[0][1])
+    y = range(area[1][0],area[1][1])
+    x_len = d.shape[0] 
+    y_len = len(y)
+    z_len = len(z)
+
+    d_new = np.zeros((x_len,y_len,z_len))
+    for k in range(0,x_len):
+        z_index = 0
+        for i in z:
+            y_index = 0
+            for j in y:
+                d_new[k,y_index,z_index] = d[k,j,i]
+                y_index += 1
+            z_index += 1
+    return d_new
diff --git a/app/feature.py b/app/feature.py
index 5ee2066..9f37e84 100644
--- a/app/feature.py
+++ b/app/feature.py
@@ -4,7 +4,7 @@
 
 from .normalize import normalize
 
-def feature_ratio_mean(inputs):
+def feature_ratio_mean(inputs, norm=None):
     inputs = get_flat_values(inputs)
 
     fs = []
@@ -17,38 +17,28 @@ def feature_ratio_mean(inputs):
 
         fs.append(ratio)
 
-    fs = normalize(fs)
-    return fs
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
 
-def feature_mean(inputs):
+def feature_mean(inputs, norm=None):
     inputs = get_flat_values(inputs)
 
     fs = []
     for i in inputs:
         fs.append(np.mean(i))
 
-    fs = normalize(fs)
-    return fs
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
 
-
-def feature_ratio(inputs):
-    inputs = get_flat_values(inputs)
-
-    fs = []
-    for i in inputs:
-
-        low = i[i < 1100]
-        high = i[i >= 1100]
-
-        ratio = np.sum(high)/np.sum(low)
-
-        fs.append(ratio)
-
-    fs = normalize(fs)
-    return fs
-
-
-def feature_max(inputs):
+def feature_max(inputs, norm=None):
     inputs = get_flat_values(inputs)
 
     fs = []
@@ -58,9 +48,12 @@ def feature_max(inputs):
         x_max = bin_edges[idx_max]
 
         fs.append(x_max)
-
-    fs = normalize(fs)
-    return fs
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
 
 
 def get_flat_values(inputs):
diff --git a/app/feature_test.py b/app/feature_test.py
new file mode 100644
index 0000000..d759037
--- /dev/null
+++ b/app/feature_test.py
@@ -0,0 +1,75 @@
+"""Description of this file."""
+
+import numpy as np
+
+from .normalize import normalize
+from .get_number_of_files import get_number_of_files
+from .load_deviations import load_deviation
+from .cut_brain_test import cut_brain
+from .load_data_3d import load_sample_input
+
+def feature_ratio_mean(area="whole", training=True, norm=None):
+    fs = []
+    for i in range(1,get_number_of_files(training)+1):
+        data = load_data(i,training) 
+        if not area == "whole":
+            data = cut_brain(data,area)
+        data = get_flat_values(data) 
+        mean = np.mean(data)
+        low = data[data < mean]
+        high = data[data >= mean]
+
+        ratio = np.sum(high)/np.sum(low)
+
+        fs.append(ratio)
+
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
+
+def feature_mean(area, training=True, norm=None):
+    fs = []
+    for i in range(1,get_number_of_files(training)+1):
+        data = load_data(i,training) 
+        if not area == "whole":
+            data = cut_brain(data,area)
+        data = get_flat_values(data) 
+        fs.append(np.mean(data))
+
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
+
+def feature_max(area, training=True, norm=None):
+    fs = []
+    for i in range(1,get_number_of_files(training)+1):
+        data = load_data(i,training) 
+        if not area == "whole":
+            data = cut_brain(data,area)
+        data = get_flat_values(data) 
+        hist, bin_edges = np.histogram(data, bins='auto')
+        idx_max = hist.argmax()
+        x_max = bin_edges[idx_max]
+        fs.append(x_max)
+
+    if norm == None:
+        fs, minmax = normalize(fs)
+        return fs, minmax
+    else:
+        fs = normalize(fs, norm)
+        return fs
+
+def get_flat_values(inputs):
+    vs = inputs.flatten()
+    return vs #[vs > 100]
+
+def load_data(ID, training=True):
+   #return load_deviation(ID, training)
+   return load_sample_input(ID, training)
+
diff --git a/app/get_number_of_files.py b/app/get_number_of_files.py
new file mode 100644
index 0000000..84a7c2b
--- /dev/null
+++ b/app/get_number_of_files.py
@@ -0,0 +1,14 @@
+import os
+from .settings import DATA_DIRECTORY
+
+def get_number_of_files(training=True):
+    if training == True:
+        folder = "set_train"
+    else:
+        folder = "set_test"
+    DIR_IN = os.path.join(
+        DATA_DIRECTORY,
+        folder
+        )
+    nof = len([name for name in os.listdir(DIR_IN) if os.path.isfile(os.path.join(DIR_IN, name))])
+    return nof
diff --git a/app/heatmap.py b/app/heatmap.py
index bfaf459..edacd18 100644
--- a/app/heatmap.py
+++ b/app/heatmap.py
@@ -6,6 +6,7 @@
 import numpy as np
 
 from .load_data import load_sample_input
+from .load_deviations import load_deviation
 
 
 __author__ = "lfievet"
@@ -20,8 +21,10 @@
 
 
 def heatmap():
-    sample = 10
-    data = load_sample_input(sample)
+    sample = 20
+    #data = load_sample_input(sample)
+    
+    data = load_deviation(sample)
 
     # flat_data = data.get_data().flatten()
     # flat_data = flat_data[flat_data > 100]
@@ -38,7 +41,7 @@ def heatmap():
     # return
 
     for i in range(60, 150):
-        heat_map = data.get_data()[:, :, i, 0]
+        heat_map = data[:][:][i]  #.get_data()[:, :, i, 0]
         # print(heat_map)
         # print(heat_map.shape)
         # heatmap.reshape((176, 208))
diff --git a/app/load_data_3d.py b/app/load_data_3d.py
index e7223b6..c394efc 100644
--- a/app/load_data_3d.py
+++ b/app/load_data_3d.py
@@ -1,31 +1,32 @@
 """Loading training and testing data."""
-
 import os
+import scipy.io
 
 import nibabel as ni
 import pandas as pd
 
 from .settings import DATA_DIRECTORY
+from .get_number_of_files import get_number_of_files
 
 def load_samples_inputs(training=True):
-    DIR = os.path.join(
-        DATA_DIRECTORY,
-        "set_train"
-        )
-    files = len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))])
-
+    nof = get_number_of_files(training) 
     inputs = []
-    for i in range(1, files+1):
-        inputs.append(load_sample_input(i))
-
+    for i in range(1, nof+1):
+        inputs.append(load_sample_input(i, training))
     return inputs
 
-
 def load_sample_input(id=1, training=True):
+    if training == True:
+        folder = "set_train"
+        files = "train"
+    else:
+        folder = "set_test"
+        files = "test"
+
     file_path = os.path.join(
         DATA_DIRECTORY,
-        "set_train",
-        "train_{}.nii".format(id)
+        folder,
+        "{}_{}.nii".format(files,id)
     )
     return ni.load(file_path).get_data()[:,:,:,0]
 
diff --git a/app/load_deviations.py b/app/load_deviations.py
new file mode 100644
index 0000000..fc82797
--- /dev/null
+++ b/app/load_deviations.py
@@ -0,0 +1,42 @@
+import os
+import scipy.io
+import numpy as np
+
+from .settings import DATA_DIRECTORY
+from .mean_brain import create_deviation_set 
+from .get_number_of_files import get_number_of_files
+
+def load_deviations(training=True):
+    if training == True:
+        files = "train"
+    else:
+        files = "test"
+    DIR = os.path.join(
+        DATA_DIRECTORY,
+        "set_{}_deviation".format(files),
+        )
+    if not os.path.exists(DIR):
+        create_deviation_set()
+    data = []
+    for i in range(1,get_number_of_files(training)+1):
+        data.append(load_deviation(i,training))
+    return data
+
+def load_deviation(ID,training=True):
+    if training == True:
+        files = "train"
+    else:
+        files = "test"
+    DIR = os.path.join(
+        DATA_DIRECTORY,
+        "set_{}_deviation".format(files),
+        )
+    if not os.path.exists(DIR):
+        create_deviation_set()
+    FILE = os.path.join(
+        DATA_DIRECTORY,
+        "set_{}_deviation".format(files),
+        "{}_{}.mat".format(files,ID)
+        )
+    return scipy.io.loadmat(FILE)['out']
+
diff --git a/app/load_features.py b/app/load_features.py
new file mode 100644
index 0000000..4c83c9a
--- /dev/null
+++ b/app/load_features.py
@@ -0,0 +1,64 @@
+from .load_data_3d import load_targets, load_samples_inputs
+from .load_deviations import load_deviations
+from .cut_brain import cut_brain
+from .feature import feature_mean, feature_max, feature_ratio_mean
+
+def load_features(norms=None):
+    if norms == None:
+        training = True
+    else:
+        training = False 
+
+    areas = ["lt","mt","rt","lb","mb","rb"]
+    inputs = [
+        {
+            "area": "whole",
+            "val": load_deviations(training)
+        }
+    ]
+    for a in areas:
+        inputs.append(
+            {
+                "area": a,
+                "val": cut_brain(inputs[0]["val"], a)
+            }
+            )
+    features = [
+        {
+            "name": "mean",
+            "f": feature_mean
+        },
+        {
+            "name": "ratio_mean",
+            "f": feature_ratio_mean
+        },
+        {
+            "name": "max",
+            "f": feature_max
+        },
+    ]
+    if norms == None:
+        norms = {}
+        data = load_targets()
+        print("plotting features")
+        for f in features:
+            for i in inputs:
+                feature_inputs , norms["{}_{}".format(f["name"], i["area"])] = f["f"](i["val"])
+                data["{}_{}".format(f["name"], i["area"])] = feature_inputs
+               # plt.figure()
+               # plt.scatter(
+               #     feature_inputs,
+               #     data["Y"].tolist(),
+               # )
+               # plt.savefig("plots/line_{}_{}.pdf".format(
+               #     f["name"], i["area"]
+               # ))
+               # plt.close()
+        return data, norms
+    else:
+        data = {} 
+        for f in features:
+            for i in inputs:
+                feature_inputs = f["f"](i["val"], norms["{}_{}".format(f["name"], i["area"])])
+                data["{}_{}".format(f["name"], i["area"])] = feature_inputs
+        return data
diff --git a/app/load_features_test.py b/app/load_features_test.py
new file mode 100644
index 0000000..41385c3
--- /dev/null
+++ b/app/load_features_test.py
@@ -0,0 +1,48 @@
+import matplotlib.pyplot as plt
+
+from .load_data_3d import load_targets, load_samples_inputs
+from .load_deviations import load_deviations
+from .cut_brain_test import cut_brain
+from .feature_test import feature_mean, feature_max, feature_ratio_mean
+
+def load_features(norms=None):
+    if norms == None:
+        training = True
+    else:
+        training = False 
+
+    areas = ["whole","lt","mt","rt","lb","mb","rb"]
+    features = [
+        {
+            "name": "mean",
+            "f": feature_mean
+        },
+        {
+            "name": "ratio_mean",
+            "f": feature_ratio_mean
+        },
+        {
+            "name": "max",
+            "f": feature_max
+        },
+    ]
+    if norms == None:
+        norms = {}
+        data = load_targets()
+        print("plotting features")
+        for f in features:
+            for a in areas:
+                feature_inputs , norms["{}_{}".format(f["name"], a)] = f["f"](a)
+                data["{}_{}".format(f["name"], a)] = feature_inputs
+                plt.figure()
+                plt.scatter(feature_inputs, data["Y"].tolist())
+                plt.savefig("plots/line_{}_{}.pdf".format(f["name"], a))
+                plt.close()
+        return data, norms
+    else:
+        data = {} 
+        for f in features:
+            for a in areas:
+                feature_inputs = f["f"](a, norms["{}_{}".format(f["name"], a)])
+                data["{}_{}".format(f["name"], a)] = feature_inputs
+        return data
diff --git a/app/mean_brain.py b/app/mean_brain.py
new file mode 100644
index 0000000..821d7c1
--- /dev/null
+++ b/app/mean_brain.py
@@ -0,0 +1,61 @@
+import os
+import numpy as np
+import pandas as pd
+import scipy.io
+
+from .get_number_of_files import get_number_of_files
+from .load_data_3d import load_sample_input, load_targets
+from .settings import DATA_DIRECTORY
+
+def create_deviation_set():
+    mean = mean_brain()
+
+    files = "train"
+    DIR_OUT = os.path.join(
+        DATA_DIRECTORY,
+        "set_{}_deviation".format(files) 
+        )
+    if not os.path.exists(DIR_OUT):
+        os.makedirs(DIR_OUT)
+    nof = get_number_of_files(True) 
+    for i in range(1,nof+1):
+        data = load_sample_input(i,True)
+        diff = brain_deviation(data, mean)
+        FILE_OUT = os.path.join(
+                DIR_OUT,
+                "{}_{}.mat".format(files,i)
+                )
+        scipy.io.savemat(FILE_OUT, mdict={'out': diff}, oned_as='row')
+
+    files = "test"
+    DIR_OUT = os.path.join(
+        DATA_DIRECTORY,
+        "set_{}_deviation".format(files) 
+        )
+    if not os.path.exists(DIR_OUT):
+        os.makedirs(DIR_OUT)
+    nof = get_number_of_files(False) 
+    for i in range(1,nof+1):
+        data = load_sample_input(i,False)
+        diff = brain_deviation(data, mean)
+        FILE_OUT = os.path.join(
+                DIR_OUT,
+                "{}_{}.mat".format(files,i)
+                )
+        scipy.io.savemat(FILE_OUT, mdict={'out': diff}, oned_as='row')
+
+def brain_deviation(data, mean):
+    return data-mean
+
+def mean_brain():
+    targets = load_targets()
+    targets = targets['Y'].tolist()
+    nof = get_number_of_files(True)
+    mean = np.zeros(load_sample_input(1,True).shape)
+    for i in range(0,nof):
+        data = load_sample_input(i+1,True) 
+        mean += np.divide(data, targets.count(targets[i]))
+    return np.divide(mean, len(set(targets)))
+
+
+
diff --git a/app/normalize.py b/app/normalize.py
index b5ac38a..e1b6b50 100644
--- a/app/normalize.py
+++ b/app/normalize.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-def normalize(inputs):
+def normalize(inputs, minmax=None):
     data = np.array(inputs)
-    maximum = data.max()
-    minimum = data.min()
-
-    out = (data-minimum)/(maximum-minimum)
-
-    return out
+    if minmax == None:
+        minimum = data.min()
+        maximum = data.max()
+        return (data-minimum)/(maximum-minimum), [minimum,maximum]
+    else:
+        return (data-minmax[0])/(minmax[1]-minmax[0])
 
diff --git a/app/predictor_cut.py b/app/predictor_cut.py
index 9beef5e..475cd0a 100644
--- a/app/predictor_cut.py
+++ b/app/predictor_cut.py
@@ -12,36 +12,63 @@
 from sklearn.metrics import accuracy_score
 
 from .settings import CURRENT_DIRECTORY
-from .cut_brain import cut_brain
-from .feature import feature_mean, feature_max, feature_ratio_mean
-from .load_data_3d import load_targets, load_samples_inputs
 from .squared_error import squared_error
-
+from .load_features_test import load_features
+from .get_number_of_files import get_number_of_files
 
 def predict_cut(training=True):
-    cache_path = os.path.join(
+    cache_data_path = os.path.join(
+        CURRENT_DIRECTORY,
+        "..",
+        "cache",
+        "data.hdf"
+    )
+    cache_norms_path = os.path.join(
+        CURRENT_DIRECTORY,
+        "..",
+        "cache",
+        "norms.hdf"
+    )
+    cache_test_path = os.path.join(
         CURRENT_DIRECTORY,
         "..",
         "cache",
-        "trial1.hdf"
+        "test.hdf"
     )
-    if os.path.exists(cache_path):
+    if os.path.exists(cache_data_path) and os.path.exists(cache_norms_path):
         print("Loading features from cache")
-        data = pd.read_hdf(cache_path, "table")
+        data = pd.read_hdf(cache_data_path, "table")
+        norms = pd.read_hdf(cache_norms_path, "table").to_dict()
     else:
         print("Loading features")
-        data = load_features()
+        data, norms = load_features()
+        print("saving data to cache")
+        data.to_hdf(cache_data_path, "table")
+        pd.DataFrame.from_dict(norms).to_hdf(cache_norms_path, "table")
 
-    print("saving data to cache")
-    data.to_hdf(cache_path, "table")
+    if os.path.exists(cache_test_path):
+        print("Loading test features from cache")
+        test_data = pd.read_hdf(cache_test_path, "table").to_dict()
+    else:
+        print("Loading test features")
+        test_data = load_features(norms)
+        print("saving test to cache")
+        pd.DataFrame.from_dict(test_data).to_hdf(cache_test_path, "table")
 
-    print(data)
-    #feature_list = data.keys().tolist()
-    #feature_list.remove("Y")
-    feature_list = ["max_lb", "max_mb", "mean_mb", "mean_rt", "mean_mt", "mean_whole", "ratio_mean_lt",
-            "ratio_mean_mb", "ratio_mean_mt", "ratio_mean_rt", "ratio_mean_whole"]
+    feature_list = ['mean_rt', 'mean_mb', 'ratio_mean_lt', 'ratio_mean_rt', 'ratio_mean_rb', 'max_rt', 'max_rb']
     xs = data[feature_list].values.tolist()
     ys = data["Y"].values.tolist()
+    #print(data)
+    #print(test_data)
+    test_inputs = []
+    for i in range(0,get_number_of_files(training=False)):
+        tmp = []
+        for f in feature_list:
+            tmp.append(test_data[f][0][i])
+        test_inputs.append(tmp)
+
+    print(len(xs))
+    print(len(test_inputs))
     nn = KNeighborsRegressor(
         n_neighbors=3,
         weights="uniform",
@@ -52,53 +79,14 @@ def predict_cut(training=True):
     predicted = cross_val_predict(nn, xs, ys, cv=5)
     print("Squared Error:",squared_error(ys,predicted))
 
+    nn.fit(xs, ys)
 
-def load_features():
-    areas = ["lt","mt","rt","lb","mb","rb"]
-    inputs = [
-        {
-            "area": "whole",
-            "val": load_samples_inputs()
-        }
-    ]
-    for a in areas:
-        inputs.append(
-            {
-                "area": a,
-                "val": cut_brain(inputs[0]["val"], a)
-            }
-            )
-    data = load_targets()
-
-    features = [
-        {
-            "name": "mean",
-            "f": feature_mean
-        },
-        {
-            "name": "ratio_mean",
-            "f": feature_ratio_mean
-        },
-        {
-            "name": "max",
-            "f": feature_max
-        },
-    ]
-
-    print("plotting features")
-    for f in features:
-        for i in inputs:
-            feature_inputs = f["f"](i["val"])
-            data["{}_{}".format(f["name"], i["area"])] = feature_inputs
-
-            plt.figure()
-            plt.scatter(
-                feature_inputs,
-                data["Y"].tolist(),
-            )
-            plt.savefig("plots/line_{}_{}.pdf".format(
-                f["name"], i["area"]
-            ))
-            plt.close()
+    result = nn.predict(test_inputs)
+    print(result)
+    result_path = os.path.join(
+        CURRENT_DIRECTORY,
+        "..",
+        "result.csv"
+    )
+    pd.Series(result).to_csv(result_path)
 
-    return data
diff --git a/main.py b/main.py
index c2d715c..b0d9777 100644
--- a/main.py
+++ b/main.py
@@ -9,8 +9,9 @@
 from app.predictor_cut import predict_cut
 from app.predictor_cut_iterate import predict_cut_iterate
 from app.heatmap_side import heatmap_side
-
+from app.mean_brain import create_deviation_set
 from app.settings import CACHE_DIRECTORY, PLOT_DIRECTORY, ITERATE_DIRECTORY
+from app.age_hist import age_hist
 
 if __name__ == "__main__":
     if not os.path.exists(CACHE_DIRECTORY):
@@ -41,4 +42,7 @@
                 print("Additional Argument needed for this!")
             else:
                 predict_cut_iterate(num=int(args[2]))
-
+        elif args[1] == "mean_brain":
+            create_deviation_set()
+        elif args[1] == "age_hist":
+            age_hist()