# Auricular shape analysis - age prediction

In [None]:
%load_ext autoreload
%autoreload 2

import logging
import os
import time
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
sys.path.append("../..")

from projects.auricular.curvaturedescriptors import CurvatureDescriptorsParams
from projects.auricular.curvaturedescriptors import CurvatureDescriptors
from projects.auricular.curvaturedescriptors import HistogramDescriptors
from projects.auricular.analyze import ModelAnalysis
from projects.auricular.common import getSample

In [None]:
input_data = os.path.expanduser("~/data/aurikularni_plocha_ply5/")
sample = list(getSample(input_data))
params = CurvatureDescriptorsParams(
    input_data=input_data,
    sampling_method='regular',
    dist=1.0,
    sampling_rate=0.5,
    sample_count=5000,
    output='../../../output_812')
cd = CurvatureDescriptors(params)

In [None]:
data = cd.getData()
hist_descriptors = {
    0.5: HistogramDescriptors(data, 0.5),
    1.0: HistogramDescriptors(data, 1.0),
    2.0: HistogramDescriptors(data, 2.0)
}

## Prediction

### age x mean dne

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.svm import LinearSVR

from projects.auricular.analyze import evaluateAllModels, ModelAnalysis

In [None]:
df = pd.DataFrame(data=[[
                    float(data1['age']),
                    np.log(float(data1['age'])),
                    float(data1['dist'][1.0]['sampled_dne'])] for data1 in data],
                  columns = ['age', 'logAge', 'dne'])

r = evaluateAllModels(df, indeps=[['dne']], dep=['logAge'], model=SVR())
pd.DataFrame(r)

### age x vector dne

In [None]:
pd.DataFrame(hist_descriptors[0.5].getSampleHistogramData(3)[0]).plot.bar()
pd.DataFrame(hist_descriptors[0.5].getSampleHistogramData(3)[249]).plot.bar()
pd.DataFrame(hist_descriptors[0.5].getSampleHistogramData(3)[498]).plot.bar();

In [None]:
ma=ModelAnalysis(data, hist_descriptors, 'dist_curv')

In [None]:
ma.twoParamPlot(1.0)

In [None]:
ma.modelForBins(3, None, 1.0, model=SVR(), normalize_dist=False)

In [None]:
ma.modelForBins(3, [[0, 2]], 1.0)

In [None]:
ma.modelForBins(3, [[0, 1]], 1.0)

In [None]:
ma.plotRmsePerBins(list(ma.binsRmse(2.0)))

In [None]:
ma.plotRmsePerBins(list(ma.binsRmse(1.0, model=SVR())))

In [None]:
ma.plotRmsePerBins(list(ma.binsRmse(model=LinearSVR())))

In [None]:
ma.compareMethods(2.0)

In [None]:
ma.compareMethods(1.0)

In [None]:
ma.compareMethods(0.5)

## Neural network

In [None]:
from tensorflow import keras
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error

from projects.auricular.ann import buildModel, evaluateModel

In [None]:
dist=1.0
bins=10

Y = pd.DataFrame([np.log(float(data1['age'])) for data1 in data]).values
X = pd.DataFrame(ma.hist_descriptors[dist].getSampleHistogram2dData(bins, True, False)).values
#X = pd.DataFrame(hist_descriptors[dist].getSampleHistogramData(bins)).values

In [None]:
root_logdir = os.path.join(os.curdir, "mylogs")
run_logdir = os.path.join(root_logdir, time.strftime("run_%Y_%m_%d_%H_%M_%S"))

In [None]:
X_train, X_validate_test, y_train, y_validate_test = train_test_split(X, Y,
                                                                      test_size=0.2,
                                                                      random_state=None)
X_validate, X_test, y_validate, y_test = train_test_split(X_validate_test,
                                                          y_validate_test,
                                                          test_size=0.5,
                                                          random_state=None)

model = buildModel(n_inputs=X_train.shape[1],
                   n_hidden_layers=2,
                   n_neurons=X_train.shape[1],
                   learning_rate=0.00005)
model.fit(X_train, y_train,
          use_multiprocessing=True,
          workers=8,
          epochs=1000,
          batch_size=10,
          callbacks=[keras.callbacks.EarlyStopping(patience=100),
                     keras.callbacks.TensorBoard(run_logdir)],
          validation_data=(X_test, y_test))

In [None]:
predictions = model.predict(X_test)
np.sqrt(mean_squared_error(np.exp(predictions), np.exp(y_test)))

In [None]:
model.evaluate(X_test, y_test)

In [None]:
X = pd.DataFrame(hist_descriptors[dist].getSampleHistogramData(bins)).values
result_curv = evaluateModel(X, Y)

In [None]:
X = pd.DataFrame(ma.hist_descriptors[dist].getSampleHistogram2dData(bins, True, False)).values
result_curv_dist = evaluateModel(X, Y)

In [None]:
rmse, predicted, predicted_indices = result_curv #_dist
rmse

In [None]:
# 10 x 10-fold CV
r=[]
for i in range(10):
    X = pd.DataFrame(ma.hist_descriptors[dist].getSampleHistogram2dData(bins, True, False)).values
    result_curv_dist = evaluateModel(X, Y)
    rmse, predicted, predicted_indices = result_curv_dist
    r+=[rmse]
np.mean(r)

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
plt.scatter(np.exp(Y[predicted_indices.astype(int)]), np.exp(predicted))
plt.xlabel("actual")
plt.ylabel("predicted")
plt.plot([20, 100], [20, 100], color="black", linewidth=1);