# Auricular shape analysis - age prediction

In [None]:
%load_ext autoreload
%autoreload 2

import logging
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import trimesh
from PIL import Image
from scipy.ndimage import distance_transform_edt, binary_fill_holes

In [None]:
sys.path.append("/home/vajicek/src/aurikular/shapeexplorer")

from projects.auricular.projection import computeHeightmap, regularSampling, getDistanceToEdge
from projects.auricular.curvaturedescriptors import CurvatureDescriptorsParams
from projects.auricular.curvaturedescriptors import CurvatureDescriptors
from projects.auricular.curvaturedescriptors import HistogramDescriptors
from projects.auricular.common import getSample

In [None]:
input_data = os.path.expanduser("~/data/aurikularni_plocha_ply4/")
sample = list(getSample(input_data))
params = CurvatureDescriptorsParams(
    input_data=input_data,
    sampling_method='regular',
    dist=1.0,
    sampling_rate=0.5,
    sample_count=5000,
    output='../../../output_499')
cd = CurvatureDescriptors(params)

In [None]:
data = cd.getData()

## Prediction

### age x mean dne

In [None]:
del sys.modules['projects.auricular.analyze']
from projects.auricular.analyze import evaluateAllModels
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.svm import LinearSVR

In [None]:
df = pd.DataFrame(data=[[
                    float(data1['age']),
                    np.log(float(data1['age'])),
                    float(data1['dist'][1.0]['sampled_dne'])] for data1 in data],
                  columns = ['age', 'logAge', 'dne'])

r = evaluateAllModels(df, indeps=[['dne']], dep=['logAge'], model=LinearRegression())
pd.DataFrame(r)

### age x vector dne

In [None]:
pd.DataFrame(hist_descriptors[1.0].getSampleHistogramData(3)[0]).plot.bar()
pd.DataFrame(hist_descriptors[1.0].getSampleHistogramData(3)[107]).plot.bar()
pd.DataFrame(hist_descriptors[1.0].getSampleHistogramData(3)[207]).plot.bar()
pd.DataFrame(hist_descriptors[1.0].getSampleHistogramData(3)[307]).plot.bar()

In [None]:
x=[a[0] for a in hist_descriptors[1.0].getSampleHistogramData(3)]
y=[a[2] for a in hist_descriptors[1.0].getSampleHistogramData(3)]
age=[float(data1['age']) for data1 in data]
pd.DataFrame({'x': x, 'y': y, 'age': age}).plot.scatter(x='x', y='y', c='age', colormap='viridis')

In [None]:
def modelForBins(bins, indeps=None, dist=2.0, model=LinearRegression()):
    df = pd.DataFrame(hist_descriptors[dist].getSampleHistogramData(bins))
    df['age'] = [float(data1['age']) for data1 in data]
    df['logAge'] = np.log(df['age'])
    indeps = indeps or [list(range(bins))]   
    r = evaluateAllModels(df, indeps=indeps, dep=['logAge'], model=model)
    return pd.DataFrame(r)

In [None]:
modelForBins(3, [[0, 1, 2]], 1.0)

In [None]:
modelForBins(3, [[0,2]])

In [None]:
modelForBins(3, [[0, 1]])

In [None]:
def plotRmsePerBins(bins_rmse_list):
    bins, rmses = list(zip(*bins_rmse_list))
    df = pd.DataFrame({
        'rmse': rmses,
        'bins': bins})
    df.plot(y='rmse', x='bins')
    _ = plt.xticks(df['bins'])
    
def binsRmse(dist=1.0, model=LinearRegression()):
    for bins in range(2, 20):
        yield bins, modelForBins(bins, dist=dist, model=model)['rmse'][0]

In [None]:
plotRmsePerBins(list(binsRmse(2.0)))

In [None]:
plotRmsePerBins(list(binsRmse(1.0, model=SVR())))

In [None]:
plotRmsePerBins(list(binsRmse(model=LinearSVR())))

In [None]:
def compareMethods(dist=1.0):
    lsvr_bins_rmse_list = list(binsRmse(dist=dist, model=LinearSVR()))
    svr_bins_rmse_list = list(binsRmse(dist=dist, model=SVR()))
    lr_bins_rmse_list = list(binsRmse(dist=dist, model=LinearRegression()))

    df = pd.DataFrame({
        'linear regression': list(zip(*lr_bins_rmse_list))[1],
        'linear SVR': list(zip(*lsvr_bins_rmse_list))[1],
        'SVR': list(zip(*svr_bins_rmse_list))[1],
        'bins': list(zip(*svr_bins_rmse_list))[0]})
    df.plot.line(x='bins')
    _ = plt.xticks(df['bins'])

In [None]:
compareMethods(2.0)

In [None]:
compareMethods(1.0)

In [None]:
compareMethods(0.5)

## Neural network

In [None]:
import tensorflow as tf
from tensorflow import keras
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [None]:
bins = 14
dist = 1.0

def baselineModel():
    model = keras.Sequential()
    model.add(Dense(bins, input_dim=bins, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [None]:
X = pd.DataFrame(np.array(hist_descriptors[dist].getSampleHistogramData(bins))).values
Y = pd.DataFrame([np.log(float(data1['age'])) for data1 in data]).values

estimators = []
#estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baselineModel, epochs=1000, batch_size=10, verbose=0)))
pipeline = Pipeline(estimators)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=None)
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)
np.sqrt(mean_squared_error(np.exp(predictions), np.exp(y_test)))

In [None]:
kf = KFold(n_splits=10, shuffle=True)
fold_rmse = []
for train_index, test_index in kf.split(X):
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = Y[train_index]
    y_test = Y[test_index]
    pipeline.fit(X_train, y_train)
    predictions = pipeline.predict(X_test)
    rmse = np.sqrt(mean_squared_error(np.exp(predictions), np.exp(y_test)))
    print(rmse)
    fold_rmse += [rmse]

print("fold_rmse = %f" % np.mean(fold_rmse))