# Auricular shape analysis - curvature descriptors

In [None]:
import logging
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import trimesh
from PIL import Image

In [None]:
# Import descriptor extraction tools
del sys.modules['projects.auricular.curvaturedescriptors']
del sys.modules['projects.auricular.common']
del sys.modules['base.common']
from projects.auricular.curvaturedescriptors import CurvatureDescriptorsParams
from projects.auricular.curvaturedescriptors import CurvatureDescriptors
from projects.auricular.curvaturedescriptors import HistogramDescriptors
from projects.auricular.common import get_sample

In [None]:
del sys.modules['projects.auricular.projection']

In [None]:
input_data = os.path.expanduser("~/data/aurikularni_plocha_ply3/")
sample = list(get_sample(input_data))
cd = CurvatureDescriptors(CurvatureDescriptorsParams(
    input_data=input_data,
    sampling_method='regular',
    dist=1.0,
    sampling_rate=0.5,
    sample_count=5000,
    output='../../../output'))

## Compute descriptors

In [None]:
logging.basicConfig(level=logging.INFO)
cd.newAnalysis()
cd.computeDescriptors()

## Sample age distribution

In [None]:
age=[float(specimen['age']) for specimen in sample]
plt.hist(age)
plt.show()
pd.DataFrame(age).describe()

## Reduced sample age distribution

In [None]:
reduced_sample = list(get_sample(os.path.expanduser("~/data/aurikularni_plocha_ply2/")))
reduced_sample_age = [float(specimen['age']) for specimen in reduced_sample]
plt.hist(reduced_sample_age)
plt.show()
pd.DataFrame(reduced_sample_age).describe()

## Data viewer

In [None]:
trimesh.load(sample[3]['filename']).show()

In [None]:
plt.imshow(Image.open(os.path.join('../../../output', sample[0]['basename'] + '_sample_map.png'), 'r'))

In [None]:
plt.imshow(Image.open(os.path.join('../../../output', sample[307]['basename'] + '_sample_map.png'), 'r'))

In [None]:
pd.DataFrame(hist_descriptors_1.getSampleHistogramData(30)[0]).plot.bar()

In [None]:
pd.DataFrame(hist_descriptors_1.getSampleHistogramData(30)[307]).plot.bar()

## Load computed data

In [None]:
data = cd.getData()
hist_descriptors = {
    1.0: HistogramDescriptors(data, 1.0),
    2.0: HistogramDescriptors(data, 2.0)
}

In [None]:
data[0]['dist'][2.0]['curvature'].mean()

In [None]:
data[0]['dist'][2.0]['sampled_dne']

In [None]:
data[0]

In [None]:
df = pd.DataFrame(data=[[
                    data1['name'],
                    data1['subset'],
                    data1['type'],
                    data1['side'],
                    data1['sex'],
                    float(data1['age']),
                    float(data1['dist'][2.0]['sampled_dne']),
                    np.log(float(data1['dist'][2.0]['sampled_dne']))] for data1 in data],
                  columns = ['name', 'subset', 'type', 'side', 'sex', 'age', 'meandne', 'log(meandne)'])

df.head()

In [None]:
df['age'].hist()

In [None]:
df.groupby(['subset'])['subset'].describe()[["count"]]

In [None]:
cols={a[0]:a[1] for a in zip(df['subset'].unique(),mcolors.BASE_COLORS.keys())}

fig, ax = plt.subplots(figsize=(15,10))
for subset, color in cols.items():
    df1 = df[df['subset']==subset]
    ax.scatter(df1['age'], df1['log(meandne)'], c=color, label=subset)
ax.legend()
_ = sub[['age','log(meandne)','name']].apply(lambda x: ax.text(*x), axis=1)

In [None]:
df.plot.scatter(x='age', y='meandne')

## Prediction

### age x mean dne

In [None]:
del sys.modules['projects.auricular.analyze']
from projects.auricular.analyze import evaluateAllModels
from sklearn.linear_model import LinearRegression

df = pd.DataFrame(data=[[
                    float(data1['age']),
                    np.log(float(data1['age'])),
                    float(data1['dist'][2.0]['sampled_dne'])] for data1 in data],
                  columns = ['age', 'logAge', 'dne'])

r = evaluateAllModels(df, indeps=[['dne']], dep=['logAge'], model=LinearRegression())
pd.DataFrame(r)

### age x vector dne

In [None]:
pd.DataFrame(hist_descriptors.getSampleHistogramData(3)[0]).plot.bar()
pd.DataFrame(hist_descriptors.getSampleHistogramData(3)[107]).plot.bar()
pd.DataFrame(hist_descriptors.getSampleHistogramData(3)[207]).plot.bar()
pd.DataFrame(hist_descriptors.getSampleHistogramData(3)[307]).plot.bar()

In [None]:
x=[a[0] for a in hist_descriptors[1.0].getSampleHistogramData(3)]
y=[a[2] for a in hist_descriptors[1.0].getSampleHistogramData(3)]
age=[float(data1['age']) for data1 in data]
pd.DataFrame({'x': x, 'y': y, 'age': age}).plot.scatter(x='x', y='y', c='age', colormap='viridis')

In [None]:
def model_for_bins(bins, dist=1.0, model=LinearRegression()):
    df = pd.DataFrame(hist_descriptors[dist].getSampleHistogramData(bins))
    df['age'] = [float(data1['age']) for data1 in data]
    df['logAge'] = np.log(df['age'])

    r = evaluateAllModels(df, indeps=[list(range(bins))], dep=['logAge'], model=model)
    return pd.DataFrame(r)

model_for_bins(4)

In [None]:
df = pd.DataFrame(hist_descriptors.getSampleHistogramData(3))
df['age'] = [float(data1['age']) for data1 in data]
df['logAge'] = np.log(df['age'])

pd.DataFrame(evaluateAllModels(df, indeps=[[0,2]], dep=['logAge'], model=LinearRegression()))

In [None]:
pd.DataFrame(evaluateAllModels(df, indeps=[[0,1,2]], dep=['logAge'], model=LinearRegression()))

In [None]:
def plot_rmse_per_bins(bins_rmse_list):
    bins, rmses = list(zip(*bins_rmse_list))
    df = pd.DataFrame({
        'rmse': rmses,
        'bins': bins})
    df.plot(y='rmse', x='bins')
    _ = plt.xticks(df['bins'])
    
def bins_rmse(dist=1.0, model=LinearRegression()):
    for bins in range(2, 20):
        yield bins, model_for_bins(bins, dist, model)['rmse'][0]

In [None]:
from sklearn.linear_model import LinearRegression
lr_bins_rmse_list = list(bins_rmse())
plot_rmse_per_bins(lr_bins_rmse_list)

In [None]:
from sklearn.svm import SVR
svr_bins_rmse_list = list(bins_rmse(model=SVR()))
plot_rmse_per_bins(svr_bins_rmse_list)

In [None]:
from sklearn.svm import LinearSVR
lsvr_bins_rmse_list = list(bins_rmse(model=LinearSVR()))
plot_rmse_per_bins(lsvr_bins_rmse_list)

In [None]:
def compareMethods(dist=1.0):
    lsvr_bins_rmse_list = list(bins_rmse(dist=dist, model=LinearSVR()))
    svr_bins_rmse_list = list(bins_rmse(dist=dist, model=SVR()))
    lr_bins_rmse_list = list(bins_rmse(dist=dist, model=LinearRegression()))

    df = pd.DataFrame({
        'linear regression': list(zip(*lr_bins_rmse_list))[1],
        'linear SVR': list(zip(*lsvr_bins_rmse_list))[1],
        'SVR': list(zip(*svr_bins_rmse_list))[1],
        'bins': list(zip(*svr_bins_rmse_list))[0]})
    df.plot.line(x='bins')
    _ = plt.xticks(df['bins'])

In [None]:
compareMethods(2.0)

compareMethods(1.0)

In [None]:
a = hist_descriptors.getSampleHistogramData(10)
plt.imshow(np.log(a), interpolation='none')
plt.show()
plt.scatter(age, np.log(dne))
plt.show()
plt.scatter(age, np.log(a[:,3]))
plt.show()
plt.scatter(age, np.log(a[:,8]))
plt.show()