# Analysis between Datasets

### Import Libraries

In [None]:
import umap 
import umap.plot

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(0)

### Load Data

In [None]:
df_males_2d_single = pd.read_pickle('comparison-data/2d-single/df.pkl')
df_males_2d_single_far = pd.read_pickle('comparison-data/2d-single-far/df.pkl')
df_males_2d_stereo = pd.read_pickle('comparison-data/2d-stereo/df.pkl')
df_males_3d = pd.read_pickle('comparison-data/3d/df.pkl')

In [None]:
columns = list(set(df_males_2d_single.columns.values) & set(df_males_3d.columns.values) & set(df_males_2d_stereo.columns.values) & set(df_males_2d_single_far.columns.values))

for col in columns:
    if ('Persistence' in col) or ('Turning' in col):
        columns.remove(col)

all_data = pd.concat([df_males_3d[columns], df_males_2d_single[columns], df_males_2d_stereo[columns], df_males_2d_single_far[columns]])
all_data['type'] = ['3D Dataset' for i in range(len(df_males_3d))] + ['2D Single Camera at 2m' for i in range(len(df_males_2d_single))] + ['2D Telecentric' for i in range(len(df_males_2d_stereo))] + ['2D Single Camera at 7m' for i in range(len(df_males_2d_single_far))]

### UMAP

In [None]:
mapper = umap.UMAP().fit(all_data.drop(columns=['type', 'seq', 'track_group']))
plt.figure(dpi=400)
umap.plot.points(mapper, labels=all_data['type'].values, theme='fire')
plt.show()

In [None]:
fig, ax = plt.subplots(dpi=300, figsize=(10, 10))
plot_obj = umap.plot.points(mapper, labels=all_data['type'].values, show_legend=True, ax=ax)
ax.set_title('UMAP Visualisation of Datasets')
plt.show()

### Correlation

In [None]:
df_2d_single_vs_3d = df_males_2d_single[columns].corrwith(df_males_3d[columns])
df_2d_stereo_vs_3d = df_males_2d_stereo[columns].corrwith(df_males_3d[columns])
df_2d_stereo_vs_2d_single = df_males_2d_stereo[columns].corrwith(df_males_2d_single[columns])
df_2d_single_vs_far = df_males_2d_single[columns].corrwith(df_males_2d_single_far[columns])
df_3d_vs_far = df_males_3d[columns].corrwith(df_males_2d_single_far[columns])
df_2d_stereo_vs_far = df_males_2d_stereo[columns].corrwith(df_males_2d_single_far[columns])

In [None]:
df_2d_single_vs_3d.to_excel('correlations1.xlsx')
df_2d_stereo_vs_3d.to_excel('correlations2.xlsx')
df_2d_stereo_vs_2d_single.to_excel('correlations3.xlsx')
df_2d_single_vs_far.to_excel('correlations4.xlsx')
df_3d_vs_far.to_excel('correlations5.xlsx')
df_2d_stereo_vs_far.to_excel('correlations6.xlsx')

In [None]:
df_2d_single_vs_3d = df_2d_single_vs_3d.mean()
df_2d_stereo_vs_3d = df_2d_stereo_vs_3d.mean()
df_2d_stereo_vs_2d_single = df_2d_stereo_vs_2d_single.mean()
df_2d_single_vs_far = df_2d_single_vs_far.mean()
df_3d_vs_far = df_3d_vs_far.mean()
df_2d_stereo_vs_far = df_2d_stereo_vs_far.mean()

In [None]:
# Define the correlation values
correlation_matrix = np.array([[1, df_2d_stereo_vs_3d, df_2d_single_vs_3d, df_3d_vs_far],
                              [df_2d_stereo_vs_3d, 1,df_2d_stereo_vs_2d_single,df_2d_stereo_vs_far],
                              [df_2d_single_vs_3d, df_2d_stereo_vs_2d_single, 1, df_2d_single_vs_far],
                              [df_3d_vs_far, df_2d_stereo_vs_far, df_2d_single_vs_far, 1]])

datasets = ['3D Dataset', '2D Telecentric Dataset', '2D Single Camera (at 2m)', '2D Single Camera (at 7m)']

corr_df = pd.DataFrame(correlation_matrix, columns=datasets, index=datasets)
#df_lt = corr_df.where(np.tril(np.ones(corr_df.shape)).astype(bool))
#mask = np.triu(np.ones(corr_df.shape)).astype(bool)
mask=None

plt.figure(dpi=300)
hmap=sns.heatmap(corr_df, mask=mask, cmap='flare', annot=True)
plt.title('Correlation between Datasets')
plt.show()


### Feature Histograms

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
male_2d_stereo = np.load('2d-stereo-data/males.npy', allow_pickle=True)
couple_2d_stereo = np.load('2d-stereo-data/couple.npy', allow_pickle=True)
female_2d_stereo = np.load('2d-stereo-data/females.npy', allow_pickle=True)
focal_male_2d_stereo = np.load('2d-stereo-data/focal-males.npy', allow_pickle=True)

male_2d_single = np.load('2d-single-data/males.npy', allow_pickle=True)
couple_2d_single = np.load('2d-single-data/couples.npy', allow_pickle=True)
female_2d_single = np.load('2d-single-data/females.npy', allow_pickle=True)
focal_male_2d_single = np.load('2d-single-data/focal-males.npy', allow_pickle=True)

In [None]:
def get_features(dataset, feature):
    data = []
    for trial in dataset:
        for group in trial:
            for track in group:
                data += track[:, feature].tolist()

    data = list(filter(lambda x: not np.isnan(x), data))
    return data


def create_histogram(male_feature, couple_feature, female_feature, focal_male_feature, title):
    bins=np.histogram(np.hstack((male_feature, couple_feature )), bins=40)[1] #get the bin edges
    fig = plt.figure(figsize=(15,8))
    fig.set_dpi(200)
    plt.hist(male_feature, density=True, bins=bins, alpha=0.5)
    plt.hist(couple_feature, density=True, bins=bins, alpha=0.5)
    #plt.hist(female_feature, density=True, bins=bins)
   #plt.hist(focal_male_feature, density=True, bins=bins)
    fig.legend(['male', 'couple', 'female', 'focal-male'])
    plt.xlabel('Feature Value')
    plt.ylabel('Frequency')
    plt.title(title)
    plt.show()

feature_columns = [
    'Y Coord',
    'Z Coord',
    'Y Velocity',
    'Z Velocity',
    'Angular Velocity Y-Z',
    'Angular Acceleration Y-Z',
    'Angle of Flight', 
    'Centroid Distance Function',
    'Persistence Velocity',
    'Turning Velocity',
    'Radial Velocity',
    'Radial Acceleration',
    'Radial Jerk',
    'Y Acceleration',
    'Z Acceleration'
] 

for feature in range(len(male_2d_single[0][0][0][0])):
    create_histogram(
        get_features(male_2d_stereo, feature),
        get_features(couple_2d_stereo, feature),
        get_features(female_2d_stereo, feature),
        get_features(focal_male_2d_stereo, feature),
        title=f'2D Stereo: {feature_columns[feature]}'
    )
    create_histogram(
        get_features(male_2d_single, feature),
        get_features(couple_2d_single, feature),
        get_features(female_2d_single, feature),
        get_features(focal_male_2d_single, feature),
        title=f'2D Single: {feature_columns[feature]}'
    )

In [None]:
### Feature Comparisons

In [None]:
import numpy as np

path = '../results/2d stereo model performance/'
data_2d_stereo = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns

path = '../results/2d single model performance/'
data_2d_single = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns

path = '../results/2d single far model performance/'
data_2d_single_far = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns


In [None]:
print('Number of features\n2D Stereo: ',len(data_2d_stereo), '\n2D Single: ',len(data_2d_single), '\n2D single far: ',len(data_2d_single_far))

In [None]:
# Same features

same = set(list(data_2d_stereo)) & set(list(data_2d_single)) & set(list(data_2d_single_far))
print('Number of similar features across all: ', len(same))

same = set(list(data_2d_stereo)) & set(list(data_2d_single))
print('Number of similar features across 2D stereo vs 2D single: ', len(same))

same = set(list(data_2d_stereo)) & set(list(data_2d_single_far))
print('Number of similar features across 2D stereo vs 2D single far: ', len(same))

same = set(list(data_2d_single)) & set(list(data_2d_single_far))
print('Number of similar features across 2D single vs 2D single far: ', len(same))

In [None]:
from collections import Counter

def feature_review(dataset):
    feature_names = []
    feature_types = []
    for f in dataset:
        name_split = f.split('(')
        feature_names.append(name_split[0])
        if len(name_split) != 1:
            feature_types.append(name_split[1][:-1])
    print(Counter(feature_names))
    print(Counter(feature_types))
    return feature_names, feature_types

print('-- 2D STEREO --')
fname_2d_stereo, ftype_2d_stereo = feature_review(data_2d_stereo)

print('\n-- 2D SINGLE --')
fname_2d_single, ftype_2d_single = feature_review(data_2d_single)

print('\n-- 2D SINGLE FAR --')
fname_2d_single_far, ftype_2d_single_far = feature_review(data_2d_single_far)

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(data_2d_stereo)) & set(list(data_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(data_2d_stereo))

same = set(list(data_2d_stereo)) & set(list(data_2d_single_far))
print('2D stereo vs 2D single far: ',len(same)/len(data_2d_stereo))

same = set(list(data_2d_single)) & set(list(data_2d_single_far))
print('2D single vs 2D single far: ',len(same)/len(data_2d_single_far))

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(fname_2d_stereo)) & set(list(fname_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(set(list(fname_2d_stereo))))

same = set(list(ftype_2d_stereo)) & set(list(ftype_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(set(list(ftype_2d_stereo))))

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(fname_2d_stereo)) & set(list(fname_2d_single_far))
print('2D stereo vs 2D single: ',len(same)/len(set(list(fname_2d_stereo))))

same = set(list(ftype_2d_stereo)) & set(list(ftype_2d_single_far))
print('2D stereo vs 2D single: ',len(same)/len(set(list(ftype_2d_stereo))))

In [None]:
same = list(set(list(fname_2d_stereo)) & set(list(fname_2d_single)))
names = list(set(list(fname_2d_stereo)))
for s in same:
    names.remove(s)
print(names)

In [None]:
same = list(set(list(fname_2d_stereo)) & set(list(fname_2d_single_far)))
names = list(set(list(fname_2d_stereo)))
for s in same:
    names.remove(s)
print(names)

In [None]:
same = list(set(list(fname_2d_single)) & set(list(fname_2d_single_far)))
names = list(set(list(fname_2d_single_far)))
for s in same:
    names.remove(s)
print(names)

In [None]:
for i in sorted(data_2d_single_far):
    try:
        print(i.split('(')[1][:-1])
    except:
        print('')