# Analysis between Datasets

### Import Libraries

In [None]:
import umap 
import umap.plot

import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt

import joblib

np.random.seed(0)

### Load Data

In [None]:
df_males_2d_single = pd.read_pickle('E:/2d-anomaly/FILTER/2m/df.pkl')
df_males_2d_single_far = pd.read_pickle('E:/2d-anomaly/FILTER/15m/df.pkl')
df_males_2d_stereo = pd.read_pickle('E:/2d-anomaly/FILTER/telecentric/df.pkl')
df_males_3d = pd.read_pickle('E:/2d-anomaly/FILTER/3D/df.pkl')

In [None]:
columns = list(set(df_males_2d_single.columns.values) & set(df_males_3d.columns.values) & set(df_males_2d_stereo.columns.values) & set(df_males_2d_single_far.columns.values))

for col in columns:
    if ('Persistence' in col) or ('Turning' in col):
        columns.remove(col)

all_data = pd.concat([df_males_3d[columns], df_males_2d_single[columns], df_males_2d_stereo[columns], df_males_2d_single_far[columns]])
all_data['type'] = ['3D Dataset' for i in range(len(df_males_3d))] + ['2D Single Camera at 2m' for i in range(len(df_males_2d_single))] + ['2D Telecentric' for i in range(len(df_males_2d_stereo))] + ['2D Single Camera at 15m' for i in range(len(df_males_2d_single_far))]

### UMAP

In [None]:
mapper = umap.UMAP().fit(all_data.drop(columns=['type', 'seq', 'track_group']))
plt.figure(dpi=600)
umap.plot.points(mapper, labels=all_data['type'].values)
plt.show()

In [None]:
fig, ax = plt.subplots(dpi=600, figsize=(10, 10))
plot_obj = umap.plot.points(mapper, labels=all_data['type'].values, show_legend=True, ax=ax)
#ax.set_title('UMAP Visualisation of Datasets')
plt.show()

In [None]:
params = {'legend.fontsize': 15,
          'legend.handlelength': 2}
plt.rcParams.update(params)

fig, ax = plt.subplots(dpi=600, figsize=(10, 10))
plot_obj = umap.plot.points(mapper, labels=all_data['type'].values, show_legend=True, ax=ax)
#ax.set_title('UMAP Visualisation of Datasets')
#plt.legend(prop={'size': 6})
plt.show()

### Correlation

In [None]:
# 'Y-Z' , 'X-Y', 'X-Z'
orientation = 'Y-Z'

# 20, 30, 40, 50
segment = 50
overlap = int(segment/2)


path = 'E:/2d-anomaly/dfs-corr/'
df_male = joblib.load(path + f'df_male_{segment}_{overlap}.dat')
df_couple = joblib.load(path + f'df_couples_{segment}_{overlap}.dat')
df_female = joblib.load(path + f'df_females_{segment}_{overlap}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_{segment}_{overlap}.dat')
results_3d = pd.concat([df_male, df_couple, df_female, df_focal_male])

path = 'E:/2d-anomaly/dfs-corr-2d/'
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_None.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_None.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_None.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_None.dat')
results_2m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = 3000
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
results_3m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = 4000
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
results_4m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = 5000
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
results_5m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = 9000
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
results_9m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = 15000
df_male = joblib.load(path + f'df_male_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_single camera_{orientation}_{segment}_{overlap}_{dist}.dat')
results_15m = pd.concat([df_male, df_couple, df_female, df_focal_male])

dist = None
df_male = joblib.load(path + f'df_male_stereo_{orientation}_{segment}_{overlap}_{dist}.dat')
df_couple = joblib.load(path + f'df_couples_stereo_{orientation}_{segment}_{overlap}_{dist}.dat')
df_female = joblib.load(path + f'df_females_stereo_{orientation}_{segment}_{overlap}_{dist}.dat')
df_focal_male = joblib.load(path + f'df_focal_males_stereo_{orientation}_{segment}_{overlap}_{dist}.dat')
results_tele = pd.concat([df_male, df_couple, df_female, df_focal_male])

In [None]:
dataframes = [results_3d, results_2m, results_3m, results_4m, results_5m, results_9m, results_15m, results_tele]

common_columns = set(dataframes[0].columns)
for df in dataframes[1:]:
    common_columns.intersection_update(df.columns)

common_columns = [col for col in common_columns if 'Persistence' not in col and 'Turning' not in col and 'seq' not in col and 'track_group' not in col]

all_data = pd.concat([
    df[common_columns].assign(type=name) for df, name in zip(
        dataframes,
        ['3D Dataset', '2D Single Camera at 2m', '2D Single Camera at 3m', '2D Single Camera at 4m',
         '2D Single Camera at 5m', '2D Single Camera at 9m', '2D Single Camera at 15m', '2D Telecentric']
    )
], ignore_index=True)

correlations = {}
datasets = [results_3d, results_2m, results_3m, results_4m, results_5m, results_9m, results_15m, results_tele]
dataset_names = ['3D', '2M', '3M', '4M', '5M', '9M', '15M', 'Tele']

for i, df1 in enumerate(datasets):
    for j, df2 in enumerate(datasets):
        if i < j:
            key = (dataset_names[i], dataset_names[j])
            corr = df1[columns].corrwith(df2[columns])
            correlations[key] = np.abs(corr).mean()

corr_matrix = np.ones((len(datasets), len(datasets)))

for i, name1 in enumerate(dataset_names):
    for j, name2 in enumerate(dataset_names):
        if i < j:
            corr_value = correlations[(name1, name2)]
            corr_matrix[i, j] = corr_matrix[j, i] = corr_value

corr_df = pd.DataFrame(corr_matrix, columns=dataset_names, index=dataset_names)

# Plotting
plt.figure(dpi=600)
sns.heatmap(corr_df, cmap='flare', annot=True)
plt.title(f'Correlation between Datasets - {orientation} - segment: {segment}, overlap: {overlap}')
plt.show()


In [None]:
dataframes = [results_3d, results_2m, results_5m, results_9m, results_15m, results_tele]

common_columns = set(dataframes[0].columns)
for df in dataframes[1:]:
    common_columns.intersection_update(df.columns)

common_columns = [col for col in common_columns if 'Persistence' not in col and 'Turning' not in col and 'seq' not in col and 'track_group' not in col]

all_data = pd.concat([
    df[common_columns].assign(type=name) for df, name in zip(
        dataframes,
        ['3D Dataset', '2D Single Camera at 2m', 
         '2D Single Camera at 5m', '2D Single Camera at 9m', '2D Single Camera at 15m', '2D Telecentric']
    )
], ignore_index=True)

correlations = {}
datasets = [results_3d, results_2m, results_5m, results_9m, results_15m, results_tele]
dataset_names = [
    '3D Dataset', 
    '2D Single Camera at 2m', 
    '2D Single Camera at 5m', 
    '2D Single Camera at 9m', 
    '2D Single Camera at 15m', 
    '2D Telecentric'
]

for i, df1 in enumerate(datasets):
    for j, df2 in enumerate(datasets):
        if i < j:
            key = (dataset_names[i], dataset_names[j])
            corr = df1[columns].corrwith(df2[columns])
            correlations[key] = np.abs(corr).mean()


corr_matrix = np.ones((len(datasets), len(datasets))) 


for i, name1 in enumerate(dataset_names):
    for j, name2 in enumerate(dataset_names):
        if i < j:
            corr_value = correlations[(name1, name2)]
            corr_matrix[i, j] = corr_matrix[j, i] = corr_value

corr_df = pd.DataFrame(corr_matrix, columns=dataset_names, index=dataset_names)

plt.figure(dpi=600)
sns.heatmap(corr_df, cmap='flare', annot=True)
#plt.title(f'Correlation between Datasets - {orientation} - segment: {segment}, overlap: {overlap}')
plt.show()


### Feature Comparisons

In [None]:
path = 'E:/2d-anomaly/FILTER/3d/'
data_3d = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns

path = 'E:/2d-anomaly/FILTER/telecentric/'
data_2d_stereo = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns

path = 'E:/2d-anomaly/FILTER/2m/'
data_2d_single = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns

path = 'E:/2d-anomaly/FILTER/15m/'
data_2d_single_far = np.load(path + 'shap_train.npy', allow_pickle=True)[0].columns


In [None]:
print(
    'Number of features\n2D Stereo: ',
    len(data_2d_stereo), 
    '\n2D Single: ',
    len(data_2d_single), 
    '\n2D single far: ',
    len(data_2d_single_far),
    '\n3D: ',
    len(data_3d)
)

In [None]:
# Same features

same = set(list(data_2d_stereo)) & set(list(data_2d_single)) & set(list(data_2d_single_far))
print('Number of similar features across all: ', len(same))

same = set(list(data_2d_stereo)) & set(list(data_2d_single))
print('Number of similar features across 2D stereo vs 2D single: ', len(same))

same = set(list(data_2d_stereo)) & set(list(data_2d_single_far))
print('Number of similar features across 2D stereo vs 2D single far: ', len(same))

same = set(list(data_2d_single)) & set(list(data_2d_single_far))
print('Number of similar features across 2D single vs 2D single far: ', len(same))

In [None]:
from collections import Counter

def feature_review(dataset):
    feature_names = []
    feature_types = []
    for f in dataset:
        name_split = f.split('(')
        feature_names.append(name_split[0])
        if len(name_split) != 1:
            feature_types.append(name_split[1][:-1])
    print(Counter(feature_names))
    print(Counter(feature_types))
    return feature_names, feature_types

print('-- 2D STEREO --')
fname_2d_stereo, ftype_2d_stereo = feature_review(data_2d_stereo)

print('\n-- 2D SINGLE --')
fname_2d_single, ftype_2d_single = feature_review(data_2d_single)

print('\n-- 2D SINGLE FAR --')
fname_2d_single_far, ftype_2d_single_far = feature_review(data_2d_single_far)

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(data_2d_stereo)) & set(list(data_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(data_2d_stereo))

same = set(list(data_2d_stereo)) & set(list(data_2d_single_far))
print('2D stereo vs 2D single far: ',len(same)/len(data_2d_stereo))

same = set(list(data_2d_single)) & set(list(data_2d_single_far))
print('2D single vs 2D single far: ',len(same)/len(data_2d_single_far))

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(fname_2d_stereo)) & set(list(fname_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(set(list(fname_2d_stereo))))

same = set(list(ftype_2d_stereo)) & set(list(ftype_2d_single))
print('2D stereo vs 2D single: ',len(same)/len(set(list(ftype_2d_stereo))))

In [None]:
print('-- Percentage of Feature Similarity --')

same = set(list(fname_2d_stereo)) & set(list(fname_2d_single_far))
print('2D stereo vs 2D single: ',len(same)/len(set(list(fname_2d_stereo))))

same = set(list(ftype_2d_stereo)) & set(list(ftype_2d_single_far))
print('2D stereo vs 2D single: ',len(same)/len(set(list(ftype_2d_stereo))))

In [None]:
same = list(set(list(fname_2d_stereo)) & set(list(fname_2d_single)))
names = list(set(list(fname_2d_stereo)))
for s in same:
    names.remove(s)
print(names)

In [None]:
same = list(set(list(fname_2d_stereo)) & set(list(fname_2d_single_far)))
names = list(set(list(fname_2d_stereo)))
for s in same:
    names.remove(s)
print(names)

In [None]:
same = list(set(list(fname_2d_single)) & set(list(fname_2d_single_far)))
names = list(set(list(fname_2d_single_far)))
for s in same:
    names.remove(s)
print(names)

In [None]:
for i in sorted(data_2d_single_far):
    try:
        print(i.split('(')[1][:-1])
    except:
        print('')