In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
with open('data/path_length_prediction.json') as f:
    data = json.load(f)

In [None]:
data['appletree'][0].keys()

In [None]:
rows = []

for tree_name, tree_data in data.items():
    for idx, entry in enumerate(tree_data):
        row = {'tree': tree_name}
        
        for method, method_data in entry.items():
            if method == 'predictions':
                for prediction in method_data:
                    predictor_name = prediction['predictor_parameters']['name']
                    
                    label = f'{predictor_name}_predicted_length'
                    
                    if prediction['predictor_parameters'].get('include_approaches',False):
                        label += ' (A)'
                    
                    row[label] = prediction['predicted_length']
            else:
                if isinstance(method_data, dict):
                    row[f'{method}_actual_length'] = method_data['actual_length']
                    row[f'{method}_actual_length_unoptimized'] = method_data['actual_length_unoptimized']
                else:
                    row[f'{method}_actual_length'] = None
                    row[f'{method}_actual_length_unoptimized'] = None
                    
        rows.append(row)
                    
df = pd.DataFrame(rows).dropna()

In [None]:
actual_length = [name for name in df.columns if name.endswith('_actual_length')]
c = df.corr()[actual_length]
c = c.loc[list(set(c.index) - set(actual_length))]

labels = {
    'EuclideanDistancePredictor_predicted_length' : "Euclidean",
    
    'on_chull_actual_length_unoptimized': 'CHull Unopt',
    'on_cgal_chull_actual_length_unoptimized': 'CGAL Unopt',
    'on_sphere_actual_length_unoptimized': 'Sphere Unopt',
    'on_chull_actual_length': 'Chull Optimized',
    'on_cgal_chull_actual_length': 'CGAL Optimized',
    'on_sphere_actual_length': 'Sphere Optimized',
    
    'CGALConvexHullDistancePredictor_predicted_length': 'CGAL Pred',
    'CGALConvexHullDistancePredictor_predicted_length (A)': 'CGAL Pred (A)',
    
    'HelicalDistancePredictor_predicted_length': 'Helix',
    'HelicalDistancePredictor_predicted_length (A)': 'Helix (A)',

    'CuttingPlaneConvexHullDistancePredictor_predicted_length': 'CHull Pred',
    'CuttingPlaneConvexHullDistancePredictor_predicted_length (A)': 'CHull Pred (A)',
    
    'GreatCircleDistancePredictor_predicted_length': 'GreatCircle',
    'GreatCircleDistancePredictor_predicted_length (A)': 'GreatCircle (A)',
    

    'DendriticConvexHullDistancePredictor_predicted_length': 'Dendritic',
 
}

In [None]:


for column in c.columns:
    col = c[column]
    plt.scatter(x=[labels[l] for l in col.index], y=col.values, label=labels[column])
    
plt.legend(bbox_to_anchor=(1, 1), loc="upper left")

plt.xlabel('Prediction method')
plt.ylabel('Correlation')
plt.title('Pearson Correlation between predicted length and actual')

plt.grid()
_ = plt.xticks(orientation=90)

In [None]:
# Calculate the number of groups and the width of the bars
num_groups = len(c.columns)
bar_width = 1 / (num_groups + 1)

# Create an array with the position of each bar on the x-axis
x_positions = np.arange(len(c.index))

fig, ax = plt.subplots()

# Iterate through the columns and plot the bars
for i, column in enumerate(c.columns):
    col = c[column]
    ax.bar(x_positions + i * bar_width, col.values, width=bar_width, label=labels[column])

# Set the x-tick positions and labels
ax.set_xticks(x_positions + num_groups * bar_width / 2)
ax.set_xticklabels([labels[l] for l in c.index])

plt.legend(bbox_to_anchor=(1, 1), loc="upper left")

plt.xlabel('Prediction method')
plt.ylabel('Correlation')
plt.title('Pearson Correlation between predicted length and actual')

plt.grid()
_ = plt.xticks(orientation=90)

plt.ylim(0.5,1.0)

plt.show()


In [None]:

plot, axes = plt.subplots(ncols=3, figsize=(15,5))

for ax, col in zip(axes,actual_length):


    ax.scatter(df[col+'_unoptimized'], df[col], s=10)

    ax.plot([0,25],[0,25], c='gray')
    ax.grid()
    ax.set_title(col.capitalize().replace('_',' '))
    
    ax.set_xlabel('Unoptimized path length')
    ax.set_ylabel('Optimized path length')