# Library


In [16]:
#pip install plotly

In [1]:
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import torch

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load ML and PDE results

In [None]:
# Read in HF & ML data
root_path = "./all_data/"
data_dir = root_path + 'inference_1/'
output_dir = data_dir

# Get RFP profiles
filename = os.path.join(data_dir, 'ML_outputs.npy')
print(filename)
data_array_ML = np.load(filename)
data_array_ML = data_array_ML[:, ::]  

data_array_HQ = np.load(os.path.join(data_dir, 'PDE_outputs.npy'))
data_array_HQ = data_array_HQ.reshape([-1, 3, 201])
data_array_HQ = data_array_HQ[:, 1, :].squeeze()
data_array_HQ = data_array_HQ / data_array_HQ.max(axis=1, keepdims=True)

# Get parameters - fmt 0.6
params_array_ML = np.loadtxt(os.path.join(data_dir, 'ML_params.txt'), delimiter=',', dtype=np.float32)
params_array_HQ = np.load(os.path.join(data_dir, 'PDE_params.npy')).astype(np.float32)
param_names = ['DC', 'aC', 'aA', 'aT', 'aL', 'dA', 'dT', 'dL', 'alpha','beta', 'Kphi', 'N0']
all_param_names = ['DC', 'DN', 'DA', 'DB', 'aC','aA', 'aB', 'aT', 'aL', 'bN',
                'dA', 'dB', 'dT', 'dL', 'k1','k2', 'KN', 'KP', 'KT', 'KA', 
                'KB', 'alpha','beta', 'Cmax', 'a', 'b', 'm', 'n', 'Kphi', 'l', 
                'N0', 'G1','G2','G3','G4','G5','G6','G7','G8','G9',
                'G10','G11','G12', 'G13','G14','G15','G16','G17','G18', 'G19', 'alpha_p',
                'beta_p', 'seeding_v']
selected_param_idx = [all_param_names.index(param) for param in param_names if param in all_param_names]
params_array_HQ = params_array_HQ[:,[selected_param_idx]].squeeze()

# Get pattern types
pattern_types_array_ML = np.load(os.path.join(data_dir, 'ML_types.npy'))
pattern_types_array_HQ = np.load(os.path.join(data_dir, 'PDE_types.npy'))
pattern_types_array_HQ = pattern_types_array_HQ[:, 1].squeeze()

# Read in sorted 
# Save sorted data for future processing
# filename = os.path.join(output_dir, f'sorted_ML_outputs.npy')
# sorted_data_array_ML = np.load(filename)
# filename = os.path.join(output_dir, f'sorted_ML_params.npy')
# sorted_params_array_ML = np.load(filename)
# filename = os.path.join(output_dir, f'sorted_ML_types.npy')
# sorted_types_array_ML = np.load(filename)
    
print('RFP profiles -------------')
print(f"ML: {data_array_ML.shape}")
# print(f"sorted ML: {sorted_data_array_ML.shape}")
print(f"HQ: {data_array_HQ.shape}")

print('PDE Parameters -------------')
print(f"ML: {params_array_ML.shape}")
# print(f"sorted ML: {sorted_params_array_ML.shape}")
print(f"HQ: {params_array_HQ.shape}")

print('RFP types -------------')
print(f"ML:  {pattern_types_array_ML.shape}")
# print(f"sorted ML:  {sorted_types_array_ML.shape}")
print(f"HQ:  {pattern_types_array_HQ.shape}")

# Convert to dataframe
data_df_ML = pd.DataFrame(params_array_ML, columns=param_names)
data_df_HQ = pd.DataFrame(params_array_HQ, columns=param_names)

In [21]:
# Convert peak number to ring number
def peak_to_ring(df_col):
    new_col = []
    for value in df_col:
        if value in [1, 2]:
            new_col.append(1)
        elif value in [3, 4]:
            new_col.append(2)
        elif value in [5, 6]:
            new_col.append(3)
        elif value in [7, 8]:
            new_col.append(4)
        elif value in [9, 10]:
            new_col.append(5)
    return new_col

# Transform the RFP_type_list
pattern_types_array_HQ = peak_to_ring(pattern_types_array_HQ)

data_df_ML['RFP_type'] = pattern_types_array_ML
data_df_HQ['RFP_type'] = pattern_types_array_HQ

# Match parameter by rows and sort


In [None]:
# Check if all rows of parameters match
all_rows_match = True  

for row_hq in tqdm(params_array_HQ):

    matches = [np.array_equal(row_hq, row_ML) for row_ML in params_array_ML]

    if not np.any(matches):
        all_rows_match = False  
        break  # Exit the loop as we found a row without a match

# Print the result
if all_rows_match:
    print("All rows in params_array_HQ have an equivalent in params_array_ML.")
else:
    print("Not all rows in params_array_HQ have an equivalent in params_array_ML.")

In [None]:
# Match and sort rows 

matching_indices = []

# Iterate over each row in params_array_HQ to find matching rows in params_array_ML
for j in tqdm(range(0, len(params_array_HQ))):
    
    row_hq = params_array_HQ[j, :]
    found_match = False  

    for i, row_ML in enumerate(params_array_ML):

        # If within tolerance for floating-point precision
        if np.allclose(row_hq, row_ML, rtol=1e-06, atol=1e-10):

            matching_indices.append(i)
            found_match = True
            
            break  # Stop searching once a match is found

# Sort the rows acoordingly
sorted_params_array_ML= []
sorted_data_array_ML =[]
sorted_types_array_ML =[]

if matching_indices:
    sorted_params_array_ML = params_array_ML[matching_indices]
    sorted_data_array_ML = data_array_ML[matching_indices]
    sorted_types_array_ML = pattern_types_array_ML[matching_indices]
    
    print('Shape of sorted ML profiles', sorted_data_array_ML.shape)
    print('Shape of PDE profiles', data_array_HQ.shape)
    
else:
    
    print("No matching indices found")


## Save sorted data

In [53]:
# Save sorted data for future processing
filename = os.path.join(output_dir, f'sorted_ML_outputs.npy')
np.save(filename, sorted_data_array_ML)
filename = os.path.join(output_dir, f'sorted_ML_params.npy')
np.save(filename, sorted_params_array_ML)
filename = os.path.join(output_dir, f'sorted_ML_types.npy')
np.save(filename, sorted_types_array_ML)

filename = os.path.join(output_dir, 'sorted_ML_params.txt')
np.savetxt(filename, sorted_params_array_ML, delimiter=',', fmt='%0.8f')

# Compute accuracy 

## Accuracy on the entire dataset

In [None]:
# Calculate R2
R2 = r2_score(sorted_data_array_ML.flatten(), data_array_HQ.flatten())
print('R2 score -- ', str(R2) )

# Plot 
fig, axs = plt.subplots(1, 1, figsize=(3, 3))
axs.scatter(sorted_data_array_ML.flatten(), data_array_HQ.flatten(), s=0.1, color='orange', alpha=0.5)
axs.plot([np.min(sorted_data_array_ML), np.max(sorted_data_array_ML)], [np.min(sorted_data_array_ML), np.max(sorted_data_array_ML)], 'blue')  # y=x line
axs.set_xlim(np.min(sorted_data_array_ML), np.max(sorted_data_array_ML))
axs.set_ylim(np.min(sorted_data_array_ML), np.max(sorted_data_array_ML))
axs.set_aspect('equal', adjustable='box')
axs.set_xlabel('ML prediction')
axs.set_ylabel('High quality simulation')
axs.set_title('Training')
axs.set_xlim(0, 1)
axs.set_ylim(0, 1)
plt.tight_layout()
plt.show()

## Accuracy on each pattern type

In [None]:
# ---------------------- 1 ring ----------------------
index_plot_1 = np.where(pattern_types_array_HQ == 1)[0]
index_plot_2 = np.where(sorted_types_array_ML == 1)[0]
intersection_indices = np.intersect1d(index_plot_1, index_plot_2)
common_count = len(intersection_indices)

print('------------- 1 ring -------------')
print("# of correct entries:", common_count)
print("total # of entries:", len(index_plot_2))
print("% of correct entries:", common_count/len(index_plot_2) *100, "%")

# Save matching rows
matching_HQ_outputs = data_array_HQ[intersection_indices, :]
matching_HQ_params  = params_array_HQ[intersection_indices, :]
matching_HQ_types   = pattern_types_array_HQ[intersection_indices]
matching_ML_outputs = sorted_data_array_ML[intersection_indices, :]
matching_ML_params  = sorted_params_array_ML[intersection_indices, :]
matching_ML_types   = sorted_types_array_ML[intersection_indices]

filename = output_dir + 'matching_1_HQ_outputs.npy'
np.save(filename, np.array(matching_HQ_outputs))
filename = output_dir + 'matching_1_HQ_params.npy'
np.save(filename, np.array(matching_HQ_params))
filename = output_dir + 'matching_1_HQ_types.npy'
np.save(filename, np.array(matching_HQ_types))
filename = output_dir + 'matching_1_ML_outputs.npy'
np.save(filename, np.array(matching_ML_outputs))
filename = output_dir + 'matching_1_ML_params.npy'
np.save(filename, np.array(matching_ML_params))
filename = output_dir + 'matching_1_ML_types.npy'
np.save(filename, np.array(matching_ML_types)) 

# R2
R2 = r2_score(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten())
print('R2 score -- ', str(R2))
print('# of samples -- ',  len(matching_ML_outputs))

# Plot 
fig, axs = plt.subplots(1, 1, figsize=(3, 3))
axs.scatter(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten(), s=0.1, color='orange', alpha=0.5)
axs.plot([np.min(matching_ML_outputs), np.max(matching_ML_outputs)], [np.min(matching_ML_outputs), np.max(matching_ML_outputs)], 'blue')  # y=x line
axs.set_xlim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_ylim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_aspect('equal', adjustable='box')
axs.set_xlabel('ML prediction')
axs.set_ylabel('High quality simulation')
axs.set_title('1 ring')
axs.set_xlim(0, 1)
axs.set_ylim(0, 1)

for i in range(0, 10):
    data_ML = matching_ML_outputs[i, :].squeeze()
    data_HQ = matching_HQ_outputs[i, :]
    R2 = r2_score(data_ML, data_HQ)
    
# ---------------------- 2 rings ----------------------
index_plot_1 = np.where(pattern_types_array_HQ == 2)[0]
index_plot_2 = np.where(sorted_types_array_ML == 2)[0]
intersection_indices = np.intersect1d(index_plot_1, index_plot_2)
common_count = len(intersection_indices)

print('------------- 2 rings -------------')
print("# of correct entries:", common_count)
print("total # of entries:", len(index_plot_2))
print("% of correct entries:", common_count/len(index_plot_2) *100, "%")

# Save matching rows
matching_HQ_outputs = data_array_HQ[intersection_indices, :]
matching_HQ_params  = params_array_HQ[intersection_indices, :]
matching_HQ_types   = pattern_types_array_HQ[intersection_indices]
matching_ML_outputs = sorted_data_array_ML[intersection_indices, :]
matching_ML_params  = sorted_params_array_ML[intersection_indices, :]
matching_ML_types   = sorted_types_array_ML[intersection_indices]

filename = output_dir + 'matching_2_HQ_outputs.npy'
np.save(filename, np.array(matching_HQ_outputs))
filename = output_dir + 'matching_2_HQ_params.npy'
np.save(filename, np.array(matching_HQ_params))
filename = output_dir + 'matching_2_HQ_types.npy'
np.save(filename, np.array(matching_HQ_types))
filename = output_dir + 'matching_2_ML_outputs.npy'
np.save(filename, np.array(matching_ML_outputs))
filename = output_dir + 'matching_2_ML_params.npy'
np.save(filename, np.array(matching_ML_params))
filename = output_dir + 'matching_2_ML_types.npy'
np.save(filename, np.array(matching_ML_types)) 

# R2
R2 = r2_score(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten())
print('R2 score -- ', str(R2) )
print('# of samples -- ',  len(matching_ML_outputs))

# Plot 
fig, axs = plt.subplots(1, 1, figsize=(3, 3))
axs.scatter(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten(), s=0.1, color='orange', alpha=0.5)
axs.plot([np.min(matching_ML_outputs), np.max(matching_ML_outputs)], [np.min(matching_ML_outputs), np.max(matching_ML_outputs)], 'blue')  # y=x line
axs.set_xlim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_ylim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_aspect('equal', adjustable='box')
axs.set_xlabel('ML prediction')
axs.set_ylabel('High quality simulation')
axs.set_title('2 rings')
axs.set_xlim(0, 1)
axs.set_ylim(0, 1)

for i in range(0, 10):
    data_ML = matching_ML_outputs[i, :].squeeze()
    data_HQ = matching_HQ_outputs[i, :]
    R2 = r2_score(data_ML, data_HQ)


# ---------------------- 3 rings ----------------------
index_plot_1 = np.where(pattern_types_array_HQ == 3)[0]
index_plot_2 = np.where(sorted_types_array_ML == 3)[0]
intersection_indices = np.intersect1d(index_plot_1, index_plot_2)
common_count = len(intersection_indices)

print('------------- 3 rings -------------')
print("# of correct entries:", common_count)
print("total # of entries:", len(index_plot_2))
print("% of correct entries:", common_count/len(index_plot_2) *100, "%")

# Save matching rows
matching_HQ_outputs = data_array_HQ[intersection_indices, :]
matching_HQ_params  = params_array_HQ[intersection_indices, :]
matching_HQ_types   = pattern_types_array_HQ[intersection_indices]
matching_ML_outputs = sorted_data_array_ML[intersection_indices, :]
matching_ML_params  = sorted_params_array_ML[intersection_indices, :]
matching_ML_types   = sorted_types_array_ML[intersection_indices]

filename = output_dir + 'matching_3_HQ_outputs.npy'
np.save(filename, np.array(matching_HQ_outputs))
filename = output_dir + 'matching_3_HQ_params.npy'
np.save(filename, np.array(matching_HQ_params))
filename = output_dir + 'matching_3_HQ_types.npy'
np.save(filename, np.array(matching_HQ_types))
filename = output_dir + 'matching_3_ML_outputs.npy'
np.save(filename, np.array(matching_ML_outputs))
filename = output_dir + 'matching_3_ML_params.npy'
np.save(filename, np.array(matching_ML_params))
filename = output_dir + 'matching_3_ML_types.npy'
np.save(filename, np.array(matching_ML_types)) 

# R2
R2 = r2_score(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten())
print('R2 score -- ', str(R2) )
print('# of samples -- ',  len(matching_ML_outputs))

# Plot 
fig, axs = plt.subplots(1, 1, figsize=(3, 3))
axs.scatter(matching_ML_outputs.flatten(), matching_HQ_outputs.flatten(), s=0.1, color='orange', alpha=0.5)
axs.plot([np.min(matching_ML_outputs), np.max(matching_ML_outputs)], [np.min(matching_ML_outputs), np.max(matching_ML_outputs)], 'blue')  # y=x line
axs.set_xlim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_ylim(np.min(matching_ML_outputs), np.max(matching_ML_outputs))
axs.set_aspect('equal', adjustable='box')
axs.set_xlabel('ML prediction')
axs.set_ylabel('High quality simulation')
axs.set_title('3 rings')
axs.set_xlim(0, 1)
axs.set_ylim(0, 1)

for i in range(0, 10):
    data_ML = matching_ML_outputs[i, :].squeeze()
    data_HQ = matching_HQ_outputs[i, :]
    R2 = r2_score(data_ML, data_HQ)


## Plot ML against PDE

In [None]:
for i in range(0, 10): 
    data_ML = sorted_data_array_ML[i, :].squeeze()
    data_HQ = data_array_HQ[i, :]
    print('ML type: ', sorted_types_array_ML[i])
    print('PDE type: ',pattern_types_array_HQ[i])
    
    R2 = r2_score(data_ML, data_HQ)
    print(R2)

    # Plot
    fig, ax = plt.subplots(1,1, figsize=(1,1))
    ax = plt.plot(data_ML, color ='orange', label='ML')
    ax = plt.plot(data_HQ, color ='blue', label='PDE')
    fig.legend()
    fig.show()