# Library


In [1]:
# Library
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd

# Dataset

In [None]:
# Load validated data
output_dir = "./data/"

# 1 ring
filename = output_dir + 'matching_1_ML_params.npy'
matching_1_ML_params = np.load(filename)
filename = output_dir + 'matching_1_ML_types.npy'
matching_1_ML_types = np.load(filename)

# 2 rings
filename = output_dir + 'matching_2_ML_params.npy'
matching_2_ML_params = np.load(filename)
filename = output_dir + 'matching_2_ML_types.npy'
matching_2_ML_types = np.load(filename)

# 3 rings
filename = output_dir + 'matching_3_ML_params.npy'
matching_3_ML_params = np.load(filename)
filename = output_dir + 'matching_3_ML_types.npy'
matching_3_ML_types = np.load(filename)

print('finish loading')

In [None]:
# params
params_array_cont = np.concatenate((matching_1_ML_params,
                                   matching_2_ML_params,
                                   matching_3_ML_params,
                                   ), axis=0)

# Types
pattern_types_array_cont = np.concatenate((matching_1_ML_types,
                                          matching_2_ML_types,
                                          matching_3_ML_types,
                                          ), axis=0)
params_range_opt = {
    'DC': [0.5e-3, 12.5e-2],
    'aC': [0.1, 1],
    'aA': [100, 100000],
    'aT': [10, 8000],
    'aL': [5, 500],
    'dA': [0.001, 0.1],
    'dT': [3, 300],
    'dL': [0.144, 14.4],
    'alpha': [1, 5],
    'beta':  [2, 2000],
    'Kphi':  [1, 10],
    'N0':  [200000, 5000000]
}

# Make df for plotting
RFP_1_df = pd.DataFrame(matching_1_ML_params, columns=params_range_opt.keys())
RFP_2_df = pd.DataFrame(matching_2_ML_params, columns=params_range_opt.keys())
RFP_3_df = pd.DataFrame(matching_3_ML_params, columns=params_range_opt.keys())

RFP_1_df['RFP_types'] = matching_1_ML_types
RFP_2_df['RFP_types'] = matching_2_ML_types
RFP_3_df['RFP_types'] = matching_3_ML_types

# Non patterning sets
df_nonpatterning = pd.concat([RFP_1_df[0:1000]
                               ])
# Patterning sets
df_patterning = pd.concat([
                              RFP_2_df[0:1000],
                              RFP_3_df[0:1000], 
                             ])
# All sets
all_sets = pd.concat([df_patterning, 
                      df_nonpatterning
                     ])

# Analyze

In [None]:
col_names = all_sets.columns
print(col_names)

# Plot
color1 = "#69b3a2"
color2 = "#404080"

for column in col_names:
    fig, ax = plt.subplots(figsize=(4, 2))
    
    # Log scales
    if column in ['DC', 'aA', 'aT', 'aL', 'dA', 'dT', 'dL','beta']:
        # Get bins
        combined_data = all_sets[column]
        min_bin = np.log10(combined_data[combined_data > 0].min())  # Log scale, ignore non-positive values
        max_bin = np.log10(combined_data.max())
        bins = np.logspace(min_bin, max_bin, 21)
        
        ax.set_xscale('log')

     # Linear scales
    else:
       
        # Get bins
        combined_data = all_sets[column]
        bins = np.linspace(combined_data.min(), combined_data.max(), 21)
    
    # Computate statistial signficance
    KS_D, p_value = stats.kstest(df_patterning[column], df_nonpatterning[column])
    if p_value <0.5:
        print('Different -- ', p_value)
    else:
        print('Same')
        
    # 2+ rings
    weights_pattern = np.ones_like(df_patterning[column]) / len(df_patterning[column])
    ax.hist(df_patterning[column], bins=bins, alpha=0.5, weights=weights_pattern, label='2+ rings', color=color2, edgecolor='white')

    # 1 ring
    weights_nopattern = np.ones_like(df_nonpatterning[column]) / len(df_nonpatterning[column])
    ax.hist(df_nonpatterning[column], bins=bins, alpha=0.5, weights=weights_nopattern, label='1 ring', color=color1, edgecolor='white')


    ax.set_xlabel(column)
    ax.set_ylabel('Frequency')
    ax.legend()
    plt.tight_layout()
    plt.show()
