## Fit different distributions to distance data

In [None]:
import numpy as np
import scipy.stats as stats
from scipy.optimize import fmin
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from scipy.spatial.distance import pdist
from mat73 import loadmat
%matplotlib inline


In [None]:
# Load distance data
nameFeat = "../data/distance_data_layer6.mat"
data_dict = loadmat(nameFeat)
data = np.array(data_dict['distance_data'], dtype=float)

# Create histogram bins
bin_edges = np.linspace(np.min(data), np.max(data), 500)
counts, _ = np.histogram(data, bins=bin_edges, density=True)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

# Plot the histogram
plt.figure()
plt.bar(bin_centers, counts / np.sum(counts))
# Distributions to be fitted
distributions = ['rayleigh', 'norm', 'poisson',
                  'weibull_min', 'expon','folded_gaussian', 'rice']
rmse_values = []

for dist_name in distributions:
    if dist_name == 'folded_gaussian':  # Custom case for Folded Gaussian
        def objfun(param): 
            return np.sqrt(mean_squared_error(
                counts,
                stats.norm.pdf(bin_centers, param[0], param[1]) +
                stats.norm.pdf(-bin_centers, param[0], param[1])
            ))

        optimal_param = fmin(objfun, [np.mean(data), np.std(data)])

        fitVals = stats.norm.pdf(bin_centers, optimal_param[0], optimal_param[1]) + \
            stats.norm.pdf(-bin_centers, optimal_param[0], optimal_param[1])

    elif dist_name == 'poisson':  # Poisson distribution
        lambda_ = np.mean(data)
        fitVals = stats.poisson.pmf(
            np.round(bin_centers), lambda_) * np.sum(counts)

    else:
        param = stats.__dict__[dist_name].fit(data)
        fitVals = stats.__dict__[dist_name].pdf(
            bin_centers, *param) * np.sum(counts)

    # Calculate RMSE and Chi-Square
    rmse = np.sqrt(mean_squared_error(fitVals, counts))
    rmse_values.append(rmse)

    fitVals /= np.sum(fitVals)

    # Plot fitted curves
    plt.plot(bin_centers, fitVals, label=dist_name)

plt.legend()
plt.xlabel('Value')
plt.ylabel('Normalized frequency')

# Display RMSE values
print("Root Mean Square Errors:", {
      distributions[i]: rmse_values[i] for i in range(len(distributions))})

# Find the best-fitting distribution
best_fit_rmse_idx = np.argmin(rmse_values)

print(
    f"Best-fitting distribution based on RMSE: {distributions[best_fit_rmse_idx]}")
