In [None]:
import pandas as pd

In [None]:
benin = pd.read_csv("../data/benin_clean.csv")
togo = pd.read_csv("../data/togo_clean.csv")
sierra = pd.read_csv("../data/sierraleone_clean.csv")

In [None]:
# Add country name as a column
benin['country'] = 'Benin'
togo['country'] = 'Togo'
sierra['country'] = 'Sierra Leone'

In [None]:
# Combine all into one DataFrame
df_all = pd.concat([benin, togo, sierra], ignore_index=True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
metrics = ['GHI', 'DNI', 'DHI']

In [None]:
for metric in metrics:
    plt.figure(figsize=(8, 5))
    sns.boxplot(data=df_all, x='country', y=metric, palette='Set2')
    plt.title(f'{metric} Comparison Across Countries')
    plt.ylabel(metric)
    plt.xlabel("Country")
    plt.show()

In [None]:
summary = df_all.groupby("country")[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std']).round(2)
summary

In [None]:
from scipy.stats import f_oneway, kruskal

# Get GHI values per country
ghi_benin = benin['GHI'].dropna()
ghi_togo = togo['GHI'].dropna()
ghi_sierra = sierra['GHI'].dropna()

# One-way ANOVA
f_stat, p_val_anova = f_oneway(ghi_benin, ghi_togo, ghi_sierra)

# Kruskal–Wallis (non-parametric)
h_stat, p_val_kruskal = kruskal(ghi_benin, ghi_togo, ghi_sierra)

print("ANOVA p-value:", round(p_val_anova, 4))
print("Kruskal–Wallis p-value:", round(p_val_kruskal, 4))

In [None]:
df_all.groupby("country")["GHI"].mean().sort_values().plot(
    kind='bar', color='skyblue', title="Average GHI by Country", ylabel="GHI"
)
plt.show()

In [None]:
# Calculate solar score: mean of GHI, DNI, DHI
solar_score = df_all.groupby('country')[['GHI', 'DNI', 'DHI']].mean().sum(axis=1).sort_values(ascending=False)

# Display as a DataFrame
solar_score_df = solar_score.reset_index()
solar_score_df.columns = ['Country', 'Solar Score']
solar_score_df

In [None]:
plt.figure(figsize=(8, 5))
sns.barplot(data=solar_score_df, x='Solar Score', y='Country', palette='YlOrRd')
plt.title('Solar Score by Country')
plt.xlabel('Solar Score (Mean of GHI + DNI + DHI)')
plt.ylabel('Country')
plt.tight_layout()
plt.show()