In [None]:
import pandas as pd

# Load each country's clean data
benin_df = pd.read_csv('data/benin_clean.csv')
sierra_leone_df = pd.read_csv('data/sierraleone_clean.csv')
togo_df = pd.read_csv('data/togo_clean.csv')

# Add a 'Country' column to each DataFrame
benin_df['Country'] = 'Benin'
sierra_leone_df['Country'] = 'Sierra Leone'
togo_df['Country'] = 'Togo'

# Combine them into a single DataFrame for comparison
combined_df = pd.concat([benin_df, sierra_leone_df, togo_df], ignore_index=True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.boxplot(x='Country', y='GHI (W/m²)', data=combined_df)
plt.title('GHI Comparison Across Countries')
plt.show()

In [None]:
summary_table = combined_df.groupby('Country')[['GHI (W/m²)', 'DNI (W/m²)', 'DHI (W/m²)']].agg(
    ['mean', 'median', 'std']
)
print(summary_table)

In [None]:
from scipy import stats

ghi_benin = benin_df['GHI (W/m²)'].dropna()
ghi_sl = sierra_leone_df['GHI (W/m²)'].dropna()
ghi_togo = togo_df['GHI (W/m²)'].dropna()

# Kruskal-Wallis is often safer for non-normal data (optional but recommended in docs)
# stat, p_value = stats.kruskal(ghi_benin, ghi_sl, ghi_togo)

# One-way ANOVA
stat, p_value = stats.f_oneway(ghi_benin, ghi_sl, ghi_togo)

print(f"ANOVA F-statistic: {stat:.4f}")
print(f"P-value: {p_value:.4f}")
# Briefly note the p-values [cite: 163, 170]