# Week 2 Visualizations: Visualizing distributions

**Input**: ACLED events + Economics master (sector percentages)  
**Output**: JSON datasets for D3.js charts in viz-datasets/

In [59]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

# For visualizations (optional)
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set style for plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load Data

In [60]:
raw_data_path = Path('../raw-data')
processed_data_path = Path('../processed-data')

df_acled = pd.read_csv(raw_data_path / 'ACLED' / 'ACLED_2025-10-29.csv', encoding='utf-8-sig')
df_econ = pd.read_csv(processed_data_path / 'economics-countries-master.csv')

print(f"ACLED: {len(df_acled):,} events")
print(f"Economics: {len(df_econ):,} country-years")

# Get the current year from the data
current_year = df_acled['year'].max()
print(f"Latest year in data: {current_year}")

ACLED: 2,372,683 events
Economics: 10,936 country-years
Latest year in data: 2024


## Process & Join

In [61]:
# Filter to last 10 years
last_10_years_start = current_year - 9  # Include current year, so -9 gives us 10 years
df_acled_recent = df_acled[df_acled['year'] >= last_10_years_start].copy()

print(f"Filtered to last 10 years: {last_10_years_start}-{current_year}")

# Aggregate events by country-year
conflict_summary = df_acled_recent.groupby(['country', 'year']).agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()
conflict_summary.columns = ['country', 'year', 'event_count', 'total_fatalities']

# Get latest economics data per country (includes Population now)
df_econ_latest = df_econ.sort_values('Year').groupby('Country').last().reset_index()

# Join conflict + economics
df_merged = conflict_summary.merge(
    df_econ_latest, 
    left_on='country', 
    right_on='Country',
    how='left'
)

print(f"Merged: {len(df_merged):,} rows")
print(f"Records with population data: {df_merged['Population'].notna().sum():,}")

Filtered to last 10 years: 2015-2024
Merged: 1,606 rows
Records with population data: 1,288


## Viz 1: Boxplot - Distribution of Sector Percentages for all countries

In [None]:
# using dataset from week 1