In [None]:
# Save bubble map data - ALL countries with conflict fatalities
bubble_metadata = {
    'title': 'Bubble Map: Conflict Fatalities by Country (2015-2024)',
    'description': 'All countries with recorded conflict fatalities',
    'source': 'ACLED',
    'date_range': f'{last_10_years_start}-{current_year}',
    'notes': 'Includes all countries with at least 1 fatality'
}

# Convert to int for cleaner JSON
country_conflict['event_count'] = country_conflict['event_count'].astype(int)
country_conflict['total_fatalities'] = country_conflict['total_fatalities'].astype(int)

# Rename event type columns for cleaner JSON keys
rename_cols = {
    'Battles': 'battles',
    'Explosions/Remote violence': 'explosions',
    'Protests': 'protests',
    'Riots': 'riots',
    'Strategic developments': 'strategic',
    'Violence against civilians': 'violence_civilians'
}
country_conflict = country_conflict.rename(columns=rename_cols)

# Fill any NaN values with 0
for col in rename_cols.values():
    if col in country_conflict.columns:
        country_conflict[col] = country_conflict[col].fillna(0).astype(int)

bubble_output = {
    'metadata': bubble_metadata,
    'data': country_conflict.to_dict(orient='records')
}

output_file_bubble = viz_datasets_path / 'viz8_bubble_map_fatalities.json'
with open(output_file_bubble, 'w', encoding='utf-8') as f:
    json.dump(bubble_output, f, indent=2, ensure_ascii=False)

print(f"[OK] Saved: {output_file_bubble.name} ({output_file_bubble.stat().st_size / 1024:.1f} KB)")
print(f"Total countries: {len(country_conflict):,}")
print(f"\nSample record:")
print(country_conflict.iloc[0].to_dict())

In [None]:
# Load ACLED data
df_acled = pd.read_csv(raw_data_path / 'ACLED' / 'ACLED_2025-10-29.csv', encoding='utf-8-sig')

# Get current year and filter to last 10 years
current_year = df_acled['year'].max()
last_10_years_start = current_year - 9
df_acled_recent = df_acled[df_acled['year'] >= last_10_years_start].copy()

print(f"ACLED events (2015-2024): {len(df_acled_recent):,}")

# Aggregate by country - ALL countries, not just top 20
country_conflict = df_acled_recent.groupby('country').agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()
country_conflict.columns = ['country', 'event_count', 'total_fatalities']

# Also get event type breakdown per country
event_type_counts = df_acled_recent.groupby(['country', 'event_type']).agg({
    'event_id_cnty': 'count'
}).reset_index()
event_type_counts.columns = ['country', 'event_type', 'count']

# Pivot to get columns for each event type
event_pivot = event_type_counts.pivot(index='country', columns='event_type', values='count').fillna(0).astype(int)
event_pivot = event_pivot.reset_index()

# Merge with main country data
country_conflict = country_conflict.merge(event_pivot, on='country', how='left')

# Filter to countries with at least 1 fatality (to make bubbles meaningful)
country_conflict = country_conflict[country_conflict['total_fatalities'] > 0].copy()

print(f"Countries with fatalities: {len(country_conflict):,}")
print(f"\nFatalities range: {country_conflict['total_fatalities'].min():,} - {country_conflict['total_fatalities'].max():,}")
print(f"\nEvent type columns: {list(event_pivot.columns[1:])}")

country_conflict.sort_values('total_fatalities', ascending=False).head(20)

# Week 4 Visualizations: Maps

**Input**: ACLED events + Economics master (sector percentages)  
**Output**: JSON datasets for D3.js charts in viz-datasets/

In [46]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

# For visualizations (optional)
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set style for plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load Data

In [47]:
raw_data_path = Path('../raw-data')
processed_data_path = Path('../processed-data')
viz_datasets_path = Path('../viz-datasets')

df_econ = pd.read_csv(processed_data_path / 'economics-countries-master.csv')
df_econ = df_econ.rename(columns=str.lower)
df_econ = ( df_econ.sort_values('year').groupby('country').last().reset_index())

print(f"Economics: {len(df_econ):,} country-years")
df_econ.head(10)

Economics: 220 country-years


Unnamed: 0,country,year,primary_%,secondary_%,tertiary_%,tourism_%,gdp_usd,population,inflation_%,debt_%
0,Afghanistan,2023,41.89,8.9,49.21,,14266500000.0,41128771.0,2.3,
1,Albania,2023,26.58,18.51,54.91,5.56,18916380000.0,2777689.0,6.73,82.38
2,Algeria,2023,29.78,22.93,47.29,1.39,194998400000.0,44903225.0,9.27,
3,Andorra,2023,1.9,12.25,85.85,,3352031000.0,79824.0,,
4,Angola,2023,46.78,13.37,39.85,,106782800000.0,35588987.0,25.75,
5,Anguilla,2023,5.2,14.39,80.41,,,,,
6,Antigua and Barbuda,2023,5.49,18.45,76.06,11.71,1867733000.0,93763.0,7.53,
7,Argentina,2023,12.8,24.03,63.17,1.68,631133400000.0,46234830.0,,
8,Armenia,2023,15.66,19.63,64.71,,19513470000.0,2780469.0,8.64,60.08
9,Aruba,2023,4.11,8.7,87.19,21.2,3544708000.0,106445.0,4.26,


## Viz 1: Colorpleth Map

In [None]:
# Select only the fields you want
df_econ['gdp_per_capita'] = df_econ['gdp_usd'] / df_econ['population']

df_econ_latest = df_econ[['country', 'year', 'primary_%', 'gdp_usd', 'gdp_per_capita']].copy()
df_econ_latest = df_econ_latest.rename(columns={"primary_%": "primary"})
df_viz2 = df_econ_latest[df_econ_latest['gdp_usd'].notna()].copy()
df_viz2 = df_viz2[df_viz2['primary'].notna()].copy()

# Country name mappings to match GeoJSON names
df_viz2['country'] = df_viz2['country'].replace({
    "United States": "United States of America",
    "Russian Federation": "Russia",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
    "Bolivia (Plurinational State of)": "Bolivia",
    "D.R. of the Congo": "Dem. Rep. Congo",
    "South Sudan": "S. Sudan",
    "Central African Republic": "Central African Rep.",
    "Iran (Islamic Republic of)": "Iran",
    "TÃ¼rkiye": "Turkey",
    "Republic of Korea": "South Korea",
    "D.P.R. of Korea": "North Korea",
    "Viet Nam": "Vietnam",
    "Lao People's DR": "Laos",
    "U.R. of Tanzania: Mainland": "Tanzania",
    "Dominican Republic": "Dominican Rep.",
    "Equatorial Guinea": "Eq. Guinea",
    "State of Palestine": "Palestine",
    "Syrian Arab Republic": "Syria",
    "Republic of Moldova": "Moldova",
    "Solomon Islands": "Solomon Is.",
    "Brunei Darussalam": "Brunei",
    "Bosnia and Herzegovina": "Bosnia and Herz.",
    "North Macedonia": "Macedonia"
})

# Add duplicate rows for territories that share data
somaliland_rows = df_viz2[df_viz2['country'] == "Somalia"].copy()
somaliland_rows['country'] = "Somaliland"
df_viz2 = pd.concat([df_viz2, somaliland_rows], ignore_index=True)

cyprus_rows = df_viz2[df_viz2['country'] == "Cyprus"].copy()
cyprus_rows['country'] = "N. Cyprus"
df_viz2 = pd.concat([df_viz2, cyprus_rows], ignore_index=True)

# Metadata
metadata = {
    'title': 'Maps',
    'description': '',
    'source': 'World Bank',
    'date_range': '>= 2024',
    'notes': ''
}

# Create output structure for viz 2
viz1_output = {
    'metadata': metadata,
    'data': df_viz2.to_dict(orient='records')
}

output_file_viz1 = viz_datasets_path / 'viz7_maps.json'
with open(output_file_viz1, 'w', encoding='utf-8') as f:
    json.dump(viz1_output, f, indent=2, ensure_ascii=False)

print(f"\n[OK] Saved VIZ 2: {output_file_viz1.name} ({output_file_viz1.stat().st_size / 1024:.1f} KB)")
print(f"Total countries: {len(df_viz2):,}")

## Viz 2: Bubble Map (Proportional Symbol) - All Countries with Conflict Data