# Week 1 Visualizations: Economic Sectors & Conflict Analysis

**Input**: ACLED events + Economics master (sector percentages)  
**Output**: JSON datasets for D3.js charts in viz-datasets/

In [6]:
## Setup

In [7]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

# For visualizations (optional)
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set style for plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load Data

In [8]:
raw_data_path = Path('../raw-data')
processed_data_path = Path('../processed-data')

df_acled = pd.read_csv(raw_data_path / 'ACLED' / 'ACLED_2025-10-29.csv', encoding='utf-8-sig')
df_econ = pd.read_csv(processed_data_path / 'economics-countries-master.csv')

print(f"ACLED: {len(df_acled):,} events")
print(f"Economics: {len(df_econ):,} country-years")

ACLED: 2,372,683 events
Economics: 10,936 country-years


## Process & Join

In [None]:
# Filter to 2015+
df_acled_recent = df_acled[df_acled['year'] >= 2015].copy()

# Aggregate events by country-year
conflict_summary = df_acled_recent.groupby(['country', 'year']).agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()
conflict_summary.columns = ['country', 'year', 'event_count', 'total_fatalities']

# Get latest economics data per country (includes Population now)
df_econ_latest = df_econ.sort_values('Year').groupby('Country').last().reset_index()

# Join conflict + economics
df_merged = conflict_summary.merge(
    df_econ_latest, 
    left_on='country', 
    right_on='Country',
    how='left'
)

print(f"Merged: {len(df_merged):,} rows")
print(f"Records with population data: {df_merged['Population'].notna().sum():,}")

## Viz 1: Bar Chart - Top Countries by Primary Sector %

In [None]:
# Aggregate by country
country_totals = df_merged.groupby('country').agg({
    'event_count': 'sum',
    'total_fatalities': 'sum',
    'Primary_%': 'first',
    'Secondary_%': 'first',
    'Tertiary_%': 'first',
    'Tourism_%': 'first',
    'Population': 'first'  # Get the latest population
}).reset_index()

# Filter for countries with economics data
country_totals = country_totals[country_totals['Primary_%'].notna()].copy()

# Calculate per capita metrics (per 100,000 population)
country_totals['events_per_100k'] = (country_totals['event_count'] / country_totals['Population']) * 100000
country_totals['fatalities_per_100k'] = (country_totals['total_fatalities'] / country_totals['Population']) * 100000

# Top 15 by conflict, sorted by Primary_%
viz1_data = country_totals.nlargest(15, 'event_count').sort_values('Primary_%', ascending=False)

viz1_data.head(10)

# Prepare JSON

In [None]:
viz_data = viz1_data[['country', 'event_count', 'total_fatalities', 
                      'events_per_100k', 'fatalities_per_100k',
                      'Primary_%', 'Secondary_%', 'Tertiary_%', 'Tourism_%', 
                      'Population']].copy()

# Clean values
viz_data['event_count'] = viz_data['event_count'].astype(int)
viz_data['total_fatalities'] = viz_data['total_fatalities'].astype(int)
viz_data['events_per_100k'] = viz_data['events_per_100k'].round(2)
viz_data['fatalities_per_100k'] = viz_data['fatalities_per_100k'].round(2)
viz_data['Primary_%'] = viz_data['Primary_%'].round(2)
viz_data['Secondary_%'] = viz_data['Secondary_%'].round(2)
viz_data['Tertiary_%'] = viz_data['Tertiary_%'].round(2)
viz_data['Tourism_%'] = viz_data['Tourism_%'].fillna(0).round(2)
viz_data['Population'] = viz_data['Population'].astype('Int64')  # Integer type that handles NaN

# Metadata
metadata = {
    'title': 'Top Conflict Countries by Economic Sector (2015-2024)',
    'description': 'Top 15 sorted by primary sector %, showing economic structure vs conflict',
    'source': 'ACLED + World Bank',
    'date_range': '2015-2024',
    'notes': 'Per capita rates calculated per 100,000 population'
}

viz_data

# Save JSON

In [12]:
viz_datasets_path = Path('../viz-datasets')
viz_datasets_path.mkdir(exist_ok=True)

output = {
    'metadata': metadata,
    'data': viz_data.to_dict('records')
}

output_file = viz_datasets_path / 'viz1_bar_chart_sectors_conflicts.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)

print(f"✓ Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)")

✓ Saved: viz1_bar_chart_sectors_conflicts.json (3.3 KB)


## Viz 2: Grouped Bar Chart - Eventtypes grouped by Countries

In [19]:
# Group by country and event_type, count events
events_by_country = df_acled_recent.groupby(['country', 'event_type'])['event_id_cnty'].count().reset_index()

# Pivot the data to get event types as columns
pivot_df = events_by_country.pivot(index='country', columns='event_type', values='event_id_cnty').fillna(0)

# sort by total events
pivot_df['total_events'] = pivot_df.sum(axis=1)
pivot_df = pivot_df.sort_values('total_events', ascending=False).drop(columns=['total_events'])

# filter the first 10 countries
pivot_df = pivot_df.head(10)

# Reset index to make country a column
pivot_df = pivot_df.reset_index()

# Create the JSON structure
output_data = {
    "metadata": {
        "title": "Event Types by Country (2015-2024)",
        "description": "Distribution of ACLED event types across countries",
        "source": "ACLED",
        "date_range": "2015-2024"
    },
    "data": pivot_df.to_dict('records')
}

# Save to JSON file
with open('../viz-datasets/viz2_event_types.json', 'w') as f:
    json.dump(output_data, f, indent=2)

pivot_df.head()
print(f"{len(pivot_df):,} rows")

10 rows


## Additional Visualizations

In [None]:
# Add cells below for:
# - Heatmap (years × event types)
# - 100% stacked bar (sector composition)

## Viz 3: Heatmap - Event Types × Years

In [None]:
# Group by year and event_type
heatmap_data = df_acled_recent.groupby(['year', 'event_type']).agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()

# Rename columns for clarity
heatmap_data.columns = ['year', 'event_type', 'event_count', 'total_fatalities']

# Clean and convert data types
heatmap_data['year'] = heatmap_data['year'].astype(int)
heatmap_data['event_count'] = heatmap_data['event_count'].astype(int)
heatmap_data['total_fatalities'] = heatmap_data['total_fatalities'].astype(int)

# Preview the data
print(f"Years covered: {heatmap_data['year'].min()} - {heatmap_data['year'].max()}")
print(f"Event types: {sorted(heatmap_data['event_type'].unique())}")
print(f"\nTotal rows: {len(heatmap_data):,}")
print("\nSample data:")
heatmap_data.head(10)

In [None]:
# Prepare metadata and save to JSON
metadata = {
    'title': 'Heatmap: Event Types by Year (2015-2024)',
    'description': 'Temporal distribution of ACLED event types showing both event counts and fatalities',
    'source': 'ACLED',
    'date_range': f"{heatmap_data['year'].min()}-{heatmap_data['year'].max()}",
    'note': 'Color intensity based on event_count, fatalities included for additional context'
}

output = {
    'metadata': metadata,
    'data': heatmap_data.to_dict('records')
}

output_file = viz_datasets_path / 'viz3_heatmap_event_types_years.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)

print(f"✓ Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)")