# Week 1 Visualizations: Economic Sectors & Conflict Analysis

**Input**: ACLED events + Economics master (sector percentages)  
**Output**: JSON datasets for D3.js charts in viz-datasets/

In [40]:
## Setup

In [41]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

# For visualizations (optional)
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set style for plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load Data

In [42]:
raw_data_path = Path('../raw-data')
processed_data_path = Path('../processed-data')

df_acled = pd.read_csv(raw_data_path / 'ACLED' / 'ACLED_2025-10-29.csv', encoding='utf-8-sig')
df_econ = pd.read_csv(processed_data_path / 'economics-countries-master.csv')

print(f"ACLED: {len(df_acled):,} events")
print(f"Economics: {len(df_econ):,} country-years")

# Get the current year from the data
current_year = df_acled['year'].max()
print(f"Latest year in data: {current_year}")

ACLED: 2,372,683 events
Economics: 10,936 country-years
Latest year in data: 2024


## Process & Join

In [43]:
# Filter to last 10 years
last_10_years_start = current_year - 9  # Include current year, so -9 gives us 10 years
df_acled_recent = df_acled[df_acled['year'] >= last_10_years_start].copy()

print(f"Filtered to last 10 years: {last_10_years_start}-{current_year}")

# Aggregate events by country-year
conflict_summary = df_acled_recent.groupby(['country', 'year']).agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()
conflict_summary.columns = ['country', 'year', 'event_count', 'total_fatalities']

# Get latest economics data per country (includes Population now)
df_econ_latest = df_econ.sort_values('Year').groupby('Country').last().reset_index()

# Join conflict + economics
df_merged = conflict_summary.merge(
    df_econ_latest, 
    left_on='country', 
    right_on='Country',
    how='left'
)

print(f"Merged: {len(df_merged):,} rows")
print(f"Records with population data: {df_merged['Population'].notna().sum():,}")

Filtered to last 10 years: 2015-2024
Merged: 1,606 rows
Records with population data: 1,288


## Viz 1: Bar Chart - Top Countries by Primary Sector %

In [44]:
# Aggregate by country
country_totals = df_merged.groupby('country').agg({
    'event_count': 'sum',
    'total_fatalities': 'sum',
    'Primary_%': 'first',
    'Secondary_%': 'first',
    'Tertiary_%': 'first',
    'Tourism_%': 'first',
    'Population': 'first'  # Get the latest population
}).reset_index()

# Filter for countries with economics data
country_totals = country_totals[country_totals['Primary_%'].notna()].copy()

# Calculate per capita metrics (per 100000 population)
country_totals['events_per_100k'] = (country_totals['event_count'] / country_totals['Population']) * 100000
country_totals['fatalities_per_100k'] = (country_totals['total_fatalities'] / country_totals['Population']) * 100000

# Top 20 by conflict, sorted by event_count descending
viz1_data = country_totals.nlargest(20, 'event_count').sort_values('event_count', ascending=False)

viz1_data.head(10)

Unnamed: 0,country,event_count,total_fatalities,Primary_%,Secondary_%,Tertiary_%,Tourism_%,Population,events_per_100k,fatalities_per_100k
220,Ukraine,197663,137716,18.74,11.22,70.04,,38000000.0,520.165789,362.410526
96,India,170146,12189,22.1,22.86,55.04,2.6,1417173000.0,12.006013,0.860092
134,Mexico,94541,57520,9.99,28.73,61.28,8.46,127504100.0,74.147405,45.112266
232,Yemen,93875,162361,25.29,15.16,59.55,,33696610.0,278.588822,481.831795
31,Brazil,91053,47537,13.64,18.61,67.75,,215313500.0,42.28857,22.07804
143,Myanmar,78505,79808,26.55,31.96,41.49,,54179310.0,144.898497,147.303474
223,United States,73368,440,4.38,14.65,80.97,2.97,333287600.0,22.013423,0.132018
159,Pakistan,70772,19536,29.49,17.07,53.44,,235824900.0,30.010407,8.284114
0,Afghanistan,67194,202145,41.89,8.9,49.21,,41128770.0,163.374685,491.492926
99,Iraq,59606,106705,48.81,9.21,41.99,,44496120.0,133.957741,239.807415


# Prepare JSON

In [45]:
viz_data = viz1_data[['country', 'event_count', 'total_fatalities', 
                      'events_per_100k', 'fatalities_per_100k',
                      'Primary_%', 'Secondary_%', 'Tertiary_%', 'Tourism_%', 
                      'Population']].copy()

# Clean values
viz_data['event_count'] = viz_data['event_count'].astype(int)
viz_data['total_fatalities'] = viz_data['total_fatalities'].astype(int)
viz_data['events_per_100k'] = viz_data['events_per_100k'].round(2)
viz_data['fatalities_per_100k'] = viz_data['fatalities_per_100k'].round(2)
viz_data['Primary_%'] = viz_data['Primary_%'].round(2)
viz_data['Secondary_%'] = viz_data['Secondary_%'].round(2)
viz_data['Tertiary_%'] = viz_data['Tertiary_%'].round(2)
viz_data['Tourism_%'] = viz_data['Tourism_%'].fillna(0).round(2)
viz_data['Population'] = viz_data['Population'].astype('Int64')  # Integer type that handles NaN

# Metadata
metadata = {
    'title': 'Top Conflict Countries by Economic Sector (2015-2024)',
    'description': 'Top 20 countries sorted by number of conflict events',
    'source': 'ACLED + World Bank',
    'date_range': '2015-2024',
    'notes': 'Per capita rates calculated per 100000 population'
}

viz_data

Unnamed: 0,country,event_count,total_fatalities,events_per_100k,fatalities_per_100k,Primary_%,Secondary_%,Tertiary_%,Tourism_%,Population
220,Ukraine,197663,137716,520.17,362.41,18.74,11.22,70.04,0.0,38000000
96,India,170146,12189,12.01,0.86,22.1,22.86,55.04,2.6,1417173173
134,Mexico,94541,57520,74.15,45.11,9.99,28.73,61.28,8.46,127504125
232,Yemen,93875,162361,278.59,481.83,25.29,15.16,59.55,0.0,33696614
31,Brazil,91053,47537,42.29,22.08,13.64,18.61,67.75,0.0,215313498
143,Myanmar,78505,79808,144.9,147.3,26.55,31.96,41.49,0.0,54179306
223,United States,73368,440,22.01,0.13,4.38,14.65,80.97,2.97,333287557
159,Pakistan,70772,19536,30.01,8.28,29.49,17.07,53.44,0.0,235824862
0,Afghanistan,67194,202145,163.37,491.49,41.89,8.9,49.21,0.0,41128771
99,Iraq,59606,106705,133.96,239.81,48.81,9.21,41.99,0.0,44496122


# Save JSON

In [46]:
viz_datasets_path = Path('../viz-datasets')
viz_datasets_path.mkdir(exist_ok=True)

output = {
    'metadata': metadata,
    'data': viz_data.to_dict('records')
}

output_file = viz_datasets_path / 'viz1_bar_chart_sectors_conflicts.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)

print(f"✓ Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)")

✓ Saved: viz1_bar_chart_sectors_conflicts.json (6.2 KB)


## Viz 3: Grouped Bar Chart - Eventtypes grouped by Countries

In [47]:
# Use the same 7 highlighted countries as viz2, in the same order
highlighted_countries = ['Ukraine', 'India', 'Mexico', 'United States', 'Afghanistan', 'Somalia', 'Italy']

# Group by country and event_type, count events
events_by_country = df_acled_recent.groupby(['country', 'event_type'])['event_id_cnty'].count().reset_index()

# Pivot the data to get event types as columns
pivot_df = events_by_country.pivot(index='country', columns='event_type', values='event_id_cnty').fillna(0)

# Filter to only the 7 highlighted countries
pivot_df = pivot_df[pivot_df.index.isin(highlighted_countries)]

# Combine Protests + Riots into "Protests & Riots" and exclude Strategic developments
pivot_df['Protests & Riots'] = pivot_df['Protests'] + pivot_df['Riots']

# Select only the 4 event types needed for the visualization
viz3_columns = ['Battles', 'Explosions/Remote violence', 'Protests & Riots', 'Violence against civilians']
pivot_df = pivot_df[viz3_columns]

# Sort by the order in highlighted_countries list
pivot_df['sort_order'] = pivot_df.index.map(lambda x: highlighted_countries.index(x) if x in highlighted_countries else 999)
pivot_df = pivot_df.sort_values('sort_order').drop(columns=['sort_order'])

# Reset index to make country a column
pivot_df = pivot_df.reset_index()

# Convert to int for cleaner JSON
for col in viz3_columns:
    pivot_df[col] = pivot_df[col].astype(int)

# Create the JSON structure
output_data = {
    "metadata": {
        "title": "Event Types by Country (2015-2024)",
        "description": "Distribution of ACLED event types across the 7 highlighted countries",
        "source": "ACLED",
        "date_range": "2015-2024",
        "note": "Protests & Riots combined. Strategic developments excluded. Same 7 countries as viz2."
    },
    "data": pivot_df.to_dict('records')
}

# Save to JSON file
with open('../viz-datasets/viz3_event_types.json', 'w') as f:
    json.dump(output_data, f, indent=2)

print(f"{len(pivot_df):,} countries included")
print(f"Countries: {pivot_df['country'].tolist()}")
print(f"\nEvent types: {viz3_columns}")
pivot_df

7 countries included
Countries: ['Ukraine', 'India', 'Mexico', 'United States', 'Afghanistan', 'Somalia', 'Italy']

Event types: ['Battles', 'Explosions/Remote violence', 'Protests & Riots', 'Violence against civilians']


event_type,country,Battles,Explosions/Remote violence,Protests & Riots,Violence against civilians
0,Ukraine,53383,126721,6436,1485
1,India,6393,1663,150906,6518
2,Mexico,9115,190,42614,38103
3,United States,24,18,69188,388
4,Afghanistan,40917,17393,1501,4738
5,Somalia,15646,6479,1511,4938
6,Italy,0,14,23893,43


## Additional Visualizations

In [48]:
# Add cells below for:
# - Heatmap (years × event types)
# - 100% stacked bar (sector composition)

## Viz 4: Heatmap - Event Types × Years

In [49]:
# Group by year and event_type
heatmap_data = df_acled_recent.groupby(['year', 'event_type']).agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum'
}).reset_index()

# Rename columns for clarity
heatmap_data.columns = ['year', 'event_type', 'event_count', 'total_fatalities']

# Clean and convert data types
heatmap_data['year'] = heatmap_data['year'].astype(int)
heatmap_data['event_count'] = heatmap_data['event_count'].astype(int)
heatmap_data['total_fatalities'] = heatmap_data['total_fatalities'].astype(int)

# Preview the data
print(f"Years covered: {heatmap_data['year'].min()} - {heatmap_data['year'].max()}")
print(f"Event types: {sorted(heatmap_data['event_type'].unique())}")
print(f"\nTotal rows: {len(heatmap_data):,}")
print("\nSample data:")
heatmap_data.head(10)

Years covered: 2015 - 2024
Event types: ['Battles', 'Explosions/Remote violence', 'Protests', 'Riots', 'Strategic developments', 'Violence against civilians']

Total rows: 60

Sample data:


Unnamed: 0,year,event_type,event_count,total_fatalities
0,2015,Battles,7804,32841
1,2015,Explosions/Remote violence,8217,15506
2,2015,Protests,9522,255
3,2015,Riots,4391,1361
4,2015,Strategic developments,1432,82
5,2015,Violence against civilians,4800,12422
6,2016,Battles,12484,59976
7,2016,Explosions/Remote violence,17337,40042
8,2016,Protests,26229,674
9,2016,Riots,6633,1241


In [50]:
# Prepare metadata and save to JSON
metadata = {
    'title': 'Heatmap: Event Types by Year (2015-2024)',
    'description': 'Temporal distribution of ACLED event types showing both event counts and fatalities',
    'source': 'ACLED',
    'date_range': f"{heatmap_data['year'].min()}-{heatmap_data['year'].max()}",
    'note': 'Color intensity based on event_count, fatalities included for additional context'
}

output = {
    'metadata': metadata,
    'data': heatmap_data.to_dict('records')
}

output_file = viz_datasets_path / 'viz4_heatmap_event_types_years.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, ensure_ascii=False)

print(f"✓ Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)")

✓ Saved: viz4_heatmap_event_types_years.json (8.1 KB)


## Viz 2: 100% Stacked Bar Chart - Economic Sector Composition

In [51]:
# Filter to only the 7 highlighted countries in specific order
highlighted_countries = ['Ukraine', 'India', 'Mexico', 'United States', 'Afghanistan', 'Somalia', 'Italy']

# Get data for these countries from viz1_data
stacked_data = viz1_data[viz1_data['country'].isin(highlighted_countries)].copy()

# Sort by the order in highlighted_countries list
stacked_data['sort_order'] = stacked_data['country'].apply(lambda x: highlighted_countries.index(x) if x in highlighted_countries else 999)
stacked_data = stacked_data.sort_values('sort_order').drop(columns=['sort_order'])

# Transform the data for stacked bar chart
# Tourism is shown separately, but subtracted from Tertiary to keep total at 100%
stacked_records = []
for _, row in stacked_data.iterrows():
    country_name = row['country']
    # Handle NaN tourism values - treat as 0
    tourism_pct = float(row['Tourism_%']) if pd.notna(row['Tourism_%']) else 0.0
    tertiary_pct = float(row['Tertiary_%']) if pd.notna(row['Tertiary_%']) else 0.0
    
    # Add each sector as a separate record
    stacked_records.append({
        'country': country_name,
        'sector': 'Primary',
        'percentage': float(row['Primary_%'])
    })
    stacked_records.append({
        'country': country_name,
        'sector': 'Secondary',
        'percentage': float(row['Secondary_%'])
    })
    # Tertiary without Tourism (since Tourism is already part of Tertiary)
    stacked_records.append({
        'country': country_name,
        'sector': 'Tertiary',
        'percentage': round(tertiary_pct - tourism_pct, 2)
    })
    # Tourism shown separately
    stacked_records.append({
        'country': country_name,
        'sector': 'Tourism',
        'percentage': round(tourism_pct, 2)
    })

# Create the output structure
stacked_output = {
    'metadata': {
        'title': '100% Stacked Bar: Economic Sector Composition by Country',
        'description': 'Distribution of economic sectors (Primary, Secondary, Tertiary, Tourism) for highlighted conflict countries',
        'source': 'World Bank + ACLED',
        'date_range': '2015-2024',
        'note': 'Tourism is shown separately but is part of Tertiary sector. Bars total 100%. Only showing the 7 highlighted countries from Viz 1.'
    },
    'countries': stacked_data['country'].tolist(),
    'data': stacked_records
}

# Save to JSON
output_file = viz_datasets_path / 'viz2_stacked_bar_sectors.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(stacked_output, f, indent=2, ensure_ascii=False)

print(f'✓ Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)')
print(f'Countries: {len(stacked_data)}')
print(f'Data records: {len(stacked_records)}')
print('\nFirst country sample:')
print([r for r in stacked_records if r['country'] == stacked_data.iloc[0]['country']])



✓ Saved: viz2_stacked_bar_sectors.json (3.2 KB)
Countries: 7
Data records: 28

First country sample:
[{'country': 'Ukraine', 'sector': 'Primary', 'percentage': 18.74}, {'country': 'Ukraine', 'sector': 'Secondary', 'percentage': 11.22}, {'country': 'Ukraine', 'sector': 'Tertiary', 'percentage': 70.04}, {'country': 'Ukraine', 'sector': 'Tourism', 'percentage': 0.0}]


In [52]:
# Compute world averages for Primary, Secondary, Tertiary sectors
sector_cols = ['Primary_%', 'Secondary_%', 'Tertiary_%']

# Keep rows with sector values available
econ_valid = df_econ_latest[df_econ_latest['Primary_%'].notna()].copy()

# Simple unweighted mean across countries
simple_mean = econ_valid[sector_cols].mean().round(2)

print('Simple mean:', simple_mean.to_dict())

Simple mean: {'Primary_%': 19.78, 'Secondary_%': 18.74, 'Tertiary_%': 61.39}


## Viz 5: Waffle Chart - Economic Sectors by Event Type

In [53]:
# Merge ACLED events with economics data to get sector percentages per event
# Use only Primary, Secondary, Tertiary (exclude Tourism)
df_events_econ = df_acled_recent.merge(
    df_econ_latest[['Country', 'Primary_%', 'Secondary_%', 'Tertiary_%']], 
    left_on='country', 
    right_on='Country',
    how='left'
)

# Handle NaNs: Filter out events without economics data
df_events_econ = df_events_econ[
    df_events_econ['Primary_%'].notna() & 
    df_events_econ['Secondary_%'].notna() & 
    df_events_econ['Tertiary_%'].notna()
].copy()

# EXCLUDE Strategic developments
df_events_econ = df_events_econ[df_events_econ['event_type'] != 'Strategic developments'].copy()

print(f"Events with valid economic data (excluding Strategic developments): {len(df_events_econ):,} out of {len(df_acled_recent):,}")

# Group by event_type AND country to count events per country per event type
country_event_counts = df_events_econ.groupby(['event_type', 'country']).agg({
    'event_id_cnty': 'count',
    'Primary_%': 'first',    # These are constant per country
    'Secondary_%': 'first',
    'Tertiary_%': 'first'
}).reset_index()

country_event_counts.columns = ['event_type', 'country', 'event_count', 'Primary_%', 'Secondary_%', 'Tertiary_%']

# Calculate weighted averages for each event type
# Weight each country's sector percentages by the number of events of that type in that country
waffle_data = []

for event_type in country_event_counts['event_type'].unique():
    # Get all countries for this event type
    event_df = country_event_counts[country_event_counts['event_type'] == event_type].copy()
    
    # Calculate total events for this type (for weighting)
    total_events = event_df['event_count'].sum()
    
    # Calculate weights (proportion of events in each country)
    event_df['weight'] = event_df['event_count'] / total_events
    
    # Calculate weighted average for each sector
    primary_weighted = (event_df['Primary_%'] * event_df['weight']).sum()
    secondary_weighted = (event_df['Secondary_%'] * event_df['weight']).sum()
    tertiary_weighted = (event_df['Tertiary_%'] * event_df['weight']).sum()
    
    # Normalize to ensure they sum to 100% (handle rounding errors)
    total_pct = primary_weighted + secondary_weighted + tertiary_weighted
    primary_weighted = (primary_weighted / total_pct) * 100
    secondary_weighted = (secondary_weighted / total_pct) * 100
    tertiary_weighted = (tertiary_weighted / total_pct) * 100
    
    waffle_data.append({
        'event_type': event_type,
        'event_count': int(total_events),
        'primary_pct': round(primary_weighted, 2),
        'secondary_pct': round(secondary_weighted, 2),
        'tertiary_pct': round(tertiary_weighted, 2)
    })

waffle_df = pd.DataFrame(waffle_data)

# Sort by primary sector percentage descending
waffle_df = waffle_df.sort_values('primary_pct', ascending=False).reset_index(drop=True)

# Verify totals sum to 100%
waffle_df['total_pct'] = waffle_df['primary_pct'] + waffle_df['secondary_pct'] + waffle_df['tertiary_pct']

print("\nEvent Types and WEIGHTED Average Economic Sector Composition (sorted by Primary %):\n")
print(waffle_df.to_string(index=False))
print(f"\nTotal event types: {len(waffle_df)}")
print("\nNote: Percentages are weighted by event frequency per country.")
print("Example: If Ukraine has 100000 Battles and Germany has 100 Battles,")
print("Ukraine's economic structure is weighted ~1000x more heavily.")
print("Strategic developments excluded.")

Events with valid economic data (excluding Strategic developments): 1,708,290 out of 2,219,670

Event Types and WEIGHTED Average Economic Sector Composition (sorted by Primary %):

                event_type  event_count  primary_pct  secondary_pct  tertiary_pct  total_pct
                   Battles       306713        29.04          15.86         55.10     100.00
Explosions/Remote violence       291287        25.77          13.52         60.71     100.00
Violence against civilians       207049        22.57          21.26         56.18     100.01
                     Riots       108648        20.28          20.74         58.99     100.01
                  Protests       794593        16.21          20.73         63.06     100.00

Total event types: 5

Note: Percentages are weighted by event frequency per country.
Example: If Ukraine has 100000 Battles and Germany has 100 Battles,
Ukraine's economic structure is weighted ~1000x more heavily.
Strategic developments excluded.


In [54]:
# Transform data for waffle chart JSON format
# Each event type will have sectors as separate records for easier D3.js handling

waffle_records = []

for _, row in waffle_df.iterrows():
    event_name = row['event_type']
    
    # Create a record for this event type with all sector breakdowns
    # Tourism removed as requested
    event_record = {
        'event_type': event_name,
        'event_count': int(row['event_count']),
        'sectors': [
            {
                'sector': 'Primary',
                'percentage': float(row['primary_pct'])
            },
            {
                'sector': 'Secondary',
                'percentage': float(row['secondary_pct'])
            },
            {
                'sector': 'Tertiary',
                'percentage': float(row['tertiary_pct'])
            }
        ],
        'total_percentage': float(row['total_pct'])
    }
    
    waffle_records.append(event_record)

# Create metadata
metadata = {
    'title': 'Waffle Chart: Economic Sector Composition by Event Type',
    'description': 'Weighted average economic sector distribution for countries experiencing each type of conflict event',
    'source': 'ACLED + World Bank',
    'date_range': f'{last_10_years_start}-{current_year}',
    'note': 'Percentages are WEIGHTED by event frequency per country. Countries with more events of a type have proportionally more influence on the average. Tourism sector removed. NaN values excluded. Strategic developments excluded. Sorted by Primary sector percentage.'
}

# Create output structure
waffle_output = {
    'metadata': metadata,
    'data': waffle_records
}

# Save to JSON
output_file = viz_datasets_path / 'viz5_waffle_sectors_by_event_type.json'
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(waffle_output, f, indent=2, ensure_ascii=False)

print(f'Saved: {output_file.name} ({output_file.stat().st_size / 1024:.1f} KB)')
print(f'Event types: {len(waffle_records)}')
print('\nFirst event type sample:')
print(json.dumps(waffle_records[0], indent=2))

Saved: viz5_waffle_sectors_by_event_type.json (2.5 KB)
Event types: 5

First event type sample:
{
  "event_type": "Battles",
  "event_count": 306713,
  "sectors": [
    {
      "sector": "Primary",
      "percentage": 29.04
    },
    {
      "sector": "Secondary",
      "percentage": 15.86
    },
    {
      "sector": "Tertiary",
      "percentage": 55.1
    }
  ],
  "total_percentage": 100.0
}
