# Download ACLED Data for Ethiopia

This notebook downloads conflict event data from ACLED for Ethiopia.

## Setup

Make sure you have:
1. Created a `.env` file with your ACLED credentials
2. Activated the virtual environment
3. Installed all dependencies


In [None]:
import sys
from pathlib import Path

# Add src to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root / "src"))

from src.config import START_YEAR, END_YEAR, validate_credentials
from src.acled_client import fetch_acled_range, load_cached_data
from src.utils_logging import setup_logging

# Set up logging
logger = setup_logging()


## Validate Credentials


In [None]:
try:
    validate_credentials()
    print("✓ ACLED credentials validated")
except ValueError as e:
    print(f"✗ Error: {e}")
    print("Please create a .env file with ACLED_USERNAME and ACLED_PASSWORD")


## Check for Cached Data


In [None]:
cached_data = load_cached_data()

if not cached_data.empty:
    print(f"Found cached data with {len(cached_data)} records")
    if 'event_date' in cached_data.columns:
        print(f"Date range: {cached_data['event_date'].min()} to {cached_data['event_date'].max()}")
    print("\nYou can skip the download if you want to use cached data.")
else:
    print("No cached data found. Proceed with download.")


## Download ACLED Data


In [None]:
print(f"Downloading ACLED data for Ethiopia")
print(f"Time window: {START_YEAR} to {END_YEAR}")
print(f"This may take several minutes depending on data volume...\n")

df = fetch_acled_range(START_YEAR, END_YEAR, save_individual=True)


## Inspect Downloaded Data


In [None]:
if not df.empty:
    print(f"Total records: {len(df)}")
    print(f"\nColumns: {list(df.columns)}")
    if 'event_date' in df.columns:
        print(f"\nDate range: {df['event_date'].min()} to {df['event_date'].max()}")
    print(f"\nFirst few rows:")
    display(df.head())
    
    print(f"\nData types:")
    print(df.dtypes)
else:
    print("No data was downloaded.")


## Summary Statistics


In [None]:
if not df.empty:
    print("Records per year:")
    if 'year' in df.columns:
        print(df['year'].value_counts().sort_index())
    
    print("\nEvent types:")
    if 'event_type' in df.columns:
        print(df['event_type'].value_counts())
    
    print("\nTotal fatalities:")
    if 'fatalities' in df.columns:
        print(f"{df['fatalities'].sum():,.0f}")
