In [2]:
import pandas as pd
import requests
from datetime import datetime
import json

from os import listdir
from os.path import isfile, join

In [3]:
def get_data(region):
    
    if region=="MENA":
        mypath = "data/MENA/"
    elif region=="SSA":
        mypath = "data/SSA/"
    elif region=="CIS_CEE":
        mypath = "data/CIS_CEE/"
        
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    with open(mypath+onlyfiles[0],"r") as file:
        d = json.load(file)
        df = pd.DataFrame.from_dict(d)

    for file_name in onlyfiles[1:]:
        with open(mypath+file_name, "r") as file:
            d = json.load(file)
        df = pd.concat([df, pd.DataFrame.from_dict(d)])  
    return df

In [4]:
email = "zceimpo@ucl.ac.uk"
api = "EU7OWLtHsLaVApHo56AT"
url = f"https://api.acleddata.com/acled/read?key={api}&email={email}"

# MENA
mena_iso = [364,376,422,275,760,368,792,887,818,788,]
mena_iso_txt = f"iso={mena_iso[0]}"
for m in mena_iso[1:]:
    mena_iso_txt +=f":OR:iso={m}"
    
# SSA
ssa_iso = [231,706,232,204,854,729,728,566,120,180,508,686,404,288,768,466,]
ssa_iso_txt = f"iso={ssa_iso[0]}"
for s in ssa_iso[1:]:
    ssa_iso_txt +=f":OR:iso={s}"

# CIS CEE    
cis_iso = [804,643,112,688,0,268,51,31,]
cis_iso_txt = f"iso={cis_iso[0]}"
for c in cis_iso[1:]:
    cis_iso_txt +=f":OR:iso={c}"


list_txts = {"MENA":mena_iso_txt, "SSA":ssa_iso_txt, "CIS_CEE":cis_iso_txt}
for i, iso in list_txts.items():
    print(i)
    page = 1
    id_list = ["ɕ∉§ɤ"]
    df = get_data(i)
    
    while not any(el in df["event_id_cnty"].tolist() for el in id_list):
        r = requests.get(f"{url}&{iso}&page={page}")
        page+=1
        data = r.json()["data"]
        df_temp = pd.DataFrame.from_dict(data)
        id_list = df_temp["event_id_cnty"].to_list()
        if df["event_id_cnty"].iloc[-1] not in id_list:
            file_name = f'{data[0]["event_date"]}'
            with open(f'data/{i}/{file_name}_{page-1}.json', 'w') as f:
                json.dump(data, f)
                print(f"Data Saved: {i}/{file_name}_{page-1}")

MENA
Data Saved: MENA/2025-02-21_1
Data Saved: MENA/2025-01-20_2
SSA
Data Saved: SSA/2025-02-21_1
CIS_CEE
Data Saved: CIS_CEE/2025-02-21_1


In [121]:
df['country'].value_counts().index.unique()

Index(['Ukraine', 'Russia', 'Azerbaijan', 'Serbia', 'Armenia', 'Georgia',
       'Kosovo', 'Belarus'],
      dtype='object', name='country')

In [123]:
df.columns

Index(['event_id_cnty', 'event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'civilian_targeting', 'iso', 'region', 'country',
       'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'source', 'source_scale', 'notes', 'fatalities',
       'tags', 'timestamp'],
      dtype='object')

In [119]:
df.drop_duplicates(subset=["event_id_cnty"])

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,...,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp
0,UKR105123,2023-05-26,2023,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Russia (2000-),,External/Other forces,...,Kozacha Lopan,50.3319,36.1936,2,Ministry of Defence of Ukraine,Other,"On 26 May 2023, Russian forces shelled near Ko...",0,,1727205001
1,SRB2675,2023-05-26,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Serbia),Green-Left Front; People's Party (Serbia); SSP...,Protesters,...,Uzice,43.8582,19.8441,1,N1TV Online - Bosnia and Herzegovina,Regional,"On 26 May 2023, several hundred citizens prote...",0,crowd size=several hundred,1728421389
2,SRB2676,2023-05-26,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Serbia),People's Party (Serbia); POKS: Movement to Res...,Protesters,...,Topola,44.2541,20.6825,1,N1TV Online - Bosnia and Herzegovina,Regional,"On 26 May 2023, several hundred citizens prote...",0,crowd size=several hundred,1728421389
3,UKR105150,2023-05-26,2023,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Russia (2000-),,External/Other forces,...,Pryvillya,48.7249,37.8691,2,Ministry of Defence of Ukraine,Other,"On 26 May 2023, Russian forces shelled near Pr...",0,,1728421518
4,XKX804,2023-05-26,2023,1,Demonstrations,Riots,Violent demonstration,Rioters (Kosovo),Serbian Ethnic Group (Kosovo),Rioters,...,Zvecan,42.9108,20.8387,1,Kallxo; N1TV Online - Bosnia and Herzegovina; ...,National-Regional,"On 26 May 2023, local Kosovo Serbs from Zvecan...",0,crowd size=no report,1728421527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,UKR186830,2024-10-03,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,External/Other forces,...,Starytsia,50.2405,36.7931,2,Ministry of Defence of Ukraine,Other,"On 3 October 2024, Russian and Ukrainian force...",55,,1728421524
4996,UKR186831,2024-10-03,2024,1,Political violence,Explosions/Remote violence,Air/drone strike,Military Forces of Russia (2000-) Air Force,,External/Other forces,...,Bochkove,50.3244,37.1409,2,Ministry of Defence of Ukraine,Other,"On 3 October 2024, Russian forces conducted ai...",0,,1728421524
4997,UKR186832,2024-10-03,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,External/Other forces,...,Hrekivka,49.2450,37.9205,2,Ministry of Defence of Ukraine,Other,"On 3 October 2024, Russian and Ukrainian force...",0,,1728421524
4998,UKR186833,2024-10-03,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,External/Other forces,...,Nelipivka,48.3515,37.8370,2,Institute for the Study of War; Ministry of De...,Other,"On 3 October 2024, Russian and Ukrainian force...",0,,1728421524


In [78]:
""" 
MENA Countries:
Iran - 364,
Israel - 376,
Lebanon - 422,
Palestine - 275,
Syria - 760,
Iraq - 368,
Turkey - 792,
Yemen - 887,
Egypt - 818,
Tunisia - 788,
"""
mena_iso = [364,376,422,275,760,368,792,887,818,788,]
mena_iso_txt = f"iso={mena_iso[0]}"
for m in mena_iso[1:]:
    mena_iso_txt +=f":OR:iso={m}"
    
start_date = datetime.strptime("2023-06-01",'%Y-%m-%d')
response_date = datetime.strptime("2024-10-30",'%Y-%m-%d')

page = 1

while response_date > start_date:
    r = requests.get(f"{url}&{mena_iso_txt}&page={page}")
    page+=1
    data = r.json()["data"]
    response_date = datetime.strptime(data[0]["event_date"], '%Y-%m-%d')
    
    file_name = f'{data[0]["event_date"]}'
    with open(f'data/MENA/{file_name}_{page-1}.json', 'w') as f:
        json.dump(data, f)

In [79]:
"""
SSA Countries:
Ethiopia - 231,
Somalia - 706,
Eritrea - 232,
Benin - 204,
Burkina Faso - 854,
Sudan - 729,
South Sudan - 728,
Nigeria - 566,
Cameroon - 120,
DRC - 180,
Mozambique - 508,
Senegal - 686,
Kenya - 404,
Ghana - 288,
Togo - 768,
Mali - 466,
"""
ssa_iso = [231,706,232,204,854,729,728,566,120,180,508,686,404,288,768,466,]
ssa_iso_txt = f"iso={ssa_iso[0]}"
for s in ssa_iso[1:]:
    ssa_iso_txt +=f":OR:iso={s}"
start_date = datetime.strptime("2023-06-01",'%Y-%m-%d')
response_date = datetime.strptime("2024-10-30",'%Y-%m-%d')

page = 1

while response_date > start_date:
    r = requests.get(f"{url}&{ssa_iso_txt}&page={page}")
    page+=1
    data = r.json()["data"]
    response_date = datetime.strptime(data[0]["event_date"], '%Y-%m-%d')
    
    file_name = f'{data[0]["event_date"]}'
    with open(f'data/SSA/{file_name}_{page-1}.json', 'w') as f:
        json.dump(data, f)

In [77]:
"""
CEE CIS Countries:
Ukraine - 804,
Russia - 643,
Belarus - 112,
Serbia - 688,
Kosovo - 0,
Georgia - 268,
Armenia - 51,
Azerbaijan - 31,
"""
cis_iso = [804,643,112,688,0,268,51,31,]
cis_iso_txt = f"iso={cis_iso[0]}"
for c in cis_iso[1:]:
    cis_iso_txt +=f":OR:iso={c}"
    
start_date = datetime.strptime("2023-06-01",'%Y-%m-%d')
response_date = datetime.strptime("2024-10-30",'%Y-%m-%d')

page = 1

while response_date > start_date:
    r = requests.get(f"{url}&{cis_iso_txt}&page={page}")
    page+=1
    data = r.json()["data"]
    response_date = datetime.strptime(data[0]["event_date"], '%Y-%m-%d')
    
    file_name = f'{data[0]["event_date"]}'
    with open(f'data/CIS_CEE/{file_name}_{page-1}.json', 'w') as f:
        json.dump(data, f)

In [108]:
while not any(el in df["event_id_cnty"].tolist() for el in id_list):

hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
h

KeyboardInterrupt: 

In [81]:
from os import listdir
from os.path import isfile, join

In [84]:
mypath = "data/MENA/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]


with open(mypath+onlyfiles[0],"r") as file:
    d = json.load(file)
    df = pd.DataFrame.from_dict(d)

for file_name in onlyfiles[1:]:
    with open(mypath+file_name, "r") as file:
        d = json.load(file)
    df = pd.concat([df, pd.DataFrame.from_dict(d)])
# df = df.reset_index(drop=True)
# df['event_date'] = pd.to_datetime(df['event_date'])
# df['latitude'] = pd.to_numeric(df['latitude'])
# df['longitude'] = pd.to_numeric(df['longitude'])
# df['actor_group'] = df['actor1'].map(actor_map)
    

In [106]:
id_list = ["IRN23229","LBN14820"]
any(el in df["event_id_cnty"].tolist() for el in id_list)

True

In [99]:
df["event_id_cnty"].nunique()

115000

In [95]:
df.head()

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,...,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp
0,IRN23229,2023-05-09,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Iran),Students (Iran),Protesters,...,Tehran - District 6,35.7252,51.4029,1,Trade Unions of Iranian Student,New media,"On 9 May 2023, students at the Faculty of Soci...",0,crowd size=no report,1684803186
1,LBN14820,2023-05-09,2023,1,Demonstrations,Riots,Violent demonstration,Rioters (Lebanon),Depositors Outcry Association,Rioters,...,Beirut - Port,33.8982,35.5074,1,L'Orient Le Jour; Naharnet; The National,National-International,"On 9 May 2023, clashes erupted between demonst...",0,crowd size=no report,1684803187
2,LBN14822,2023-05-09,2023,1,Demonstrations,Riots,Violent demonstration,Rioters (Lebanon),Depositors Outcry Association; Independent Pol...,Rioters,...,Beirut - Port,33.8982,35.5074,1,Murr TV; Naharnet,National,"On 9 May 2023, clashes erupted between police ...",0,crowd size=no report,1684803187
3,TUR29741,2023-05-09,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Turkey),Yesil Sol Parti: Green Left Party; HDP: People...,Protesters,...,Cizre,37.3274,42.1785,1,ANF News,International,"On 9 May 2023, thousands gathered at public me...",0,crowd size=thousands,1684803190
4,TUR29750,2023-05-09,2023,1,Demonstrations,Protests,Peaceful protest,Protesters (Turkey),Yesil Sol Parti: Green Left Party,Protesters,...,Gaziemir,38.3239,27.1292,1,Mezopotamya Agency,National,"On 9 May 2023, Green-Left party members staged...",0,crowd size=no report,1684803190
