# Import

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm

df = pd.read_csv("data/food_crises_interpol.csv") # Read data into DataFrame
news_df = pd.read_csv("data/articles_topics_positivity.csv") # Read news data into DataFrame

# Processing Data

In [3]:
df["date"] = pd.to_datetime(df["year_month"], format="%Y_%m") # Create date column
print(len(df['district'].unique()))
df.set_index(["date", "county"], inplace=True) # Set index

77


In [5]:
news_df["date"] = pd.to_datetime(
    pd.to_datetime(news_df["date"], format="%Y-%m-%d").dt.strftime("%Y-%m"),
    format="%Y-%m",
)

In [10]:
def create_news_features(columns):
    cols = []
    for column in columns:
        col = news_df.groupby(["date", "county"])[column].mean()
        col = col.fillna(0)
        # col = col.rolling(3).mean()
        # col = col.shift(3)
        cols.append(col)
    return pd.concat(cols, axis=1)

In [11]:
news_features = create_news_features(['hunger_positive', 'hunger_negative', 'refugees_positive', 'refugees_negative', 'humanitarian_positive', 'humanitarian_negative', 'conflict_positive', 'conflict_negative', 'vulnerability_positive', 'vulnerability_negative'])

# Merge

In [13]:
df.sort_index(level=0, inplace=True) # Sort DataFrame by date

In [15]:
df = df.join(news_features, how="left") # Join df with created news features

In [18]:
# Replace the NaN values in specific columns with 0
columns_to_fill = ['hunger', 'refugees', 'conflict', 'humanitarian', 'vulnerability']
df[columns_to_fill] = df[columns_to_fill].fillna(0)


In [19]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,country,district_code,district,centx,centy,region,year_month,year,month,ipc,...,area,cropland_pct,pop,ruggedness_mean,pasture_pct,hunger,refugees,conflict,humanitarian,vulnerability
date,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2007-01-01,Abiemnhom,South Sudan,136,Abiemnhom,29.13019,9.550939,Unity,2007_01,2007,1,,...,2408.233,1.742261,18973.21,11004.640,91.48276,0.0,0.0,0.00,0.0,0.00
2007-01-01,Akobo,South Sudan,114,Akobo,32.85308,7.799214,Jonglei,2007_01,2007,1,,...,9027.626,10.046920,147537.90,2787.772,89.71963,0.0,0.0,0.00,0.0,0.00
2007-01-01,Aweil Centre,South Sudan,128,Aweil Center,26.89562,8.417934,Northern Bahr el Ghazal,2007_01,2007,1,,...,11202.960,0.173833,48640.05,3754.860,81.76336,0.0,0.0,0.00,0.0,0.00
2007-01-01,Aweil East,South Sudan,141,Aweil East,27.61364,9.243139,Northern Bahr el Ghazal,2007_01,2007,1,,...,6400.981,26.804220,347359.50,3544.279,90.16216,0.0,0.0,0.00,0.0,0.00
2007-01-01,Aweil North,South Sudan,76,Aweil North,26.72969,9.334756,Northern Bahr el Ghazal,2007_01,2007,1,,...,6369.012,22.906570,146637.60,4808.697,93.45946,0.0,0.0,0.00,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-02-01,Wulu,South Sudan,105,Wulu,29.20889,6.200243,Lakes,2020_02,2020,2,2.0,...,11894.780,0.032678,45158.50,40366.550,65.10490,0.0,0.0,0.00,0.0,0.00
2020-02-01,Yambio,South Sudan,82,Yambio,28.54729,5.124909,Western Equatoria,2020_02,2020,2,3.0,...,8896.165,19.126490,197278.90,60628.160,60.88350,0.0,0.0,0.25,0.0,0.25
2020-02-01,Yei,South Sudan,81,Yei,30.34454,4.252148,Central Equatoria,2020_02,2020,2,3.0,...,6713.251,16.303490,288875.40,75813.310,62.85185,0.0,0.0,0.00,0.0,0.00
2020-02-01,Yirol East,South Sudan,107,Yirol East,30.80245,6.793194,Lakes,2020_02,2020,2,4.0,...,5599.893,2.482445,73693.13,18421.020,91.75000,0.0,0.0,0.00,0.0,0.00


In [20]:
df.to_csv("data/food_crises_news.csv")