# Prerequisites

Required CSVS:

data/conflict.csv - raw conflict data

data/climate-complete-noaa.csv - raw climate data

data/clean_fews.csv - transformed food data from FEWSNET

data/clean_food.csv - transformed food data from WFP

data/clean_ipc.csv - transformed IPC data from FSNAU


For the last 3 csvs, see other notebook on transformation

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pystan
import scipy.special

# Model

There are three response vectors, $A_i$,$B_i$, $C_i$, where $A$ is the % of population in IPC Phase 2 and $B$ is the % of population in IPC Phase 3 and $C_i$ is the % of the population in IPC Phase 4. We have $N$ points of data, and $K$ features to estimate the coefficients of.

$$\mu_{A_i} = \text{inv_logit}(\alpha_{A} + \sum_{X\in\{A,B,C\}}(\beta_{A,X} * \text{logit}(X_{i-1})) + \sum_k(\text{coeffs}_{A,k} * \text{feats}_{i,k}))$$

$$A_i \sim \text{Beta_prop}(\mu_{A_i}, \kappa_A)$$

$$\mu_{B_i} = \text{inv_logit}(\alpha_{B} + \sum_{X\in\{A,B,C\}}(\beta_{B,X} * \text{logit}(X_{i-1})) + \sum_k(\text{coeffs}_{B,k} * \text{feats}_{i,k}))$$

$$B_i \sim \text{Beta_prop}(\mu_{B_i}, \kappa_B)$$

$$\mu_{C_i} = \text{inv_logit}(\alpha_{C} + \sum_{X\in\{A,B,C\}}(\beta_{C,X} * \text{logit}(X_{i-1})) + \sum_k(\text{coeffs}_{C,k} * \text{feats}_{i,k}))$$

$$C_i \sim \text{Beta_prop}(\mu_{C_i}, \kappa_C)$$

Beta_prop is a variant of the Beta distribution, with parameters $\mu$, which is the mean, and $\kappa$, which is the precision, or the inverse of the variance, where a high $\kappa$ implies a low variance.

To transform $\mu$ and $\kappa$ back into the standard parameters $\alpha$ and $\beta$:
$$ \alpha = \mu\kappa $$
$$ \beta = (1-\mu)\kappa $$

logit is the logit function $\text{logit(p)} = \log(\frac{p}{1-p})$, and maps $(0,1)$ to $(-\infty, +\infty)$, 
and inv_logit is the inverse function $\text{inv_logit(x)} = \frac{e^x}{e^x + 1}$

All the parameters and data ending with _2 refers to IPC Phase 2, _3 to IPC Phase 3, and _4 to IPC Phase 4.

In [2]:
beta_model_code = '''
data {
    int<lower=0> N;
    int<lower=0> K;
    matrix[N,K] feats;
    vector[N] response_2;
    vector[N] response_3;
    vector[N] response_4;
}
transformed data {
    vector[N] logit_response_2;
    vector[N] logit_response_3;
    vector[N] logit_response_4;
    
    logit_response_2 = logit(response_2);
    logit_response_3 = logit(response_3);
    logit_response_4 = logit(response_4);
}
parameters{
    real alpha_2;
    vector[3] beta_2;
    vector[K] coeffs_2;
    real<lower = 100> kappa_2;
    
    real alpha_3;
    vector[3] beta_3;
    vector[K] coeffs_3;
    real<lower = 100> kappa_3;
    
    real alpha_4;
    vector[3] beta_4;
    vector[K] coeffs_4;
    real<lower = 100> kappa_4;
}

model{
    vector[N] mus_2;
    vector[N] mus_3;
    vector[N] mus_4;
    
    mus_2[2:N] = inv_logit(alpha_2 + beta_2[1]*logit_response_2[1:(N-1)] + beta_2[2]*logit_response_3[1:(N-1)] + beta_2[3]*logit_response_4[1:(N-1)] + feats[2:N]*coeffs_2);
    response_2[2:N] ~ beta_proportion(mus_2[2:N], kappa_2);
    
    mus_3[2:N] = inv_logit(alpha_3 + beta_3[1]*logit_response_2[1:(N-1)] + beta_3[2]*logit_response_3[1:(N-1)] + beta_3[3]*logit_response_4[1:(N-1)] + feats[2:N]*coeffs_3);
    response_3[2:N] ~ beta_proportion(mus_3[2:N], kappa_3);
    
    mus_4[2:N] = inv_logit(alpha_4 + beta_4[1]*logit_response_2[1:(N-1)] + beta_4[2]*logit_response_3[1:(N-1)] + beta_4[3]*logit_response_4[1:(N-1)] + feats[2:N]*coeffs_4);
    response_4[2:N] ~ beta_proportion(mus_4[2:N], kappa_4);
}
'''
beta_model = pystan.StanModel(model_code=beta_model_code, model_name="beta_model")

INFO:pystan:COMPILING THE C++ CODE FOR MODEL beta_model_c1f83952f869caec67ffdfc8087592cc NOW.


In [None]:
#Export models
REGIONS = ['Awdal', 'Bakool', 'Banadir', 'Bari', 'Bay', 'Galgaduud', 'Gedo', 'Hiraan', 'Lower Juba', 'Lower Shabelle', 'Middle Juba', 'Middle Shabelle', 'Mudug', 'Nugaal', 'Sanaag', 'Sool', 'Togdheer', 'Woqooyi Galbeed']
if (False):
    for region in REGIONS[9:]:
        datasets = extract_features(region)
        if(datasets == None):
            print("{} has too little data to fit model".format(region))
        else:
            print("{}".format(region))
            model = fit_model(datasets, 0)
            plot_model(datasets, model)
            model.to_dataframe().to_csv("model/{}.csv".format(region), index=False)

#Saves feature names
rows = []

for region in REGIONS:
    datasets = extract_features(region)
    if (datasets != None):
        rows.append(dict(region=region, feature_names=list(datasets.values())[0]['feature_names']))
feature_names_df = pd.DataFrame(rows)
feature_names_df.to_csv("model/feature-names.csv", index=False)


In [4]:
import numpy as np
import math

MONTHS = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
LEAPS  = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

def getDays(year, quarter):
    if(year % 4 ==0):
        return [_, 91, 91, 92, 92][quarter]
    else:
        return [_, 90, 91, 92, 92][quarter]

def getDate(year, month, day):
    if(year % 4 == 0):
        return year + (np.cumsum(LEAPS)[month-1]+day-1)/366.
    else:
        return year + (np.cumsum(MONTHS)[month-1]+day-1)/365.

In [None]:
import math
import copy
#data is per-region
def predict_data(data, end_year, end_quarter):
    new_data = dict()
    feature_names = data['feature_names'][:]
    
    
    food_df = data['food_df'].copy()
    ffood_df = data['ffood_df'].copy()
    weather_df = data['weather_df'].copy()
    conflict_df = data['conflict_df'].copy()
    new_data['ipc_df'] = data['ipc_df'].copy()
    end_month = [_, 3, 6, 9, 12][end_quarter]
    end_day = [_, 31, 30, 30, 31][end_quarter]
    
    new_rows = []
    food_items = sorted(set(food_df.Item.values))
    for item in food_items:
        
        t_item_df = food_df[food_df.Item.eq(item)]
        
        market = t_item_df.Market.values[0]
        region = t_item_df.Region.values[0]
        last_year = max(t_item_df.Year.values)
        last_quarter = max(t_item_df[t_item_df.Year.eq(last_year)].Quarter.values)
        last_month = max(t_item_df[t_item_df.Year.eq(last_year)].Month.values)
        last_price = t_item_df.loc[t_item_df.Year.eq(last_year) & t_item_df.Month.eq(last_month)].Price.values[0]
        
        for new_year in range(last_year, end_year+1):
            month_start = last_month+1 if new_year == last_year else 1
            month_end = end_month+1 if new_year == end_year else 13
            for new_month in range(month_start, month_end):
                new_rows.append(dict(
                    Date = getDate(new_year, new_month, 1),
                    Region = region,
                    Market = market,
                    Item = item,
                    Price = last_price,
                    Year = new_year,
                    Month = new_month,
                    Quarter = math.ceil(new_month/3)
                ))
    food_df = food_df.append(new_rows).sort_values(by=['Item', 'Date']).reset_index(drop=True)
    new_data['food_df'] = food_df
    
    ffood_items = sorted(set(ffood_df.Item))
    new_rows=[]
    for item in ffood_items:
        
        t_item_df = ffood_df[ffood_df.Item.eq(item)]
        
        market = t_item_df.Market.values[0]
        region = t_item_df.Region.values[0]
        last_year = max(t_item_df.Year.values)
        last_quarter = max(t_item_df[t_item_df.Year.eq(last_year)].Quarter.values)
        last_month = max(t_item_df[t_item_df.Year.eq(last_year)].Month.values)
        last_price = t_item_df.loc[t_item_df.Year.eq(last_year) & t_item_df.Month.eq(last_month)].Price.values[0]
        
        for new_year in range(last_year, end_year+1):
            if(last_month==12 and new_year == last_year):
                continue
            month_start = last_month+1 if new_year == last_year else 1
            month_end = end_month+1 if new_year == end_year else 13
            for new_month in range(month_start, month_end):
                new_rows.append(dict(
                    Date = getDate(new_year, new_month, 1),
                    Region = region,
                    Market = market,
                    Item = item,
                    Price = last_price,
                    Year = new_year,
                    Month = new_month,
                    Quarter = math.ceil(new_month/3)
                ))
    ffood_df = ffood_df.append(new_rows).sort_values(by=['Item', 'Date']).reset_index(drop=True)
    new_data['ffood_df'] = ffood_df
    
    new_rows = []
    region = conflict_df.Region.values[0]
    last_year = max(conflict_df.Year.values)
    last_month = max(conflict_df[conflict_df.Year.eq(last_year)].Month.values)
    last_day = max(conflict_df[conflict_df.Year.eq(last_year) & conflict_df.Month.eq(last_month)].Day.values)
    last_fatality = conflict_df.loc[conflict_df.Year.eq(last_year) & conflict_df.Month.eq(last_month) & conflict_df.Day.eq(last_day)].Fatalities.values[0]
    
    for new_year in range(last_year, end_year+1):
        month_start = last_month+1 if new_year == last_year else 1
        month_end = end_month+1 if new_year == end_year else 13
        for new_month in range(month_start, month_end):
            day_start = last_day+1 if (new_year == last_year and new_month == last_month) else 1
            day_end = end_day+1 if (new_year == end_year and new_month == end_month) else MONTHS[new_month]+1
            if(new_month == 2 and new_year%4 == 0):
                day_end+=1
            for new_day in range(day_start, day_end):
                new_rows.append(dict(
                    Region = region,
                    Date = getDate(new_year, new_month, new_day),
                    Fatalities = last_fatality,
                    Year = new_year,
                    Month = new_month,
                    Day = new_day,
                    Quarter = math.ceil(new_month/3)
                ))
    conflict_df = conflict_df.append(new_rows).sort_values(by=['Date']).reset_index(drop=True)
    new_data['conflict_df'] = conflict_df
    
    new_rows = []
    station = weather_df.Station.values[0]
    last_year = max(weather_df.Year.values)
    last_month = max(weather_df[weather_df.Year.eq(last_year)].Month.values)
    last_day = max(weather_df[weather_df.Year.eq(last_year) & weather_df.Month.eq(last_month)].Day.values)
    last_temperature = weather_df[weather_df.Year.eq(last_year) & weather_df.Month.eq(last_month) & weather_df.Day.eq(last_day)].Temperature.values[0]
    
    for new_year in range(last_year, end_year+1):
        month_start = last_month+1 if new_year == last_year else 1
        month_end = end_month+1 if new_year == end_year else 13
        for new_month in range(month_start, month_end):
            day_start = last_day+1 if (new_year == last_year and new_month == last_month) else 1
            day_end = end_day+1 if (new_year == end_year and new_month == end_month) else MONTHS[new_month]+1
            if(new_month == 2 and new_year%4 == 0):
                day_end+=1
            for new_day in range(day_start, day_end):
                new_rows.append(dict(
                    Station = station,
                    Date = getDate(new_year, new_month, new_day),
                    Temperature = last_temperature,
                    Year = new_year,
                    Month = new_month,
                    Day = new_day,
                    Quarter = math.ceil(new_month/3)
                ))
                
    weather_df = weather_df.append(new_rows).sort_values(by=['Date']).reset_index(drop=True)
    new_data['weather_df'] = weather_df
    
    datasets = copy.deepcopy(data['datasets'])
    
    last_date = max(datasets.keys())
    last_year = last_date//10
    last_quarter = last_date%10
    
    for new_year in range(last_year, end_year+1):
        quarter_start = last_quarter+1 if new_year == last_year else 1
        quarter_end = end_quarter+1 if new_year == end_year else 5
        for new_quarter in range(quarter_start, quarter_end):
            nDays = getDays(new_year, new_quarter)
            dataset = dict()
            features=  []
            
            for item in food_items:
                market = food_df[food_df.Item.eq(item)].Market.values[0]
                t_item_df = food_df[food_df.Year.eq(new_year) & food_df.Quarter.eq(new_quarter) & food_df.Item.eq(item)]
                features.append(np.mean(t_item_df.Price.values)/1e4)
                
            for item in ffood_items:
                market = ffood_df[ffood_df.Item.eq(item)].Market.values[0]
                t_item_df = ffood_df[ffood_df.Year.eq(new_year) & ffood_df.Quarter.eq(new_quarter) & ffood_df.Item.eq(item)]
                features.append(np.mean(t_item_df.Price.values)/1e4)

            t_conflict_df = conflict_df[conflict_df.Year.eq(new_year) & conflict_df.Quarter.eq(new_quarter)]
            features.append(np.sum(t_conflict_df.Fatalities.values)/nDays)

            t_weather_df = weather_df[weather_df.Year.eq(new_year) & weather_df.Quarter.eq(new_quarter)]
            features.append(np.mean(t_weather_df.Temperature.values))

            dataset['features'] = features
            datasets[new_year*10+new_quarter] = dataset
    new_data['datasets'] = datasets
    return new_data
    
    
bakool_data = extract_features("Banadir")
predict_data(bakool_data, 2020, 4)




                
        

In [6]:
'''
extract_features(region):

returns datasets, a dictionary where the keys are dates and the values are dictionaries.

Each dictionary has the following keys and values:
'features' - list of feature values
'feature_names' - name of each feature
'food' - a Pandas dataframe of food information
'conflict' - a Pandas dataframe of conflict information
'weather' - a Pandas dataframe of weather information
'ipc' - a dictionary:
    'p2perc': Proportion in IPC Phase 2
    'p3perc': Proportion in IPC Phase 3
    'p4perc': Proportion in IPC Phase 4
    
    
Some regions have insufficient data, they will return None if extract_features is called
'''

def extract_features(region):
    #CONSTANTS
    WANTEDFEWSFOOD = {'Cowpeas (Red)'}
    WANTEDSTATIONS = {'EGAL INTL'}
    
    data = dict()
    
    feature_names = []
    
    food_df = pd.read_csv('data/clean_food.csv')
    food_df = food_df[food_df.Region.eq(region)]
    data['food_df'] = food_df
    
    food_items = sorted(set(food_df.Item.values))
    for food_item in food_items:
        market = food_df[food_df.Item.eq(food_item)].Market.values[0]
        feature_names.append("{} - {}".format(food_item, market))
    

    #Constrained by food data dates, get the earliest and latest dates here:
    e_y = min(food_df.Year.values)
    e_q = min(food_df[food_df.Year.eq(e_y)].Quarter.values)
    l_y = max(food_df.Year.values)
    l_q = max(food_df[food_df.Year.eq(l_y)].Quarter.values)
    
    ffood_df = pd.read_csv('data/clean_fews.csv')
    ffood_df = ffood_df[ffood_df.Item.isin(WANTEDFEWSFOOD) & ffood_df.Region.eq(region)]
    ffood_df = ffood_df[(ffood_df.Year.eq(e_y) & ffood_df.Quarter.ge(e_q)) | (ffood_df.Year.gt(e_y))]
    ffood_df = ffood_df[(ffood_df.Year.eq(l_y) & ffood_df.Quarter.le(l_q)) | (ffood_df.Year.lt(l_y))]
    data['ffood_df'] = ffood_df
    
    ffood_items = sorted(set(ffood_df.Item.values))
    for ffood_item in ffood_items:
        market = ffood_df[ffood_df.Item.eq(ffood_item)].Market.values[0]
        feature_names.append("{} - {}".format(ffood_item, market))

    
    conflict_df = pd.read_csv("data/clean_conflict.csv")
    conflict_df = conflict_df[conflict_df.Region.eq(region)]
    conflict_df = conflict_df[(conflict_df.Year.eq(e_y) & conflict_df.Quarter.ge(e_q)) | (conflict_df.Year.gt(e_y))]
    conflict_df = conflict_df[(conflict_df.Year.eq(l_y) & conflict_df.Quarter.le(l_q)) | (conflict_df.Year.lt(l_y))]
    data['conflict_df'] = conflict_df
    
    feature_names.append("Fatalities")

    
    ipc_df = pd.read_csv('data/clean_ipc.csv')
    ipc_df = ipc_df[ipc_df.Region.eq(region)]
    ipc_df = ipc_df[(ipc_df.Year.eq(e_y) & ipc_df.Quarter.ge(e_q)) | (ipc_df.Year.gt(e_y))]
    ipc_df = ipc_df[(ipc_df.Year.eq(l_y) & ipc_df.Quarter.le(l_q)) | (ipc_df.Year.lt(l_y))]
    data['ipc_df'] = ipc_df
    


    #Extract weather data
    weather_df = pd.read_csv('data/clean_weather.csv')
    weather_df = weather_df[weather_df.Station.isin(WANTEDSTATIONS)]
    weather_df = weather_df[(weather_df.Year.eq(e_y) & weather_df.Quarter.ge(e_q)) | (weather_df.Year.gt(e_y))]
    weather_df = weather_df[(weather_df.Year.eq(l_y) & weather_df.Quarter.le(l_q)) | (weather_df.Year.lt(l_y))]
    data['weather_df'] = weather_df
    
    feature_names.append("Temperature")

    if(len(ipc_df)<=12):
        #Insufficient data, trying to fit with data will give an over-fitted model
        return None
    
    datasets = dict()
    data['feature_names'] = feature_names
    
    
    def getDays(year, quarter):
        if(year % 4 ==0):
            return [_, 91, 91, 92, 92][quarter]
        else:
            return [_, 90, 91, 92, 92][quarter]
    
    
    for (i,row) in ipc_df.iterrows():
        year = row.Year
        quarter = row.Quarter
        nDays = getDays(year, quarter)
        dataset = dict()
        
        dataset['P2'] =  row.P2perc
        dataset['P3'] =  row.P3perc
        dataset['P4'] =  row.P4perc

        features = []
        
        
        for item in food_items:
            market = food_df[food_df.Item.eq(item)].Market.values[0]
            t_item_df = food_df[food_df.Year.eq(year) & food_df.Quarter.eq(quarter) & food_df.Item.eq(item)]
            #dataset["{} - {}".format(item, market)] = t_item_df
            features.append(np.mean(t_item_df.Price.values)/1e4)
            
        
        for item in ffood_items:
            market = ffood_df[ffood_df.Item.eq(item)].Market.values[0]
            t_item_df = ffood_df[ffood_df.Year.eq(year) & ffood_df.Quarter.eq(quarter) & ffood_df.Item.eq(item)]
            #dataset["{} - {}".format(item, market)] = t_item_df
            features.append(np.mean(t_item_df.Price.values)/1e4)
        
        t_conflict_df = conflict_df[conflict_df.Year.eq(year) & conflict_df.Quarter.eq(quarter)]
        #dataset["Fatalities"] = t_conflict_df
        features.append(np.sum(t_conflict_df.Fatalities.values)/nDays)
        
        t_weather_df = weather_df[weather_df.Year.eq(year) & weather_df.Quarter.eq(quarter)]
        cycle = 1
        
        while(len(t_weather_df) < 10):
            t_weather_df = weather_df[weather_df.Year.eq(year+cycle) & weather_df.Quarter.eq(quarter)]
            cycle += 1
        #dataset["Temperature"] = t_weather_df
        features.append(np.mean(t_weather_df.Temperature.values))
        
        dataset['features'] = features
        datasets[year*10+quarter] = dataset
    data['datasets'] = datasets
    return data

In [7]:
'''
fit_model(datasets, holdout)
Attempts to fit datasets, but ignoring out the last <holdout> dates for evaluation
Returns a StanFit4Model object
'''
def fit_model(datasets, holdout):
    train_dates = sorted(list (datasets.keys()))[:-holdout] if (holdout>0) else sorted(list (datasets.keys()))
    nFeatures = len(datasets[train_dates[0]]['features'])
    features = [datasets[date]['features'] for date in train_dates]
    response_2 = [max(datasets[date]['P2'], 1e-5) for date in train_dates]
    response_3 = [max(datasets[date]['P3'], 1e-5)  for date in train_dates]
    response_4 = [max(datasets[date]['P4'], 1e-5)  for date in train_dates]
    famine_model_data = dict(
        N = len(train_dates),
        K = nFeatures,
        feats = features,
        response_2 = response_2,
        response_3 = response_3,
        response_4 = response_4
    )
    result = beta_model.sampling(data=famine_model_data, iter=3000, control = dict(max_treedepth=12, adapt_delta=0.8))
    return result

In [8]:

#bakool_model = fit_model(bakool_data['datasets'],0)

In [9]:
bakool_data = extract_features("Bakool")
bakool_model = fit_model(bakool_data['datasets'],3)
plot_model(bakool_data['datasets'], bakool_model)



NameError: name 'plot_model' is not defined

In [None]:
'''
plot_model(datasets, model)
Plots the predicted and actual IPC Phase proportion values based on the datasets and the StanFit4Model object
'''

def plot_model(datasets, model):
    def getDate(date):
        year = date//10
        quarter = date%10
        if(year % 4 ==0):
            return year+(np.cumsum([0, 91, 91, 92, 92])[quarter]/366.)
        else:
            return year+(np.cumsum([0, 90, 91, 92, 92])[quarter]/365.)
    
    
    nFeatures = len(datasets[list(datasets.keys())[0]]['features'])
    all_coeffs = list(map(lambda x: sum(x)/len(x), model.get_posterior_mean()))
    al_2, be_2, co_2, k_2 = all_coeffs[0],all_coeffs[1:4],all_coeffs[4:4+nFeatures],all_coeffs[4+nFeatures]
    al_3, be_3, co_3, k_3 = all_coeffs[5+nFeatures],all_coeffs[6+nFeatures:9+nFeatures],all_coeffs[9+nFeatures:9+2*nFeatures],all_coeffs[9+2*nFeatures]
    al_4, be_4, co_4, k_4 = all_coeffs[10+2*nFeatures],all_coeffs[11+2*nFeatures:14+2*nFeatures],all_coeffs[14+2*nFeatures:14+3*nFeatures],all_coeffs[14+3*nFeatures]
    
    dates = sorted(list(datasets.keys()))
    
    gold_ipc2 = [max(datasets[date]['P2'], 1e-5) for date in dates]
    gold_ipc3 = [max(datasets[date]['P3'], 1e-5) for date in dates]
    gold_ipc4 = [max(datasets[date]['P4'], 1e-5) for date in dates]
    
    gold_ipc2_logit = list(map(scipy.special.logit, gold_ipc2))
    gold_ipc3_logit = list(map(scipy.special.logit, gold_ipc3))
    gold_ipc4_logit = list(map(scipy.special.logit, gold_ipc4))
    
    
    pred_ipc2_logit = [gold_ipc2_logit[0]]
    pred_ipc3_logit = [gold_ipc3_logit[0]]
    pred_ipc4_logit = [gold_ipc3_logit[0]]
    
    
    for (i, time) in enumerate(dates[1:]):
        new_ipc_2a = al_2 + be_2[0]*gold_ipc2_logit[i] + be_2[1]*gold_ipc3_logit[i] + be_2[2]*gold_ipc4_logit[i] + sum(np.multiply(co_2, datasets[time]['features']))
        new_ipc_3a = al_3 + be_3[0]*gold_ipc2_logit[i] + be_3[1]*gold_ipc3_logit[i] + be_3[2]*gold_ipc4_logit[i] + sum(np.multiply(co_3, datasets[time]['features']))
        new_ipc_4a = al_4 + be_4[0]*gold_ipc2_logit[i] + be_4[1]*gold_ipc3_logit[i] + be_4[2]*gold_ipc4_logit[i] + sum(np.multiply(co_4, datasets[time]['features']))
        
        pred_ipc2_logit.append(new_ipc_2a)
        pred_ipc3_logit.append(new_ipc_3a)
        pred_ipc4_logit.append(new_ipc_4a)
        
    pred_ipc2 = list(map(scipy.special.expit, pred_ipc2_logit))
    pred_ipc3 = list(map(scipy.special.expit, pred_ipc3_logit))
    pred_ipc4 = list(map(scipy.special.expit, pred_ipc4_logit))
    
    
    float_dates = list(map(getDate, dates))
    print(float_dates)
    print("IPC PHASE 2")
    plt.plot(float_dates, gold_ipc2, marker='o', color='gold')
    plt.plot(float_dates,pred_ipc2, marker='+', color='red')
    plt.show()

    print("IPC PHASE 3")
    plt.plot(float_dates, gold_ipc3, marker='o', color='gold')
    plt.plot(float_dates,pred_ipc3, marker='+', color='blue')
    plt.show()
    
    print("IPC PHASE 4")
    plt.plot(float_dates, gold_ipc4, marker='o', color='gold')
    plt.plot(float_dates,pred_ipc4, marker='+', color='brown')
    plt.show()
    