# This code is for ModelC
build source model based on source countries data(CCPM and control measures) and predict CCPM for target countries

In [11]:
# Part 1 - Data Preprocessing
import tensorflow as tf
# Importing the libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import os
from tqdm import tqdm
import datetime 


# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.models import Model
from keras.models import load_model,clone_model
from keras.layers import Input, Embedding, LSTM, Dense,  Lambda

from keras.backend import slice
from keras.constraints import max_norm
import warnings
warnings.simplefilter('ignore')
os.environ["CUDA_VISIBLE_DEVICES"]='-1'

##　locate the directory storing the data 
os.chdir(os.getcwd()+'/data/')

In [12]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [13]:
def create_sequences_x(data, seq_length):
    xs = []
    for i in range(len(data)-seq_length+1):
        x = data[i:(i+seq_length)]
        xs.append(x)
    return np.array(xs)


def create_sequences_y(data, seq_length):
    ys = []
    for i in range(seq_length, len(data)):
        y = data[i]
        ys.append(y)
    return np.array(ys)

def MAPE(y, y_pred):
    mape = sum(abs(y-y_pred)/y)/len(y)
    print('MAPE: ', mape)
    return mape

# 构造source序列

In [14]:
from os import listdir
from os.path import isfile, join
from re import sub

mypath = 'source/'
source_countries = ['Austria','China (except Hubei)','Croatia','Germany','Hubei','Italy','Japan',
            'Lebanon','Monaco','Norway','Oman','United Arab Emirates']

In [15]:
pred_length = 7
seq_length = 7
x_seq0 = []
y_seq0 = []

for sc in source_countries:
    print(sc)
    # data preprocessing
    df = pd.read_excel('./source/'+sc+'.xlsx', index_col=0)

    df_new_1day = df[['confirmed cases per million']].diff(periods=1)
    df_new_1day.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)
    df_new_1day['control'] = df.control.values

    df_new_7days = df[['confirmed cases per million']].diff(periods=pred_length)
    df_new_7days.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

    df_new_7days['cum cases'] = 0
    df_new_7days['cum cases'][pred_length:] = df['confirmed cases per million'].values[0:len(df_new_7days)-pred_length]  
    
    ## scale data
    scaler_x = MinMaxScaler() #scale data into 0-1
    scaler_y = MinMaxScaler() 

    if len(x_seq0)==0:
        x_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        y_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        x_seq0 = x_seq0[0:len(y_seq0)]
    else:
        tx_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        ty_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        tx_seq0 = tx_seq0[0:len(ty_seq0)]
        
        x_seq0 = np.concatenate((x_seq0, tx_seq0),axis=0)
        y_seq0 = np.concatenate((y_seq0, ty_seq0),axis=0)

Austria
China (except Hubei)
Croatia
Germany
Hubei
Italy
Japan
Lebanon
Monaco
Norway
Oman
United Arab Emirates


# scale the sequence values

In [16]:
x_seq1 = np.reshape(x_seq0, newshape=(-1,1))

scaler_x = scaler_x.fit(x_seq1)
x = scaler_x.transform(x_seq1)
x = np.reshape(x, newshape=(x_seq0.shape))
y_seq1 = np.reshape(y_seq0[:,0:1], newshape=(-1,1))

scaler_y = scaler_y.fit(y_seq1)
y = scaler_y.transform(y_seq1)

y = np.reshape(y, newshape=(y_seq0[:,0:1].shape))

# 训练source模型

In [17]:
from random import seed
from random import sample

In [18]:
seed(123)
train_idx = sample(range(len(x)),300)
test_idx = set(range(len(x))).difference(train_idx)


X_train = x[train_idx].copy()
y_train = np.reshape(y[train_idx], (-1))

X_test = x[list(test_idx)].copy()
y_test = np.reshape(y[list(test_idx)], (-1))

# Building the model
main_input = Input(shape=(seq_length,2,), dtype='float32', name='main_input')  
lstm_out = LSTM(4)(main_input)   
main_output = Dense(units = 1)(lstm_out)
regressor = Model(inputs=main_input, outputs=main_output)


regressor.compile(optimizer='adam', loss='mse')
regressor.fit(X_train, y_train, epochs = 50, batch_size = 32, validation_data=(X_test,y_test))
regressor.save('../model/ModelC-control.pkl')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Train on 300 samples, validate on 593 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# target countries - medium

In [19]:
from os import listdir
from os.path import isfile, join
from re import sub
# target_countries = ['Albania','Algeria','Argentina','Armenia','Australia','Azerbaijan','Bangladesh','Belarus','Belgium','Bermuda',
#              'Bolivia','Brazil','Bulgaria','Canada','Chile','Colombia','Costa Rica','Cuba','Czech Republic','Denmark',
#              'El Salvador','Estonia','Finland','France','Ghana','Gibraltar','Greece','Honduras','Hungary','India',
#              'Indonesia','Iran','Iraq','Ireland','Israel','Jamaica','Jordan','Liberia','Luxembourg','Malaysia','Mexico',
#              'Morocco','Nepal','Netherlands','Nigeria','Pakistan','Paraguay','Peru','Philippines','Poland','Portugal',
#              'Qatar','Republic of the Congo','Romania','Russia','Rwanda','Saudi Arabia','Senegal','Sierra Leone',
#              'Singapore','Slovakia','Slovenia','South Africa','Sri Lanka','Switzerland','Thailand','Tunisia','Turkey',
#              'Ukraine','United Kingdom','United States','Venezuela']
target_countries = ['Australia','France','Greece','Iraq','Netherlands']

In [20]:
pred_model = load_model('../model/ModelC-control.pkl')
ret_test = pd.DataFrame(index=target_countries, columns=['MAPE'])
modelc_control = pd.DataFrame(index=['loop1', 'loop2', 'loop3', 'loop4','loop5'], columns=['MAPE(mean)','MAPE(std)','MAPE<0.1','MAPE<0.05'])

In [21]:
mypath = 'target/'
for l in range(1):
    for tar in target_countries:
        print(tar)
        # data preprocessing
        df = pd.read_excel(mypath+tar+'.xlsx', index_col=0)

        df_new_1day = df.diff(periods=1)
        df_new_1day.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

        df_new_1day['control'] = df.control.values

        df_new_7days = df[['confirmed cases per million']].diff(periods=pred_length)
        df_new_7days.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

        df_new_7days['cum cases'] = 0
        df_new_7days['cum cases'][pred_length:] = df['confirmed cases per million'].values[0:len(df_new_7days)-pred_length]

        x_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        y_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        x_seq0 = x_seq0[0:len(y_seq0)]

        x_seq1 = np.reshape(x_seq0, newshape=(-1,1))
        x = scaler_x.transform(x_seq1)

        x = np.reshape(x, newshape=(x_seq0.shape))

        y_seq1 = np.reshape(y_seq0[:,0:1], newshape=(-1,1))
        y = scaler_y.transform(y_seq1)

        y = np.reshape(y, newshape=(y_seq0[:,0:1].shape))

        test_idx = int(len(x)*0.8)
        X_train = x[0:test_idx, :, 0:2].copy()
        y_train = np.reshape(y[0:test_idx], (-1))

        X_test = x[test_idx:,:,0:2].copy()
        y_test = np.reshape(y[test_idx:], (-1))

        model =clone_model(pred_model)
        model.set_weights(pred_model.get_weights())
        for layer in model.layers[:-1]:
            layer.trainable=False
        for layer in model.layers[-1:]:
            layer.trainable=True
        model.compile(optimizer='adam', loss='mse')
        # Fitting the RNN to the Training set
        model.fit(X_train, y_train, epochs = 12, batch_size = 2, verbose=0)

        # Predicting daily cases
        predicted_cases = model.predict(X_test)
        predicted_cases = scaler_y.inverse_transform(predicted_cases)

        true_cases = np.reshape(scaler_y.inverse_transform(np.reshape(y_test,(-1,1))),(-1))+y_seq0[test_idx:,1]
        predicted_cases = np.reshape(predicted_cases,(-1))+y_seq0[test_idx:,1]


        mape = MAPE(true_cases, predicted_cases) 
        ret_test.loc[tar, 'MAPE'] = mape
    modelc_control.loc['loop'+str(l+1), 'MAPE(mean)'] = ret_test.MAPE.mean() 
    modelc_control.loc['loop'+str(l+1), 'MAPE(std)'] = ret_test.MAPE.std()
    modelc_control.loc['loop'+str(l+1), 'MAPE<0.1'] = ret_test[(ret_test.MAPE<0.1)].shape[0]
    modelc_control.loc['loop'+str(l+1), 'MAPE<0.05']=ret_test[(ret_test.MAPE<0.05)].shape[0]

Australia
MAPE:  0.041625889435366
France
MAPE:  0.028971640956362954
Greece
MAPE:  0.033116221847490315
Iraq
MAPE:  0.1004357721459402
Netherlands
MAPE:  0.03850912715152976


In [23]:
# modelc_control.to_csv('../result/modelC_control.csv')