# This code is for ModelB
build source model based on source countries data(CCPM only) and predict CCPM for target countries

In [12]:
import tensorflow as tf
# Importing the libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import os
from tqdm import tqdm
import datetime 

# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.models import Model
from keras.models import load_model,clone_model

from keras.layers import Input, Embedding, LSTM, Dense,  Lambda

from keras.backend import slice
from keras.constraints import max_norm
import warnings
warnings.simplefilter('ignore')
os.environ["CUDA_VISIBLE_DEVICES"]='-1'

##　locate the directory storing the data 
os.chdir(os.getcwd()+'/data/')

In [13]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [14]:
def create_sequences_x(data, seq_length):
    xs = []
    for i in range(len(data)-seq_length+1):
        x = data[i:(i+seq_length)]
        xs.append(x)

    return np.array(xs)


def create_sequences_y(data, seq_length):
    ys = []
    for i in range(seq_length, len(data)):
        y = data[i]
        ys.append(y)
    return np.array(ys)

def MAPE(y, y_pred):
    mape = sum(abs(y-y_pred)/y)/len(y)
    print('MAPE: ', mape)
    return mape

# 1. Build the source model

## 1.1 Construct source  sequences

In [15]:
from os import listdir
from os.path import isfile, join
from re import sub
mypath = './source/'
source_countries=['Austria','China (except Hubei)','Croatia','Germany','Hubei','Italy','Japan',
            'Lebanon','Monaco','Norway','Oman','United Arab Emirates']

In [16]:
pred_length = 7
seq_length = 7
x_seq0 = []
y_seq0 = []

for sc in source_countries:
    print(sc)
    # data preprocessing
    df = pd.read_excel('source/'+sc+'.xlsx', index_col=0)

    df_new_1day = df[['confirmed cases per million']].diff(periods=1)
    df_new_1day.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

    df_new_7days = df[['confirmed cases per million']].diff(periods=pred_length)
    df_new_7days.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

    df_new_7days['cum cases'] = 0
    df_new_7days['cum cases'][pred_length:] = df['confirmed cases per million'].values[0:len(df_new_7days)-pred_length]
    
    scaler_x = MinMaxScaler() #scale data into 0-1
    scaler_y = MinMaxScaler()

    if len(x_seq0)==0:
        x_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        y_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        x_seq0 = x_seq0[0:len(y_seq0)]
    else:
        tx_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        ty_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        tx_seq0 = tx_seq0[0:len(ty_seq0)]
        
        x_seq0 = np.concatenate((x_seq0, tx_seq0),axis=0)
        y_seq0 = np.concatenate((y_seq0, ty_seq0),axis=0)

Austria
China (except Hubei)
Croatia
Germany
Hubei
Italy
Japan
Lebanon
Monaco
Norway
Oman
United Arab Emirates


## 1.2 scale the sequence values

In [17]:
x_seq1 = np.reshape(x_seq0, newshape=(-1,1))

scaler_x = scaler_x.fit(x_seq1)
x = scaler_x.transform(x_seq1)
x = np.reshape(x, newshape=(x_seq0.shape))
y_seq1 = np.reshape(y_seq0[:,0:1], newshape=(-1,1))

scaler_y = scaler_y.fit(y_seq1)
y = scaler_y.transform(y_seq1)

y = np.reshape(y, newshape=(y_seq0[:,0:1].shape))

##  1.3 source model

In [19]:
import random
from random import sample
random.seed(123)
train_idx = sample(range(len(x)),int(len(x)*0.8))
test_idx = set(range(len(x))).difference(train_idx)


X_train = x[train_idx].copy()
y_train = np.reshape(y[train_idx], (-1))

X_test = x[list(test_idx)].copy()
y_test = np.reshape(y[list(test_idx)], (-1))

# Building the RNN

main_input = Input(shape=(seq_length,1,), dtype='float32', name='main_input')  

lstm_out = LSTM(4)(main_input)  
main_output = Dense(units = 1)(lstm_out)
regressor = Model(inputs=main_input, outputs=main_output)

regressor.compile(optimizer='adam', loss='mse')


# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs = 9, batch_size = 16)#  10
regressor.save('../model/ModelB_nocontrol.pkl')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


# 2. Target countries

In [20]:
from os import listdir
from os.path import isfile, join
from re import sub
mypath = 'target/'
# target_countries = ['Albania','Algeria','Argentina','Armenia','Australia','Azerbaijan','Bangladesh','Belarus','Belgium','Bermuda',
#              'Bolivia','Brazil','Bulgaria','Canada','Chile','Colombia','Costa Rica','Cuba','Czech Republic','Denmark',
#              'El Salvador','Estonia','Finland','France','Ghana','Gibraltar','Greece','Honduras','Hungary','India',
#              'Indonesia','Iran','Iraq','Ireland','Israel','Jamaica','Jordan','Liberia','Luxembourg','Malaysia','Mexico',
#              'Morocco','Nepal','Netherlands','Nigeria','Pakistan','Paraguay','Peru','Philippines','Poland','Portugal',
#              'Qatar','Republic of the Congo','Romania','Russia','Rwanda','Saudi Arabia','Senegal','Sierra Leone',
#              'Singapore','Slovakia','Slovenia','South Africa','Sri Lanka','Switzerland','Thailand','Tunisia','Turkey',
#              'Ukraine','United Kingdom','United States','Venezuela']

target_countries = ['Australia','France','Greece','Iraq','Netherlands']

In [21]:
pred_model = load_model('../model/ModelB_nocontrol.pkl')
ret_test = pd.DataFrame(index=target_countries, columns=['MAPE'])
no_control = pd.DataFrame(index=['loop1','loop2','loop3','loop4','loop5'], columns=['MAPE(mean)','MAPE(std)','MAPE<0.1','MAPE<0.05'])

In [24]:
for l in range(1):
    for tar in target_countries:
        print(tar)
        # data preprocessing

        ## scale data
        df = pd.read_excel('target/'+tar+'.xlsx', index_col=0)

        df_new_1day = df.diff(periods=1)
        df_new_1day.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

        df_new_7days = df[['confirmed cases per million']].diff(periods=pred_length)
        df_new_7days.rename(columns={'confirmed cases per million':'new cases'}, inplace=True)

        df_new_7days['cum cases'] = 0
        df_new_7days['cum cases'][pred_length:] = df['confirmed cases per million'].values[0:len(df_new_7days)-pred_length]

        x_seq0 = create_sequences_x(np.array(df_new_1day.dropna()), seq_length)
        y_seq0 = create_sequences_y(np.array(df_new_7days.dropna()), seq_length)
        x_seq0 = x_seq0[0:len(y_seq0)]
        x_seq1 = np.reshape(x_seq0, newshape=(-1,1))

        x = scaler_x.transform(x_seq1)

        x = np.reshape(x, newshape=(x_seq0.shape))
        y_seq1 = np.reshape(y_seq0[:,0:1], newshape=(-1,1))
        y = scaler_y.transform(y_seq1)

        y = np.reshape(y, newshape=(y_seq0[:,0:1].shape))
        model =clone_model(pred_model)
        model.set_weights(pred_model.get_weights())
        test_idx = int(len(x)*0.8)

        X_train = x[0:test_idx, :, 0:1].copy()
        y_train = np.reshape(y[0:test_idx], (-1))

        X_test = x[test_idx:,:,0:1].copy()
        y_test = np.reshape(y[test_idx:], (-1))


        for layer in model.layers[:-1]:
            layer.trainable=False
        for layer in model.layers[-1:]:
            layer.trainable=True
        model.compile(optimizer='adam', loss='mse')
        # Fitting the RNN to the Training set
        model.fit(X_train, y_train, epochs = 7, batch_size = 4, verbose=0)# 原来10

        # Predicting daily cases
        predicted_cases = model.predict(X_test)
        predicted_cases = scaler_y.inverse_transform(predicted_cases)

        true_cases = np.reshape(scaler_y.inverse_transform(np.reshape(y_test,(-1,1))),(-1))+y_seq0[test_idx:,1]
        predicted_cases = np.reshape(predicted_cases,(-1))+y_seq0[test_idx:,1]

        mape = MAPE(true_cases, predicted_cases)
        ret_test.loc[tar, 'MAPE'] = mape
        
    no_control.loc['loop'+str(l+1), 'MAPE(mean)'] = ret_test.MAPE.mean() 
    no_control.loc['loop'+str(l+1), 'MAPE(std)'] = ret_test.MAPE.std()
    no_control.loc['loop'+str(l+1), 'MAPE<0.1'] = ret_test[(ret_test.MAPE<0.1)].shape[0]
    no_control.loc['loop'+str(l+1), 'MAPE<0.05']=ret_test[(ret_test.MAPE<0.05)].shape[0]

Australia
MAPE:  0.045807415471918696
France
MAPE:  0.026227821083216512
Greece
MAPE:  0.03944255519134048
Iraq
MAPE:  0.10229714895687346
Netherlands
MAPE:  0.033947424745118235


In [15]:
no_control.to_csv('../result/ModelB_nocontrol.csv')