# The code is used to build the pre_train model for the source countries

In [1]:
import os
import pandas as pd
from datetime import datetime
import numpy as np
import time 
import keras 
import sys
import random


from keras.layers import multiply
from keras.layers.core import *
from keras.layers.recurrent import LSTM
from keras.models import *
import keras.backend as K
from keras.layers import Concatenate
from keras.constraints import max_norm

from keras.models import load_model
from keras.optimizers import Adam

os.getcwd()
seed=666
random.seed(seed)
np.random.seed(seed)

Using TensorFlow backend.


In [3]:
#create the sequence slice for the model
def create_sequences(data, seq_length, next_days):

    N=len(data)-seq_length-next_days+1
    xs = np.zeros((N,seq_length,1))   
    ys = np.zeros((N,1))
    cs = np.zeros((N,seq_length,1))
    
    for i in range(N):
        xs[i,:,0] = data[i:i+seq_length,0]
        ys[i] = data[i+seq_length:i+seq_length+next_days,0].sum() 
        cs[i,:,0] = data[i:i+seq_length,1]
    return xs,ys,cs

In [4]:
def model_attention_applied_after_lstm(x_input,c_input,y_input):
    INPUT_DIM =1
    TIME_STEPS=7
    lstm_units = 32

    inputs1 = Input(shape=(TIME_STEPS, INPUT_DIM,))
    inputs2 = Input(shape=(TIME_STEPS,))  
    lstm_out = LSTM(lstm_units, return_sequences=True)(inputs1)

    a = Permute((2, 1))(lstm_out)
    a = Reshape((lstm_units, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
    a = Dense(TIME_STEPS, activation='softmax')(a)
    a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
    a = RepeatVector(INPUT_DIM)(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    output_attention_mul = multiply([lstm_out, a_probs])
    attention_mul = Flatten()(output_attention_mul) 
    newcase_output = Dense(16)(attention_mul)        
    newcase_output = Dense(1,activation='sigmoid')(newcase_output)

   #add the weight to the lockdown measures according to the domain knowledge
    def control_rate(x,rate=0.1):
        rate2= rate*K.ones((7,1))
        return  K.dot(x,rate2)

    inputs1_new = Flatten()(inputs1)    
    inputs2_new = multiply([inputs2, inputs1_new])
    control_output=Lambda(control_rate)(inputs2_new) 
   
    output=keras.layers.Add()([newcase_output,control_output])


    model = Model(input=[inputs1 ,inputs2], output=output)
    model.compile(optimizer='adam', loss='mse')
    model.fit([x_input,c_input], y_input, epochs=100, batch_size=32,  
              validation_data=([x_input,c_input],y_input))

    return model

In [4]:
#list the source countries
source_countries=['Austria','China (except Hubei)','Croatia','Germany','Hubei','Italy','Japan',
            'Lebanon','Monaco','Norway','Oman','United Arab Emirates']

seq_length=7
next_days=7

x_train_total=np.zeros((0,seq_length,1))
c_train_total=np.zeros((0,seq_length))
y_train_total=np.zeros((0,1))

INPUT_DIM = 2
TIME_STEPS = 7

for i in range(len(source_countries)):
    #load the data 
    country_name='./data/source/'+source_countries[i]+'.xlsx'
    data_dist =  pd.read_excel(country_name,encoding='gbk').dropna()
    data_dist=data_dist.drop(['截止时间'],axis=1)
    data_diff= data_dist['confirmed cases per million'].diff().dropna()
    data_diff=pd.concat([data_diff,data_dist.iloc[1:,1]],axis=1)
    data_diff=np.array( data_diff)
    
    x,y,c=create_sequences(data_diff, seq_length, next_days)
    
    #normalization
    x_max=x.max()
    x_min=x.min()
    x=(x-x_min)/(x_max-x_min)
    
    y_max=y.max()
    y_min=y.min()
    y=(y-y_min)/(y_max-y_min)

    # concat all the countries samples to train a source model
    c=np.ones((c.shape[0],c.shape[1]))-c.reshape(-1,seq_length)
    
    x_train=x.reshape(-1,seq_length,1)
    c_train=c.reshape(-1,seq_length)
    y_train=y.reshape(-1,1)    
    
    x_train_total=np.concatenate((x_train_total,x_train),axis=0)
    c_train_total=np.concatenate((c_train_total,c_train),axis=0)    
    y_train_total=np.concatenate((y_train_total,y_train),axis=0)    

model = model_attention_applied_after_lstm(x_train_total,c_train_total,y_train_total)
#save the model
model.save('./model/source_model_ALerT-COVID.pkl')  

Instructions for updating:
If using Keras pass *_constraint arguments to layers.





Train on 903 samples, validate on 903 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


# The code is used to verify the effect of ALerT-COVID

In [7]:
# list the name of target countries
# target_cols=['Albania','Algeria','Argentina','Armenia','Australia','Azerbaijan','Bangladesh','Belarus','Belgium','Bermuda',
#              'Bolivia','Brazil','Bulgaria','Canada','Chile','Colombia','Costa Rica','Cuba','Czech Republic','Denmark',
#              'El Salvador','Estonia','Finland','France','Ghana','Gibraltar','Greece','Honduras','Hungary','India',
#              'Indonesia','Iran','Iraq','Ireland','Israel','Jamaica','Jordan','Liberia','Luxembourg','Malaysia','Mexico',
#              'Morocco','Nepal','Netherlands','Nigeria','Pakistan','Paraguay','Peru','Philippines','Poland','Portugal',
#              'Qatar','Republic of the Congo','Romania','Russia','Rwanda','Saudi Arabia','Senegal','Sierra Leone',
#              'Singapore','Slovakia','Slovenia','South Africa','Sri Lanka','Switzerland','Thailand','Tunisia','Turkey',
#              'Ukraine','United Kingdom','United States','Venezuela']

target_cols=['Australia','France','Greece','Iraq','Netherlands']

#set the parameter
seq_length=7
next_days=7


pred_total=[]
true_data_total=[]
pred_data_total=[]
new_case_mape_total=[]
cumulative_case_mape_total=[]

for i in range(len(target_cols)):
    print(i,target_cols[i])
    # load the data
    country_name='./data/target/'+target_cols[i]+'.xlsx'
    data_dist =  pd.read_excel(country_name,encoding='gbk').dropna()
    data_dist=data_dist.drop(['截止时间'],axis=1)
    data_diff= data_dist['confirmed cases per million'].diff().dropna()
    data_diff=pd.concat([data_diff,data_dist.iloc[1:,1]],axis=1)
    data_diff=np.array(data_diff)
    
    x,y,c=create_sequences(data_diff, seq_length, next_days)
    
    # normalization 
    c=np.ones((c.shape[0],c.shape[1]))-c.reshape(-1,seq_length)
    x_max=x.max()
    x_min=x.min()
    x=(x-x_min)/(x_max-x_min)
    
    y_max=y.max()
    y_min=y.min()
    y=(y-y_min)/(y_max-y_min)

    #split the target countries data, 80% of the data is used to fine tune source model,20% is used to verify the effect  
    index1=int(len(x)*0.8)
    
    x_train=x[:index1].reshape(-1,seq_length,1)
    c_train=c[:index1].reshape(-1,seq_length)
    y_train=y[:index1].reshape(-1,1)

    
    x_val=x[index1:].reshape(-1,seq_length,1)
    c_val=c[index1:].reshape(-1,seq_length)
    y_val=y[index1:].reshape(-1,1)
    
    #load the source model and fine_tune the model   
    source_model='./model/source_model_ALerT-COVID.pkl'  
    pre_model= load_model(source_model) 
    
    for layer in pre_model.layers[:len(pre_model.layers)-2]:
        layer.trainable = False 
        
    pre_model.compile(loss='mse', optimizer=Adam(lr=1e-3))   
    pre_model.fit([x_train,c_train], y_train,epochs=15, batch_size=4,
                  validation_data=([x_val,c_val],y_val),verbose = 0)
 

    pred_val = pre_model.predict([x_val,c_val])[-1,0]
    pred_val = np.array(pred_val)*(y_max-y_min)+y_min 
    pred_val = np.array(data_dist.iloc[-8,0])+pred_val
    
    
    true_val = y_val[-1,0]
    true_val = np.array(true_val)*(y_max-y_min)+y_min 
    true_val = np.array(data_dist.iloc[-8,0])+true_val
   
    
    true_data_total.append(true_val)
    pred_data_total.append(pred_val)   
    
    ##  calculate the mape for new confirmed cases
    pred_test= pre_model.predict([x_val,c_val])   
    true_test=y_val*(y_max-y_min)+y_min 
    pred_test=pred_test*(y_max-y_min)+y_min
    new_case_mape=np.mean(abs((pred_test-true_test)/true_test))
    new_case_mape_total.append(new_case_mape)
    
    ## calculate the mape for the culmulative confirmed cases    
    true_test_raw=np.array(data_dist.iloc[index1+7:-7,0]).reshape(-1,1)+ true_test
    pred_test_raw=np.array(data_dist.iloc[index1+7:-7,0]).reshape(-1,1)+pred_test
    cumulative_case_mape=np.mean(abs((pred_test_raw-true_test_raw)/true_test_raw))
    cumulative_case_mape_total.append(cumulative_case_mape)

#save the result 
result                          = pd.DataFrame(cumulative_case_mape_total,index=target_cols,columns=['mape'])
result['true_data']             = true_data_total
result['pred_data']             = pred_data_total
# result.to_excel('result_verify.xls')
result

0 Australia
1 France
2 Greece
3 Iraq
4 Netherlands


Unnamed: 0,mape,true_data,pred_data
Australia,0.023682,282.040499,288.921578
France,0.016752,2889.763353,2910.115905
Greece,0.028023,279.668512,288.088274
Iraq,0.078635,153.620513,134.100555
Netherlands,0.023425,2699.582466,2803.307724


# The code is used to predict the CCPM for the next seven days in the future

In [9]:
#the name of target countries
# target_cols=['Albania','Algeria','Argentina','Armenia','Australia','Azerbaijan','Bangladesh','Belarus','Belgium','Bermuda',
#              'Bolivia','Brazil','Bulgaria','Canada','Chile','Colombia','Costa Rica','Cuba','Czech Republic','Denmark',
#              'El Salvador','Estonia','Finland','France','Ghana','Gibraltar','Greece','Honduras','Hungary','India',
#              'Indonesia','Iran','Iraq','Ireland','Israel','Jamaica','Jordan','Liberia','Luxembourg','Malaysia','Mexico',
#              'Morocco','Nepal','Netherlands','Nigeria','Pakistan','Paraguay','Peru','Philippines','Poland','Portugal',
#              'Qatar','Republic of the Congo','Romania','Russia','Rwanda','Saudi Arabia','Senegal','Sierra Leone',
#              'Singapore','Slovakia','Slovenia','South Africa','Sri Lanka','Switzerland','Thailand','Tunisia','Turkey',
#              'Ukraine','United Kingdom','United States','Venezuela']

target_cols=['Australia','France','Greece','Iraq','Netherlands']
## parameter configure
seq_length=7
next_days=7

INPUT_DIM = 2
TIME_STEPS = 7


pred_total=[]
pre14_true=[]
pre7_true=[]

pred_control_total=[]
pred_nocontrol_total=[]
pred_true_total=[]


for i in range(len(target_cols)):
    print(i,target_cols[i])
    
    # load the data 
    country_name='./data/target/'+target_cols[i]+'.xlsx'
    data_dist =  pd.read_excel(country_name,encoding='gbk').dropna()
    data_dist=data_dist.drop(['截止时间'],axis=1)
    data_diff= data_dist['confirmed cases per million'].diff().dropna()
    data_diff=pd.concat([data_diff,data_dist.iloc[1:,1]],axis=1)
    data_diff=np.array(data_diff)
    
    x,y,c=create_sequences(data_diff, seq_length, next_days)
    c=np.ones((c.shape[0],c.shape[1]))-c.reshape(-1,seq_length)
    
    
    pre14_true.append(y[-8,0])
    pre7_true.append(y[-1,0])
    
    #normalization
    x_max=x.max()
    x_min=x.min()
    x=(x-x_min)/(x_max-x_min)
    
    y_max=y.max()
    y_min=y.min()
    y=(y-y_min)/(y_max-y_min)   
    
    #reshape the dimension
    x_train=x.reshape(-1,seq_length,1)
    c_train=c.reshape(-1,seq_length)
    y_train=y.reshape(-1,1)
    
    # load the pre-train model and fine-tune the model
    source_model='./model/source_model_ALerT-COVID.pkl'  
    pre_model= load_model(source_model) 
    for layer in pre_model.layers[:len(pre_model.layers)-2]:
        layer.trainable = False    
    pre_model.compile(loss='mse', optimizer=Adam(lr=1e-3))   
    pre_model.fit([x_train,c_train], y_train,epochs=10, batch_size=2,verbose = 0)
 
    x_test=data_diff[-7:,0].reshape(1,7,1)   
    x_test=(x_test-x_min)/(x_max-x_min)
    
    #simulation about keeping the lockdown measures or lifting the dockdown measures  
    c_test1=np.zeros((1,7))
    c_test2=np.ones((1,7))        
    c_test3=c_train[-1,:].reshape(1,7)                    
        
    pred_test1 = pre_model.predict([x_test, c_test1])
    pred_test2 = pre_model.predict([x_test, c_test2])
    pred_test3 = pre_model.predict([x_test, c_test3])
    
    pred_test1=np.array(pred_test1)*(y_max-y_min)+y_min    
    pred_test2=np.array(pred_test2)*(y_max-y_min)+y_min 
    pred_test3=np.array(pred_test3)*(y_max-y_min)+y_min 
    
    pred_control_total.append(pred_test1[0,0])
    pred_nocontrol_total.append(pred_test2[0,0]) 
    pred_true_total.append(pred_test3[0,0]) 
    
# save result    
result                      = pd.DataFrame(pre14_true,index=target_cols,columns=['pre14_data'])
result['pre7_data']         = pre7_true
result['pred_control']      = pred_control_total
result['pred_nocontrol']    = pred_nocontrol_total
result['pred_true']         = pred_true_total
result['nocontrol_control'] = (result['pred_nocontrol']-result['pred_control'])/result['pred_control']
# result.to_excel('result_predict.xls')
result

0 Australia
1 France
2 Greece
3 Iraq
4 Netherlands


Unnamed: 0,pre14_data,pre7_data,pred_control,pred_nocontrol,pred_true,nocontrol_control
Australia,2.745111,3.058837,10.263621,11.233669,11.233669,0.094513
France,47.553747,94.310845,274.141876,304.985199,304.985199,0.112509
Greece,5.468647,3.741706,9.40487,10.512584,10.512584,0.117781
Iraq,25.160052,47.411283,23.626308,51.411167,23.626308,1.176014
Netherlands,69.682458,69.624098,126.92627,149.290207,142.147995,0.176196


In [10]:
result.to_excel('result_predict.xls')