In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
import time
from datetime import datetime
from scipy import integrate, optimize
import warnings
warnings.filterwarnings('ignore')
from scipy.integrate import odeint

In [3]:
cv  = pd.read_csv('drive/My Drive/Thesis/Data/cov_data_may.csv')
cv


Unnamed: 0,Date,Active Cases,New Deaths,New Recovered,New Cases,Total Deaths,Total Recovered,Total Cases
0,2021-05-01 00:00:00+00:00,100250,131,4344,4512,45652,1526978,1672880
1,2021-05-02 00:00:00+00:00,100760,144,3740,4394,45796,1530718,1677274
2,2021-05-03 00:00:00+00:00,100564,153,4773,4730,45949,1535491,1682004
3,2021-05-04 00:00:00+00:00,99087,188,5658,4369,46137,1541149,1686373
4,2021-05-05 00:00:00+00:00,98217,212,5943,5285,46349,1547092,1691658
5,2021-05-06 00:00:00+00:00,98277,147,5440,5647,46496,1552532,1697305
6,2021-05-07 00:00:00+00:00,98546,167,5891,6327,46663,1558423,1703632
7,2021-05-08 00:00:00+00:00,99003,179,5494,6130,46842,1563917,1709762
8,2021-05-09 00:00:00+00:00,98395,170,4360,3922,47012,1568277,1713684
9,2021-05-10 00:00:00+00:00,96742,206,6338,4891,47218,1574615,1718575


In [4]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

def mse(predict,target):
    return ((predict-target)**2).mean()

def mae(predict,target):
    return (abs(target-predict)).mean()

def mape(predict,target):
    return (abs((target - predict) / target).mean()) * 100

SEIR Model

In [6]:
# SEIR MODEL - DIFFERENTIAL EQUATIONS
# The SEIR model adds an “Exposed” state for individuals that have contracted the disease but are not yet
# infectious. This uses the fact that there is an incubation stage of infection that lasts approx. 6 days.
# The model assumes people carry lifelong immunity to a disease upon recovery, which hasn't been disproven
# for COVID-19.

def comp(state, t, N, beta, gamma, sigma):
    S, E, I, R = state
    dSdt = -(beta * S * I)/N # Change in susceptible population over time
    dEdt = (beta * S * I)/N - sigma * E # Change in exposed population over time
    dIdt = sigma * E - gamma * I # Change in infectious population over time
    dRdt = gamma * I # Change in recovered population over time
    return dSdt, dEdt, dIdt, dRdt



Basic SEIR

In [8]:
total_pop = 276051328 
removed = 1572630 #recovered + dead
infected = 	100250		
exposed = 5000 #assumption
susceptible = total_pop - infected - removed - exposed 

last_day_total = 1668368 #total cases April 30, 2021

σ=1/5.2   
γ=1/2.3   
β=R0γ=2.2γ = 0.96  =>>>> 1535730.93 (MAE)

σ=1/5.2   
γ=1/2.3 
β=R = 1.4γ = 0.1 =>>>>  78241.47

σ=1/5.2   
γ=1/D = 1/14
β=R = 2.3γ  =>>>>> 42281.14

σ=1/5.2   
γ=1/D = 1/14
β=R = 1.4γ = 0.1 =>>>>  6639.98

In [20]:
real = cv['New Cases'][31:]
real

31    4824
32    5246
33    5353
34    6486
35    6594
36    5832
37    6993
Name: New Cases, dtype: int64

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)
#Recovery rate, γ = 1/D, is determined by the average duration, D, of infection

days_to_recover = 14
recovery_rate = 1/2.3
incubation_period = 5.2 # incubation parameter

days = range(0,38)

# Computation
#calc = odeint(comp, [susceptible, exposed, infected, recovered], days,
#             args=(total_pop, 1/incubation_period, effective_contact_rate, recovery_rate))
#S, E, I, R = calc.T

R0 = 2.2
sigma= 1/incubation_period
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})


In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274339200.0,76452.5,72172.32,1609182.0,1,1681354.0
2,274276100.0,120285.0,62536.97,1638013.0,2,1700550.0
3,274217500.0,152620.6,62034.55,1664863.0,3,1726897.0
4,274156700.0,181212.3,66371.23,1692647.0,4,1759018.0
5,274090500.0,209919.7,73612.55,1722993.0,5,1796605.0
6,274016300.0,240809.3,82940.2,1756958.0,6,1839898.0
7,273932400.0,275143.0,94070.93,1795375.0,7,1889446.0
8,273837100.0,313844.7,106985.9,1839016.0,8,1946002.0
9,273728800.0,357725.2,121805.5,1888681.0,9,2010487.0


In [None]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[1060691.349221306,
 1194704.2196931094,
 1343512.959746454,
 1508167.3720128834,
 1689628.405257471,
 1888712.9587332718,
 2106027.282844633]

In [None]:
print('MAE : ', mae( real, ncArr_test))
print('MAPE : ', mape (real, ncArr_test))

MAE :  1535730.9353584468
MAPE :  99.60664490504669


#################################################

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 1/days_to_recover
incubation_period = 5.2 # incubation parameter

days = range(0,38)

R0 = 2.2
sigma= 1/incubation_period
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})

In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274403800.0,17992.45727,95537.032975,1579606.0,1,1675143.0
2,274389100.0,28230.423302,93287.591256,1586337.0,2,1679624.0
3,274374600.0,36499.362544,92898.158473,1592976.0,3,1685875.0
4,274360000.0,43374.344068,93931.821616,1599642.0,4,1693573.0
5,274345200.0,49277.080301,96073.370636,1606422.0,5,1702495.0
6,274330000.0,54517.537309,99096.55513,1613387.0,6,1712484.0
7,274314200.0,59324.290689,102840.210481,1620595.0,7,1723436.0
8,274297800.0,63866.676438,107190.851375,1628093.0,8,1735284.0
9,274280700.0,68270.951642,112069.987856,1635921.0,9,1747991.0


In [None]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[41091.438705872744,
 43257.36105389474,
 45536.468193122186,
 47934.58848167956,
 50457.83541962458,
 53112.6217626296,
 55905.67343191244]

In [None]:
print('MAE : ', mae( ncArr_test, real))
print('MAPE : ', mape (ncArr_test, real))

MAE :  42281.141006962265
MAPE :  720.211424133393


#######################################################

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 1/days_to_recover
incubation_period = 5.2 # incubation parameter

days = range(0,38)

R0 = 1.4
sigma= 1/incubation_period
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})

In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274409400.0,12933.949444,95043.633272,1579593.0,1,1674637.0
2,274400100.0,19089.526359,91493.900377,1586247.0,2,1677741.0
3,274391200.0,23908.133665,89199.907456,1592694.0,3,1681894.0
4,274382400.0,27722.508375,87859.642544,1599012.0,4,1686872.0
5,274373700.0,30783.490553,87245.621782,1605262.0,5,1692508.0
6,274365000.0,33280.198781,87186.533796,1611489.0,6,1698676.0
7,274356300.0,35355.234908,87553.406177,1617728.0,7,1705281.0
8,274347600.0,37116.143638,88249.179239,1624005.0,8,1712254.0
9,274338800.0,38644.049188,89200.848106,1630341.0,9,1719542.0


In [None]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[11842.504948667949,
 12069.136625356274,
 12299.990307378117,
 12535.160730971256,
 12774.740336823277,
 13018.820021527587,
 13267.490048825508]

In [None]:
print('MAE : ', mae( real, ncArr_test))
print('MAPE : ', mape (real, ncArr_test))

MAE :  6639.977574221424
MAPE :  53.059194362035456


#######################################################

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 1/2.3
incubation_period = 5.2 # incubation parameter

days = range(0,38)

R0 = 1.4
sigma= 1/incubation_period
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})

In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274368800.0,49653.69417,69761.680012,1608804.0,1,1678565.0
2,274331500.0,74710.794369,55177.457667,1635563.0,2,1690740.0
3,274300400.0,89912.454832,48732.045992,1657946.0,3,1706678.0
4,274271700.0,100220.157651,46476.365146,1678537.0,4,1725013.0
5,274243700.0,108164.185747,46403.228094,1698672.0,5,1745075.0
6,274215400.0,115038.159867,47493.6871,1719054.0,6,1766548.0
7,274186200.0,121506.757222,49231.788331,1740064.0,7,1789295.0
8,274155800.0,127914.927139,51357.658383,1761920.0,8,1813277.0
9,274124000.0,134445.239215,53741.864514,1784760.0,9,1838501.0


In [None]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[72658.53129674308,
 76195.4650114975,
 79899.97911323467,
 83779.54276292678,
 87841.92650059704,
 92095.1891095615,
 96547.66843466228]

In [None]:
print('MAE : ', mae( ncArr_test, real))
print('MAPE : ', mape (ncArr_test, real))

MAE :  78241.47174703183
MAPE :  1333.553427123341


##########################################################################

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 0.1
incubation_period = 5.2 # incubation parameter

days = range(0,38)

R0 = 2.2
sigma= 1
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})

In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274397200.0,15678.310059,101438.7,1582628.0,1,1684067.0
2,274374300.0,20371.145073,109206.8,1593129.0,2,1702335.0
3,274349300.0,23412.246475,119728.2,1604559.0,3,1724287.0
4,274321800.0,26123.96702,131929.1,1617129.0,4,1749058.0
5,274291500.0,28922.371375,145573.8,1630993.0,5,1776566.0
6,274258000.0,31952.057769,160688.0,1646293.0,6,1806981.0
7,274221100.0,35277.725916,177386.4,1663183.0,7,1840569.0
8,274180400.0,38942.125776,195821.3,1681828.0,8,1877649.0
9,274135400.0,42983.739113,216168.4,1702411.0,9,1918579.0


In [None]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[351392.1622095676,
 386627.66359717306,
 425241.83874915633,
 467525.75474606175,
 513788.46478122286,
 564356.544792356,
 619573.1482920637]

In [None]:
print('MAE : ', mae( ncArr_test, real))
print('MAPE : ', mape (ncArr_test, real))

MAE :  469596.79673822876
MAPE :  7919.282612906202


################################################################

In [None]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 0.1
incubation_period = 5.2 # incubation parameter

days = range(0,38)

R0 = 1.4
sigma= 1
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})

In [None]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274419100.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274405300.0,10534.066221,98591.900054,1582526.0,1,1681117.0
2,274391500.0,12626.787528,100389.280747,1592459.0,2,1692848.0
3,274377300.0,13623.782748,103379.288168,1602642.0,3,1706021.0
4,274362700.0,14279.709536,106837.066003,1613150.0,4,1719987.0
5,274347600.0,14837.632517,110532.429911,1624017.0,5,1734549.0
6,274331900.0,15376.11574,114394.366836,1635262.0,6,1749656.0
7,274315800.0,15920.837458,118403.150746,1646901.0,7,1765304.0
8,274299000.0,16480.483018,122555.652848,1658947.0,8,1781503.0
9,274281700.0,17058.287331,126854.189687,1671417.0,9,1798271.0


In [None]:
last_day_total = 1668368 #total cases April 30, 2021
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr[:31]

ncArr_test =ncArr[-7:]
ncArr_test

[35655.31176814623,
 36892.56252957042,
 38171.94975170866,
 39494.942821721546,
 40862.846612642985,
 42277.19824953284,
 43739.48413503403]

In [None]:
print('MAE : ', mae( ncArr_test, real))
print('MAPE : ', mape (ncArr_test, real))

MAE :  33680.89940976525
MAPE :  576.3550771728643


SEIR + LSTM ###################################################################

In [35]:
# BETA => effective_contact_rate
# GAMMA => recovery_rate
# SIGMA => incubation_period (r0*gamma)

days_to_recover = 14
recovery_rate = 1/days_to_recover
incubation_period = 5.2 # incubation parameter

days = range(0,31)

R0 = 1.4
sigma= 1/incubation_period
gamma= recovery_rate
beta= R0*gamma

# Computation
calc = odeint(comp, [susceptible, exposed, infected, removed], days,
             args=(total_pop, beta, gamma, sigma))
S, E, I, R = calc.T
df = pd.DataFrame({'Susceptible': S, 'Exposed': E, 'Infected': I, 'Recovered': R, 'Day': days})


In [36]:
t = df["Recovered"] + df["Infected"]
df['Total'] = t

df

Unnamed: 0,Susceptible,Exposed,Infected,Recovered,Day,Total
0,274373400.0,5000.0,100250.0,1572630.0,0,1672880.0
1,274363800.0,12932.479392,95043.489756,1579593.0,1,1674637.0
2,274354500.0,19086.88396,91493.380018,1586247.0,2,1677740.0
3,274345500.0,23904.524842,89198.839174,1592694.0,3,1681893.0
4,274336700.0,27718.07327,87857.898758,1599012.0,4,1686870.0
5,274328000.0,30778.322248,87243.105054,1605262.0,5,1692505.0
6,274319400.0,33274.356762,87183.167349,1611489.0,6,1698672.0
7,274310700.0,35348.754712,87549.127194,1617727.0,7,1705276.0
8,274302000.0,37109.043755,88243.93414,1624004.0,8,1712248.0
9,274293200.0,38636.335985,89194.589233,1630339.0,9,1719534.0


In [37]:
last_day_total = 1668368 #total cases April 30, 2021

In [38]:
ncArr = []
for data in df['Total']:
    calc = data - last_day_total
    ncArr.append(calc)
    last_day_total = data

ncArr_train =ncArr
ncArr_train

[4512.0,
 1756.9781865738332,
 3103.3782338702586,
 4152.270851462847,
 4977.585435188143,
 5635.053960043471,
 6166.692980903434,
 6604.182700217003,
 6971.413012586767,
 7286.402667571558,
 7562.745964951348,
 7810.702330403728,
 8038.018101827474,
 8250.545968624065,
 8452.711441037478,
 8647.864728246117,
 8838.545818153303,
 9026.684315129183,
 9213.750067934161,
 9400.866680340609,
 9588.896999804769,
 9778.507806899725,
 9970.218271656195,
 10164.43656584411,
 10361.487596732797,
 10561.633802249562,
 10765.090879288968,
 10972.039498776197,
 11182.634407968959,
 11397.01102646091,
 11615.290623645997]

In [39]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler

In [40]:
from pandas import DataFrame
new_cases = DataFrame (ncArr_train)
covid_data = new_cases.values.astype(float)
train_data = covid_data

In [41]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1,1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1,1))
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
train_window = 7

def create_inout_sequences(input_data, tw):
  inout_seq = []
  L = len(input_data)
  for i in range(L-tw):
    train_seq = input_data[i:i+tw]
    train_label = input_data[i+tw:i+tw+1]
    inout_seq.append((train_seq, train_label))
  return inout_seq

train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

In [42]:
#Model
class LSTM(nn.Module):
  def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
    super().__init__()
    self.hidden_layer_size = hidden_layer_size
    self.lstm = nn.LSTM(input_size, hidden_layer_size)
    self.linear = nn.Linear(hidden_layer_size, output_size)
    self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                        torch.zeros(1,1,self.hidden_layer_size))
    
  def forward(self, input_seq):
    lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq), 1, -1), self.hidden_cell)
    predictions = self.linear(lstm_out.view(len(input_seq), -1))
    return predictions[-1]

model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [65]:
#Training
epochs = 1000

for i in range(epochs):
  for seq, labels in train_inout_seq:
    optimizer.zero_grad()
    model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                         torch.zeros(1, 1, model.hidden_layer_size))
    
    y_pred = model(seq)

    single_loss = loss_function(y_pred, labels)
    single_loss.backward()
    optimizer.step()

  if i%100==1:
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')


epoch:   1 loss: 0.00000231
epoch: 101 loss: 0.00000133
epoch: 201 loss: 0.00000000
epoch: 301 loss: 0.00000038
epoch: 401 loss: 0.00000000
epoch: 501 loss: 0.00000447
epoch: 601 loss: 0.00000936
epoch: 701 loss: 0.00000000
epoch: 801 loss: 0.00000006
epoch: 901 loss: 0.00002773
epoch: 999 loss: 0.0000007896


In [66]:
#Prediction
fut_pred = 7

test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)

model.eval()

for i in range(fut_pred):
  seq = torch.FloatTensor(test_inputs[-train_window:])
  with torch.no_grad():
    model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                    torch.zeros(1, 1, model.hidden_layer_size))
    test_inputs.append(model(seq).item())

test_inputs[fut_pred:]
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
print(actual_predictions)

[0.7456353902816772, 0.7862399220466614, 0.8275161981582642, 0.8695007562637329, 0.912225067615509, 0.9557166695594788, 1.0]
[[11831.2575727 ]
 [12051.74611307]
 [12279.98805142]
 [12516.95586775]
 [12763.90644105]
 [13022.78590654]
 [13296.4547069 ]]


In [67]:
test_data = real.to_list()
test_data

[4824, 5246, 5353, 6486, 6594, 5832, 6993]

Prediction SEIR + LSTM

In [None]:
print('MAE : ', mae( test_data, actual_predictions))
print('MAPE : ', mape (test_data, actual_predictions))

MAE :  6585.006893260124
MAPE :  52.67710437719917


In [55]:
print('MAE : ', mae( test_data, actual_predictions))
print('MAPE : ', mape (test_data, actual_predictions))

MAE :  6627.841401338096
MAPE :  52.821395601356905


In [58]:
print('MAE : ', mae( test_data, actual_predictions))
print('MAPE : ', mape (test_data, actual_predictions))

MAE :  6630.941333497696
MAPE :  52.83641081703748


In [64]:
print('MAE : ', mae( test_data, actual_predictions))
print('MAPE : ', mape (test_data, actual_predictions))

MAE :  6610.821028606299
MAPE :  52.76119458874731


In [61]:
print('MAE : ', mae( test_data, actual_predictions))
print('MAPE : ', mape (test_data, actual_predictions))

MAE :  6611.945193665773
MAPE :  52.76354838733436


############################################

SEIR + BERT + LSTM

In [69]:
#Downloading libraries
!pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.0.0.tar.gz (85 kB)
[K     |████████████████████████████████| 85 kB 2.6 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.9.1-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 18.8 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 58.6 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.0.15-py3-none-any.whl (43 kB)
[K     |████████████████████████████████| 43 kB 1.8 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 64.8 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |█████████████████████

In [70]:
pip install fasttext

Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[K     |████████████████████████████████| 68 kB 4.8 MB/s  eta 0:00:01
[?25hCollecting pybind11>=2.2
  Using cached pybind11-2.7.0-py2.py3-none-any.whl (199 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp37-cp37m-linux_x86_64.whl size=3093380 sha256=411247e1e62ed3acd2d4e12d6108d1097874cea4760c1c360ca0b581696da39e
  Stored in directory: /root/.cache/pip/wheels/4e/ca/bf/b020d2be95f7641801a6597a29c8f4f19e38f9c02a345bab9b
Successfully built fasttext
Installing collected packages: pybind11, fasttext
Successfully installed fasttext-0.9.2 pybind11-2.7.0


In [71]:
#Importing dependencies
import gensim
from gensim.models import KeyedVectors
from gensim import corpora
from gensim.corpora import Dictionary
from sklearn.feature_extraction.text import CountVectorizer
from collections import Counter
from sklearn.metrics import silhouette_score
from wordcloud import WordCloud
from gensim.models.coherencemodel import CoherenceModel
import os
from sentence_transformers import SentenceTransformer
import copy
from datetime import datetime
import warnings
from matplotlib import pyplot as plt
from wordcloud import WordCloud
import contextlib
import spacy
import re
import random
import math
from gensim.utils import simple_preprocess
import fasttext.util


#word2vec, glove, fasttext
class WordVecVectorizer(object):
    def __init__(self, model, method):
        self.model = model
        self.method = method

        if method == 'WORD2VEC':
          self.dim = 100 
                     
        elif method == 'GLOVE':
           self.dim = 5 
           
        elif method == 'FASTTEXT':           
            self.dim = 300
        else:
          self.dim = 200
       
        
    def fit(self, X, y):
        return self    
        
    def transform(self, X):
        return np.array([
            np.mean([self.model[w] for w in texts.split() if w in self.model]
                    or [np.zeros(self.dim)], axis=0)
            for texts in X              
        ])

In [72]:
def loadGloveModel(File):
    print("Loading Glove Model")
    f = open(File,'r')
    gloveModel = {}
    for line in f:
        splitLines = line.split()
        word = splitLines[0]
        wordEmbedding = np.array([float(value) for value in splitLines[1:]])
        gloveModel[word] = wordEmbedding
    print(len(gloveModel)," words loaded!")
    return gloveModel



def vectorize(method, sentences):
  if method == 'BOW':
    print('Getting vector representations for BoW ...')
    dictionary = corpora.Dictionary()
    doc_tokenized = [simple_preprocess(doc) for doc in sentences]
    BoW_corpus = [dictionary.doc2bow(doc, allow_update=True) for doc in doc_tokenized]
    vec = np.array(BoW_corpus)
    print('Getting vector representations for BoW. Done!')
    return vec

  elif method == 'BERT':
    print('Getting vector representations for BERT ...')
    model = SentenceTransformer('indobenchmark/indobert-base-p1')
    vec = np.array(model.encode(sentences, show_progress_bar=True))
    print('Getting vector representations for BERT. Done!')
    return vec

  elif method == 'WORD2VEC':
     print('Getting vector representations for WORD2VEC ...')
     model = gensim.models.KeyedVectors.load_word2vec_format('drive/My Drive/Thesis/pretrained_model/word2vec/model.bin', binary=True) 
     wtv_vect = WordVecVectorizer(model, method)
     embeddings = wtv_vect.transform(sentences)
     vec = np.vstack(embeddings)
     print('Getting vector representations for WORD2VEC. Done!')
     return vec

  elif method == 'GLOVE':
    print('Getting vector representations for GLOVE ...')
    model = loadGloveModel("drive/My Drive/Thesis/Data/gensim_glove_vectors.txt") 
    wtv_vect = WordVecVectorizer(model, method)
    embeddings = wtv_vect.transform(sentences)
    vec = np.vstack(embeddings)
    print('Getting vector representations for GLOVE. Done!')
    return vec

  elif method == 'FASTTEXT':
    print('Getting vector representations for FASTTEXT ...')
    #loading the model
    model = fasttext.load_model('drive/My Drive/Thesis/pretrained_model/fasttext/model.bin') 
    wtv_vect = WordVecVectorizer(model, method)
    embeddings = wtv_vect.transform(sentences)
    vec = np.vstack(embeddings)
    print('Getting vector representations for FASTTEXT. Done!')
    return vec       

In [80]:
#loading covid data 
new_cases = cv['New Cases']


In [102]:
train_data = ncArr_train
cv_feature = train_data

In [101]:
#loading tweet data
tw  = pd.read_csv('drive/My Drive/Thesis/Data/Tweet_TM/task1_may_june/bert_cluster_0_combined.csv')
tw_feature = tw['clean_sentences']


In [103]:
#create vector for tweets
method = 'BERT'
tweet_feature  = vectorize(method, tw_feature)




Getting vector representations for BERT ...


HBox(children=(FloatProgress(value=0.0, description='Batches', max=1.0, style=ProgressStyle(description_width=…


Getting vector representations for BERT. Done!


In [104]:
#concatenating features

features = nn.Sequential(
            nn.Linear(768, 1),
            nn.ReLU(),
        )

x1 = torch.Tensor(cv_feature)
x2 = features(torch.Tensor(tweet_feature))
x1 = x1.view(31, 1) 
x = torch.cat((x1, x2),1)

x = x.detach().numpy()
train_data = np.array([])
for x in x:
  train_data = np.append(train_data, x[0])
train_data.shape

(31,)

In [235]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1,1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1,1))
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
train_window = 7

train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

#MODEL
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

#Training
epochs = 1000

for i in range(epochs):
  for seq, labels in train_inout_seq:
    optimizer.zero_grad()
    model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                         torch.zeros(1, 1, model.hidden_layer_size))
    
    y_pred = model(seq)

    single_loss = loss_function(y_pred, labels)
    single_loss.backward()
    optimizer.step()

  if i%100==1:
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.21479741
epoch: 101 loss: 0.00021077
epoch: 201 loss: 0.00002840
epoch: 301 loss: 0.00012081
epoch: 401 loss: 0.00001702
epoch: 501 loss: 0.00000202
epoch: 601 loss: 0.00000344
epoch: 701 loss: 0.00003507
epoch: 801 loss: 0.00000399
epoch: 901 loss: 0.00000026
epoch: 999 loss: 0.0000004294


In [236]:
#Prediction
test_data = new_cases[31:]
fut_pred = 7

test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)

model.eval()

for i in range(fut_pred):
  seq = torch.FloatTensor(test_inputs[-train_window:])
  with torch.no_grad():
    model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                    torch.zeros(1, 1, model.hidden_layer_size))
    test_inputs.append(model(seq).item())

test_inputs[fut_pred:]
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
print(actual_predictions)


[0.7456352114677429, 0.7862398624420166, 0.8275160789489746, 0.8695005774497986, 0.912225067615509, 0.9557164907455444, 1.0]
[[11821.75528688]
 [12023.04514165]
 [12221.18251579]
 [12413.03843271]
 [12596.23016939]
 [12767.93900271]
 [12924.44659205]]


In [125]:
cov = new_cases[31:]

print('MAE : ', mae( cov, actual_predictions.reshape(7,)))
print('MAPE : ', mape (cov, actual_predictions.reshape(7,)))

MAE :  6563.810842533419
MAPE :  52.768112224196926


In [135]:
cov = new_cases[31:]

print('MAE : ', mae( cov, actual_predictions.reshape(7,)))
print('MAPE : ', mape (cov, actual_predictions.reshape(7,)))

MAE :  6626.995771377661
MAPE :  53.00155840375462


In [210]:
cov = new_cases[31:]

print('MAE : ', mae( cov, actual_predictions.reshape(7,)))
print('MAPE : ', mape (cov, actual_predictions.reshape(7,)))

MAE :  6651.424873068948
MAPE :  53.10233711719034


In [225]:
cov = new_cases[31:]

print('MAE : ', mae( cov, actual_predictions.reshape(7,)))
print('MAPE : ', mape (cov, actual_predictions.reshape(7,)))

MAE :  6399.33303002671
MAPE :  52.06877575935707


In [237]:
cov = new_cases[31:]

print('MAE : ', mae( cov, actual_predictions.reshape(7,)))
print('MAPE : ', mape (cov, actual_predictions.reshape(7,)))

MAE :  6491.376734454922
MAPE :  52.48226063192003


6546.59  52.68

###################################

##############################################################################