In [1]:
# LSTM for international airline passengers problem with window regression framing
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score 
import pandas as pd

In [None]:
import sys
!{sys.executable} -m pip install keras

In [None]:
import sys
!{sys.executable} -m pip install TensorFlow

In [2]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)


In [3]:
# fix random seed for reproducibility
numpy.random.seed(7)

In [4]:
# load the dataset
dataframe = read_csv("cases_keywords_complete_V2_5days - Hoja 1.csv", engine='python')
dataset = dataframe.values
dataset = dataframe.drop(["fecha","Index","casos_5"],axis=1)
dataset = dataset.astype('float32')



In [5]:
dataset.tail()

Unnamed: 0,casos,tos,dolor de garganta,fiebre,cansancio,dolor de cabeza,suma_keys_sintomas,tengo covid,sintomas covid,test covid,suma_keys_preguntas
569,31171.0,10.0,8.0,42.0,8.0,22.0,90.0,16.0,60.0,87.0,163.0
570,32330.0,,,,,,,19.0,44.0,63.0,126.0
571,34042.0,,,,,,,,,,
572,35754.0,,,,,,,,,,
573,37466.0,,,,,,,,,,


In [6]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)


In [7]:
# split into train and test sets
train_size = int(len(dataset) * 0.297)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
train_size

170

In [8]:
# reshape into X=t and Y=t+1
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

trainY

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 2.54588958e-05, 2.54588958e-05,
       2.54588958e-05, 5.09177917e-05, 0.00000000e+00, 2.54588958e-05,
       2.54588958e-05, 2.54588958e-05, 5.09177917e-05, 0.00000000e+00,
       7.63766875e-05, 0.00000000e+00, 0.00000000e+00, 5.09177917e-05,
       5.09177917e-05, 2.54588958e-05, 5.09177917e-05, 2.54588958e-05,
       2.54588958e-05, 1.01835583e-04, 2.54588958e-05, 7.63766875e-05,
       4.07342333e-04, 2.54588958e-04, 2.80047854e-04, 2.29130063e-04,
       1.52753375e-04, 2.29130063e-04, 4.07342333e-04, 2.29130063e-04,
       7.63766875e-05, 5.60095708e-04, 3.30965646e-04, 5.85554633e-04,
       6.11013500e-04, 4.83719021e-04, 8.91061383e-04, 5.85554633e-04,
       6.87390217e-04, 6.87390217e-04, 4.32801229e-04, 1.34932145e-03,
       1.50207488e-03, 1.47661590e-03, 1.85849937e-03, 2.72410177e-03,
       3.28419753e-03, 3.71699873e-03, 4.58260113e-03, 8.09592847e-03,
      

In [9]:
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))


In [10]:
trainX

array([[[2.54588958e-05, 0.00000000e+00, 2.54588958e-05]],

       [[0.00000000e+00, 2.54588958e-05, 0.00000000e+00]],

       [[2.54588958e-05, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 2.54588958e-05]],

       [[0.00000000e+00, 2.54588958e-05, 2.54588958e-05]],

       [[2.54588958e-05, 2.54588958e-05, 2.54588958e-05]],

       [[2.54588958e-05, 2.54588958e-05, 5.09177917e-05]],

       [[2.54588958e-05, 5.09177917e-05, 0.00000000e+00]],

       [[5.09177917e-05, 0.00000000e+00, 2.54588958e-05]],

       [[0.00000000e+00, 2.54588958e-05, 2.54588958e-05]],

       [[2.54588958e-05, 2.54588958e-05, 2.54588958e-05]],

       [[2.54588958e-05, 2.54588958e-05, 5.09177917e-05]],

       [[2.54588958e-05, 5.09177917e-05,

In [11]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)


Epoch 1/100
166/166 - 24s - loss: 0.0018
Epoch 2/100
166/166 - 0s - loss: 0.0012
Epoch 3/100
166/166 - 0s - loss: 7.1551e-04
Epoch 4/100
166/166 - 0s - loss: 3.8659e-04
Epoch 5/100
166/166 - 0s - loss: 2.5113e-04
Epoch 6/100
166/166 - 0s - loss: 2.1404e-04
Epoch 7/100
166/166 - 0s - loss: 2.1243e-04
Epoch 8/100
166/166 - 0s - loss: 2.1450e-04
Epoch 9/100
166/166 - 0s - loss: 2.1853e-04
Epoch 10/100
166/166 - 0s - loss: 2.1196e-04
Epoch 11/100
166/166 - 0s - loss: 2.1616e-04
Epoch 12/100
166/166 - 0s - loss: 2.0088e-04
Epoch 13/100
166/166 - 0s - loss: 2.2112e-04
Epoch 14/100
166/166 - 0s - loss: 2.0617e-04
Epoch 15/100
166/166 - 0s - loss: 2.0790e-04
Epoch 16/100
166/166 - 0s - loss: 2.0299e-04
Epoch 17/100
166/166 - 0s - loss: 2.0549e-04
Epoch 18/100
166/166 - 0s - loss: 2.0133e-04
Epoch 19/100
166/166 - 0s - loss: 2.0170e-04
Epoch 20/100
166/166 - 0s - loss: 2.1269e-04
Epoch 21/100
166/166 - 0s - loss: 2.0165e-04
Epoch 22/100
166/166 - 0s - loss: 1.9745e-04
Epoch 23/100
166/166 - 0s 

<keras.callbacks.History at 0x7ffd15cc7df0>

In [33]:
# make predictions
trainPredict = model.predict(trainX)*20000

testPredict = model.predict(testX)*20000

testPredict

array([[  103.39628 ],
       [  126.051384],
       [  145.36963 ],
       [  169.7464  ],
       [  163.77173 ],
       [  177.56274 ],
       [  149.38385 ],
       [  125.42986 ],
       [  169.83975 ],
       [  195.37436 ],
       [  249.6802  ],
       [  213.90245 ],
       [  253.02193 ],
       [  214.32947 ],
       [  209.6563  ],
       [  313.76474 ],
       [  370.4864  ],
       [  431.9788  ],
       [  450.11115 ],
       [  539.3097  ],
       [  461.52838 ],
       [  439.59805 ],
       [  630.2612  ],
       [  725.8545  ],
       [  882.86566 ],
       [  865.5258  ],
       [  985.1684  ],
       [  924.47217 ],
       [  842.0195  ],
       [ 1099.3113  ],
       [ 1168.5148  ],
       [ 1219.6531  ],
       [ 1168.0442  ],
       [ 1240.9508  ],
       [ 1102.4226  ],
       [ 1016.4315  ],
       [ 1386.2744  ],
       [ 1527.9484  ],
       [ 1619.8384  ],
       [ 1585.7744  ],
       [ 1761.7688  ],
       [ 1844.9829  ],
       [ 1543.1512  ],
       [ 19

In [34]:
total_pred = np.concatenate([trainPredict, testPredict])
dataset1 = pd.DataFrame(total_pred)

dataset1

Unnamed: 0,0
0,-29.950483
1,-30.278631
2,-30.323856
3,-30.368093
4,-30.368093
...,...
561,10827.009766
562,11798.380859
563,11382.997070
564,11748.178711


In [35]:
total_cases = np.concatenate([trainY, testY])

In [36]:
mean_squared_error(total_cases, total_pred, squared=False)

4729.2715

In [None]:
r2_score(total_cases, total_pred, multioutput="variance_weighted")

In [None]:
mean_squared_error(trainY[0], totalPredict[:,0], squared=False)

trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))

In [None]:
dataset1["fecha"] = dataframe["fecha"]

In [None]:
dataset1.tail()

In [None]:
dataset = {"prediction_170": total_pred}
dataset = pd.DataFrame(dataset)

dataset

In [None]:
dataset1.to_csv(r'/Users/walterconde/Ironhack/Repositories/IronWalter/Projects/Final/LSTM_prediction_test4_170.csv', index = False)

In [None]:
dataframe["prediction_170"] = total_pred
dataframe

In [None]:
dataframe["fecha"] = dataset["fecha"]

In [None]:
185+377

In [None]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])


In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))


In [None]:
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict


In [None]:
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict


In [None]:
# plot baseline and predictions
plt.plot(scaler.inverse_tr
         
         ansform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()