<center> <h1>Testing The Algorithm</h1> </center>

## Importing Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

## Exploratory Data Analysis

### Reading the dataset

In [2]:
df = pd.read_csv('/home/zayed/Desktop/COVID-19/time-series-19-covid-combined.csv')

In [3]:
df

Unnamed: 0,Date,Country/Region,Province/State,Confirmed,Recovered,Deaths
0,2020-01-22,Afghanistan,,0,0.0,0
1,2020-01-23,Afghanistan,,0,0.0,0
2,2020-01-24,Afghanistan,,0,0.0,0
3,2020-01-25,Afghanistan,,0,0.0,0
4,2020-01-26,Afghanistan,,0,0.0,0
...,...,...,...,...,...,...
103735,2021-01-31,Zimbabwe,,33388,26044.0,1217
103736,2021-02-01,Zimbabwe,,33548,26583.0,1234
103737,2021-02-02,Zimbabwe,,33814,26794.0,1254
103738,2021-02-03,Zimbabwe,,33964,27391.0,1269


### Extracting Bangladesh Information

In [4]:
pd.set_option('max_rows', None)

In [5]:
bangladesh = df[df['Country/Region'] == "Bangladesh"]

In [6]:
bangladesh

Unnamed: 0,Date,Country/Region,Province/State,Confirmed,Recovered,Deaths
7600,2020-01-22,Bangladesh,,0,0.0,0
7601,2020-01-23,Bangladesh,,0,0.0,0
7602,2020-01-24,Bangladesh,,0,0.0,0
7603,2020-01-25,Bangladesh,,0,0.0,0
7604,2020-01-26,Bangladesh,,0,0.0,0
7605,2020-01-27,Bangladesh,,0,0.0,0
7606,2020-01-28,Bangladesh,,0,0.0,0
7607,2020-01-29,Bangladesh,,0,0.0,0
7608,2020-01-30,Bangladesh,,0,0.0,0
7609,2020-01-31,Bangladesh,,0,0.0,0


In [7]:
bangladesh.columns

Index(['Date', 'Country/Region', 'Province/State', 'Confirmed', 'Recovered',
       'Deaths'],
      dtype='object')

### Adding a new Feature (CurrentCase)

In [8]:
bangladesh['CurrentCase'] = bangladesh['Confirmed'] - bangladesh['Recovered'] - bangladesh['Deaths']

In [9]:
bangladesh.columns

Index(['Date', 'Country/Region', 'Province/State', 'Confirmed', 'Recovered',
       'Deaths', 'CurrentCase'],
      dtype='object')

In [10]:
bangladesh

Unnamed: 0,Date,Country/Region,Province/State,Confirmed,Recovered,Deaths,CurrentCase
7600,2020-01-22,Bangladesh,,0,0.0,0,0.0
7601,2020-01-23,Bangladesh,,0,0.0,0,0.0
7602,2020-01-24,Bangladesh,,0,0.0,0,0.0
7603,2020-01-25,Bangladesh,,0,0.0,0,0.0
7604,2020-01-26,Bangladesh,,0,0.0,0,0.0
7605,2020-01-27,Bangladesh,,0,0.0,0,0.0
7606,2020-01-28,Bangladesh,,0,0.0,0,0.0
7607,2020-01-29,Bangladesh,,0,0.0,0,0.0
7608,2020-01-30,Bangladesh,,0,0.0,0,0.0
7609,2020-01-31,Bangladesh,,0,0.0,0,0.0


### Handling Missing/Null Values

In [11]:
bangladesh.isna()

Unnamed: 0,Date,Country/Region,Province/State,Confirmed,Recovered,Deaths,CurrentCase
7600,False,False,True,False,False,False,False
7601,False,False,True,False,False,False,False
7602,False,False,True,False,False,False,False
7603,False,False,True,False,False,False,False
7604,False,False,True,False,False,False,False
7605,False,False,True,False,False,False,False
7606,False,False,True,False,False,False,False
7607,False,False,True,False,False,False,False
7608,False,False,True,False,False,False,False
7609,False,False,True,False,False,False,False


In [12]:
del bangladesh['Province/State']
print("Column Removed Successfully. ")

Column Removed Successfully. 


In [13]:
bangladesh

Unnamed: 0,Date,Country/Region,Confirmed,Recovered,Deaths,CurrentCase
7600,2020-01-22,Bangladesh,0,0.0,0,0.0
7601,2020-01-23,Bangladesh,0,0.0,0,0.0
7602,2020-01-24,Bangladesh,0,0.0,0,0.0
7603,2020-01-25,Bangladesh,0,0.0,0,0.0
7604,2020-01-26,Bangladesh,0,0.0,0,0.0
7605,2020-01-27,Bangladesh,0,0.0,0,0.0
7606,2020-01-28,Bangladesh,0,0.0,0,0.0
7607,2020-01-29,Bangladesh,0,0.0,0,0.0
7608,2020-01-30,Bangladesh,0,0.0,0,0.0
7609,2020-01-31,Bangladesh,0,0.0,0,0.0


### Resetting Index

In [14]:
bangladesh.reset_index(drop=True, inplace=True)
print("Done. ")

Done. 


In [15]:
bangladesh

Unnamed: 0,Date,Country/Region,Confirmed,Recovered,Deaths,CurrentCase
0,2020-01-22,Bangladesh,0,0.0,0,0.0
1,2020-01-23,Bangladesh,0,0.0,0,0.0
2,2020-01-24,Bangladesh,0,0.0,0,0.0
3,2020-01-25,Bangladesh,0,0.0,0,0.0
4,2020-01-26,Bangladesh,0,0.0,0,0.0
5,2020-01-27,Bangladesh,0,0.0,0,0.0
6,2020-01-28,Bangladesh,0,0.0,0,0.0
7,2020-01-29,Bangladesh,0,0.0,0,0.0
8,2020-01-30,Bangladesh,0,0.0,0,0.0
9,2020-01-31,Bangladesh,0,0.0,0,0.0


### Changing Default Date Format to Python Date Format

In [16]:
bangladesh.dtypes

Date               object
Country/Region     object
Confirmed           int64
Recovered         float64
Deaths              int64
CurrentCase       float64
dtype: object

In [17]:
bangladesh["Date"] = pd.to_datetime(bangladesh["Date"])

In [18]:
bangladesh.dtypes

Date              datetime64[ns]
Country/Region            object
Confirmed                  int64
Recovered                float64
Deaths                     int64
CurrentCase              float64
dtype: object

### Making Univariate Dataset

Currently our dataframe contains <b>Multivariate Data.</b> But our algorithm by default works with Univariate Data. For this, we will split the dataframe into 4 extra dataframe: <b>confirmed</b>, <b>recovered</b>, <b>deaths</b>, <b>current_case</b>. And we will work with the new dataframes. 

In [19]:
confirmed = bangladesh[['Date',"Confirmed"]]
recovered = bangladesh[['Date', 'Recovered']]
deaths = bangladesh[['Date', 'Deaths']]
current_case = bangladesh[["Date", "CurrentCase"]]

In [20]:
confirmed

Unnamed: 0,Date,Confirmed
0,2020-01-22,0
1,2020-01-23,0
2,2020-01-24,0
3,2020-01-25,0
4,2020-01-26,0
5,2020-01-27,0
6,2020-01-28,0
7,2020-01-29,0
8,2020-01-30,0
9,2020-01-31,0


In [21]:
recovered

Unnamed: 0,Date,Recovered
0,2020-01-22,0.0
1,2020-01-23,0.0
2,2020-01-24,0.0
3,2020-01-25,0.0
4,2020-01-26,0.0
5,2020-01-27,0.0
6,2020-01-28,0.0
7,2020-01-29,0.0
8,2020-01-30,0.0
9,2020-01-31,0.0


In [22]:
deaths

Unnamed: 0,Date,Deaths
0,2020-01-22,0
1,2020-01-23,0
2,2020-01-24,0
3,2020-01-25,0
4,2020-01-26,0
5,2020-01-27,0
6,2020-01-28,0
7,2020-01-29,0
8,2020-01-30,0
9,2020-01-31,0


In [23]:
current_case

Unnamed: 0,Date,CurrentCase
0,2020-01-22,0.0
1,2020-01-23,0.0
2,2020-01-24,0.0
3,2020-01-25,0.0
4,2020-01-26,0.0
5,2020-01-27,0.0
6,2020-01-28,0.0
7,2020-01-29,0.0
8,2020-01-30,0.0
9,2020-01-31,0.0


### Visualizing Data

In [24]:
confirmed.iplot(kind='bar', x='Date', y='Confirmed')

In [25]:
deaths.iplot(kind='bar', x='Date', y='Deaths')

In [26]:
recovered.iplot(kind='bar', x='Date', y='Recovered')

In [27]:
current_case.iplot(kind='bar', x='Date', y='CurrentCase')

### Renaming Columns

We Need to rename the columns of the dataframe. First one will be <b>ds (datestamp)</b> and the other column name will be <b>y</b>. It's the rule of Facebook Prophet Algorithm. 

In [28]:
def rename_function(dataframe):
    columns = dataframe.columns
    dataframe = dataframe.rename(columns= {columns[0]: 'ds', columns[1]: 'y'})
    return dataframe

In [29]:
confirmed = rename_function(confirmed)
recovered = rename_function(recovered)
deaths = rename_function(deaths)
current_case = rename_function(current_case)

In [30]:
print('Confirmed : ',confirmed.columns)
print('Deaths : ',deaths.columns)
print('Recovered : ',recovered.columns)
print('Current Case : ',current_case.columns)

Confirmed :  Index(['ds', 'y'], dtype='object')
Deaths :  Index(['ds', 'y'], dtype='object')
Recovered :  Index(['ds', 'y'], dtype='object')
Current Case :  Index(['ds', 'y'], dtype='object')


### Split Function

Split function takes two arguements: dataframe and ratio. and return two dataframe: train & test. For example, if total rows of input dataframe is 100 and the ratio is 30, the function will return first 70 rows as train dataframe, and last 30 rows as test dataframe. 

In [31]:
def train_test_split(dataframe, ratio):
    divisor = round((ratio/100)*dataframe.shape[0])
    train = dataframe.iloc[:divisor]
    test = dataframe.iloc[divisor:]
    return train, test, divisor

In [32]:
confirmed_train, confirmed_test, divisor = train_test_split(confirmed, 72)

In [33]:
print("Number of All Rows : ", confirmed.shape[0])
print("Number of Trained Rows : ",confirmed_train.shape[0])
print("Number of Test Rows : ",confirmed_test.shape[0])

Number of All Rows :  380
Number of Trained Rows :  274
Number of Test Rows :  106


## Training Model & Forecasting

#### Step 1: Initializing the Prophet Model

In [35]:
from fbprophet import Prophet
model = Prophet(daily_seasonality=False, yearly_seasonality=False, holidays=None,  interval_width=0.95, growth='linear')

#### Step 2: Adding Seasonality (Optional)

In [36]:
 model.add_seasonality(name="Monthly", period=30.42, fourier_order=25)

<fbprophet.forecaster.Prophet at 0x7f9dbaa31450>

#### Step 3: Training the Model with Train Dataset

In [38]:
model.fit(confirmed_train)
print("Model Trained Successfully. ")

Exception: Prophet object can only be fit once. Instantiate a new object.

#### Step 4: Making future date dataframe

In [None]:
future_dates = model.make_future_dataframe(confirmed_test.shape[0])
print("Future Dates Generated. ")

In [None]:
future_dates.tail()

#### Step 5: Making Prediction

In [None]:
prediction = model.predict(future_dates)

In [None]:
prediction.tail()

#### Step 6: Filtering Important Columns

In [None]:
approximation = prediction[['ds', 'yhat' ,'yhat_lower', 'yhat_upper']]

In [None]:
approximation.tail(30)

In [None]:
bangladesh.tail()

#### Step 7: Visualizing the Prediction


In [None]:
model.plot(prediction)

### Checking Accuracy

* R-squared shows how well the data fit the regression model (the goodness of fit).
* R-squared can take any values between 0 to 1.
* Higher R-squared value means the data is good fit with the model. 

#### Creating the measurement Function

In [None]:
def check_metrics(test, prediction):
    R2_score = r2_score(test['y'], prediction['yhat'].iloc[divisor:])
    print(f"R2 Score : {R2_score}")

In [None]:
check_metrics(confirmed_test, prediction)