<a href="https://colab.research.google.com/github/xszbencex/SZFM_2021_10_GitGud_Big/blob/main/SZFM_2021_10_GitGud_Big.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---
# **COVID19 prediction and forecasting using machine learning**
---

### **Import all needed packages/libraries**

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np
import datetime as dt

from datetime import timedelta 
from sklearn.linear_model import LinearRegression 
from sklearn.svm import SVR 
from statsmodels.tsa.api import Holt

### **Import dataset**

In [None]:
url = 'https://raw.githubusercontent.com/xszbencex/SZFM_2021_10_GitGud_Big/main/Dataset/covid_19_data.csv'
covid = pd.read_csv(url)

In [None]:
covid.tail(20)

In [None]:
covid.head(20)

In [None]:
print("Size/Shape of the dataset",covid.shape)
print("Checking for null values",covid.isnull().sum())
print("Checking Data-type",covid.dtypes)

In [None]:
covid.drop(["SNo"],1,inplace=True)

In [None]:
covid.isnull().sum()

In [None]:
covid["ObservationDate"] = pd.to_datetime(covid["ObservationDate"])

### **Segregating weekly rises**

In [None]:
datewise["WeekofYear"] = datewise.index.weekofyear
week_num = [] #weekly progress
weekwise_confirmed = [] #weekly progress for different types of cases 
weekwise_recovered = []
weekwise_deaths = []
w = 1
for i in list(datewise["WeekofYear"].unique()):
    weekwise_confirmed.append(datewise[datewise["WeekofYear"]==i]["Confirmed"].iloc[-1])
    weekwise_recovered.append(datewise[datewise["WeekofYear"]==i]["Recovered"].iloc[-1])
    weekwise_deaths.append(datewise[datewise["WeekofYear"]==i]["Deaths"].iloc[-1])
    week_num.append(w)
    w=w+1
plt.figure(figsize=(20,10))
plt.plot(week_num,weekwise_confirmed,linewidth=2)
plt.plot(week_num,weekwise_recovered,linewidth =2)
plt.plot(week_num,weekwise_deaths,linewidth = 2)
plt.xlabel("WeekNumber")
plt.ylabel("Number of cases")
plt.title("Weekly Progress of different types of cases")

### **Segregating most number of Confirmed and Death Cases increases in a week**

In [None]:
fig,(ax1,ax2) = plt.subplots(1,2,figsize=(20,10))
sns.barplot(x= week_num,y=pd.Series(weekwise_confirmed).diff().fillna(0),ax=ax1)
sns.barplot(x= week_num,y=pd.Series(weekwise_deaths).diff().fillna(0),ax=ax2)
ax1.set_xlabel("Week Number")
ax2.set_xlabel("Week Number")
ax1.set_ylabel("Numberof Confirmed cases")
ax2.set_ylabel("Numberof Death cases")
ax1.set_title("Weekly increase in number of Confirmed cases")
ax2.set_title("Weekly increase in number of Death Cases")
plt.show()

### **Segregating Average increasing number of cases**

In [None]:
print("Average increase in number of Confirmed cases everyday:",np.round(datewise["Confirmed"].diff().fillna(0).mean()))
print("Average increase in number of Recovered cases everyday:",np.round(datewise["Recovered"].diff().fillna(0).mean()))
print("Average increase in number of Death cases everyday:",np.round(datewise["Deaths"].diff().fillna(0).mean()))

plt.figure(figsize=(20,10))
plt.plot(datewise["Confirmed"].diff().fillna(0),label="Daily increase in confirmed cases",linewidth=2)
plt.plot(datewise["Recovered"].diff().fillna(0),label="Daily increase in recovered cases",linewidth=2)
plt.plot(datewise["Deaths"].diff().fillna(0),label="Daily increase in death cases",linewidth=2)
plt.xlabel("Timestamp")
plt.ylabel("Daily increase")
plt.title("Daily increase")
plt.legend()
plt.xticks(rotation=90)
plt.show()

### **Calculating country wise mortality rate**

In [None]:
countrywise= covid[covid["ObservationDate"]==covid["ObservationDate"].max()].groupby(["Country/Region"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"}).sort_values(["Confirmed"],ascending=False)
countrywise["Mortality"]=(countrywise["Deaths"]/countrywise["Recovered"])*100
countrywise["Recovered"]=(countrywise["Recovered"]/countrywise["Confirmed"])*100

In [None]:
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(25,10))
top_15confirmed = countrywise.sort_values(["Confirmed"],ascending=False).head(15)
top_15deaths = countrywise.sort_values(["Deaths"],ascending=False).head(15)
sns.barplot(x=top_15confirmed["Confirmed"],y=top_15confirmed.index,ax=ax1)
ax1.set_title("Top 15 countries as per number of confirmed cases")
sns.barplot(x=top_15deaths["Deaths"],y=top_15deaths.index,ax=ax2)
ax1.set_title("Top 15 countries as per number of death cases")



### **Data analysis for India**



In [None]:
india_data = covid[covid["Country/Region"]=="India"]
datewise_india = india_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})
print(datewise_india.iloc[-1])
print("Total Active Cases",datewise_india["Confirmed"].iloc[-1]-datewise_india["Recovered"].iloc[-1]-datewise_india["Deaths"].iloc[-1])
print("Total Closed Cases",datewise_india["Recovered"].iloc[-1]+datewise_india["Deaths"].iloc[-1])

### **Data analysis for US**

In [None]:

us_data = covid[covid["Country/Region"]=="US"]
datewise_us = us_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})
print(datewise_us.iloc[-1])
print("Total Active Cases",datewise_us["Confirmed"].iloc[-1]-datewise_us["Recovered"].iloc[-1]-datewise_us["Deaths"].iloc[-1])
print("Total Closed Cases",datewise_us["Recovered"].iloc[-1]+datewise_us["Deaths"].iloc[-1])

 ### **Segregating weekly rises for India**



In [None]:
datewise_india["WeekofYear"] = datewise_india.index.weekofyear
week_num_india = []
india_weekwise_confirmed = []
india_weekwise_recovered = []
india_weekwise_deaths = []
w = 1
for i in list(datewise_india["WeekofYear"].unique()):
    india_weekwise_confirmed.append(datewise_india[datewise_india["WeekofYear"]==i]["Confirmed"].iloc[-1])
    india_weekwise_recovered.append(datewise_india[datewise_india["WeekofYear"]==i]["Recovered"].iloc[-1])
    india_weekwise_deaths.append(datewise_india[datewise_india["WeekofYear"]==i]["Deaths"].iloc[-1])
    week_num_india.append(w)
    w=w+1
plt.figure(figsize=(8,5))
plt.plot(week_num_india,india_weekwise_confirmed,linewidth=3)
plt.plot(week_num_india,india_weekwise_recovered,linewidth =3)
plt.plot(week_num_india,india_weekwise_deaths,linewidth = 3)
plt.xlabel("WeekNumber")
plt.ylabel("Number of cases")
plt.title("Weekly Progress of different types of cases")



### **Segregating country wise analysis**



In [None]:
china_data = covid[covid["Country/Region"]=="Mainland China"]
Italy_data = covid[covid["Country/Region"]=="Italy"]
US_data = covid[covid["Country/Region"]=="US"]
spain_data = covid[covid["Country/Region"]=="Spain"]

datewise_china = china_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})
datewise_Italy = Italy_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})
datewise_US=US_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})
datewise_Spain=spain_data.groupby(["ObservationDate"]).agg({"Confirmed":"sum","Recovered":"sum","Deaths":"sum"})

max_ind = datewise_india["Confirmed"].max()
max_it = datewise_Italy["Confirmed"].max()
max_us = datewise_US["Confirmed"].max()
max_spain = datewise_Spain["Confirmed"].max()
max_china = datewise_china["Confirmed"].max()

print("It took",datewise_india[datewise_india["Confirmed"]>0].shape[0],"days in India to reach",max_ind,"Confirmed Cases")
print("It took",datewise_Italy[datewise_Italy["Confirmed"]>0].shape[0],"days in Italy to reach",max_it,"Confirmed Cases")
print("It took",datewise_US[datewise_US["Confirmed"]>0].shape[0],"days in US to reach",max_us,"Confirmed Cases")
print("It took",datewise_Spain[datewise_Spain["Confirmed"]>0].shape[0],"days in Spain to reach",max_spain,"Confirmed Cases")
print("It took",datewise_china[datewise_china["Confirmed"]>0].shape[0],"days in China to reach",max_china,"Confirmed Cases")



### **Creating a Model Training using the current Dataset**


In [None]:
datewise["Days Since"]=datewise.index-datewise.index[0]
datewise["Days Since"] = datewise["Days Since"].dt.days
train_ml = datewise.iloc[:int(datewise.shape[0]*0.95)]
valid_ml = datewise.iloc[:int(datewise.shape[0]*0.95):]
model_scores=[]

In [None]:
lin_reg = LinearRegression(normalize=True)
svm = SVR(C=1,degree=5,kernel='poly',epsilon=0.001)
lin_reg.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["Confirmed"]).reshape(-1,1))
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["Confirmed"]).reshape(-1,1))

In [None]:
prediction_valid_lin_reg = lin_reg.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
prediction_valid_svm = svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

In [None]:
new_date = []
new_prediction_lr=[]
new_prediction_svm=[]
for i in range(1,18):
  new_date.append(datewise.index[-1]+timedelta(days=i))
  new_prediction_lr.append(lin_reg.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0][0])
  new_prediction_svm.append(svm.predict(np.array(datewise["Days Since"].max()+i).reshape(-1,1))[0])
pd.set_option("display.float_format",lambda x: '%.f' % x)
model_predictions=pd.DataFrame(zip(new_date,new_prediction_lr,new_prediction_svm),columns = ["Dates","LR","SVR"])
model_predictions.head(5)

### **Time series analysis to getting the exact data**

In [None]:
model_train=datewise.iloc[:int(datewise.shape[0]*0.85)]
valid=datewise.iloc[int(datewise.shape[0]*0.85):]

### **Initializing the model which includes forecasting and prediction**

In [None]:
holt=Holt(np.asarray(model_train["Confirmed"])).fit(smoothing_level=1.4,smoothing_slope=0.2)
y_pred = valid.copy()
y_pred["Holt"]=holt.forecast(len(valid))

In [None]:
holt_new_date=[]
holt_new_prediction=[]
for i in range(1,18):
   holt_new_date.append(datewise.index[-1]+timedelta(days=i))
   holt_new_prediction.append(holt.forecast((len(valid)+i))[-1])

model_predictions["Holts Linear Model Prediction"]=holt_new_prediction
model_predictions.head()

###  **Egy konkrét ország vizsgálata (Pakistan)**


In [None]:
!pip install pmdarima

In [None]:
import numpy as np
import pandas as pd
import datetime
import plotly.express as px
import plotly.express as ex
import folium
import matplotlib.pyplot as plt

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

import plotly.graph_objects as go
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [None]:
url = 'https://raw.githubusercontent.com/csengetoth/covid_forecast/main/pak_data-v2.csv'

In [None]:
#covid_data = pd.read_excel('pak_data-v2.xlsx')
covid_data = pd.read_csv(url)

In [None]:
covid_data.head()

In [None]:
covid_data.isnull().sum()

In [None]:
covid_data.dtypes

In [None]:
#start date 
covid_data.Date.min()

In [None]:
#max date 
covid_data.Date.max()

### **Pakistan**

In [None]:
pak_data = covid_data.copy()

In [None]:
pak_data['Total Confirmed Cases'] = 0
pak_data['Total Recovered'] = 0
pak_data['Total Deaths'] = 0
pak_data['Active Cases'] = 0
for i in range(0, len(pak_data)):
    if (i == 0):
        pak_data['Total Confirmed Cases'].iloc[i] = pak_data['New Cases'].iloc[i]
        pak_data['Total Recovered'].iloc[i] = pak_data['Cured Cases'].iloc[i]
        pak_data['Total Deaths'].iloc[i] = pak_data['Death Cases'].iloc[i]
        pak_data['Active Cases'].iloc[i] = pak_data['Active Cases'].iloc[i-1] + pak_data['New Cases'].iloc[i] - pak_data['Death Cases'].iloc[i] - pak_data['Cured Cases'].iloc[i]

    else:
        pak_data['Total Confirmed Cases'].iloc[i] = pak_data['Total Confirmed Cases'].iloc[i-1] + pak_data['New Cases'].iloc[i]
        pak_data['Total Recovered'].iloc[i] =  pak_data['Total Recovered'].iloc[i-1] + pak_data['Cured Cases'].iloc[i]
        pak_data['Total Deaths'].iloc[i] =  pak_data['Total Deaths'].iloc[i-1] + pak_data['Death Cases'].iloc[i]
        pak_data['Active Cases'].iloc[i] = pak_data['Active Cases'].iloc[i-1] + pak_data['New Cases'].iloc[i] - pak_data['Death Cases'].iloc[i] - pak_data['Cured Cases'].iloc[i]

In [None]:
pak_data.head()

In [None]:
confirmed = pak_data['New Cases'].sum()
recovered = pak_data['Cured Cases'].sum()
deaths = pak_data['Death Cases'].sum()
active = confirmed - (recovered - deaths)

print(confirmed, recovered, deaths, active)

labels = ['Active Cases','Recovered Cases','Death Cases']
sizes = [active,recovered,deaths]
color= ['#66b3ff','green','red']
explode = []

for i in labels:
    explode.append(0.1)
    
plt.figure(figsize= (15,10))
plt.pie(sizes, labels=labels, autopct='%3.1f%%', startangle=9, explode =explode,colors = color)
centre_circle = plt.Circle((0,0),0.60,fc='white')

fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title('e',fontsize = 24)
plt.axis('equal')  
plt.tight_layout()

In [None]:
temp = pak_data.groupby(['Date'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()
fig = px.scatter(temp, x="Date", y="New Cases", color="New Cases",
                 size='New Cases', hover_data=['New Cases'],
                 color_discrete_sequence = ex.colors.cyclical.IceFire)
fig.update_layout(title_text='Trend of Daily Cases in Pakistan',
                  plot_bgcolor='rgb(275, 270, 273)',width=1000, height=1000)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Confirmed Cases'],
                    mode='lines+markers',marker_color='blue',name='Confimned Cases'))
#fig.add_trace(go.Scatter(x=train_df['Date'],y=train_df['Active'], 
#                mode='lines+markers',marker_color='purple',name='Active Cases'))
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Recovered'],
                mode='lines+markers',marker_color='green',name='Recovered'))
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Deaths'], 
                mode='lines+markers',marker_color='red',name='Deaths'))
fig.update_layout(title_text='Coronavirus Cases in Pakistan',plot_bgcolor='rgb(275, 270, 273)',width=1000, height=1000)
fig.show()

### **Prophet Model**

In [None]:
confirmed = pak_data['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(pak_data['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=15)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(15)
#print(forecast)


fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Confirmed Cases'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Confirmed Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=1000, height=1000)
fig.show()

In [None]:
recv = pak_data['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(pak_data['Date'])
data['y'] = recv

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=15)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(15)
print(forecast)
#fig = plot_plotly(prop, prop_forecast)
#fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')


fig = go.Figure()
fig.add_trace(go.Scatter(x=pak_data['Date'], y=pak_data['Total Recovered'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Recovered Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=1000, height=1000)
fig.show()

### **Province Predictions**

In [None]:
province_cases = pak_data.groupby(['Date','Province'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()

In [None]:
province_cases['Total Confirmed Cases'] = 0
province_cases['Total Recovered'] = 0
province_cases['Total Deaths'] = 0
province_cases['Active Cases'] = 0

province_list = province_cases['Province'].unique()
test = province_cases.copy()
for province in province_list:
    
    
    province_data = province_cases[province_cases['Province'] == province]
    
    for i in range(0, len(province_data)):

        if (i == 0):
            province_data['Total Confirmed Cases'].iloc[i] = province_data['New Cases'].iloc[i]
            province_data['Total Recovered'].iloc[i] = province_data['Cured Cases'].iloc[i]
            province_data['Total Deaths'].iloc[i] = province_data['Death Cases'].iloc[i]
            province_data['Active Cases'].iloc[i] = province_data['Active Cases'].iloc[i-1] + province_data['New Cases'].iloc[i] - province_data['Death Cases'].iloc[i] - province_data['Cured Cases'].iloc[i]
        else:
            province_data['Total Confirmed Cases'].iloc[i] = province_data['Total Confirmed Cases'].iloc[i-1] + province_data['New Cases'].iloc[i]
            province_data['Total Recovered'].iloc[i] =  province_data['Total Recovered'].iloc[i-1] + province_data['Cured Cases'].iloc[i]
            province_data['Total Deaths'].iloc[i] =  province_data['Total Deaths'].iloc[i-1] + province_data['Death Cases'].iloc[i]
            province_data['Active Cases'].iloc[i] = province_data['Active Cases'].iloc[i-1] + province_data['New Cases'].iloc[i] - province_data['Death Cases'].iloc[i] - province_data['Cured Cases'].iloc[i]

    
    province_cases.update(province_data)

province_cases = province_cases.astype({"New Cases":int,"Cured Cases":int,"Death Cases":int, "Total Confirmed Cases": int,"Total Recovered":int,"Total Deaths":int,"Active Cases": int})

In [None]:
province_cases.dtypes

In [None]:
province_cases.head()

In [None]:
s_cases = pak_data.groupby('Province')['New Cases','Cured Cases','Death Cases'].sum().reset_index()
s_cases['Active Cases'] = s_cases['New Cases'] - s_cases['Death Cases']- s_cases['Cured Cases']
s_cases["Death Rate (per 100)"] = np.round(100*s_cases["Death Cases"]/s_cases["New Cases"],2)
s_cases["Cure Rate (per 100)"] = np.round(100*s_cases["Cured Cases"]/s_cases["New Cases"],2)
s_cases.sort_values('New Cases', ascending= False).fillna(0).style.background_gradient(cmap='Blues',subset=["New Cases"])\
                        .background_gradient(cmap='Blues',subset=["Death Cases"])\
                        .background_gradient(cmap='Blues',subset=["Cured Cases"])\
                        .background_gradient(cmap='Blues',subset=["Active Cases"])\
                        .background_gradient(cmap='Blues',subset=["Death Rate (per 100)"])\
                        .background_gradient(cmap='Blues',subset=["Cure Rate (per 100)"])

In [None]:
# For Sindh
s = province_cases.loc[province_cases['Province'] == 'Sindh']

confirmed = s['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Confirmed Cases'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Sindh Confirmed Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

In [None]:
# For Sindh

confirmed = s['Total Deaths'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Deaths'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='red',name='Predicted'))
fig.update_layout(title_text = 'Sindh Death Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

In [None]:
# For Sindh
s = province_cases.loc[province_cases['Province'] == 'Sindh']

confirmed = s['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(s['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=s['Date'], y=s['Total Recovered'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Sindh Recovered Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

### **Cities Prediction**

In [None]:
cities = pak_data.groupby(['Date', 'City'])['New Cases','Cured Cases','Death Cases'].sum().reset_index()

In [None]:
cities['Total Confirmed Cases'] = 0
cities['Total Recovered'] = 0
cities['Total Deaths'] = 0
cities['Active Cases'] = 0

city_list = cities['City'].unique()
for city in city_list:
    
    
    city_data = cities[cities['City'] == city]
    
    for i in range(0, len(city_data)):

        if (i == 0):
            city_data['Total Confirmed Cases'].iloc[i] = city_data['New Cases'].iloc[i]
            city_data['Total Recovered'].iloc[i] = city_data['Cured Cases'].iloc[i]
            city_data['Total Deaths'].iloc[i] = city_data['Death Cases'].iloc[i]
            city_data['Active Cases'].iloc[i] = city_data['Active Cases'].iloc[i-1] + city_data['New Cases'].iloc[i] - city_data['Death Cases'].iloc[i] - city_data['Cured Cases'].iloc[i]
        else:
            city_data['Total Confirmed Cases'].iloc[i] = city_data['Total Confirmed Cases'].iloc[i-1] + city_data['New Cases'].iloc[i]
            city_data['Total Recovered'].iloc[i] =  city_data['Total Recovered'].iloc[i-1] + city_data['Cured Cases'].iloc[i]
            city_data['Total Deaths'].iloc[i] =  city_data['Total Deaths'].iloc[i-1] + city_data['Death Cases'].iloc[i]
            city_data['Active Cases'].iloc[i] = city_data['Active Cases'].iloc[i-1] + city_data['New Cases'].iloc[i] - city_data['Death Cases'].iloc[i] - city_data['Cured Cases'].iloc[i]

    
    cities.update(city_data)

cities = cities.astype({"New Cases":int,"Cured Cases":int,"Death Cases":int, "Total Confirmed Cases": int,"Total Recovered":int,"Total Deaths":int,"Active Cases": int})

In [None]:
c_cases = cities.groupby('City')['New Cases','Cured Cases','Death Cases'].sum().reset_index()
c_cases = c_cases.sort_values(['New Cases'], ascending=False)

c_cases['Active Cases'] = c_cases['New Cases'] - c_cases['Death Cases']- c_cases['Cured Cases']
c_cases["Death Rate (per 100)"] = np.round(100*c_cases["Death Cases"]/c_cases["New Cases"],2)
c_cases["Cure Rate (per 100)"] = np.round(100*c_cases["Cured Cases"]/c_cases["New Cases"],2)
c_cases.sort_values('New Cases', ascending= False).fillna(0).style.background_gradient(cmap='Blues',subset=["New Cases"])\
                        .background_gradient(cmap='Blues',subset=["Death Cases"])\
                        .background_gradient(cmap='Blues',subset=["Cured Cases"])\
                        .background_gradient(cmap='Blues',subset=["Active Cases"])\
                        .background_gradient(cmap='Blues',subset=["Death Rate (per 100)"])\
                        .background_gradient(cmap='Blues',subset=["Cure Rate (per 100)"])

In [None]:
khi = cities.loc[cities['City'] == 'Karachi']

confirmed = khi['Total Confirmed Cases'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Confirmed Cases'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='Orange',name='Predicted'))
fig.update_layout(title_text = 'Karachi Confirmed Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

In [None]:
confirmed = khi['Total Deaths'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Deaths'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='red',name='Predicted'))
fig.update_layout(title_text = 'Karachi Death Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

In [None]:
confirmed = khi['Total Recovered'].values.tolist()
data = pd.DataFrame(columns = ['ds','y'])
data['ds'] = list(khi['Date'])
data['y'] = confirmed

prop=Prophet()
prop.fit(data)
future=prop.make_future_dataframe(periods=30)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = go.Figure()
fig.add_trace(go.Scatter(x=khi['Date'], y=khi['Total Recovered'],
                    mode='lines+markers',marker_color='green',name='Actual'))
fig.add_trace(go.Scatter(x=prop_forecast['ds'], y=prop_forecast['yhat_upper'],
                    mode='lines+markers',marker_color='yellow',name='Predicted'))
fig.update_layout(title_text = 'Karachi Recovered Cases (Predicted vs Actual) using Prophet')
fig.update_layout(plot_bgcolor='rgb(275, 270, 273)',width=600, height=600)
fig.show()

### **Visualization and prediction több ország esetén**

### **Import the data**

In [None]:
import numpy as np 
#array kezelése

import matplotlib.pyplot as plt 
#plotok abrazolasa

import matplotlib.colors as mcolors 
#szinek megadása akár névvel, akár rgb formatumban

import pandas as pd 
#adatok manipulalasa, ellemzese, modellezese, regressziok

import random 
import math

import time 
#idohoz kapcsolodo fuggvenyek hasznalata

from sklearn.linear_model import LinearRegression, BayesianRidge 
#model illesztése, minimalizálja az adathalmaz által meghatározott érték, és előre jelzett érték közötti különbséget

from sklearn.model_selection import RandomizedSearchCV, train_test_split
#random hiperparaméter számítás,
#tombok vagy matrixok felosztasa veletlenszeru sorozatokra es teszt reszhalmazokra 

from sklearn.preprocessing import PolynomialFeatures
#polinomiális és interakciós jellemzők generálása.

from sklearn.svm import SVR 
#osztályozás, regresszió, kiugró értékek kezelése

from sklearn.metrics import mean_squared_error, mean_absolute_error
#átlag négyzetes hiba
#átlag abszolut hiba

import datetime 
#dátum/idő kezelése

import operator 
#a Python belső operátorainak megfelelő hatékony függvénykészletet exportál.

plt.style.use('seaborn-poster')
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
import warnings
warnings.filterwarnings("ignore")

In [None]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
# recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/10-22-2021.csv')
us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/10-22-2021.csv')

In [None]:
latest_data.head() #elso sorok kiirasa

In [None]:
confirmed_df.head() #elso sorok kiirasa

In [None]:
us_medical_data.head() #elso sorok kiirasa

In [None]:
cols = confirmed_df.keys()

### **Get all the dates for the ongoing coronavirus pandemic**

In [None]:
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]

In [None]:
dates = confirmed.keys()
world_cases = []
total_deaths = [] 
mortality_rate = []

for i in dates:
    confirmed_sum = confirmed[i].sum()
    death_sum = deaths[i].sum()
    
    world_cases.append(confirmed_sum)
    total_deaths.append(death_sum)
    
    # calculate rates
    mortality_rate.append(death_sum/confirmed_sum)

In [None]:
print(mortality_rate)

### **Getting daily increases and moving averages**



In [None]:
def daily_increase(data):
    d = [] 
    for i in range(len(data)):
        if i == 0:
            d.append(data[0])
        else:
            d.append(data[i]-data[i-1])
    return d 

def moving_average(data, window_size):
    moving_average = []
    for i in range(len(data)):
        if i + window_size < len(data):
            moving_average.append(np.mean(data[i:i+window_size]))
        else:
            moving_average.append(np.mean(data[i:len(data)]))
    return moving_average

window = 7

# confirmed cases
world_daily_increase = daily_increase(world_cases)
world_confirmed_avg= moving_average(world_cases, window)
world_daily_increase_avg = moving_average(world_daily_increase, window)

# deaths
world_daily_death = daily_increase(total_deaths)
world_death_avg = moving_average(total_deaths, window)
world_daily_death_avg = moving_average(world_daily_death, window)

In [None]:
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)

In [None]:
print(world_confirmed_avg)

In [None]:
print(world_daily_increase_avg)

In [None]:
print(world_death_avg)

In [None]:
print(world_daily_death_avg)

### **Future forcasting**

In [None]:
days_in_future = 10
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-10]

###**Convert integer into datetime for better visualization**

In [None]:
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
    future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))

In [None]:
days_to_skip = 376
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[days_to_skip:], world_cases[days_to_skip:], test_size=0.08, shuffle=False) 