## References
* [Kaggle, Coronavirus (COVID-19) Visualization & Prediction](https://www.kaggle.com/code/therealcyberlord/coronavirus-covid-19-visualization-prediction/notebook#US-Medical-Data-on-Testing)  </br>

* [Kaggle, COVID-19 - Analysis, Visualization & Comparisons](https://www.kaggle.com/code/imdevskp/covid-19-analysis-visualization-comparisons#Date-vs) </br>
* [Worldmeters Coronavirus](https://www.worldometers.info/coronavirus/#countries) </br>
* [Johns Hopkins Center for Systems Science and Engineering COVID-19 GitHub](https://github.com/CSSEGISandData/COVID-19) </br>
* [Johns Hopkins Coronavirus Resouce Center](https://coronavirus.jhu.edu/map.html) </br> 

# Libraries 

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors 
import pandas as pd
import random 
import math 
import time 
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR 
from sklearn.metrics import mean_squared_error, mean_absolute_error 
import datetime
import operator 
plt.style.use('seaborn-poster')
%matplotlib inline 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
import warnings 
warnings.filterwarnings('ignore')

# interactive visualization
import plotly.express as px
import plotly.graph_objs as go
# import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# Data

Query Countries

In [18]:
countries = ['Taiwan*', 'US', 'Hong Kong', 'Vietnam', 'China', 'India']
# countries = ['US']

Import Data

In [19]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')


In [20]:
recoveries_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/26/22,3/27/22,3/28/22,3/29/22,3/30/22,3/31/22,4/1/22,4/2/22,4/3/22,4/4/22
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
265,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
266,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
267,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Process Data

In [21]:
def daily_increase(data):
    d = [] 
    for i in range(len(data)):
        if i == 0:
            d.append(data[0])
        else:
            d.append(data[i] - data[i-1])
    return d 

def moving_average(data, window_size):
    moving_average = []
    for i in range(len(data)):
        if i < window_size:
            moving_average.append(np.mean(data[ : i]))
        else:
            moving_average.append(np.mean(data[i - window_size : i]))
    return moving_average 

window = 7


In [84]:
aRow = confirmed_df[confirmed_df['Country/Region']=='China']
# totalConfirmed = np.array(aRow)[0][4:-1]
# totalConfirmed

# for i in range(2, len(date)+2):
for i in range(2, 5):
    val = sum(np.array(aRow)[:,i])
    if math.isnan(val): 
        print(np.array(aRow)[:,i])
        print(sum(np.array(aRow)[:,i]))



array([1, 14, 6, 1, 0, 26, 2, 1, 4, 1, 0, 5, 0, 444, 4, 0, 1, 2, 0, 2, 1,
       1, 0, 0, 2, 9, 1, 5, 4, 0, 0, 0, 1, 10], dtype=object)

In [22]:
# ideally should be able to set start date and end date

# Get dates 
cols = confirmed_df.keys()
date = np.array(cols)[4:-1]

for country in countries: 

    # Search from 'Country/Region' or 'Province/State'
    searchFrom = 'Country/Region'
    aRow = confirmed_df[confirmed_df[searchFrom]==country]
    if aRow.shape[0] == 0:
        searchFrom = 'Province/State'

    # Get data
    aRow = confirmed_df[confirmed_df[searchFrom]==country]
    totalConfirmed = np.array(aRow)[0][4:-1]
    aRow = deaths_df[deaths_df[searchFrom]==country]
    totalDeaths = np.array(aRow)[0][4:-1]
    

    totalMortality = []
    for i in range(len(totalConfirmed)):
        if totalConfirmed[i] == 0:
            totalMortality.append(0)
        else: 
            totalMortality.append(totalDeaths[i]/totalConfirmed[i])

    # Create DataFrame 
    new_df = pd.DataFrame( 
            {   'Date': date, 
                'Total Confirmed': totalConfirmed,
                'Daily Confirmed': daily_increase(totalConfirmed), 
                'Total Deaths': totalDeaths,
                'Daily Deaths': daily_increase(totalDeaths), 
                'Total Mortality': totalMortality,
                # 'Daily Mortality': daily_increase(totalMortality), 
            } 
        )


    # Save DataFrame 
    country = country.split('*')
    country = ''.join(country)
    new_df.to_csv(country+'.csv')

    print('Processing: {}'.format(country))
    # print(new_df.tail())




Processing: Taiwan
Processing: US
Processing: Hong Kong
Processing: Vietnam
Processing: China
Processing: India


# Visualization

Total Confirmed Cases and Daily Confirmed Cases 

In [25]:
df2 = pd.DataFrame()
for country in countries:
    # read data 
    country = country.split('*')
    country = ''.join(country)
    df = pd.read_csv(country+'.csv')
    # add another column 
    df['Country'] = [country for _ in range(df['Date'].shape[0])]
    df2 = df2.append(df, ignore_index=True)

In [28]:
col = 'Total Confirmed'
fig = px.line(
    df2, x='Date', y=col, color='Country',
    height=600, title=col, log_y=True, 
    color_discrete_sequence = px.colors.cyclical.mygbm 
)
fig.update_layout(showlegend=True) 

Total Deaths and Daily Deaths

Mortality