# Analysis on Covid-19 Datasets
This project is the study on Covid-19 Datasets.

# Data Description
The datasets used in this projects:
1. usa_country_wise.csv
2. day_wise.csv
3. country_wise_latest.csv
4. covid_19_clean_complete.csv
5. full_grouped.csv
6. worldometer_data.csv

source: https://drive.google.com/drive/folders/16vjONxktuGs6qdjtXapnNhn7otpENRM1 

In [None]:
# link to drive to load data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# for better interactive visualization
import plotly.graph_objects as go
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [None]:
files=os.listdir('/content/drive/Shareddrives/whs(shared drive)/Datasets/Covid-19')
files

['usa_country_wise.csv',
 'day_wise.csv',
 'country_wise_latest.csv',
 'covid_19_clean_complete.csv',
 'full_grouped.csv',
 'worldometer_data.csv']

In [None]:
# create function for data reading
def read_data(path,filename):
    return pd.read_csv(path+'/'+filename)

In [None]:
path='/content/drive/Shareddrives/whs(shared drive)/Datasets/Covid-19'
world_data=read_data(path,'worldometer_data.csv')
day_wise=read_data(path,'day_wise.csv')
country_wise_latest=read_data(path,'country_wise_latest.csv')
group_data=read_data(path,'full_grouped.csv')
usa_data=read_data(path,'usa_country_wise.csv')
province_data=read_data(path,'covid_19_clean_complete.csv')

# Which Country has maximum Total cases, Deaths, Recovered & Active cases 


In [None]:
world_data.columns

Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region'],
      dtype='object')

In [None]:
columns=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in columns:
    fig=px.treemap(world_data[0:20],
                   values=i,
                   path=['Country/Region'],
                   template="plotly_dark",
                   title="TreeMap representation of different Countries w.r.t. their {}".format(i))
    fig.show()

- From the treemap, we observed that:
  - USA has the highest number for all the 4 aspects studied
  - Brazil is the second highest

# What is the trend of Confirmed, Deaths, Recovered and Active cases

In [None]:
fig=px.line(day_wise, x="Date", y=["Confirmed","Deaths","Recovered","Active"], 
            template="plotly_dark")

fig.update_layout(
    title='Covid-19 cases w.r.t. Date',
    xaxis_title="Date",
    yaxis_title="Number of Cases",
    legend_title="Type of cases",
)

fig.show()

- the plot shows that there is obvious increase starting from middle of March, 2020
- the increase in number of Death cases is relatively small as compared to others

# Visualize Population to Tests Done Ratio

In [None]:
world_data.head()

Unnamed: 0,Country/Region,Continent,Population,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,WHO Region
0,USA,North America,331198100.0,5032179,,162804.0,,2576668.0,,2292707.0,18296.0,15194.0,492.0,63139605.0,190640.0,Americas
1,Brazil,South America,212710700.0,2917562,,98644.0,,2047660.0,,771258.0,8318.0,13716.0,464.0,13206188.0,62085.0,Americas
2,India,Asia,1381345000.0,2025409,,41638.0,,1377384.0,,606387.0,8944.0,1466.0,30.0,22149351.0,16035.0,South-EastAsia
3,Russia,Europe,145940900.0,871894,,14606.0,,676357.0,,180931.0,2300.0,5974.0,100.0,29716907.0,203623.0,Europe
4,South Africa,Africa,59381570.0,538184,,9604.0,,387316.0,,141264.0,539.0,9063.0,162.0,3149807.0,53044.0,Africa


In [None]:
# we focus our study on the top 20 countries
pop_test_ratio=world_data.iloc[0:20]['Population']/world_data.iloc[0:20]['TotalTests']

In [None]:
pop_test_ratio

0       5.245489
1      16.106896
2      62.365033
3       4.911040
4      18.852446
5     122.115932
6      13.241331
7      10.866949
8      28.269105
9       6.618696
10     32.187237
11      3.877883
12      9.589865
13    107.484026
14    134.558952
15      8.514790
16     16.613857
17     56.934398
18      9.760649
19     16.353942
dtype: float64

In [None]:
fig=px.bar(world_data.iloc[0:20], x='Country/Region', y=pop_test_ratio[0:20], 
           color='Country/Region', template="plotly_dark")

fig.update_layout(
    title='Population to Test Ratio',
    xaxis_title="Country/Region",
    yaxis_title="Ratio",
    legend_title="Country/Region",
)

fig.show()

- the top 3 countries with highest population to test ratio are Bangladesh, Mexico and Pakistan.
- this can lead to the intuition that there is high chances of untested cases for these countries.
- although USA and Brazil has the highest number of cases, the ratio is relatively small implies that the chances of untested cases is relatively low.

# Top 20 countries of Total Confirmed Cases, Total Recovered Cases, Total Deaths,Total Active Cases

In [None]:
world_data.head()

Unnamed: 0,Country/Region,Continent,Population,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,WHO Region
0,USA,North America,331198100.0,5032179,,162804.0,,2576668.0,,2292707.0,18296.0,15194.0,492.0,63139605.0,190640.0,Americas
1,Brazil,South America,212710700.0,2917562,,98644.0,,2047660.0,,771258.0,8318.0,13716.0,464.0,13206188.0,62085.0,Americas
2,India,Asia,1381345000.0,2025409,,41638.0,,1377384.0,,606387.0,8944.0,1466.0,30.0,22149351.0,16035.0,South-EastAsia
3,Russia,Europe,145940900.0,871894,,14606.0,,676357.0,,180931.0,2300.0,5974.0,100.0,29716907.0,203623.0,Europe
4,South Africa,Africa,59381570.0,538184,,9604.0,,387316.0,,141264.0,539.0,9063.0,162.0,3149807.0,53044.0,Africa


### Top 20 countries of Total Confirmed cases

In [None]:
fig=px.bar(world_data.iloc[0:20],y='Country/Region',x='TotalCases',color='TotalCases',text="TotalCases")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total confirmed cases</b>")
fig.show()

### Top 20 countries of Total deaths

In [None]:
fig=px.bar(world_data.sort_values(by='TotalDeaths',ascending=False)[0:20],y='Country/Region',x='TotalDeaths',color='TotalDeaths',text="TotalDeaths")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total deaths</b>")
fig.show()

### Top 20 countries of Total active cases

In [None]:
fig=px.bar(world_data.sort_values(by='ActiveCases',ascending=False)[0:20], y='Country/Region',x='ActiveCases',color='ActiveCases',text='ActiveCases')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Active cases")
fig.show()

### Top 20 countries of Total Recoveries

In [None]:
fig=px.bar(world_data.sort_values(by='TotalRecovered',ascending=False)[:20],y='Country/Region',x='TotalRecovered',color='TotalRecovered',text='TotalRecovered')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Recovered")
fig.show()

### Pie Chart Representation of stats of worst affected countries

### Pie Charts in Donut Shape

In [None]:
labels=world_data[0:15]['Country/Region'].values
cases=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in cases:
    fig=px.pie(world_data[0:15],values=i,names=labels,template="plotly_dark",hole=0.3,title=" {} Recordeded w.r.t. to WHO Region of 15 worst effected countries ".format(i))
    fig.show()

### Deaths to Confirmed ratio

In [None]:
deaths_to_confirmed=((world_data['TotalDeaths']/world_data['TotalCases']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_confirmed)
fig.update_layout(title={'text':"Death to confirmed ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()


### Deaths to recovered ratio

In [None]:
deaths_to_recovered=((world_data['TotalDeaths']/world_data['TotalRecovered']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_recovered)
fig.update_layout(title={'text':"Death to recovered ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()


#### Visualize Confirmed,  Active,  Recovered , Deaths Cases(entire statistics ) of a particular country

In [None]:
group_data.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa


In [None]:
from plotly.subplots import make_subplots  ## for creating subplots in plotly
import plotly.graph_objects as go

In [None]:

def country_visualization(group_data,country):
    
    data=group_data[group_data['Country/Region']==country]
    df=data.loc[:,['Date','Confirmed','Deaths','Recovered','Active']]
    fig = make_subplots(rows=1, cols=4,subplot_titles=("Confirmed", "Active", "Recovered",'Deaths'))
    fig.add_trace(
        go.Scatter(name="Confirmed",x=df['Date'],y=df['Confirmed']),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(name="Active",x=df['Date'],y=df['Active']),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(name="Recovered",x=df['Date'],y=df['Recovered']),
        row=1, col=3
    )

    fig.add_trace(
        go.Scatter(name="Deaths",x=df['Date'],y=df['Deaths']),
        row=1, col=4
    )

    fig.update_layout(height=600, width=1000, title_text="Date Vs Recorded Cases of {}".format(country),template="plotly_dark")
    fig.show()

In [None]:
country_visualization(group_data,'Brazil')

In [None]:
country_visualization(group_data,'US')