## Corona Data Analysis

### Importing necessary libraries

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 
import glob

In [None]:
import os
for dirname, _, filenames in os.walk('C:\Pandas\Corona'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Creating new dataFrame and reading the csv file 

In [None]:
df = pd.read_csv('covid_19_data.csv', index_col=False)

In [None]:
df.head()

### Cleaning the Data 

In [None]:
df.isnull().any(axis=0)

In [None]:
df.dropna(inplace=True)

In [None]:
df.isnull().any()

### Removing negative value from the confirmed column

In [None]:
df.loc[df['Confirmed'] < 0]

In [None]:
df=df[~(df['Confirmed'] <  0)]

In [None]:
df.loc[df['Confirmed'] < 0]

In [None]:
df

## Which country had the maximum number of cases ?

In [None]:
df['Confirmed'] = df['Confirmed'].astype('int32')
df['Country/Region'] = df['Country/Region'].astype('string')

In [None]:
m  =pd.DataFrame(df.groupby(['Country/Region']).sum()['Confirmed'])
m

### Visualizing the number of confirmed cases 

In [None]:
sns.barplot(x=m.index, y= m.Confirmed, data=pd.melt(m), color='#042069')
sns.set(rc={"figure.figsize":(10, 4)})
sns.set_theme(style='whitegrid')
plt.xticks(rotation=90)
plt.xlabel('Countries', size=14)
plt.ylabel('No. of confirmed corona cases', size=14)
plt.show()

In [None]:
df.info()

## Which province in the world has max number of cases?

In [None]:
p = pd.DataFrame(df.groupby(['Province/State','Country/Region']).sum()['Confirmed'])
p.reset_index(inplace=True)
cases = p.sort_values(by='Confirmed', ascending=False).head(25)
cases 

### Plotting the cities having higher no. of cases of corona

In [None]:
sns.barplot(x= cases['Province/State'] ,y= cases.Confirmed, data=pd.melt(cases), palette="dark:salmon_r")
plt.xticks(rotation=90)
sns.set(rc={"figure.figsize":(15, 4)})
sns.set_theme(style='ticks')
plt.show()

## How many total deaths have been confirmed until now?

In [None]:
death = pd.DataFrame(df.groupby(['Country/Region','Province/State']).sum()['Deaths'])
death.reset_index(inplace=True) 
death

#### Total number of deaths till now

In [None]:
df.Deaths.sum()

## Which country had the highest number of deaths?

In [None]:
plt.bar(death['Country/Region'], death['Deaths'], width=0.8, color=sns.dark_palette('#f7347a'))
plt.xticks(rotation='vertical')
plt.xlabel('Countries', size=15)
plt.ylabel('No of Deaths', size=15)
plt.show()

## How much the confirmed cases of corona caused to fuel the increase in number of deaths?


In [None]:
from scipy.stats import pearsonr
#df.Confirmed.corr(df.Deaths)
pearsonr(df['Confirmed'], df['Deaths'])

#### There is high correlation between the no. of deaths and the number of confirmed cases of Corona around the world!

In [None]:
df.Confirmed.corr(df.Recovered)

In [None]:
df.Recovered.corr(df.Deaths)

In [None]:
df.Confirmed.corr(df.Deaths)
mat = df[['Confirmed','Deaths', 'Recovered']].corr()
mat
#sns.heatmap(mat, annot=True, cmap='winter')
plt.subplots(figsize=(10,7))
sns.heatmap(mat, cmap='Blues', linecolor='white', linewidths=1)

## Which country had the best recovery rate? 

In [None]:
df['Confirmed'] = df['Confirmed'].astype('int64')
df['Recovered'] = df['Recovered'].astype('int64')

In [None]:
rec = pd.DataFrame(df[['Recovered', 'Confirmed']].value_counts(normalize=True)*100)
rec.reset_index(inplace=True)
rec.rename(columns = {0:'Rate'}, inplace=True)
rec

In [None]:
rec['Country/Region'] = df['Country/Region'].copy()
rec

In [None]:
rec = rec.groupby('Country/Region').sum()
rec.reset_index(inplace=True)
rec[['Recovered', 'Confirmed']].value_counts(normalize=True)*100
rec

In [None]:
rec['Country/Region'] = rec['Country/Region'].astype('str')

### Plotting a line chart representing the recovery rates of countries

In [None]:
sns.lineplot(x= rec['Country/Region'], y= rec.Rate, data=rec)
sns.set_theme(style='darkgrid')
plt.xticks(rotation=90)
plt.show()

### Mainland China had the best recovery rate 