In [None]:

import kagglehub
imdevskp_corona_virus_report_path = kagglehub.dataset_download('imdevskp/corona-virus-report')

print('Data source import complete.')


# Importing the necessary packages

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import warnings
warnings.filterwarnings('ignore')


# Reading the data as a Pandas Dataframe

In [None]:
df = pd.read_csv('/kaggle/input/corona-virus-report/country_wise_latest.csv')

#Taking a look at the data
df.head(20)

In [None]:
# Reading another csv with dates parsed
df1 = pd.read_csv("/kaggle/input/corona-virus-report/covid_19_clean_complete.csv", parse_dates=['Date'])
df1.head(10)

In [None]:

df2 = pd.read_csv("/kaggle/input/corona-virus-report/full_grouped.csv", parse_dates=['Date'])
df2.head()
df2.sort_values(by=['Confirmed'], ascending=False)

In [None]:
df.info()

In [None]:
#Seeing if there are any null values
df.isnull().sum()

**We will visualize the following from this dataset.**
* Total number of Confirmed, Deaths, Recovered, and Active cases World Wide
* Total Confirmed Cases grouped by WHO Region
* Total Deaths grouped by WHO Region
* Which Country has the highest Confirmed Cases
* Which Country has the highest Death Cases
* Confirmed cases over time
* Death count over time
* Recovered count over time
* World map with cases over time, grouped by Country/Region


In [None]:
# Total number of Confirmed, Deaths, Recovered and Active cases around the world
status_count = ['Confirmed', 'Deaths', 'Recovered', 'Active']

#Calculate the sum of specified columns, grouped by status
column_sums = df[status_count].sum().plot(kind='bar')
plt.xlabel("Status")
plt.ylabel("Count In Millions")
plt.title("Count of Population Status")

In [None]:
# Total Confirmed cases grouped by WHO region
Region = df.groupby(by = 'WHO Region').sum()
Region.head()

In [None]:
Region['Confirmed'].plot(kind='bar')
plt.title('Confirmed Cases Grouped By WHO Region')
plt.ylabel('Count In Million')

In [None]:
# Total Deaths grouped by WHO Region
Region['Deaths'].plot(kind='bar')
plt.title('Deaths Grouped By WHO Region')


In [None]:
# Grouping cases by Country/Region
grouped = df[["Confirmed", "Deaths", "Recovered", "Country/Region"]]
grouped.head()

In [None]:
fig = px.bar(grouped, x="Country/Region", y="Confirmed", title="Countries Having Highest Confirmed Cases Count")
fig.show()

In [None]:
fig = px.bar(grouped, x="Country/Region", y="Deaths", title="Countries Having Highest Deaths")
fig.show()

In [None]:
# Grouping cases by date
date_stats = df1.groupby('Date')['Date', 'Confirmed', 'Deaths', 'Recovered', 'Country/Region'].sum().reset_index()
date_stats.head()


In [None]:
date_stats.tail()

In [None]:
px.line(date_stats, x='Date', y='Confirmed', title='World Wide Confirmed Cases')

In [None]:
px.line(date_stats, x='Date', y='Deaths', title='World Wide Deaths')

In [None]:
px.line(date_stats, x='Date', y='Recovered', title='World Wide Recovered')

In [None]:
fig = px.choropleth(df1, locations="Country/Region",
                    color=np.log(df1["Confirmed"]),
                    locationmode='country names', hover_name="Country/Region",
                    animation_frame=df1["Date"].dt.strftime('%Y-%m-%d'),
                    title='Cases over time', color_continuous_scale=px.colors.sequential.matter)

fig.show()