# 🦠 COVID-19 Global Data Tracker

This notebook tracks COVID-19 trends across countries using real-world data. It includes data cleaning, exploratory analysis, visualization, and reporting insights.

In [None]:
import pandas as pd # type: ignore
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns # type: ignore
import plotly.express as px # type: ignore

sns.set(style='whitegrid')

# Load dataset
try:
    df = pd.read_csv('owid-covid-data.csv')
    print('✅ Data loaded successfully.')
except FileNotFoundError:
    print('❌ File not found. Please place owid-covid-data.csv in this directory.')

In [None]:
df.head()

## Explore and Clean Data

In [None]:
print(df.columns)
print(df.isnull().sum().sort_values(ascending=False).head(20))

In [None]:
# Filter countries
countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]

# Convert date column
df['date'] = pd.to_datetime(df['date'])

# Fill missing data
df.fillna(method='ffill', inplace=True)
df.head()

## EDA: COVID-19 Trends

In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Deaths over time
plt.figure(figsize=(12, 6))
for country in countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['total_deaths'], label=country)
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Daily new cases
plt.figure(figsize=(12, 6))
for country in countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['new_cases'], label=country)
plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.tight_layout()
plt.show()

## Death Rate

In [None]:
df['death_rate'] = df['total_deaths'] / df['total_cases']
df[['location', 'date', 'death_rate']].dropna().head()

## Vaccination Progress

In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    data = df[df['location'] == country]
    plt.plot(data['date'], data['total_vaccinations'], label=country)
plt.title('Total COVID-19 Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.tight_layout()
plt.show()

## Choropleth Map (Optional)

In [None]:
# Latest data
latest = df[df['date'] == df['date'].max()]

fig = px.choropleth(latest,
                    locations='iso_code',
                    color='total_cases',
                    hover_name='location',
                    title='Total COVID-19 Cases by Country',
                    color_continuous_scale='Reds')
fig.show()