<a href="https://colab.research.google.com/github/yumnaehab-tech/data-science-projects/blob/main/yumnaehab-tech/data-science-projects/COVID-19%20Data%20Analysis%20-%20Yumna%20Ehab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
# Project by Yumna Ehab
#  COVID-19 Data Analysis & Visualization using Python and Plotly
# This notebook explores global COVID-19 trends, showing 7-day averages of new cases and deaths across multiple countries.


In [1]:
!pip install --quiet plotly seaborn


In [3]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [4]:
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12,6)


In [19]:
import pandas as pd
data_url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"

try:
    df = pd.read_csv(data_url)
    print(" Dataset loaded successfully from GitHub.")
    print("Shape:", df.shape)
except Exception as e:
    print(" Failed to load dataset. Error:", e)


 Dataset loaded successfully from GitHub.
Shape: (429435, 67)


In [14]:
needed = ["location", "date", "new_cases", "new_deaths", "total_vaccinations",
          "people_vaccinated", "people_fully_vaccinated"]
missing = [c for c in needed if c not in df.columns]
if missing:
    print("\nWarning - missing columns:", missing)
else:
    print("\nAll needed columns are present.")




In [20]:
if 'location' in df.columns and 'date' in df.columns:
    df = df[["location","date"] + [c for c in needed if c in df.columns and c not in ["location","date"]]]
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    # حذف صفوف تاريخها غير صالح (لو في)
    df = df.dropna(subset=['date'])
    print("\nBasic cleaning done. Rows left:", len(df))
else:
    print("\nSkipping cleaning because essential columns are missing.")


Basic cleaning done. Rows left: 429435


In [22]:
print(df.columns.tolist())

['location', 'date', 'new_cases', 'new_deaths', 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated']


In [24]:
countries = ["World", "United States", "India", "Brazil", "Egypt"]
df_selected = df[df['location'].isin(countries)].copy()


In [26]:
def add_moving_average(df, column, window=7):
    df[f'{column}_7day_avg'] = df.groupby('location')[column].transform(lambda x: x.rolling(window, min_periods=1).mean())
    return df

df_selected = add_moving_average(df_selected, 'new_cases')
df_selected = add_moving_average(df_selected, 'new_deaths')


In [27]:
import plotly.express as px

fig = px.line(df_selected, x='date', y='new_cases_7day_avg', color='location',
              title=' 7-Day Average of COVID-19 New Cases',
              labels={'new_cases_7day_avg': 'New Cases (7-day avg)', 'date': 'Date'})
fig.show()
