<a href="https://colab.research.google.com/github/victortedesco/brazil-covid19-data-analysis/blob/main/covid_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install mplcursors

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import requests
from io import BytesIO

url = "https://data.brasil.io/dataset/covid19/caso_full.csv.gz"
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    covid_df = pd.read_csv(BytesIO(response.content), compression="gzip")
    print("DataFrame loaded sucessfully!")
    print("Number of lines:", len(covid_df))
else:
    print("Error to load:", response.status_code)

DataFrame loaded sucessfully!
Number of lines: 3853648


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
import mplcursors

df = covid_df.copy()


def get_cities_by_state(state):
    cities = (
        df[(df["place_type"] == "city") & (df["state"] == state)]["city"]
        .dropna()
        .unique()
    )
    return sorted(cities)


def update_cities_on_state_change(state):
    cities = get_cities_by_state(state)
    cities.insert(0, "TOTAL")
    city_widget.options = cities
    city_widget.value = "TOTAL"


def format_number(x):
    return "{:,.0f}".format(x)


def plot_covid_data(state="SP", city=None, start_date=None, end_date=None):
    scale = 1.0
    df = covid_df.copy()
    state = state.upper()

    df = df[df["state"] == state]

    if city and city != "TOTAL":
        df = df[(df["place_type"] == "city") & (df["city"] == city)]
    else:
        df = df[df["city"].isna()]

    df["date"] = pd.to_datetime(df["date"])

    if start_date:
        df = df[df["date"] >= pd.to_datetime(start_date)]
    if end_date:
        df = df[df["date"] <= pd.to_datetime(end_date)]

    if df.empty:
        print("No data for this selection.")
        return

    plt.figure(figsize=(12, 5))
    (confirmed_line,) = plt.plot(
        df["date"],
        df["last_available_confirmed"] / scale,
        "b-",
        label="Confirmed Cases",
    )
    (deaths_line,) = plt.plot(
        df["date"], df["last_available_deaths"] / scale, "r-", label="Deaths"
    )

    confirmed_points = plt.scatter(
        df["date"], df["last_available_confirmed"] / scale, color="blue", alpha=0.5
    )
    deaths_points = plt.scatter(
        df["date"], df["last_available_deaths"] / scale, color="red", alpha=0.5
    )

    location = f"{city} ({state})" if city and city != 'TOTAL' else state
    plt.title(f"COVID-19 Trends - {location}")
    plt.xlabel("Date")
    plt.ylabel(f"Cases / Deaths")
    plt.legend()
    plt.xticks(rotation=45)
    plt.gca().xaxis.set_major_locator(plt.MaxNLocator(10))

    plt.gca().yaxis.set_major_formatter(
        plt.FuncFormatter(lambda x, pos: format_number(x))
    )

    plt.tight_layout()
    plt.grid(True)

    cursor = mplcursors.cursor([confirmed_points, deaths_points], hover=True)

    @cursor.connect("add")
    def on_add(sel):
        if sel.artist == confirmed_points:
            idx = sel.target.index
            date = df.iloc[idx]["date"].strftime("%Y-%m-%d")
            cases = format_number(df.iloc[idx]["last_available_confirmed"])
            sel.annotation.set(text=f"Date: {date}\nConfirmed: {cases}")
        elif sel.artist == deaths_points:
            idx = sel.target.index
            date = df.iloc[idx]["date"].strftime("%Y-%m-%d")
            deaths = format_number(df.iloc[idx]["last_available_deaths"])
            sel.annotation.set(text=f"Date: {date}\nDeaths: {deaths}")

    plt.show()


states = sorted(df["state"].dropna().unique())
state_widget = widgets.Dropdown(options=states, value="SP", description="State:")
city_widget = widgets.Dropdown(description="City:")

state_widget.observe(
    lambda change: update_cities_on_state_change(change["new"]), names="value"
)

# Initialize with default state
update_cities_on_state_change("SP")

min_date = covid_df["date"].min()
max_date = covid_df["date"].max()

interact(
    plot_covid_data,
    state=state_widget,
    city=city_widget,
    start_date=widgets.DatePicker(
        value=pd.to_datetime(min_date), description="Start Date:"
    ),
    end_date=widgets.DatePicker(
        value=pd.to_datetime(max_date), description="End Date:"
    ),
)

interactive(children=(Dropdown(description='State:', index=25, options=('AC', 'AL', 'AM', 'AP', 'BA', 'CE', 'D…

<function __main__.plot_covid_data(state='SP', city=None, start_date=None, end_date=None)>