In [1]:
# @title Instalar pandas y kagglehub
!pip install --quiet pandas kagglehub


In [2]:
# @title Importar librerías y leer el CSV
import pandas as pd
from pathlib import Path
import kagglehub   # opcional si prefieres traer el archivo directo de Kaggle
import zipfile, io, requests

# ------------------------------------------------------------------
# OPCIÓN A (archivo local, p. ej. si ya subiste archive.zip a Colab)
local_zip = Path("/content/archive (1).zip")  # ajusta la ruta si cambia el nombre
if local_zip.exists():
    with zipfile.ZipFile(local_zip, "r") as zf:
        with zf.open("country_vaccinations.csv") as f:
            df = pd.read_csv(f)
else:
    # ----------------------------------------------------------------
    # OPCIÓN B (descarga con kagglehub directamente desde el dataset)
    # Requiere que tu API Token de Kaggle esté configurado
    df = kagglehub.load_dataset(
        kagglehub.KaggleDatasetAdapter.PANDAS,
        "gpreda/covid-world-vaccination-progress",
        "country_vaccinations.csv",
    )

# Vista rápida
df.head()


  df = kagglehub.load_dataset(


Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/


In [3]:
# @title Limpiar nombres de columnas y tipos
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")

# Convertir “people_fully_vaccinated”, “total_vaccinations”, etc. a numérico
num_cols = [
    "total_vaccinations",
    "people_vaccinated",
    "people_fully_vaccinated",
    "daily_vaccinations",
    "total_vaccinations_per_hundred",
    "people_vaccinated_per_hundred",
    "people_fully_vaccinated_per_hundred",
]
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce")

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86512 entries, 0 to 86511
Data columns (total 15 columns):
 #   Column                               Non-Null Count  Dtype         
---  ------                               --------------  -----         
 0   country                              86512 non-null  object        
 1   iso_code                             86512 non-null  object        
 2   date                                 86512 non-null  datetime64[ns]
 3   total_vaccinations                   43607 non-null  float64       
 4   people_vaccinated                    41294 non-null  float64       
 5   people_fully_vaccinated              38802 non-null  float64       
 6   daily_vaccinations_raw               35362 non-null  float64       
 7   daily_vaccinations                   86213 non-null  float64       
 8   total_vaccinations_per_hundred       43607 non-null  float64       
 9   people_vaccinated_per_hundred        41294 non-null  float64       
 10  people_ful

In [4]:
# @title Definir rango de fechas a analizar
fecha_inicio = "2021-01-01"   # cambia si lo necesitas
fecha_fin    = "2021-12-31"

mask = (df["date"] >= fecha_inicio) & (df["date"] <= fecha_fin)
df_periodo = df.loc[mask].copy()

print(f"Filas en el periodo {fecha_inicio} → {fecha_fin}: {len(df_periodo):,}")


Filas en el periodo 2021-01-01 → 2021-12-31: 68,630


In [5]:
# @title Vacunación acumulada por país (última fecha disponible en el periodo)
ultimo_dia = (
    df_periodo.groupby("country")["date"].transform("max") == df_periodo["date"]
)
pais_total = (
    df_periodo.loc[ultimo_dia]
    .sort_values("people_fully_vaccinated_per_hundred", ascending=False)
    .reset_index(drop=True)
    .loc[
        :,
        [
            "country",
            "total_vaccinations",
            "people_fully_vaccinated",
            "people_fully_vaccinated_per_hundred",
        ],
    ]
)

# Guardar a CSV
pais_total.to_csv("vacunacion_por_pais_2021.csv", index=False)

pais_total.head(15)


Unnamed: 0,country,total_vaccinations,people_fully_vaccinated,people_fully_vaccinated_per_hundred
0,Pitcairn,94.0,47.0,100.0
1,Brunei,898401.0,400691.0,90.75
2,Portugal,19328231.0,9103550.0,89.53
3,Chile,44313091.0,16543067.0,86.11
4,Cuba,30874045.0,9672464.0,85.46
5,Singapore,11583542.0,4618252.0,84.68
6,Malta,1072274.0,435874.0,84.46
7,South Korea,103880993.0,42629829.0,83.09
8,Cambodia,30456720.0,13659518.0,80.6
9,Denmark,12110043.0,4600866.0,79.14


In [6]:
# @title Agregar por mes a nivel global
df_periodo["año_mes"] = df_periodo["date"].dt.to_period("M")
global_mensual = (
    df_periodo.groupby("año_mes")["daily_vaccinations"].sum().reset_index()
)
global_mensual["año_mes"] = global_mensual["año_mes"].dt.to_timestamp()

# Guardar a CSV
global_mensual.to_csv("vacunacion_global_mensual_2021.csv", index=False)

global_mensual


Unnamed: 0,año_mes,daily_vaccinations
0,2021-01-01,93522530.0
1,2021-02-01,170960500.0
2,2021-03-01,343200200.0
3,2021-04-01,535613200.0
4,2021-05-01,790504100.0
5,2021-06-01,1137931000.0
6,2021-07-01,1059879000.0
7,2021-08-01,1203581000.0
8,2021-09-01,971936000.0
9,2021-10-01,814177600.0


In [7]:
# @title Top 10 países por media de vacunaciones diarias
promedio_diario = (
    df_periodo.groupby("country")["daily_vaccinations"].mean().nlargest(10)
)
promedio_diario = promedio_diario.reset_index().rename(
    columns={"daily_vaccinations": "prom_dosis_diarias"}
)

promedio_diario.to_csv("top10_promedio_diario_2021.csv", index=False)

promedio_diario


Unnamed: 0,country,prom_dosis_diarias
0,China,7650299.0
1,India,4081357.0
2,United States,1394732.0
3,Brazil,948380.3
4,Indonesia,764999.3
5,Japan,629616.4
6,Vietnam,496766.5
7,Pakistan,462401.7
8,Germany,407716.6
9,Mexico,407367.8
