# Union de bases

In [4]:
%pip install unidecode
%pip install xlrd


Note: you may need to restart the kernel to use updated packages.
Collecting xlrd
  Downloading xlrd-2.0.2-py2.py3-none-any.whl.metadata (3.5 kB)
Downloading xlrd-2.0.2-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
import re, io, unicodedata, requests, pandas as pd
import numpy as np

# ---------- Utilidades ----------
def normalize_colnames(cols):
    out = []
    for c in cols:
        c = str(c)
        c = ''.join(ch for ch in unicodedata.normalize('NFKD', c) if not unicodedata.combining(ch))
        c = re.sub(r'\s+', ' ', c).strip().lower()
        c = re.sub(r'[^a-z0-9]+', '_', c)
        c = re.sub(r'_+', '_', c).strip('_')
        out.append(c)
    return out

# Sinónimos hacia nombres estándar
SYN_MAP = {
    r"^(depto|dep|departamento|dpto)$": "departamento",
    r"^(municipio|ciudad|mpio|municpio|munici?p[io])$": "municipio",  # tolera typos
    r"^(codigo_dane|cod_dane|codigo_dane_municipio|codigodane|coddane)$": "codigo_dane",
    r"^(arma_s?_?medios?|armas?_?medios?|arma|medio|armamedio|arma_emple.*)$": "arma_medio",
    r"^(fecha|fecha_hecho|fecha_del_hecho|fec_hecho)$": "fecha_hecho",
    r"^(genero|sexo)$": "genero",
    r"^(agrupa_edad_persona|grupo_edad|rango_edad|edad_grupo|grupo_etario|agrupa_edad)$": "agrupa_edad_persona",
    r"^(cantidad|total|nro_casos|num_casos|casos)$": "cantidad",
}

# Columnas finales que queremos conservar
STANDARD_ORDER = [
    "departamento","municipio","codigo_dane",
    "arma_medio","fecha_hecho","genero",
    "agrupa_edad_persona","cantidad","ANIO","DELITO"
]

def fetch_excel(url: str) -> bytes:
    r = requests.get(url, timeout=90)
    r.raise_for_status()
    return r.content

def detect_header_row(pre_df: pd.DataFrame, max_scan_rows: int = 40) -> int:
    keys = ["depart","munic","codigo","dane","arma","medio","fecha","genero","sexo","edad","agrupa","cantidad","total"]
    best_row, best_hits = 0, -1
    for i in range(min(max_scan_rows, len(pre_df))):
        row = pre_df.iloc[i].astype(str).fillna("").tolist()
        text = " | ".join(row).lower()
        hits = sum(k in text for k in keys)
        if hits > best_hits:
            best_hits = hits
            best_row = i
    return best_row

def read_excel_smart(content: bytes, url: str) -> pd.DataFrame:
    # Leer unas filas sin header para detectar
    pre = pd.read_excel(io.BytesIO(content), header=None, nrows=60, engine=None)
    hdr = detect_header_row(pre)
    try:
        df = pd.read_excel(io.BytesIO(content), header=hdr, engine=None)
    except Exception:
        ext = url.lower().split("?")[0]
        engine = "xlrd" if ext.endswith(".xls") else None
        df = pd.read_excel(io.BytesIO(content), header=hdr, engine=engine)
    df = df.loc[:, ~df.columns.astype(str).duplicated()]
    return df

# --- NUEVO: cortar pie desde 'TOTAL' (incluyéndolo) ---
def cut_footer_strict(df: pd.DataFrame) -> pd.DataFrame:
    # Texto por fila
    row_text = df.fillna("").astype(str).agg(" ".join, axis=1).str.lower()
    # 1) buscar 'TOTAL' exacto o con ':' o espacio
    m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)
    idx = np.where(m_total.values)[0]
    cut_at = int(idx[0]) if len(idx) > 0 else None
    # 2) si no hay 'TOTAL', cortar en otros triggers típicos
    if cut_at is None:
        m_trig = row_text.str.contains(r"(fuente|siedco|dijin|elaborado:|revisado:|autorizado:)")
        idx2 = np.where(m_trig.values)[0]
        if len(idx2) > 0:
            cut_at = int(idx2[0])
    if cut_at is not None:
        df = df.iloc[:cut_at, :].copy()
    # quitar filas totalmente vacías
    return df.replace(r"^\s*$", pd.NA, regex=True).dropna(how="all")

def apply_synonyms(df: pd.DataFrame) -> pd.DataFrame:
    cols = normalize_colnames(df.columns)
    df = df.copy()
    df.columns = cols
    mapped = []
    for c in df.columns:
        m = None
        for patt, tgt in SYN_MAP.items():
            if re.fullmatch(patt, c):
                m = tgt; break
        mapped.append(m if m else c)
    df.columns = mapped
    return df

def add_year_and_delito(df: pd.DataFrame, url: str, delito: str) -> pd.DataFrame:
    df = df.copy()
    yrs = re.findall(r"(20\d{2})", url)
    if yrs:
        df["ANIO"] = int(yrs[-1])
    elif "fecha_hecho" in df.columns:
        fechas = pd.to_datetime(df["fecha_hecho"], errors="coerce", dayfirst=True)
        df["ANIO"] = fechas.dt.year
    else:
        df["ANIO"] = pd.NA
    df["DELITO"] = delito.upper()
    return df

# --- NUEVO: eliminar columnas ruido y dejar solo estándar + DELITO ---
def drop_noise_columns(df: pd.DataFrame) -> pd.DataFrame:
    # eliminar 'unnamed_*'
    df = df.loc[:, ~df.columns.str.match(r"^unnamed_\d+$", case=False)].copy()
    # eliminar columnas totalmente vacías
    empty_cols = [c for c in df.columns if df[c].isna().all() or (df[c].astype(str).str.strip() == "").all()]
    df.drop(columns=empty_cols, inplace=True, errors="ignore")
    # eliminar columnas de origen llamadas 'delito' o 'delitos' (pero conservar la nuestra 'DELITO')
    drop_src = [c for c in df.columns if c.lower() in ("delito", "delitos") and c != "DELITO"]
    df.drop(columns=drop_src, inplace=True, errors="ignore")
    # ordenar y limitar a las columnas estándar existentes
    keep_existing = [c for c in STANDARD_ORDER if c in df.columns]
    return df[keep_existing]

# ---------- Todas las URLs por delito ----------
delitos = {
    "ABIGEATO": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20cabezas%20de%20ganado2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20cabezas%20de%20ganado.._0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato.xls_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_6.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2019_3.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2018_2.xlsx",
    ],
    "AMENAZAS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Amenazas2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenzas..xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_13.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2020.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2019_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2018_1.xlsx",
    ],
    "DELITOS SEXUALES": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Delitos%20sexuales2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos%20sexuales.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2018_0.xlsx",
    ],
    "EXTORSION": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Extorsi%C3%B3n2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_11.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2020_1.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2019_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2018_3.xlsx",
    ],
    "HOMICIDIOS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidio%20intencional_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202022.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios2022.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202021.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202020.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202019.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202018.xlsx",
    ],
    "HOMICIDIOS EN ACCIDENTES DE TRANSITO": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidios%20en%20accidente%20de%20tr%C3%A1nsito2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios%20en%20accidente%20de%20transito....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_accidente_de_transito_2020_2.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_2019.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_2018_1.xlsx",
    ],
    "HURTO A PERSONAS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20personas2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20personas.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_17.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_9.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_personas_2020_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_2018_1.xlsx",
    ],
    "HURTO A RESIDENCIAS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20residencias2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20residencias....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_4.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_5.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_residencias_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_2018_1.xlsx",
    ],
    "HURTO DE AUTOMOTORES": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20automotores2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20automotores....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_4.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_5.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2020_1.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2018_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2018_2.xlsx",
    ],
    "HURTO DE MOTOCICLETAS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20motocicletas2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20motocicletas....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_7.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_5.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2018_0.xlsx",
    ],
    "HURTOS A ENTIDADES COMERCIALES": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20comercio2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20comercio..._0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2018_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2018_0.xlsx",
    ],
    "HURTOS A ENTIDADES FINANCIERAS": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20entidades%20Financieras2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20entidades%20financieras.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2019_3.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2018_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2018_3.xlsx",
    ],
    "LESIONES EN ACCIDENTES DE TRANSITO": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Lesiones%20en%20accidente%20de%20tr%C3%A1nsito2024_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones%20en%20accidente%20de%20transito....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_7.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_5.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2020_1.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2018_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2018_0.xlsx",
    ],
    "LESIONES PERSONALES": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Lesiones%20personales2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones%20personales..._0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_5.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2020.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2018_0.xlsx",
    ],
    "PIRATERIA TERRESTRE": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20pirater%C3%ADa%20terrestre2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20pirateria%20terrestre_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_pirateria_terrestre_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_pirateria_terrestre_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2020_1.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_2020_0.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2019_3.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2018_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2018_1.xlsx",
    ],
    "SECUESTRO": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Secuestro2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro..._0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2020.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2019_3.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2018_3.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2018_2.xlsx",
    ],
    "TERRORISMO": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Terrorismo2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_4.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_11.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_9.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2020_1.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2020_0.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2019_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2018_2.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2018_0.xlsx",
    ],
    "VIOLENCIA INTRAFAMILIAR": [
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/Violencia%20intrafamiliar2024_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia%20intrafamiliar....xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_12.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_10.xls",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2020.xls_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2019_0.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2018_1.xlsx",
        "https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2018_0.xlsx",
    ],
}

# ---------- Procesamiento ----------
frames = []
for delito, urls in delitos.items():
    for url in urls:
        try:
            print(f"Descargando: {delito} -> {url}")
            content = fetch_excel(url)
            df = read_excel_smart(content, url)
            df = cut_footer_strict(df)             # <<< CORTA pie desde TOTAL
            df = apply_synonyms(df)                # <<< Normaliza nombres
            df = add_year_and_delito(df, url, delito)  # <<< Añade ANIO y DELITO
            if "cantidad" in df.columns:
                df["cantidad"] = pd.to_numeric(df["cantidad"], errors="coerce")
            df = drop_noise_columns(df)            # <<< Quita ruido y deja solo estándar + DELITO
            frames.append(df)
        except Exception as e:
            print(f"** Error con {url}: {e}")

if frames:
    data_delitos = pd.concat(frames, ignore_index=True).dropna(how="all")
    print("\n=== RESUMEN DATASET TOTAL ===")
    print("Filas:", data_delitos.shape[0], " Columnas:", data_delitos.shape[1])
    print("\nTipos de datos:\n", data_delitos.dtypes)
    print("\nPrimeras filas:\n", data_delitos.head(5))
    # Guardar CSV
    data_delitos.to_csv("delitos_unificado.csv", index=False, encoding="utf-8-sig")
    print('\nArchivo guardado: delitos_unificado.csv')
else:
    print("No se pudo construir el dataset combinado (todas las lecturas fallaron).")


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20cabezas%20de%20ganado2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20cabezas%20de%20ganado.._0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato.xls_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_6.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2019_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: ABIGEATO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/abigeato_2018_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Amenazas2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenzas..xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_13.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2020.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2019_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: AMENAZAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/amenazas_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Delitos%20sexuales2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos%20sexuales.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: DELITOS SEXUALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/delitos_sexuales_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Extorsi%C3%B3n2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_11.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2020_1.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2019_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: EXTORSION -> https://www.policia.gov.co/sites/default/files/delitos-impacto/extorsion_2018_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidio%20intencional_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202022.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios2022.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202021.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202020.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202019.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidio%20Intencional%202018.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Homicidios%20en%20accidente%20de%20tr%C3%A1nsito2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios%20en%20accidente%20de%20transito....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_accidente_de_transito_2020_2.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_2019.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HOMICIDIOS EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/homicidios_en_accidente_de_transito_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20personas2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20personas.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_17.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_9.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_personas_2020_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A PERSONAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_personas_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20residencias2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20residencias....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_4.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_5.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_residencias_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO A RESIDENCIAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_residencias_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20automotores2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20automotores....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_4.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_5.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2020_1.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE AUTOMOTORES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_automotores_2018_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20motocicletas2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20motocicletas....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_7.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_5.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_motocicletas_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTO DE MOTOCICLETAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20comercio2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20a%20comercio..._0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_comercio_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES COMERCIALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_motocicletas_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20a%20entidades%20Financieras2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20entidades%20financieras.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2019_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: HURTOS A ENTIDADES FINANCIERAS -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_entidades_financieras_2018_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Lesiones%20en%20accidente%20de%20tr%C3%A1nsito2024_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones%20en%20accidente%20de%20transito....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_7.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_5.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2020_1.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES EN ACCIDENTES DE TRANSITO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_en_accidente_de_transito_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Lesiones%20personales2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones%20personales..._0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_5.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2020.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: LESIONES PERSONALES -> https://www.policia.gov.co/sites/default/files/delitos-impacto/lesiones_personales_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Hurto%20pirater%C3%ADa%20terrestre2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto%20pirateria%20terrestre_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_pirateria_terrestre_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_a_pirateria_terrestre_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2020_1.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/hurto_entidades_financieras_2020_0.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2019_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2018_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: PIRATERIA TERRESTRE -> https://www.policia.gov.co/sites/default/files/delitos-impacto/pirateria_terrestre_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Secuestro2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro..._0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2020.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2019_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2018_3.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: SECUESTRO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/secuestro_2018_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Terrorismo2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_4.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_11.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_9.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2020_1.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2020_0.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2019_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2018_2.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: TERRORISMO -> https://www.policia.gov.co/sites/default/files/delitos-impacto/terrorismo_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/Violencia%20intrafamiliar2024_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia%20intrafamiliar....xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_12.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_10.xls


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2020.xls_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2019_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2018_1.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)


Descargando: VIOLENCIA INTRAFAMILIAR -> https://www.policia.gov.co/sites/default/files/delitos-impacto/violencia_intrafamiliar_2018_0.xlsx


  m_total = row_text.str.contains(r"(^|\s)total(\s|:|$)", regex=True)



=== RESUMEN DATASET TOTAL ===
Filas: 3443357  Columnas: 10

Tipos de datos:
 departamento            object
municipio               object
codigo_dane             object
arma_medio              object
fecha_hecho             object
genero                  object
agrupa_edad_persona     object
cantidad               float64
ANIO                     int64
DELITO                  object
dtype: object

Primeras filas:
   departamento      municipio codigo_dane                   arma_medio  \
0    ANTIOQUIA          Amagá   5030000.0  ARMA BLANCA / CORTOPUNZANTE   
1    ANTIOQUIA        Barbosa   5079000.0  ARMA BLANCA / CORTOPUNZANTE   
2    ANTIOQUIA          Maceo   5425000.0  ARMA BLANCA / CORTOPUNZANTE   
3    ANTIOQUIA          Maceo   5425000.0  ARMA BLANCA / CORTOPUNZANTE   
4    ANTIOQUIA  Puerto Berrío   5579000.0  ARMA BLANCA / CORTOPUNZANTE   

           fecha_hecho     genero agrupa_edad_persona  cantidad  ANIO  \
0  2024-10-18 00:00:00  MASCULINO             ADULTOS       1.

# Mirar variables 

In [4]:
import pandas as pd, unicodedata, re

# Cargar
df = pd.read_csv("delitos_unificado.csv", low_memory=False)

# 1) Conteo tal cual vienen en la columna
print("=== Conteo RAW (sin normalizar) ===")
raw_counts = (df["departamento"]
              .astype(str)
              .value_counts(dropna=True))
print(raw_counts.to_string())

=== Conteo RAW (sin normalizar) ===
departamento
CUNDINAMARCA                   711064
ANTIOQUIA                      434739
VALLE                          343847
SANTANDER                      193990
ATLÁNTICO                      150581
TOLIMA                         132684
CAUCA                          118236
BOLÍVAR                        117481
META                           111660
HUILA                          110697
BOYACÁ                         107389
NARIÑO                         106554
NORTE DE SANTANDER              95576
CESAR                           88364
MAGDALENA                       76384
RISARALDA                       75609
CÓRDOBA                         73136
CALDAS                          57531
SUCRE                           50937
QUINDÍO                         49093
GUAJIRA                         42341
CASANARE                        40680
CAQUETÁ                         29692
CHOCÓ                           24884
SIN EMPLEO DE ARMAS             22477
P

In [6]:
import pandas as pd, re, unicodedata

# --- 1) Cargar ---
csv_path = "delitos_unificado.csv"
df = pd.read_csv(csv_path, low_memory=False)

# --- 2) Helpers ---
def norm(s):
    if pd.isna(s): return ""
    s = str(s).upper().strip()
    s = "".join(ch for ch in unicodedata.normalize("NFKD", s) if not unicodedata.combining(ch))
    s = re.sub(r"[^\w\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

DEPT_SET = {
    "AMAZONAS","ANTIOQUIA","ARAUCA","ATLANTICO","BOLIVAR","BOYACA","CALDAS","CAQUETA","CASANARE","CAUCA",
    "CESAR","CHOCO","CORDOBA","CUNDINAMARCA","GUAINIA","GUAVIARE","HUILA","LA GUAJIRA","MAGDALENA","META",
    "NARINO","NORTE DE SANTANDER","PUTUMAYO","QUINDIO","RISARALDA","SAN ANDRES","SANTANDER","SUCRE","TOLIMA",
    "VALLE DEL CAUCA","VAUPES","VICHADA","BOGOTA","BOGOTA DC","BOGOTA D C","ARCHIPIELAGO DE SAN ANDRES"
}

ARMA_PAT = re.compile(
    r"ARMA|FUEGO|BLANCA|CORTO|PUNZ|CONTUND|ESCOPOL|VENEN|QUIMI|GAS|OBJETO|ARTEFACTO|SUSTANCIA|"
    r"SIN EMPLEO|NO REPORT|CINTA|ESPOSA", re.IGNORECASE
)

# --- 3) Swap departamento <-> arma_medio donde aplique ---
if {"departamento","arma_medio"}.issubset(df.columns):
    dep_norm = df["departamento"].map(norm)
    arma_norm = df["arma_medio"].map(norm)
    mask_swap = dep_norm.str.contains(ARMA_PAT, na=False) & arma_norm.isin(DEPT_SET)
    n_swapped = int(mask_swap.sum())
    if n_swapped:
        tmp = df.loc[mask_swap, "departamento"].copy()
        df.loc[mask_swap, "departamento"] = df.loc[mask_swap, "arma_medio"]
        df.loc[mask_swap, "arma_medio"] = tmp
    print(f"Filas corregidas por cruce departamento↔arma_medio: {n_swapped}")
else:
    print("⚠️ No se encontraron ambas columnas 'departamento' y 'arma_medio'.")

# --- 4) Estandarizar nombres de DEPARTAMENTO (con acentos) ---
CANON = {
    "AMAZONAS":"AMAZONAS","ANTIOQUIA":"ANTIOQUIA","ARAUCA":"ARAUCA","ATLANTICO":"ATLÁNTICO",
    "BOLIVAR":"BOLÍVAR","BOYACA":"BOYACÁ","CALDAS":"CALDAS","CAQUETA":"CAQUETÁ","CASANARE":"CASANARE",
    "CAUCA":"CAUCA","CESAR":"CESAR","CHOCO":"CHOCÓ","CORDOBA":"CÓRDOBA","CUNDINAMARCA":"CUNDINAMARCA",
    "GUAINIA":"GUAINÍA","GUAVIARE":"GUAVIARE","HUILA":"HUILA","LA GUAJIRA":"LA GUAJIRA","MAGDALENA":"MAGDALENA",
    "META":"META","NARINO":"NARIÑO","NORTE DE SANTANDER":"NORTE DE SANTANDER","PUTUMAYO":"PUTUMAYO",
    "QUINDIO":"QUINDÍO","RISARALDA":"RISARALDA","SAN ANDRES":"SAN ANDRÉS","SANTANDER":"SANTANDER",
    "SUCRE":"SUCRE","TOLIMA":"TOLIMA","VALLE DEL CAUCA":"VALLE DEL CAUCA","VAUPES":"VAUPÉS","VICHADA":"VICHADA",
    "BOGOTA":"BOGOTÁ D.C.","BOGOTA DC":"BOGOTÁ D.C.","BOGOTA D C":"BOGOTÁ D.C.",
    "ARCHIPIELAGO DE SAN ANDRES":"SAN ANDRÉS"
}
ALIAS = {
    "VALLE":"VALLE DEL CAUCA",
    "ATLANTICO":"ATLANTICO","BOLIVAR":"BOLIVAR","CORDOBA":"CORDOBA","QUINDIO":"QUINDIO","CHOCO":"CHOCO",
    "NARINO":"NARINO","GUAINIA":"GUAINIA","VAUPES":"VAUPES"
}

def canon_dep(val):
    k = norm(val)
    if k in ALIAS: k = ALIAS[k]
    return CANON.get(k, val if pd.notna(val) else val)

if "departamento" in df.columns:
    df["departamento"] = df["departamento"].map(canon_dep)

# --- 5) Guardar SOBRE EL MISMO ARCHIVO ---
df.to_csv(csv_path, index=False, encoding="utf-8-sig")
print(f"✅ Archivo actualizado en sitio: {csv_path}")

# (Opcional) Mostrar un pequeño resumen
if "departamento" in df.columns:
    print("\nTop 15 departamentos (después de limpiar):")
    print(df["departamento"].value_counts().head(15).to_string())


Filas corregidas por cruce departamento↔arma_medio: 27513
✅ Archivo actualizado en sitio: delitos_unificado.csv

Top 15 departamentos (después de limpiar):
departamento
CUNDINAMARCA          716993
ANTIOQUIA             439138
VALLE DEL CAUCA       343847
SANTANDER             195594
ATLÁNTICO             151849
TOLIMA                133920
CAUCA                 118982
BOLÍVAR               118740
META                  112614
HUILA                 111676
BOYACÁ                108295
NARIÑO                107388
NORTE DE SANTANDER     96302
CESAR                  89200
MAGDALENA              77151
