In [7]:
# Parameters
RAW_DIR = "../data/raw"
DRIVE_FILE_ID = "1ApZqHWxtuhxrNqdL-odfDxxIceoCiA2z"  # tu ID de Drive
ZIP_NAME = "archive.zip"

REQUIRED_FILES = [
    "chessData.csv",
    "random_evals.csv",
    "tactic_evals.csv",
]

In [8]:
from pathlib import Path
import zipfile
import sys
import gdown

raw_dir = Path(RAW_DIR)
raw_dir.mkdir(parents=True, exist_ok=True)

required_paths = [raw_dir / f for f in REQUIRED_FILES]
zip_path = raw_dir / ZIP_NAME

def all_exist(paths):
    return all(p.is_file() for p in paths)

if all_exist(required_paths):
    print("✅ Archivos ya presentes en data/raw. Nada que hacer.")
else:
    # Descarga el ZIP si no está
    if not zip_path.exists():
        url = f"https://drive.google.com/uc?id={DRIVE_FILE_ID}"
        print(f"⬇️ Descargando {ZIP_NAME} desde Drive a {zip_path} ...")
        gdown.download(url, str(zip_path), quiet=False)

    if not zip_path.exists():
        raise RuntimeError("No se pudo descargar el ZIP desde Drive.")

    # Descomprimir
    print(f"📦 Descomprimiendo {zip_path} en {raw_dir} ...")
    with zipfile.ZipFile(zip_path, "r") as zf:
        zf.extractall(raw_dir)

    # Verificación final
    missing = [p.name for p in required_paths if not p.exists()]
    if missing:
        print("⚠️ Faltan archivos tras descomprimir:", missing, file=sys.stderr)
        raise FileNotFoundError(f"Faltan: {missing}")
    else:
        print("✅ Archivos listos en data/raw.")


⬇️ Descargando archive.zip desde Drive a ..\data\raw\archive.zip ...


Downloading...
From (original): https://drive.google.com/uc?id=1ApZqHWxtuhxrNqdL-odfDxxIceoCiA2z
From (redirected): https://drive.google.com/uc?id=1ApZqHWxtuhxrNqdL-odfDxxIceoCiA2z&confirm=t&uuid=5983bf19-b250-4349-8c55-443ff3e491d2
To: c:\Users\samir\Documents\noveno semestre\aprendizaje maquina\stocksalmon\data\raw\archive.zip
100%|██████████| 209M/209M [00:19<00:00, 10.9MB/s] 


📦 Descomprimiendo ..\data\raw\archive.zip en ..\data\raw ...
✅ Archivos listos en data/raw.
