<a href="https://colab.research.google.com/github/tunbaruzzo/proyecto-videojuegos-ml-BARUZZO/blob/main/videoGamesSales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Videojuegos: ¿qué determina el éxito en ventas?**


In [None]:
import json
import os
import seaborn as sns
import matplotlib.pyplot as plt
from googleapiclient.discovery import build
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from wordcloud import WordCloud

#Preguntas de interes
*   Que generos se vendieron más a lo largo del tiempo?
*   Qué plataformas dominan en ventas globales y cómo cambió eso por año?
*   Los juegos con cierto Rating ESRB venden más que otros?
*   Existen publishers con ventas consistentemente altas?
*   El año de lanzamiento influye en las ventas globales?







#Funciones para conectar y extraer datos de API

In [None]:
#instalacion
!pip install requests python-dotenv pandas tqdm rapidfuzz

Collecting rapidfuzz
  Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.14.1


In [None]:
#1) Imports y config
import os, time, json, math, re, unicodedata
from typing import List, Dict, Any, Optional, Tuple
import requests
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv

# Cargar variables de entorno
load_dotenv()

RAWG_API_KEY = os.getenv("RAWG_API_KEY", "").strip()

BASE_URL = "https://api.rawg.io/api"
SESSION = requests.Session()
SESSION.headers.update({"User-Agent": "eda-ml-proyecto-gabi/1.0"})


In [None]:
#2) Helper: GET con paginación y manejo básico de límites
def rawg_get(endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
    """Realiza un GET al endpoint de RAWG y devuelve el JSON."""
    url = f"{BASE_URL}/{endpoint.lstrip('/')}"
    # Adjuntamos la key siempre
    params = {**params, "key": RAWG_API_KEY} if RAWG_API_KEY else params
    r = SESSION.get(url, params=params, timeout=30)
    r.raise_for_status()
    return r.json()

def rawg_paginated(endpoint: str, params: Dict[str, Any], page_limit: int = 5, sleep_s: float = 0.8) -> List[Dict[str, Any]]:
    """
    Itera páginas de RAWG. page_limit limita cuántas páginas traemos (para demo).
    Ajustá page_limit↑ si querés más datos.
    """
    items = []
    page = 1
    for _ in range(page_limit):
        data = rawg_get(endpoint, {**params, "page": page})
        results = data.get("results", [])
        items.extend(results)
        next_url = data.get("next")
        if not next_url:
            break
        page += 1
        time.sleep(sleep_s)  # cuida rate limit
    return items


In [None]:
#3) Funciones de extracción (juegos, géneros, plataformas)
def fetch_games(dates: str = "1980-01-01,2016-12-31",
                platforms: Optional[str] = None,
                ordering: str = "-added",
                page_size: int = 40,
                page_limit: int = 5) -> List[Dict[str, Any]]:
    """
    Trae juegos de RAWG (solo para DEMO/APIs).
    - dates: rango 'YYYY-MM-DD,YYYY-MM-DD'
    - platforms: ids separados por coma (opcional). Ej: "18,1,7" (PS4, XOne, Switch...)
    - ordering: orden (p.ej. '-metacritic', '-rating', '-added')
    - page_limit: cuántas páginas (x40) bajamos
    """
    params = {
        "dates": dates,
        "page_size": page_size,
        "ordering": ordering
    }
    if platforms:
        params["platforms"] = platforms
    return rawg_paginated("games", params=params, page_limit=page_limit)

def fetch_genres() -> List[Dict[str, Any]]:
    return rawg_paginated("genres", params={"page_size": 40}, page_limit=1)

def fetch_platforms() -> List[Dict[str, Any]]:
    return rawg_paginated("platforms", params={"page_size": 40}, page_limit=10)


In [None]:
#4) Normalización a DataFrame (nos quedamos con campos útiles “pre-lanzamiento”)
def normalize_games(items: List[Dict[str, Any]]) -> pd.DataFrame:
    """
    Devuelve columnas amigables. NOTA: incluyo metacritic/ratings_count solo como referencia API;
    no los usaria para entrenar.
    """
    rows = []
    for g in items:
        rows.append({
            "rawg_id": g.get("id"),
            "name": g.get("name"),
            "released": g.get("released"),
            "year": (g.get("released")[:4] if g.get("released") else None),
            "metacritic": g.get("metacritic"),
            "ratings_count": g.get("ratings_count"),
            "genres": ", ".join([x.get("name","") for x in g.get("genres",[])]),
            "parent_platforms": ", ".join([p["platform"]["name"] for p in g.get("parent_platforms",[])]) if g.get("parent_platforms") else None,
            "platforms": ", ".join([p["platform"]["name"] for p in g.get("platforms",[])]) if g.get("platforms") else None,
            "stores": ", ".join([s["store"]["name"] for s in g.get("stores",[])]) if g.get("stores") else None
        })
    return pd.DataFrame(rows)

def normalize_simple(items: List[Dict[str, Any]], key_fields: List[str]) -> pd.DataFrame:
    """Normaliza recursos simples (géneros, plataformas)."""
    norm = []
    for it in items:
        row = {k: it.get(k) for k in key_fields}
        norm.append(row)
    return pd.DataFrame(norm)


In [None]:
#5) Guardado local (CSV/JSON)
def save_json(filepath: str, data: Any) -> None:
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def save_csv(df: pd.DataFrame, filepath: str) -> None:
    df.to_csv(filepath, index=False, encoding="utf-8")


#Carga de datos

In [13]:
url = "https://raw.githubusercontent.com/tunbaruzzo/proyecto-videojuegos-ml-BARUZZO/refs/heads/main/Data"
df = pd.read_csv(url)

print(df.shape)
df.head()


(16719, 16)


Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8.0,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,,,,,
