In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
import json

In [2]:
with open('../../../utilities/configurations/lyft_conection.json', 'r') as file:
    credenciales_lyft = json.load(file)

engine = create_engine(f"mysql+pymysql://{credenciales_lyft['USERNAME']}:{credenciales_lyft['PASSWORD']}@{credenciales_lyft['SERVER']}/{credenciales_lyft['DATABASE']}",
                       connect_args={
                            'init_command': "SET SESSION net_read_timeout=600, net_write_timeout=600, max_execution_time=3000000"
                        })

In [3]:
start_date = '2022-11-01 00:00:00'
end_date = '2025-11-30 23:59:59'

In [4]:
sql_BikeSubscriptionFact = f'''

SELECT
    -- 1. Dimensión Tiempo (Serie Temporal)
    d.year AS Anio,
    d.month AS Mes_Numero,
    d.month_localizedValue0 AS Mes_Nombre,

    -- 2. Dimensión Producto (Segmentación)
    t.name_localizedValue0 AS Tipo_Suscripcion,
    
    -- 3. Métricas de Ventas (Variables Objetivo)
    COUNT(f.id) AS Cantidad_Vendida,
    SUM(f.totalPaid) AS Ingresos_Totales_Reales

FROM 
    BikeSubscriptionFact f

-- JOIN con DateDim para obtener fechas legibles usando purchaseDate_id
JOIN 
    DateDim d ON f.purchaseDate_id = d.id

-- JOIN con BikeSubscriptionTypeDim para obtener el nombre de la membresía
JOIN 
    BikeSubscriptionTypeDim t ON f.subscriptionType_id = t.id

WHERE 
    -- Filtros de Limpieza
    f.totalPaid > 0             -- Excluir pruebas gratuitas o errores (opcional)
    AND d.year < 2026           -- Asegurar que tomamos solo historia cerrada

GROUP BY 
    d.year, 
    d.month, 
    d.month_localizedValue0, 
    t.name_localizedValue0

ORDER BY 
    d.year ASC, 
    d.month ASC;

'''

df_sql_BikeSubscriptionFact = pd.read_sql(sql_BikeSubscriptionFact, engine)

In [5]:
df_sql_BikeSubscriptionFact.head()

Unnamed: 0,Anio,Mes_Numero,Mes_Nombre,Tipo_Suscripcion,Cantidad_Vendida,Ingresos_Totales_Reales
0,2022,7,July,1 Day (Including tax),6,726.88
1,2022,7,July,3 Days (Including tax),1,234.0
2,2022,7,July,7 Days (Including tax),2,782.0
3,2022,7,July,Annual (Including tax),2,1042.0
4,2022,8,August,1 Day (Including tax),1522,179596.0


In [6]:
df_sql_BikeSubscriptionFact.to_csv('../data/ingresos_membresia_mensual.csv', index=False)