In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
import json

In [7]:
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-light.mplstyle')
pd.set_option('display.max_columns', None)

In [8]:
with open('../config/lyft_conection.json', 'r') as file:
    credenciales_lyft = json.load(file)

engine = create_engine(f"mysql+pymysql://{credenciales_lyft['USERNAME']}:{credenciales_lyft['PASSWORD']}@{credenciales_lyft['SERVER']}/{credenciales_lyft['DATABASE']}")

### Consultas a base de datos

In [9]:
start_date = '2025-11-01 00:00:00'
end_date = '2025-12-31 23:59:59'

In [10]:
sql_BikeSubscriptionFact_main = f'''

SELECT
    bs.start AS fecha_inicio,
    bs.end AS fecha_fin,
    bs.member_accountNumber AS numero_cuenta_miembro,
    bs.member_birthday AS cumpleaños_miembro,
    bs.member_country AS pais_miembro,
    bs.member_gender AS genero_miembro,
    bs.member_postalCode AS codigo_postal_miembro,
    bs.purchasePrice AS precio_compra,
    bs.totalPaid AS total_pagado,
    bs.status_id AS id_estado,
    bs.subscriptionType_id AS id_tipo_suscripcion,
    bs.purchase AS compra,
    bm.bikeMemberAttributes_accountNumber AS cuenta_miembro,
    bm.bikeMemberAttributes_birthday AS cumpleaños_miembro,
    bm.bikeMemberAttributes_country AS pais_miembro,
    bm.bikeMemberAttributes_gender AS genero_miembro,
    bm.bikeMemberAttributes_postalCode AS codigo_postal_miembro,
    bm.email AS correo_electronico,
    bm.firstName AS nombre,
    bm.lastName AS apellido,
    bm.phoneNumber AS numero_telefono,
    bm.city AS ciudad,
    bs_status.localizedValue0 as estatus_suscripcion,
    bs_type.name_localizedValue1 as nombre_tipo_suscripcion
FROM BikeSubscriptionFact bs
INNER JOIN BikeMemberFact bm 
    ON bs.member_accountNumber = bm.bikeMemberAttributes_accountNumber
INNER JOIN BikeSubscriptionStatusDim bs_status 
    ON bs.status_id = bs_status.id
INNER JOIN BikeSubscriptionTypeDim bs_type 
    ON bs.subscriptionType_id = bs_type.id
WHERE 
    bs.purchase BETWEEN UNIX_TIMESTAMP(CONVERT_TZ("{start_date}", "America/Mexico_City", 'UTC'))*1000
    AND UNIX_TIMESTAMP(CONVERT_TZ("{end_date}", "America/Mexico_City", 'UTC'))*1000;

'''

df_sql_BikeSubscriptionFact_main = pd.read_sql(sql_BikeSubscriptionFact_main, engine)
df_sql_BikeSubscriptionFact_main.head()

# AND bs.subscriptionType_id = 11;

Unnamed: 0,fecha_inicio,fecha_fin,numero_cuenta_miembro,cumpleaños_miembro,pais_miembro,genero_miembro,codigo_postal_miembro,precio_compra,total_pagado,id_estado,id_tipo_suscripcion,compra,cuenta_miembro,cumpleaños_miembro.1,pais_miembro.1,genero_miembro.1,codigo_postal_miembro.1,correo_electronico,nombre,apellido,numero_telefono,ciudad,estatus_suscripcion,nombre_tipo_suscripcion
0,1762199434475,1793735434475,50806,440316000000,MX,M,6140,487.93,566.0,0,4,1762027204802,50806,440316000000,MX,M,6140,hebermanuel.perez@gmail.com,Heber Manuel,Perez Torres,5554571091,Condesa,Active,Anual (I.V.A incluido)
1,1762149608000,1793685608000,56834,614757600000,MX,F,3570,487.93,566.0,0,4,1761980404232,56834,614757600000,MX,F,3570,ksantiestebanv@gmail.com,Karla,Santiesteban Vazquez,5532757164,Portales Oriente,Active,Anual (I.V.A incluido)
2,1762016743371,1793552743371,59527,625212000000,MX,F,3020,487.93,566.0,0,4,1762016743371,59527,625212000000,MX,F,3020,ara24rm@hotmail.com,Araceli Victoria,Ramos Martinez,5587933623,Narvarte Poniente,Active,Anual (I.V.A incluido)
3,1762149599000,1793685599000,114221,567237600000,MX,F,6350,487.93,566.0,0,4,1761976807187,114221,567237600000,MX,F,6350,claus_tar@hotmail.com,Claudia Andrea,Bravo Garcia,5529549578,Buenavista,Active,Anual (I.V.A incluido)
4,1762178303756,1793714303756,114717,232178400000,MX,M,6600,487.93,566.0,0,4,1762005604493,114717,232178400000,MX,M,6600,stanakakok@gmail.com,Shinichi,Tanaka,5534344991,Juárez,Active,Anual (I.V.A incluido)


In [11]:
# filtrar por activas
df_sql_BikeSubscriptionFact_main_active = df_sql_BikeSubscriptionFact_main[df_sql_BikeSubscriptionFact_main['estatus_suscripcion'] == '']

# agrupar porsubscription_type_name y contar
gruped = df_sql_BikeSubscriptionFact_main.groupby('nombre_tipo_suscripcion').size()
gruped

nombre_tipo_suscripcion
1 Día (I.V.A incluido)             5089
3 Días (I.V.A incluido)             794
7 Días (I.V.A incluido)             229
Anual (I.V.A incluido)             8317
Anual Ecobici+ (I.V.A incluido)     122
ECOBICI vivo                          2
Ecobici HSBC                        141
dtype: int64

#### Consultas relacionales

In [12]:
sql_BikeSubscriptionFact = f'''

SELECT
    purchaseUser_id
    start,
    end,
    discountCodeRedemptionNumber,
    member_accountNumber,
    member_birthday,
    member_country,
    member_gender,
    member_postalCode
    purchasePrice,
    totalPaid,
    status_id,
    subscriptionType_id, 
    purchase
FROM BikeSubscriptionFact
WHERE 
	purchase BETWEEN UNIX_TIMESTAMP(CONVERT_TZ("{start_date}", "America/Mexico_City", 'UTC'))*1000
    AND UNIX_TIMESTAMP(CONVERT_TZ("{end_date}", "America/Mexico_City", 'UTC'))*1000 AND subscriptionType_id = 11;

'''

df_sql_BikeSubscriptionFact = pd.read_sql(sql_BikeSubscriptionFact, engine)
df_sql_BikeSubscriptionFact.head(2)

# purchaseUser_id = UserDim
# status_id = BikeSubscriptionStatusDim
# subscriptionType_id = BikeSubscriptionTypeDim

Unnamed: 0,start,end,discountCodeRedemptionNumber,member_accountNumber,member_birthday,member_country,member_gender,purchasePrice,totalPaid,status_id,subscriptionType_id,purchase
0,,1794585236512,ZC2GZKHA,2MZWTK59,500104800000,MX,M,06600,0.0,0,11,1763049236512
1,,1794701898618,,62HQ3AD5,332402400000,?,M,?,2499.0,0,11,1763165898618


In [None]:
sql_BikeMemberFact = f'''

SELECT
    id,
    bikeMemberAttributes_accountNumber,
    bikeMemberAttributes_birthday,
    bikeMemberAttributes_country,
    bikeMemberAttributes_gender,
    bikeMemberAttributes_postalCode,
    email,
    firstName,
    lastName,
    phoneNumber,
    city
FROM BikeMemberFact

'''

df_sql_BikeMemberFact = pd.read_sql(sql_BikeMemberFact, engine)
df_sql_BikeMemberFact

In [None]:
sql_BikeSubscriptionStatusDim = f'''

SELECT
    id,
    localizedValue0
FROM BikeSubscriptionStatusDim;
'''

df_sql_BikeSubscriptionStatusDim = pd.read_sql(sql_BikeSubscriptionStatusDim, engine)
df_sql_BikeSubscriptionStatusDim.head(2)

Unnamed: 0,id,localizedValue0
0,0,Active
1,2,To activate


In [None]:
sql_BikeSubscriptionTypeDim = f'''

SELECT
    id,
    name_localizedValue1
FROM BikeSubscriptionTypeDim;
'''

df_sql_BikeSubscriptionTypeDim = pd.read_sql(sql_BikeSubscriptionTypeDim, engine)
df_sql_BikeSubscriptionTypeDim

Unnamed: 0,id,name_localizedValue1
0,1,1 Día (I.V.A incluido)
1,2,3 Días (I.V.A incluido)
2,3,7 Días (I.V.A incluido)
3,4,Anual (I.V.A incluido)
4,9,Anual Ecobici+ (I.V.A incluido)
5,10,Ecobici HSBC
6,11,ECOBICI vivo
7,9223372036854775807,Inconnu
8,5,Membresía migrada
9,7,Membresía Plus TEST 01 (IVA incluido)
