In [30]:
import pandas as pd
import numpy as np
import uuid
import random
import os

from datetime import datetime, timedelta


In [58]:


def generar_fechas_aleatorias(start_date, end_date, n_dates=1):
    """
    Genera una lista de n_dates fechas aleatorias (con hora) entre start_date y end_date.

    Parámetros:
    - start_date: datetime o string en formato ISO
    - end_date: datetime o string en formato ISO
    - n_dates: número de fechas a generar

    Retorna:
    - Lista de datetime con marca de tiempo (fecha + hora)
    """
    if isinstance(start_date, str):
        start_date = datetime.fromisoformat(start_date)
    if isinstance(end_date, str):
        end_date = datetime.fromisoformat(end_date)

    delta_seconds = int((end_date - start_date).total_seconds())
    
    if n_dates > delta_seconds:
        raise ValueError("Demasiadas fechas para el rango proporcionado.")

    segundos_aleatorios = random.sample(range(delta_seconds), n_dates)
    fechas = [start_date + timedelta(seconds=s) for s in sorted(segundos_aleatorios)]
    
    return fechas

def generate_purchase():
    product_prices =[20,49,99,299,499]
    n_products = random.randint(1,len(product_prices))
    items =0
    purchase =0
    for _ in range(n_products):
        idx = random.randint(1,len(product_prices))-1
        current_item= product_prices[idx]
        current_items_amount =random.randint(1,5)
        current_purchase = current_items_amount*current_item
        items += current_items_amount
        purchase += current_purchase
    return [items,purchase]


In [50]:
# params
n_users=50000
n_visits_max= 9
n_campaigns = 6
rate_convertions = 0.23
end_date= datetime.now().strftime("%Y-%m-%d %H:%M:%S")
start_date = datetime.now() - timedelta(days=40)


In [51]:
visits_df=os.path.join('..','datasets','visits.csv')
(
    pd.DataFrame( [i for i in range(n_users)] , columns=['user_id'])
    .assign(
        n_visits = lambda df: df.user_id.apply(lambda x: random.randint(1,n_visits_max)),
        uuid_user = lambda df: df.user_id.apply(lambda x: str(uuid.uuid5(uuid.NAMESPACE_DNS,str(f"user-{x}")))),
        ts_visits = lambda df: df.n_visits.apply(lambda x: generar_fechas_aleatorias(start_date=start_date,end_date=end_date,n_dates=x))
    )
    .drop(
        columns=['user_id','n_visits']
    )
    .explode('ts_visits')
        .assign(
        campaign = lambda df: df.uuid_user.apply(lambda x: random.randint(1,n_campaigns)),
        uuid_interaction = lambda df: df.apply(lambda row: str(uuid.uuid5(uuid.NAMESPACE_DNS,str(f"user-{row['uuid_user']}-{row['campaign']}-{row['ts_visits']}"))),axis=1)
    )
    .to_csv(visits_df,index=False)
    
)

In [64]:
orders_df=os.path.join('..','datasets','orders.csv')


(pd.read_csv(visits_df)
    .sample(frac=rate_convertions)
    .assign(
        end_date = lambda df: pd.to_datetime(df.ts_visits, errors='coerce') + timedelta(days=10),
        transaction_ts = lambda df: df.apply(lambda row: generar_fechas_aleatorias(row['ts_visits'],row['end_date'],1),axis=1)
    )
    .explode("transaction_ts")[['uuid_interaction','transaction_ts']]
    .assign(
        transaction = lambda df: df.apply(lambda x: generate_purchase(),axis=1),
        items = lambda df: df.transaction.apply(lambda x: x[0]),
        amount = lambda df: df.transaction.apply(lambda x: x[1]),
    )
    .drop(columns=['transaction'])
    .to_csv(orders_df,index=False)
    
)