In [31]:
import pandas as pd
import numpy as np
from prophet import Prophet
from IPython.display import clear_output

In [32]:
df = pd.read_csv('sell-in.txt',sep='\t')
df['fecha'] = pd.to_datetime(df['periodo'].astype(str), format='%Y%m')
df.head()

Unnamed: 0,periodo,customer_id,product_id,plan_precios_cuidados,cust_request_qty,cust_request_tn,tn,fecha
0,201701,10234,20524,0,2,0.053,0.053,2017-01-01
1,201701,10032,20524,0,1,0.13628,0.13628,2017-01-01
2,201701,10217,20524,0,1,0.03028,0.03028,2017-01-01
3,201701,10125,20524,0,1,0.02271,0.02271,2017-01-01
4,201701,10012,20524,0,11,1.54452,1.54452,2017-01-01


In [33]:
# Función para crear la serie de tiempo para un producto
def serie_producto(id_producto):
    df_prod = df[df['product_id'] == id_producto].loc[:, ['fecha', 'tn']]
    if len(df_prod) == 0:
        return None
    
    # Filtrar solo los períodos de 2019
    df_prod = df_prod[df_prod['fecha'].dt.year == 2019]
    #df_prod = df_prod[df_prod['fecha'].dt.month >= 7]

    # Encontrar la fecha inicial y final para el producto
    fecha_inicial = df_prod['fecha'].min()
    fecha_final = df_prod['fecha'].max()
    
    # Generar fechas hasta febrero de 2020
    fechas_productos = pd.date_range(start=fecha_inicial, end='2020-02-01', freq='MS')
    df_fechas_productos = pd.DataFrame({'fecha': fechas_productos})
    
    df_prod = df_prod.groupby('fecha').agg({'tn': 'sum'}).reset_index()
    df_ret = pd.merge(df_fechas_productos, df_prod, on='fecha', how='left')
    df_ret['tn'].interpolate(method='linear', inplace=True)
    df_ret.fillna(0, inplace=True)
    return df_ret

In [34]:
# DataFrame para almacenar predicciones
df_pred = pd.read_csv('productos_a_predecir.txt', sep='\t')

# Predicciones
resultados = []

In [35]:
for i in range(len(df_pred)):
    id_producto = df_pred.iloc[i, 0]
    df_prod = serie_producto(id_producto)
    clear_output()
    print(f"Progress: {i+1}/{len(df_pred)}", end="\r")
    if df_prod is not None:
        df_prod_prophet = df_prod.rename(columns={'fecha': 'ds', 'tn': 'y'})
        
        modelo = Prophet()
        modelo.fit(df_prod_prophet)
        
        future = modelo.make_future_dataframe(periods=1, freq='MS')
        forecast = modelo.predict(future)
        
        pred_final =  max(forecast['yhat'].iloc[-1],0) #abs(forecast['yhat'].iloc[-1])  # Valor absoluto para evitar negativos
        resultados.append([id_producto, pred_final])

01:51:41 - cmdstanpy - INFO - Chain [1] start processing


Progress: 780/780

01:51:41 - cmdstanpy - INFO - Chain [1] done processing


In [36]:
# Crear DataFrame con los resultados
df_resultados = pd.DataFrame(resultados, columns=['product_id', 'tn'])

# Guardar predicciones
df_resultados.to_csv('pred_prophet0_1.csv', index=False)

In [47]:
pred12meses = pd.read_csv('prediccion2.csv')
predprophet = pd.read_csv('pred_prophet0_1.csv')

In [48]:
tot12 = pred12meses['tn'].sum()
totproph = predprophet['tn'].sum()
print(tot12)
print(totproph)
ratio = tot12/totproph
print(ratio)

30069.153636666666
27295.226766673997
1.1016268116658214


In [49]:
predprophet['tn'] = predprophet['tn']*ratio

In [50]:
tot12 = pred12meses['tn'].sum()
totproph = predprophet['tn'].sum()
print(tot12)
print(totproph)
ratio = tot12/totproph
print(ratio)

30069.153636666666
30069.153636666666
1.0


In [51]:
predprophet.to_csv('corregida_ratio2.csv', index=False)

otra

In [52]:
pred12meses = pd.read_csv('prediccion2.csv')
predprophet = pd.read_csv('pred_prophet3.csv')

In [53]:
tot12 = pred12meses['tn'].sum()
totproph = predprophet['tn'].sum()
print(tot12)
print(totproph)
ratio = tot12/totproph
print(ratio)

30069.153636666666
25497.82757923429
1.1792829621749938


In [54]:
predprophet['tn'] = predprophet['tn']*ratio

In [55]:
tot12 = pred12meses['tn'].sum()
totproph = predprophet['tn'].sum()
print(tot12)
print(totproph)
ratio = tot12/totproph
print(ratio)

30069.153636666666
30069.15363666667
0.9999999999999999


In [57]:
predprophet.to_csv('corregida_ratio3.csv', index=False)