In [None]:
pip install --upgrade fosforml

In [None]:
pip install statsmodels

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

import fosforml
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [None]:
my_session.connection.database

In [None]:
my_session.connection.schema

In [None]:
data = "ASSORTMENT_PLANNING.CPG_BRONZE.SALES_CLEAN_WITH_CLUSTER_SEP23TOJUL24"

In [None]:
sf_df = my_session.sql("select * from {}".format(data))

In [None]:
type(sf_df)

In [None]:
df=sf_df.to_pandas()

In [None]:
type(df)

In [None]:
df.head()

In [None]:
print(df.dtypes)

In [None]:
df = df.apply(pd.to_numeric, errors='coerce')

In [None]:
df = df.fillna(0)

In [None]:
# Split the data into features and target variable
X = df.drop('SALES_UNITS', axis=1)
y = df['OUTLET_CODE']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

In [None]:
# Visualize the results
plt.scatter(y_test, y_pred)
plt.xlabel('Outlet Cluster')
plt.ylabel('Predicted Sales Units')
plt.title('Actual vs Predicted Sales Units')
plt.show()

In [None]:
df.columns

In [None]:
import pandas as pd

# Assuming df is your DataFrame
df['TRANS_DATE'] = pd.to_datetime(df['TRANS_DATE'])
df = df.sort_values(by='TRANS_DATE')

In [None]:
df['MONTH'] = df['TRANS_DATE'].dt.month
df['YEAR'] = df['TRANS_DATE'].dt.year


In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

result = seasonal_decompose(df['SALES_UNITS'], model='multiplicative', period=12)
result.plot()


In [None]:
train = df[df['TRANS_DATE'] < '2024-08-01']
test = df[df['TRANS_DATE'] >= '2024-09-01']


In [None]:
from sklearn.ensemble import RandomForestRegressor

features = ['CLUSTER', 'MONTH', 'CATEGORY_ENCODED']
X_train = train[features]
y_train = train['SALES_UNITS']
X_test = test[features]
y_test = test['SALES_UNITS']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
future_dates = pd.date_range(start='2024-01-01', periods=12, freq='M')
future_df = pd.DataFrame({'TRANS_DATE': future_dates})
future_df['MONTH'] = future_df['TRANS_DATE'].dt.month
future_df['CLUSTER'] = df[CLUSTER]  # Assuming a single cluster for simplicity
future_df['CATEGORY_ENCODED'] = df['CATEGORY_ENCODED']  # Assuming a single category for simplicity

X_future = future_df[features]
future_df['PREDICTED_SALES'] = model.predict(X_future)


In [None]:
X_future = future_df[features]
future_df['PREDICTED_SALES'] = model.predict(X_future)

In [None]:
# Assuming a threshold for 'not currently selling'
threshold = 0
must_sell = future_df[future_df['PREDICTED_SALES'] > threshold]
must_sell['MUST_SELL_QTY'] = must_sell['PREDICTED_SALES'] - threshold


In [None]:
print(must_sell[['TRANS_DATE', 'PREDICTED_SALES', 'MUST_SELL_QTY','CATEGORY_ENCODED']])
