In [1]:
pip install --upgrade fosforml

Requirement already up-to-date: fosforml in /opt/conda/lib/python3.9/site-packages (1.1.8)
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

import fosforml
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
my_session.connection.database

'ASSORTMENT_PLANNING'

In [4]:
my_session.connection.schema

'CPG_BRONZE'

In [5]:
data = "ASSORTMENT_PLANNING.CPG_BRONZE.SALES_CLEAN_WITH_CLUSTER_SEP23TOJUL24"

In [6]:
sf_df = my_session.sql("select * from {}".format(data))

In [7]:
type(sf_df)

snowflake.snowpark.dataframe.DataFrame

In [8]:
df=sf_df.to_pandas()

In [9]:
type(df)

pandas.core.frame.DataFrame

In [10]:
df.head()

Unnamed: 0,OUTLET_CODE,PRODUCT_CODE,TRANS_DATE,UNIT_PTR,MNTH_CODE,START_DATE,SALES_VALUE,SALES_UNITS,SALES_VOLUME,SALES_PTR_VALUE,...,MONTH,DAY,DAYOFWEEK,QUARTER,IS_MONTH_START,IS_MONTH_END,FREQUENCY,CATEGORY_ENCODED,SUBCATEGORY_ENCODED,CLUSTER
0,OL81059,PRD0131,2023-12-29,145.454545,202312,2023-11-27,145.45,1,0.0003,145.454545,...,12,29,4,4,False,False,1,5,18,0
1,OL81059,PRD0131,2024-03-03,131.818182,202403,2024-02-27,131.82,1,0.0003,131.818182,...,3,3,6,1,False,False,2,5,18,0
2,OL81059,PRD0131,2024-03-17,131.818182,202403,2024-02-27,131.82,1,0.0003,131.818182,...,3,17,6,1,False,False,3,5,18,0
3,OL81059,PRD0147,2024-01-13,8.928571,202401,2024-01-02,142.86,16,0.000416,142.857143,...,1,13,5,1,False,False,1,2,0,1
4,OL81059,PRD0147,2024-01-20,8.928571,202401,2024-01-02,142.86,16,0.000416,142.857143,...,1,20,5,1,False,False,2,2,0,1


In [11]:
import pandas as pd

# Identify non-selling products
non_selling_products = df[df['SALES_UNITS'] == 0]

# Calculate average sales for each category
category_avg_sales = df[df['SALES_UNITS'] > 0].groupby('CATEGORY')['SALES_UNITS'].mean().reset_index()
category_avg_sales.columns = ['CATEGORY', 'AVG_SALES_UNITS']

# Merge non-selling products with average sales data
non_selling_with_avg = pd.merge(non_selling_products, category_avg_sales, on='CATEGORY', how='left')

# Suggest order quantity
non_selling_with_avg['MUST_SELL_QTY'] = non_selling_with_avg['AVG_SALES_UNITS']

# Select relevant columns
result = non_selling_with_avg[['OUTLET_CODE', 'PRODUCT_CODE', 'CATEGORY', 'MUST_SELL_QTY']]

# Save the result to a new CSV file
#result.to_csv('suggested_order_quantities.csv', index=False)

non_selling_with_avg.head()


Unnamed: 0,OUTLET_CODE,PRODUCT_CODE,TRANS_DATE,UNIT_PTR,MNTH_CODE,START_DATE,SALES_VALUE,SALES_UNITS,SALES_VOLUME,SALES_PTR_VALUE,...,DAYOFWEEK,QUARTER,IS_MONTH_START,IS_MONTH_END,FREQUENCY,CATEGORY_ENCODED,SUBCATEGORY_ENCODED,CLUSTER,AVG_SALES_UNITS,MUST_SELL_QTY


In [12]:
pip install statsmodels

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [13]:
import pandas as pd

# Load your data into a DataFrame
# df = pd.read_csv('your_data.csv')

# Identify non-selling products
non_selling_products = df[df['SALES_UNITS'] == 0]

# Calculate average sales for each category, excluding outliers
def remove_outliers(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    return series[(series >= (Q1 - 1.5 * IQR)) & (series <= (Q3 + 1.5 * IQR))]

df['SALES_UNITS'] = df.groupby('CATEGORY')['SALES_UNITS'].transform(remove_outliers)

# Calculate average sales for each category
category_avg_sales = df[df['SALES_UNITS'] > 0].groupby('CATEGORY')['SALES_UNITS'].mean().reset_index()
category_avg_sales.columns = ['CATEGORY', 'AVG_SALES_UNITS']

# Merge non-selling products with average sales data
non_selling_with_avg = pd.merge(non_selling_products, category_avg_sales, on='CATEGORY', how='left')

# Handle missing values by filling with the overall average sales units
overall_avg_sales = df['SALES_UNITS'].mean()
non_selling_with_avg['AVG_SALES_UNITS'].fillna(overall_avg_sales, inplace=True)

# Suggest order quantity
non_selling_with_avg['MUST_SELL_QTY'] = non_selling_with_avg['AVG_SALES_UNITS']

# Select relevant columns
result = non_selling_with_avg[['OUTLET_CODE', 'PRODUCT_CODE', 'CATEGORY', 'MUST_SELL_QTY']]

# Save the result to a new CSV file
# result.to_csv('suggested_order_quantities.csv', index=False)

non_selling_with_avg.head()


Unnamed: 0,OUTLET_CODE,PRODUCT_CODE,TRANS_DATE,UNIT_PTR,MNTH_CODE,START_DATE,SALES_VALUE,SALES_UNITS,SALES_VOLUME,SALES_PTR_VALUE,...,DAYOFWEEK,QUARTER,IS_MONTH_START,IS_MONTH_END,FREQUENCY,CATEGORY_ENCODED,SUBCATEGORY_ENCODED,CLUSTER,AVG_SALES_UNITS,MUST_SELL_QTY
