# Waste Projection workflow

In [18]:
# Import Libraries
import pandas as pd
import sqlite3
import utils
import importlib
importlib.reload(utils)
from utils import calculate_fefo

# Extract

In [19]:
# Specify the database file to delete
database_name = 'hf_database.db'

# Create a SQLite database and a connection
conn = sqlite3.connect(database_name)

# ---- Load demand data in a dataframe # forecast_df
file_path = '/Users/fil/Documents/my_projects/hf_fefo_waste_projection/datasets/forecast_df_2024_11_20.csv'
forecast_df = pd.read_csv(file_path) # Execute the query and load the result into a pandas DataFrame


# ---- Load Inventory Data in a dataframe # full_stock_df
file_path = '/Users/fil/Documents/my_projects/hf_fefo_waste_projection/datasets/full_stock_df_2024_11_20.csv'
full_stock_df = pd.read_csv(file_path)


# ---- Load Exclusion List in a dataframe # exclusion_df
file_path = '/Users/fil/Documents/my_projects/hf_fefo_waste_projection/datasets/exclusion_df_2024_11_20.csv'
exclusion_df = pd.read_csv(file_path)

# Close the connection
conn.close()

# Print an output for verification
print("Loaded 'forecast_df' with " + str(len(forecast_df)) + " lines") # 61.111 lines
print("Loaded 'full_stock_df' with " + str(len(full_stock_df)) + " lines") # 16.191 lines
print("Loaded 'exclusion_df' with " + str(len(exclusion_df)) + " lines") # 50 lines

Loaded 'forecast_df' with 61111 lines
Loaded 'full_stock_df' with 16191 lines
Loaded 'exclusion_df' with 49 lines


## Clean Inventory Data by Filtering Out the Eclusion List | # merged = full stock | # cleaned = stock

In [20]:
# Merge the inventory list and the exclusion list
merged_inventory_df = full_stock_df.merge(exclusion_df, on=['sku_code', 'supplier_code', 'data_source'], how='left', indicator=True)

# Filter out: from the _merge column keep only values that do not appear in the right (exclusion) table
stock_df = merged_inventory_df[merged_inventory_df['_merge'] == 'left_only'].drop(columns='_merge')

# Transform

## Run the Calculation of FEFO

In [21]:
# Calculate FEFO
calc_df = calculate_fefo(forecast_df, stock_df)

# Print an output for verification
print("Calculation complete: " + str(len(calc_df)) + " lines in the output")

display(calc_df.head(5))

Calculation complete: 16186 lines in the output


Unnamed: 0,sku_id,batch_id,pallet_id,expiration_date,discardment_date,remaining_qty,consumed_qty,dc,location,category,...,line_cost,type,hf_week,hf_week_out,temp_class,data_source,logical_mlor,mlor_source,snapshot_time,supplier_code
0,C_1-10344,,id_493476,2025-02-24,2025-02-20,3213.0,6787.0,FI,,C_1,...,3148.74,Ingredient SKU,2025-W08,3,a1,po,84.0,fixed_value_MLOR,NaT,s_5230
1,C_1-10344,,id_211571,2025-05-02,2025-04-28,200.0,0.0,FI,loc-2115,C_1,...,196.0,Ingredient SKU,2025-W18,3,a1,in,,,2024-11-19 23:45:04.657,
2,C_1-10344,,id_211648,2025-05-02,2025-04-28,1200.0,0.0,FI,loc-1818,C_1,...,1176.0,Ingredient SKU,2025-W18,3,a1,in,,,2024-11-19 23:45:04.657,
3,C_1-10344,,id_871980,2025-05-02,2025-04-28,5.0,0.0,FI,loc-6725,C_1,...,4.9,Ingredient SKU,2025-W18,3,a1,in,,,2024-11-19 23:45:04.657,
4,C_1-10344,,id_915969,2025-05-02,2025-04-28,8.0,0.0,FI,loc-6769,C_1,...,7.84,Ingredient SKU,2025-W18,3,a1,in,,,2024-11-19 23:45:04.657,


# Quick Visualizations | Load to DWH

In the original notebook, the _calc_df_ dataframe would be loaded to the DWH for visualization in Tableau and for teams to consume and query the data. However, in this version of the notebook, just for display purposes a few aggregations and pivots have been created to display what the result is like.

## Aggregations

In [22]:
# Define the weeks you want to filter for
window_1_3 = [1, 2, 3]  # Weeks 1-24
window_1_2 = [1, 2]  # Weeks 1-12
window_2 = [2]  # Weeks 7-12

# Define the mapping from HF Week Out to the desired name
hf_week_out_mapping = {
    0: 'W <0',
    1: 'W 01-06',
    2: 'W 07-12',
    3: 'W 13-24',
    4: 'W > 24'
}

# Map the HF Week Out to the desired names
calc_df['HF Week Out Name'] = calc_df['hf_week_out'].map(hf_week_out_mapping)

### Table that shows aggregate sum of Cost by Category and Window

In [23]:
# Filter the DataFrame for the specific HF Week values
window_1_3_df = calc_df[calc_df['hf_week_out'].isin(window_1_3)]

# Pivot the table to have DC as columns, aggregating the sum of Line Cost
category_agg_window_df = window_1_3_df.pivot_table(index=['category', 'HF Week Out Name'], columns='dc', values='line_cost', aggfunc='sum', fill_value=0).reset_index()

# Sort the pivoted DataFrame by 'Category' and 'HF Week Out Name'
category_agg_window_df = category_agg_window_df.sort_values(by=['HF Week Out Name', 'category'])

display(category_agg_window_df.head(5))

dc,category,HF Week Out Name,FI,PI
0,C_1,W 01-06,18815.8531,11468.2945
3,C_2,W 01-06,17656.121,565.56
5,C_3,W 01-06,32709.2661,44109.5082
8,C_4,W 01-06,27825.4802,25876.3613
11,C_5,W 01-06,68783.3367,23312.8275


### Table that shows aggregate sum of Cost by Category, weekly

In [24]:
# Filter the DataFrame for the specific HF Week values
window_1_2_df = calc_df[calc_df['hf_week_out'].isin(window_1_2)]

# Pivot the table to have DC as columns, aggregating the sum of Line Cost
category_agg_weekly_df = window_1_2_df.pivot_table(index=['dc', 'category'], columns='hf_week', values='line_cost', aggfunc='sum', fill_value=0).reset_index()

# Sort the pivoted DataFrame by 'Category' and 'HF Week Out Name'
category_agg_weekly_df = category_agg_weekly_df.sort_values(by=['dc', 'category'])

display(category_agg_weekly_df.head(5))

hf_week,dc,category,2024-W47,2024-W48,2024-W49,2024-W50,2024-W51,2024-W52,2025-W01,2025-W02,2025-W03,2025-W04,2025-W05,2025-W06
0,FI,C_1,3913.0879,2929.09,4493.8052,508.08,5599.5,1372.29,0.0,889.2,0.0,1261.26,0.0,0.0
1,FI,C_2,1293.909,4644.152,8245.51,1154.2,0.0,2318.35,0.0,0.0,0.0,0.0,0.0,0.0
2,FI,C_3,3124.8279,5499.2269,5482.7496,5130.1653,8024.0522,5448.2442,11719.331,9483.2432,16932.669,4118.0211,0.0,0.0
3,FI,C_4,8632.93,6761.881,138.5376,7570.7336,3365.01,1356.388,1449.66,3.6456,0.0,991.074,0.0,0.0
4,FI,C_5,66852.4167,1930.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### create Top Drivers dataframe

In [25]:
# Grouping the data and aggregating
aggregated_data = calc_df\
    .groupby(['sku_id', 'dc', 'hf_week'])\
    .agg({'line_cost': 'sum'})\
    .reset_index()

# Filtering the aggregated results
top_waste = aggregated_data[aggregated_data['line_cost'] > 3000]

display(top_waste.head(5))

Unnamed: 0,sku_id,dc,hf_week,line_cost
0,C_1-10344,FI,2025-W08,3148.74
5,C_1-10344,PI,2025-W16,4893.14
6,C_1-10344,PI,2025-W17,4900.0
13,C_1-10415,PI,2025-W35,6646.32
15,C_1-10429,PI,2025-W35,3664.625


End of notebook.