## Libraries

In [1]:
import pandas as pd
import numpy as np  
import polars as pl


  from pandas.core import (


## Global Variables

In [3]:
# Input paths
PATH_DATAFRAME = 'lightgbm_results.xlsx'

## Global Functions

In [4]:
def polars_read_excel(file_name, sheet_name='Sheet1'):
  return pl.read_excel(source=file_name, sheet_name=sheet_name)

## Import Data

In [5]:
df=polars_read_excel(PATH_DATAFRAME, sheet_name='Sheet1')

## Data Manipulation

In [7]:
key_features = [
    'SupplierName',
    'GarmentType',
    'Material',
    'ProposedFoldingMethod',
    'ProposedLayout',
    'Size'
]

In [15]:
w_q1 = 0.2  # 25th percentile
w_q3 = 0.62 # 75th percentile

# We'll define bins: <= w_q1 (Light), > w_q1 and <= w_q3 (Medium), > w_q3 (Heavy)
df = df.with_columns(
    pl.when(pl.col("Weight") <= w_q1)
    .then(pl.lit("Light_W")) # Added _W for clarity if Units bin also uses Light/Medium/Heavy
    .when(pl.col("Weight") <= w_q3) # This implies > w_q1 due to chain
    .then(pl.lit("Medium_W"))
    .otherwise(pl.lit("Heavy_W"))
    .alias("Weight_Bin")
)
key_features.append('Weight_Bin')

In [None]:
# Bin 'ProposedUnitsPerCarton' 
try:
    uc_q33 = df.select(pl.col("ProposedUnitsPerCarton").quantile(0.33)).item()
    uc_q66 = df.select(pl.col("ProposedUnitsPerCarton").quantile(0.66)).item()

    if uc_q33 is not None and uc_q66 is not None and uc_q33 < uc_q66 : # Ensure quantiles are distinct and ordered
        df = df.with_columns(
            pl.when(pl.col("ProposedUnitsPerCarton") <= uc_q33)
            .then(pl.lit("Low_Units"))
            .when(pl.col("ProposedUnitsPerCarton") <= uc_q66)
            .then(pl.lit("Medium_Units"))
            .otherwise(pl.lit("High_Units"))
            .alias("UnitsPerCarton_Bin")
        )
        key_features.append('UnitsPerCarton_Bin')
    elif uc_q33 is not None and uc_q66 is not None and uc_q33 == uc_q66: # Case where 33rd and 66th percentile are the same
        print(f"Warning: 33rd and 66th percentiles for 'ProposedUnitsPerCarton' are the same ({uc_q33}). Creating 2 bins: Low/High.")
        df = df.with_columns(
            pl.when(pl.col("ProposedUnitsPerCarton") <= uc_q33)
            .then(pl.lit("Low_Units"))
            .otherwise(pl.lit("High_Units")) # All values > uc_q33 (or >= uc_q33 if many are equal)
            .alias("UnitsPerCarton_Bin")
        )
        key_features.append('UnitsPerCarton_Bin')
    else: 
        print(f"Warning: Could not robustly determine distinct quantiles for 'UnitsPerCarton_Bin'. uc_q33={uc_q33}, uc_q66={uc_q66}. Check 'ProposedUnitsPerCarton' distribution. Skipping UnitsPerCarton_Bin.")

except Exception as e:
    print(f"Error during 'ProposedUnitsPerCarton' binning: {e}. Skipping UnitsPerCarton_Bin.")

In [19]:
# Ensure no duplicate features if script is re-run
key_combination_features = sorted(list(set(key_features)))
print(f"Using features for combinations: {key_features}")

Using features for combinations: ['SupplierName', 'GarmentType', 'Material', 'ProposedFoldingMethod', 'ProposedLayout', 'Size', 'Weight_Bin', 'UnitsPerCarton_Bin']


## Creating Risk Index

In [None]:
# Group by the selected features and calculate aggregate metrics
risk_index_df = df.group_by(key_combination_features, maintain_order=False).agg(
    pl.col("Probability").mean().alias("Average_Probability_GOOD_State"),
    pl.col("Probability").count().alias("NumberOfPackages"),
    pl.col("Probability").std().alias("StdDev_Probability_GOOD_State"),
    pl.col("Actual").mean().alias("Actual_Observed_GOOD_Rate")
)

# Add calculated BAD state probabilities/rates
risk_index_df = risk_index_df.with_columns([
    (1 - pl.col("Average_Probability_GOOD_State")).alias("Model_Avg_Probability_BAD_State"),
    (1 - pl.col("Actual_Observed_GOOD_Rate")).alias("Actual_Observed_BAD_Rate")
])

## Results

In [31]:
# how many combinations do we have of at least 10 packages?
risk_index_df = risk_index_df.with_columns(
    pl.when(pl.col("NumberOfPackages") >= 10)
    .then(pl.lit(1))
    .otherwise(pl.lit(0))
    .alias("ValidCombination")
)

# size of risk index
risk_index_df.shape
print ("We have",risk_index_df.shape[0], "combinations with at least 10 packages")

We have 3767 combinations with at least 10 packages


In [None]:
# Filter for combinations with a minimum number of packages for reliability
min_packages_threshold = 10 # Adjust as needed

# Combinations with highest modeled probability of being in a BAD state
highest_risk_combinations = risk_index_df.filter(
    pl.col('NumberOfPackages') >= min_packages_threshold
).sort(by='Model_Avg_Probability_BAD_State', descending=True)

print(f"Top 20 combinations likely to be in a bad state")
with pl.Config(tbl_rows=20): # To control number of rows displayed
    display(highest_risk_combinations.head(20))

Top 20 combinations likely to be in a bad state


GarmentType,Material,ProposedFoldingMethod,ProposedLayout,Size,SupplierName,UnitsPerCarton_Bin,Weight_Bin,Average_Probability_GOOD_State,NumberOfPackages,StdDev_Probability_GOOD_State,Actual_Observed_GOOD_Rate,Model_Avg_Probability_BAD_State,Actual_Observed_BAD_Rate
str,str,str,str,str,str,str,str,f64,u32,f64,f64,f64,f64
"""Hoodie""","""Wool""","""Method3""","""LayoutD""","""M""","""SupplierF""","""Low_Units""","""Heavy_W""",0.249233,10,0.026476,0.6,0.750767,0.4
"""Suit""","""Wool""","""Method3""","""LayoutD""","""XL""","""SupplierD""","""Low_Units""","""Heavy_W""",0.267472,11,0.055674,0.454545,0.732528,0.545455
"""Coat""","""Wool""","""Method3""","""LayoutD""","""M""","""SupplierF""","""Low_Units""","""Heavy_W""",0.267608,11,0.054778,0.909091,0.732392,0.090909
"""Coat""","""Wool""","""Method3""","""LayoutE""","""M""","""SupplierF""","""Low_Units""","""Heavy_W""",0.273011,13,0.041331,0.538462,0.726989,0.461538
"""Coat""","""Silk""","""Method3""","""LayoutD""","""M""","""SupplierD""","""Low_Units""","""Heavy_W""",0.274087,10,0.016554,0.8,0.725913,0.2
"""Suit""","""Wool""","""Method3""","""LayoutD""","""L""","""SupplierD""","""Low_Units""","""Heavy_W""",0.278199,11,0.032047,0.636364,0.721801,0.363636
"""Sweater""","""Wool""","""Method3""","""LayoutC""","""M""","""SupplierF""","""Low_Units""","""Heavy_W""",0.278497,12,0.023538,0.666667,0.721503,0.333333
"""Jacket""","""Wool""","""Method3""","""LayoutD""","""L""","""SupplierF""","""Low_Units""","""Heavy_W""",0.28188,12,0.022931,0.666667,0.71812,0.333333
"""Skirt""","""Wool""","""Method3""","""LayoutB""","""L""","""SupplierF""","""Medium_Units""","""Medium_W""",0.283543,12,0.015749,0.583333,0.716457,0.416667
"""Suit""","""Wool""","""Method3""","""LayoutD""","""L""","""SupplierF""","""Low_Units""","""Heavy_W""",0.28602,10,0.064558,0.6,0.71398,0.4


In [None]:
# Sort by Average_Probability_GOOD_State (descending) to see lowest risk

# Combinations with highest modeled probability of being in a GOOD state
lowest_risk_combinations = risk_index_df.filter(
    pl.col('NumberOfPackages') >= min_packages_threshold
).sort(by='Average_Probability_GOOD_State', descending=True)

print(f"Top 20 combinations likely to be in a good state")
with pl.Config(tbl_rows=20):
    display(lowest_risk_combinations.head(20))

Top 20 combinations likely to be in a good state


GarmentType,Material,ProposedFoldingMethod,ProposedLayout,Size,SupplierName,UnitsPerCarton_Bin,Weight_Bin,Average_Probability_GOOD_State,NumberOfPackages,StdDev_Probability_GOOD_State,Actual_Observed_GOOD_Rate,Model_Avg_Probability_BAD_State,Actual_Observed_BAD_Rate
str,str,str,str,str,str,str,str,f64,u32,f64,f64,f64,f64
"""Sweater""","""Cotton""","""Method1""","""LayoutD""","""S""","""SupplierA""","""Low_Units""","""Heavy_W""",0.88121,26,0.027269,0.923077,0.11879,0.076923
"""Sweater""","""Cotton""","""Method2""","""LayoutD""","""S""","""SupplierA""","""Low_Units""","""Heavy_W""",0.88116,22,0.029693,0.954545,0.11884,0.045455
"""Blouse""","""Polyester""","""Method2""","""LayoutA""","""L""","""SupplierA""","""High_Units""","""Light_W""",0.87929,10,0.020248,1.0,0.12071,0.0
"""Shirt""","""Linen""","""Method3""","""LayoutA""","""S""","""SupplierA""","""High_Units""","""Light_W""",0.878447,29,0.023696,1.0,0.121553,0.0
"""Sweater""","""Polyester""","""Method2""","""LayoutC""","""XL""","""SupplierA""","""Low_Units""","""Medium_W""",0.877131,12,0.026543,1.0,0.122869,0.0
"""Blouse""","""Polyester""","""Method2""","""LayoutA""","""S""","""SupplierA""","""High_Units""","""Light_W""",0.876336,11,0.023761,0.909091,0.123664,0.090909
"""Blouse""","""Linen""","""Method3""","""LayoutA""","""M""","""SupplierA""","""Medium_Units""","""Medium_W""",0.876157,11,0.035587,0.727273,0.123843,0.272727
"""T-Shirt""","""Linen""","""Method3""","""LayoutB""","""S""","""SupplierA""","""High_Units""","""Light_W""",0.87574,16,0.02576,1.0,0.12426,0.0
"""Sweater""","""Polyester""","""Method1""","""LayoutB""","""M""","""SupplierA""","""Low_Units""","""Medium_W""",0.874945,24,0.025927,0.916667,0.125055,0.083333
"""Shirt""","""Linen""","""Method2""","""LayoutB""","""XS""","""SupplierA""","""High_Units""","""Light_W""",0.874244,10,0.020977,1.0,0.125756,0.0
