<h1><center>Supply Chain Management (Model Building) </center></h1>

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('supply_test.csv')

df['Regional_zone'] = df['zone'] + ' ' + df['WH_regional_zone']

df.head()

Unnamed: 0.1,Unnamed: 0,Ware_house_ID,WH_Manager_ID,Location_type,WH_capacity_size,zone,WH_regional_zone,num_refill_req_l3m,transport_issue_l1y,Competitor_in_mkt,...,dist_from_hub,workers_num,wh_est_year,storage_issue_reported_l3m,temp_reg_mach,approved_wh_govt_certificate,wh_breakdown_l3m,govt_check_l3m,product_wg_ton,Regional_zone
0,16621,WH_116621,EID_66621,Rural,Large,North,Zone 5,5,0,3,...,156,30.0,2006.0,24,0,A,2,5,30132,North Zone 5
1,16622,WH_116622,EID_66622,Rural,Large,North,Zone 5,5,0,2,...,79,31.0,2019.0,5,1,C,2,24,6075,North Zone 5
2,16623,WH_116623,EID_66623,Rural,Small,North,Zone 6,3,0,3,...,70,41.0,2008.0,19,1,A+,5,9,24076,North Zone 6
3,16624,WH_116624,EID_66624,Rural,Mid,West,Zone 4,5,2,2,...,255,33.0,2017.0,9,1,A+,3,11,13092,West Zone 4
4,16625,WH_116625,EID_66625,Urban,Mid,North,Zone 4,6,0,4,...,205,20.0,1999.0,25,0,B,4,26,29071,North Zone 4


In [4]:
test_df = df[['storage_issue_reported_l3m','wh_breakdown_l3m','Location_type',
              'WH_capacity_size','wh_owner_type', 'approved_wh_govt_certificate','Regional_zone','product_wg_ton']]

test_df.isnull().sum()

storage_issue_reported_l3m        0
wh_breakdown_l3m                  0
Location_type                     0
WH_capacity_size                  0
wh_owner_type                     0
approved_wh_govt_certificate    206
Regional_zone                     0
product_wg_ton                    0
dtype: int64

In [5]:
test_certificate = test_df['approved_wh_govt_certificate'].mode()[0]

test_df.loc[test_df['approved_wh_govt_certificate'].isnull(), 'approved_wh_govt_certificate'] = test_certificate

In [6]:
train_df = pd.read_csv('train_df.csv')

train_df.head()

Unnamed: 0.1,Unnamed: 0,storage_issue_reported_l3m,wh_breakdown_l3m,Location_type,WH_capacity_size,wh_owner_type,approved_wh_govt_certificate,Regional_zone,product_wg_ton
0,0,13,5,Urban,Small,Rented,A,West Zone 6,17115
1,1,4,3,Rural,Large,Company Owned,A,North Zone 5,5074
2,2,17,6,Rural,Mid,Company Owned,A,South Zone 2,23137
3,3,17,3,Rural,Mid,Rented,A+,North Zone 3,22115
4,4,18,6,Rural,Large,Company Owned,C,North Zone 5,24071


## Training and Testing data

In [7]:
X_train = train_df.drop(['product_wg_ton','Unnamed: 0'], axis = 1)

X_train.head()

Unnamed: 0,storage_issue_reported_l3m,wh_breakdown_l3m,Location_type,WH_capacity_size,wh_owner_type,approved_wh_govt_certificate,Regional_zone
0,13,5,Urban,Small,Rented,A,West Zone 6
1,4,3,Rural,Large,Company Owned,A,North Zone 5
2,17,6,Rural,Mid,Company Owned,A,South Zone 2
3,17,3,Rural,Mid,Rented,A+,North Zone 3
4,18,6,Rural,Large,Company Owned,C,North Zone 5


In [8]:
X_test = test_df.drop('product_wg_ton', axis = 1)

X_test.head()

Unnamed: 0,storage_issue_reported_l3m,wh_breakdown_l3m,Location_type,WH_capacity_size,wh_owner_type,approved_wh_govt_certificate,Regional_zone
0,24,2,Rural,Large,Company Owned,A,North Zone 5
1,5,2,Rural,Large,Company Owned,C,North Zone 5
2,19,5,Rural,Small,Rented,A+,North Zone 6
3,9,3,Rural,Mid,Rented,A+,West Zone 4
4,25,4,Urban,Mid,Rented,B,North Zone 4


In [9]:
y_train = train_df['product_wg_ton']

y_train.head()

0    17115
1     5074
2    23137
3    22115
4    24071
Name: product_wg_ton, dtype: int64

In [10]:
y_test = test_df['product_wg_ton']

y_test.head()

0    30132
1     6075
2    24076
3    13092
4    29071
Name: product_wg_ton, dtype: int64

## Building Model

In [26]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import ipywidgets as widgets
from IPython.display import display
from math import sqrt

In [12]:
categorical_features = ['Location_type', 'WH_capacity_size', 'wh_owner_type', 'approved_wh_govt_certificate', 'Regional_zone']
numerical_features = ['storage_issue_reported_l3m', 'wh_breakdown_l3m']

categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder (handle_unknown='ignore'))])
numerical_transformer = Pipeline(steps=[('scaler', MinMaxScaler())])

preprocessor = ColumnTransformer( transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numerical_transformer, numerical_features)],
        remainder='passthrough')

In [31]:
lr  = LinearRegression()
dt  = DecisionTreeRegressor()
rf  = RandomForestRegressor()
ridge = Ridge()
lasso = Lasso()

regressors = [('LinearRegression', lr), ('DecisionTreeRegressor', dt), ('RandomForestRegressor', rf), 
              ('RidgeRegression', ridge), ('LassoRegression' , lasso)]

In [32]:
print('r2_score \n')

for regressor_name, regressor in regressors:
    
    pipeline = Pipeline(steps=[
              ('preprocessor', preprocessor),
              ('regressor', regressor)])
    
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    accuracy = r2_score(y_test, y_pred)
    
    print(f'{regressor_name}: {accuracy * 100:.2f}%')

r2_score 

LinearRegression: 98.49%
DecisionTreeRegressor: 98.61%
RandomForestRegressor: 99.01%
RidgeRegression: 98.49%
LassoRegression: 98.49%


In [33]:
print('Mean Absolute Error \n')

for regressor_name, regressor in regressors:
    
    pipeline = Pipeline(steps=[
              ('preprocessor', preprocessor),
              ('regressor', regressor)])
    
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    
    print(f'{regressor_name}: {mae}')

Mean Absolute Error 

LinearRegression: 1046.7804604041978
DecisionTreeRegressor: 982.2475999465759
RandomForestRegressor: 854.5244117040827
RidgeRegression: 1046.489865370129
LassoRegression: 1045.1976421793856


In [34]:
print('Root Mean Squared Error \n')

for regressor_name, regressor in regressors:
    
    pipeline = Pipeline(steps=[
              ('preprocessor', preprocessor),
              ('regressor', regressor)])
    
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    
    print(f'{regressor_name}: {sqrt(mse)}')

Root Mean Squared Error 

LinearRegression: 1429.4813415669773
DecisionTreeRegressor: 1370.5770657348405
RandomForestRegressor: 1161.321233948175
RidgeRegression: 1429.2650983318808
LassoRegression: 1428.8912394213548


**In comparing regression models, all algorithms, including Linear Regression, Decision Tree, RandomForest, Ridge, and Lasso, performed exceptionally well, achieving R-squared scores between 98.49% and 99.01%. RandomForest Regressor exhibited the best overall performance with the lowest Mean Absolute Error (854.52) and Root Mean Squared Error (1161.32). While Linear Regression, Ridge, and Lasso showed similar results, the RandomForest Regressor stands out as the preferred choice for its consistently lower errors across metrics.**

In [35]:
model =  pipeline = Pipeline(steps=[
              ('preprocessor', preprocessor),
              ('RandomForestRegressor', rf)])

In [36]:
def prediction(model, X_train):
    
    a = widgets.IntText(value=0, description='No. of Storage issues in last 3 months:')
    
    b = widgets.IntText(value=0, description='No. of Breakdowns in last 3 months:')
    
    c = widgets.Dropdown(options=['Urban', 'Rural'], rows=2 , description='Location Type:')
    
    d = widgets.Dropdown(options=['Small', 'Large', 'Mid'], rows=3 , description='Capacity size:')
    
    e = widgets.Dropdown(options=['Rented', 'Company Owned'], rows=2 , description='Owner type:')
    
    f = widgets.Dropdown(options=['A+', 'A', 'B+', 'B', 'C'], rows=5 , description='Govt. approved Certificate:')
    
    g = widgets.Dropdown(options=['West', 'North', 'South', 'East'] , rows=5 , description=' Zone: ')
    
    h = widgets.Dropdown(options=['Zone 1', 'Zone 2', 'Zone 3', 'Zone 4', 'Zone 5', 'Zone 6'] , rows=6 , description='Regional Zone:')
    
    
    descriptions = [
        a,
        b,
        c,
        d,
        e,
        f,
        g,
        h,
    ]
    
    display(widgets.VBox(descriptions))
    
    def make_prediction(_):
        
        rz = f"{g.value} {h.value}"
        
        user_data = {
            'storage_issue_reported_l3m': [a.value],
            'wh_breakdown_l3m': [b.value],
            'Location_type': [c.value],
            'WH_capacity_size': [d.value],
            'wh_owner_type' : [e.value],
            'approved_wh_govt_certificate': [f.value],
            'Regional_zone': [rz]
        }

        user_df = pd.DataFrame(user_data)

        prediction = model.predict(user_df)

        print(f"Weight of Product to be shipped : {prediction[0]} ton")

    button = widgets.Button(description='Make Prediction')
    button.on_click(make_prediction)
    display(button)

In [37]:
prediction(model, X_train)

VBox(children=(IntText(value=0, description='No. of Storage issues in last 3 months:'), IntText(value=0, descr…

Button(description='Make Prediction', style=ButtonStyle())

Weight of Product to be shipped : 15675.9895 ton
