In [23]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv("supply_chain_data.csv")
data.head()

Unnamed: 0,Product type,SKU,Price,Availability,Number of products sold,Revenue generated,Customer demographics,Stock levels,Lead times,Order quantities,...,Location,Lead time,Production volumes,Manufacturing lead time,Manufacturing costs,Inspection results,Defect rates,Transportation modes,Routes,Costs
0,haircare,SKU0,69.808006,55,802,8661.996792,Non-binary,58,7,96,...,Mumbai,29,215,29,46.279879,Pending,0.22641,Road,Route B,187.752075
1,skincare,SKU1,14.843523,95,736,7460.900065,Female,53,30,37,...,Mumbai,23,517,30,33.616769,Pending,4.854068,Road,Route B,503.065579
2,haircare,SKU2,11.319683,34,8,9577.749626,Unknown,1,10,88,...,Mumbai,12,971,27,30.688019,Pending,4.580593,Air,Route C,141.920282
3,skincare,SKU3,61.163343,68,83,7766.836426,Non-binary,23,13,59,...,Kolkata,24,937,18,35.624741,Fail,4.746649,Rail,Route A,254.776159
4,skincare,SKU4,4.805496,26,871,2686.505152,Non-binary,5,3,56,...,Delhi,5,414,3,92.065161,Fail,3.14558,Air,Route A,923.440632


In [11]:
target_column = "Number of products sold"
features = ['Price','Availability','Stock levels','Lead times','Order quantities']

In [12]:
num_folds = 5

In [13]:
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

In [26]:
mse_scores = []

In [15]:
x_train, x_test, y_train, y_test = train_test_split(data[features], data[target_column], 
                                                    test_size=0.2, random_state=42)

In [16]:
for train_index, test_index in kf.split(data):
    train_data = data.loc[train_index, features]
    train_target = data.loc[train_index, target_column]
    test_data = data.loc[test_index, features]
    test_target = data.loc[test_index, target_column]

In [17]:
train_data = lgb.Dataset(x_train, label=y_train)

In [18]:
params = {
    'objective': 'regression',
    'boosting_type': 'gbdt',
    'metric': 'mse',
    'num_leaves': 31,
    'learning_rate':0.05,
    'feature_fraction': 0.9
}

In [19]:
num_round = 100
bst = lgb.train(params, train_data, num_round)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 124
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
[LightGBM] [Info] Start training from score 453.200000


In [20]:
y_pred = bst.predict(x_test, num_iteration=bst.best_iteration)

In [21]:
print("Forecasted Customer Demand:", y_pred)

Forecasted Customer Demand: [602.26477999 462.50809993 470.25702294 382.77090386 325.46704061
 413.25164768 375.62754257 557.18903774 332.66809798 444.34647958
 346.02050077 409.1354491  497.41937115 360.4174601  336.97107195
 346.89585609 442.85618876 354.04827904 570.12310461 269.46575199]


In [27]:
mse = mean_squared_error(test_target,y_pred)
mse_scores.append(mse)

In [30]:
average_mse = sum(mse_scores)/num_folds

In [31]:
print("Average MSE:")
print(average_mse)

Average MSE:
13258.838709275815
