In [0]:
%run "./2.c lead_time_predict"

In [0]:
import pandas as pd
import numpy as np
from scipy.stats import norm

import os
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns


import pickle
from datetime import datetime as dt
from datetime import timedelta
import xgboost
from pandas.tseries.offsets import BDay
import math
import calendar
import datetime
from datetime import date, timedelta

%matplotlib inline

In [0]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [0]:
h_inv_data='Historical_inventory_rawData_169_03_01'
#reading historical inventory data
h_inv = spark.table(h_inv_data)
h_inv=h_inv.toPandas()


Mb51_data='consumptionmb51_roh_zpck_28_12'
# MB51 import data
Mb51 = spark.table(Mb51_data)
Mb51 = Mb51.toPandas()

ves_data='model_data_vf_19_01'
#ves data import
ves = spark.table(ves_data)
ves = ves.toPandas()
ves = ves[ves['PO_Create_Date']!='1888-01-01']
ves = ves[ves['Delivered_Quantity']!=0]

ves_w_po='enter file path to VES_data_final_06_12.csv'
#reading ves old data to get the planned lead time
ves_old=pd.read_csv(ves_w_po)

In [0]:
model_forecasting='model_based_testing_90_days_op'
model_numpos='model_based_numpos_90_days_op'
prediction_forecasting='prediction_based_testing_90_days_op'
prediction_numpos='prediction_based_numpos_90_days_op'
models_saved='prediction_models_to_use'

In [0]:
model_run_date='2022-03-25'
Start_dates = '2021-12-01'

*To find the best prediction model between model_based_prediction and prediction_model(inventory based prediction) for a plant, material combination calculating the SPS (Stockout Possibility Score) consumption, inventory, material received, order placement will be simulated for the materials over the next 13 Weeks.
*For this the following would be required: 
  1. Daily inventory in Hand
  2. Historical MB51 consumption
  3. Identifying the method of reorder namely inventory based reorder point or frequency based reorder point or both
  4. getting expected lead time to estimate material received date
  5. getting expected order placement to place orders

In [0]:
#getting required fields from inventory
h_inv_f=h_inv[['SnapshotDate','MaterialID','LocationID','BatchID','InventoryStockUnRestricted','InventoryUnitOfMeasure']]
h_inv_f['SnapshotDate']=pd.to_datetime(h_inv['SnapshotDate'],format="%Y-%m-%d")

# Creating Week attribute
h_inv_f['Week']=h_inv_f['SnapshotDate'].dt.week
h_inv_f['SnapshotDate']=pd.to_datetime(h_inv['SnapshotDate'],format="%Y-%m-%d").dt.date

In [0]:
#To check if material has more than one UoM
dat_check=h_inv_f[['MaterialID','InventoryUnitOfMeasure']]
dat_check.drop_duplicates(inplace=True)

dat_check1=pd.pivot_table(dat_check,values='InventoryUnitOfMeasure',index=['MaterialID'],aggfunc='count')
print(len(dat_check1['InventoryUnitOfMeasure'].unique()))
dat_check1['InventoryUnitOfMeasure'].value_counts()

In [0]:
#identifying total inventory stock unrestricted across batches for a given plant,material and day
h_inv_f['InventoryStockUnRestricted_daily']=h_inv_f.groupby(by=['LocationID','MaterialID','SnapshotDate'])['InventoryStockUnRestricted'].transform(np.sum)

#dropping batchID to get the plant, material and day wise inventory stock unrestricted
h_inv_f.drop(columns=['BatchID','InventoryStockUnRestricted'],axis=1,inplace=True)
h_inv_f.drop_duplicates(inplace=True)

#sorting value with respect location ,material, snapshotdate and batch
h_inv_fi=h_inv_f.sort_values(by=['LocationID','MaterialID','SnapshotDate']).reset_index()
h_inv_fi.drop(columns='index',axis=1,inplace=True)
h_inv_fi['SnapshotDate']=pd.to_datetime(h_inv_fi['SnapshotDate'],format="%Y-%m-%d")
print(h_inv_fi.shape)
h_inv_fi.head()

In [0]:
#Code to fill MB51 missing dates
Mb51['Posting_Date'] = pd.to_datetime(Mb51['Posting_Date'])
Mb51matls = Mb51[['Plant', 'Material']].drop_duplicates()

#Populating missing dates
Mb51alldates = pd.DataFrame()
for plant, matl in zip(Mb51matls['Plant'], Mb51matls['Material']):
  Start = min(Mb51[(Mb51['Plant'] == plant) & (Mb51['Material'] == matl)]['Posting_Date'])
  End = max(Mb51[(Mb51['Plant'] == plant) & (Mb51['Material'] == matl)]['Posting_Date'])

  dRan1 = pd.date_range(start =Start, end =End, freq ='D')
  dRan1 = pd.DataFrame({'Plant':plant, 'Material': matl, 'Date':dRan1})
  
  Mb51alldates = Mb51alldates.append(dRan1, ignore_index= True)
  
#merging all dates with MB51
Mb51matls = pd.merge(Mb51alldates, Mb51, left_on = ['Plant', 'Material', 'Date'], right_on = ['Plant', 'Material', 'Posting_Date'], how = 'left')
Mb51matls.drop(['Posting_Date'], inplace = True, axis = 1)

#replacing null values with 0
Mb51matls['Qty_in_BUoM'] = Mb51matls['Qty_in_BUoM'].fillna(0)

#creating week attribute
Mb51matls['week'] = Mb51matls['Date'].dt.week

In [0]:
#Creating inventory consumption data table from MB51  
inv_con1 = pd.pivot_table(Mb51matls,values='Qty_in_BUoM',index=['Plant','Material','week'],aggfunc=['mean', 'std']).reset_index()
inv_con1.columns =[s1 + str(s2) for (s1,s2) in inv_con1.columns.tolist()]
inv_con = inv_con1.rename(columns = {'Plant':'LocationID', 'Material':'MaterialID', 'week': 'Week', 'meanQty_in_BUoM':'daily_mean_consumption', 'stdQty_in_BUoM': 'std_dev_consumption'})

In [0]:
#1. populating data for reorder point calculations

#dropping null values and infinite values in column POqty_to_AvgDelQty
ves = ves.dropna(subset=['POqty_to_AvgDelQty'])
ves = ves[ves['POqty_to_AvgDelQty']!=np.inf]

#selecting attributes needed from ves data
ves_date=ves[['Plant_ID','Material_No.','PO_Create_Date','Purchase_Order_Scheduled_Qty']]

#Finding the total purchase order quantity for a given plant,material and po create date
ves_date['Total_ordered_QTY'] = ves_date.groupby(by=['Plant_ID','Material_No.','PO_Create_Date'])['Purchase_Order_Scheduled_Qty'].transform(np.sum)
ves_date.drop(columns=['Purchase_Order_Scheduled_Qty'],inplace=True)
ves_date.drop_duplicates(inplace=True)
ves_date.head()

In [0]:
#2. Getting inventory in hand at the time of order placement

# merging historical inventory with ves_date to get po create and po qty. 
Rop1=pd.merge(h_inv_fi,ves_date,how='left',left_on=['LocationID','MaterialID','SnapshotDate'],right_on=['Plant_ID','Material_No.','PO_Create_Date']).reset_index()
Rop1.drop(columns=['index','Plant_ID','Material_No.'],axis=1,inplace=True)

#filtering only the reorder dates from the combined data
Rop1=Rop1[Rop1['Total_ordered_QTY'].notna()]

#sorting reorder point by plant, material and po create date and getting a reference index
Rop1.sort_values(by=['LocationID','MaterialID','PO_Create_Date'],inplace=True)
Rop1.reset_index(inplace=True)
Rop1.reset_index(inplace=True)
Rop1.head()

In [0]:
#3. calculating the days between 2 consecutive orders
def checkPrevious(x):
  try:
    if((x['LocationID'] == Rop1.loc[x.level_0-1,'LocationID']) and (x['MaterialID'] == Rop1.loc[x.level_0-1, 'MaterialID'])):
      return (x.PO_Create_Date - Rop1.loc[x.level_0 -1, 'PO_Create_Date']).days
    else:
      return 0
  except:
    print("Exception")
    return 0

#getting day gap attibute between 2 purchase orders for the plant, material combinations
Rop1['days']= Rop1.apply(checkPrevious, 1)
Rop1.head()

* Material orders follow either an inventory based or frequency based reorder Method or a hybrid of both. 
* To classify the materials on the reorder method: 
  1. The % of orders historically placed +/-20% around the historic mean/median inventory at hand at time of order placement and (inventory based method)
  2. The % of orders placed around +/-20% of the average time between 2 consecutive orders is found (frequency based method)

In [0]:
#creating a table with unique set of plant and material combinations from the reorder point data
reorder_method_check=Rop1[['LocationID','MaterialID']]
reorder_method_check.drop_duplicates(inplace=True)
reorder_method_check.shape[0]

#mean,median,mode calculation and weightage of it on plant,mat combination to identify the order placement method
freqop=[]
invtop=[]
invtop_2=[]
modes=[]
medians=[]
means=[]
for l_id,m_id in zip(reorder_method_check['LocationID'],reorder_method_check['MaterialID']):
  a=Rop1[(Rop1['LocationID']==l_id) & (Rop1['MaterialID']==m_id)].copy()
  c=a['days'].mean()
  d=a['InventoryStockUnRestricted_daily'].median(axis=0)
  e=a['InventoryStockUnRestricted_daily'].mean()
  freqop.append(np.round(((a[(a['days']>=c*.8) & (a['days']<=c*1.2)].shape[0]/a.shape[0])*100),0))
  invtop.append(np.round(((a[(a['InventoryStockUnRestricted_daily']>=d*.8) & (a['InventoryStockUnRestricted_daily']<=d*1.2)].shape[0]/a.shape[0])*100),0))
  invtop_2.append(np.round(((a[(a['InventoryStockUnRestricted_daily']>=e*.8) & (a['InventoryStockUnRestricted_daily']<=e*1.2)].shape[0]/a.shape[0])*100),0))
  modes.append(c)
  medians.append(d)
  means.append(e)
  
#method weightage along with values
reorder_method_check['Frequency_Based_OP']=freqop
reorder_method_check['Inventory_Based_OP_median']=invtop
reorder_method_check['Inventory_Based_OP_mean']=invtop_2
reorder_method_check['freq_val']=modes
reorder_method_check['median_val']=medians
reorder_method_check['mean_val']=means

In [0]:
reorder_method_check[(reorder_method_check['LocationID']==5955) & (reorder_method_check['MaterialID']==22002545)]

In [0]:
#flagging method of reorder point based on weightage of method (threshold 50%)
aty=[]
for fpq, ibm, ibe in zip(reorder_method_check['Frequency_Based_OP'],reorder_method_check['Inventory_Based_OP_median'],reorder_method_check['Inventory_Based_OP_mean']):
  if (fpq<50) and (ibm<50) and (ibe<50): #if all values are less than threshold its hybrid
    aty.append('hybrid')
  else:
    if (fpq>ibm) and (ibm>=ibe): # order percentage of freq>median>=mean trigger method as frequency
      aty.append('frequency')
    elif (fpq>ibe) and (ibm<=ibe): #order percentage of freq>mean>=median trigger method as frequency
      aty.append('frequency')
    elif (ibm>ibe) and (ibe>=fpq): #order percentage of median>mean>=frequency trigger method as median
      aty.append('median')
    elif (ibm>fpq) and (ibe<=fpq): #order percentage of median>frequency>=mean trigger method as median
      aty.append('median')
    elif (ibe>ibm) and (ibm>=fpq): #order percentage of mean>median>=frequency trigger method as mean
      aty.append('mean')
    elif (ibe>fpq) and (ibm<=fpq): #order percentage of mean>frequency>=median trigger method as mean
      aty.append('mean')
    elif (ibm==ibe) and (ibm>fpq): #order percentage of mean=median>frequency trigger method as median
      aty.append('median')
    elif (ibm==ibe) and (ibm==fpq): #order percentage of freq=mean=median trigger method as frequency
      aty.append('frequency')
    elif (fpq==ibm) and (ibm>ibe): #order percentage of freq=median>mean trigger method as frequency
      aty.append('frequency')
    elif (fpq==ibe) and (ibm<ibe): #order percentage of freq=mean>median trigger method as frequency
      aty.append('frequency')

#creating attribute for method flag
reorder_method_check['flag']=aty
reorder_method_check.reset_index(inplace=True)
reorder_method_check.drop(['index'],axis=1,inplace=True)

#tagging combinations of plant, material which has median_val 0 and flag median as not defined
reorder_method_check.loc[(reorder_method_check['median_val']==0) & (reorder_method_check['flag']=='median'),'flag']='not_defined'

#tagging combinations of plant, material with only one order which has freq_val 0 and flag frequency as not defined
reorder_method_check.loc[(reorder_method_check['freq_val']==0) & (reorder_method_check['flag']=='frequency'),'flag']='not_defined'
reorder_method_check['flag'].value_counts()

#defining column data types 
reorder_method_check['mean_val']=round(reorder_method_check['mean_val'],0)
reorder_method_check['mean_val']=reorder_method_check['mean_val'].astype(np.int64)
reorder_method_check['freq_val']=round(reorder_method_check['freq_val'],0)
reorder_method_check['freq_val']=reorder_method_check['freq_val'].astype(np.int64)

#checking no of materials
len(reorder_method_check[(reorder_method_check['flag']!='not_defined')]['MaterialID'].unique())

In [0]:
reorder_method_check['flag'].value_counts()

In [0]:
reorder_method_check[reorder_method_check['flag']=='not_defined']

In [0]:
#Finding the historic average fullfillment % of delivery quantity w.r.t to Purchase order qty.
Delivery_qty = pd.pivot_table(ves,values=['Purchase_Order_Scheduled_Qty','Delivered_Quantity'],index=['Plant_ID','Material_No.', 'updated_VS_ID'],aggfunc='sum').reset_index()
Delivery_qty['Fulfillment_rate'] = Delivery_qty['Delivered_Quantity']/Delivery_qty['Purchase_Order_Scheduled_Qty']
Delivery_qty.loc[Delivery_qty['Fulfillment_rate']>1, 'Fulfillment_rate'] =1

In [0]:
#Finding the vendor share for a plant, material combination for purchase order
Order_share = Delivery_qty.copy()
Order_share.drop(['Delivered_Quantity', 'Fulfillment_rate'], inplace = True, axis =1)

Total_order = Order_share.groupby(['Material_No.', 'Plant_ID'])['Purchase_Order_Scheduled_Qty'].sum().reset_index()
Order_share = pd.merge(Order_share, Total_order, how = 'left', on = ['Material_No.', 'Plant_ID'])
Order_share['Vendor_share'] = Order_share['Purchase_Order_Scheduled_Qty_x']/Order_share['Purchase_Order_Scheduled_Qty_y']
Order_share.drop(['Purchase_Order_Scheduled_Qty_x', 'Purchase_Order_Scheduled_Qty_y'], inplace = True, axis = 1)

In [0]:
#Calculating order quantity for each vendor for a given plant , material combination and estimating how much the vendor will deliver

#getting total reorder quantity for plant, material combo
RoQ_Vendor=pd.pivot_table(Rop1.round({'Total_ordered_QTY':0}),values='Total_ordered_QTY',index=['LocationID','MaterialID'],aggfunc='mean').reset_index()
RoQ_Vendor['Total_ordered_QTY']=RoQ_Vendor['Total_ordered_QTY'].astype(np.int64)

#merging reorder quantity with order share to get vendor wise reorder quantity
RoQ_Vendor = pd.merge(RoQ_Vendor, Order_share, how = 'left', right_on = ['Plant_ID', 'Material_No.'], left_on = ['LocationID', 'MaterialID'])
RoQ_Vendor['Vendor_ROQ'] = RoQ_Vendor['Vendor_share']*RoQ_Vendor['Total_ordered_QTY']
RoQ_Vendor['Vendor_ROQ'] = RoQ_Vendor['Vendor_ROQ'].round(0)
RoQ_Vendor.drop(['Material_No.', 'Plant_ID', 'Vendor_share'], axis =1, inplace= True)

RoQ_Vendor.rename(columns={'Total_ordered_QTY':'Avg_ordered_QTY'},inplace=True)
RoQ_Vendor.head()

In [0]:
# 1. calculating average leadtime for plant, material combinations
#cleaning ves data for getting lead time
ldt = ves.copy()
ldt = ldt.dropna(subset=['POqty_to_AvgDelQty'])
ldt = ldt[ldt['POqty_to_AvgDelQty']!=np.inf]

#creating lead time attribute
ldt=ldt[['Plant_ID','Material_No.','First_GR_Date','PO_Create_Date']]
ldt['First_GR_Date']=pd.to_datetime(ldt['First_GR_Date'],utc=True)
ldt['First_GR_Date']=ldt['First_GR_Date'].dt.date
ldt['First_GR_Date']=pd.to_datetime(ldt['First_GR_Date'])
ldt['lead_time']=(ldt['First_GR_Date']-ldt['PO_Create_Date']).dt.days

In [0]:
# 2. Getting the Planned lead time

pldt=ves_old[['Plant','Material No.','Sum of Planned Days']].copy()
print('number of planned lead time records less than one',pldt[pldt['Sum of Planned Days']<1].shape[0])

#identifying average planned lead time for a given plant and material
pldt_pivot=pd.pivot_table(pldt,values='Sum of Planned Days',index=['Plant','Material No.'],aggfunc='mean').reset_index()
pldt_pivot['Sum of Planned Days']=round(pldt_pivot['Sum of Planned Days'],0)
print('number of unique materials',len(pldt_pivot['Material No.'].unique()))

#renaming the columns
pldt_pivot.rename(columns={'Plant':'LocationID','Material No.':'MaterialID','Sum of Planned Days':'Planned_avg_lead_time'},inplace=True)
pldt_pivot.head(3)

In [0]:
#lead time data cut w.r.t plant,material
ldt_cut=pd.pivot_table(ldt,values='lead_time',index=['Plant_ID','Material_No.'],aggfunc='mean').reset_index()

#merging RoQ_Vendor with lead time
vendor_plant_data=pd.merge(RoQ_Vendor,ldt_cut,left_on=['LocationID','MaterialID'],right_on=['Plant_ID','Material_No.'],how='inner')
vendor_plant_data.drop(columns=['Plant_ID','Material_No.'],inplace=True)
vendor_plant_data['lead_time']=round(vendor_plant_data['lead_time'],0)
vendor_plant_data.rename(columns={'lead_time':'Avg_lead_time'},inplace=True)
vendor_plant_data['Avg_lead_time']=vendor_plant_data['Avg_lead_time'].astype(np.int64)

#merging vendor_plant_data with delivery qty
vendor_plant_data = pd.merge(vendor_plant_data, Delivery_qty, how = 'left', left_on = ['LocationID', 'MaterialID', 'updated_VS_ID'], right_on = ['Plant_ID', 'Material_No.', 'updated_VS_ID'])
vendor_plant_data.drop(['Plant_ID', 'Material_No.', 'Delivered_Quantity', 'Purchase_Order_Scheduled_Qty'], axis =1, inplace = True)

#merging vendor_plant_data with planned lead time
vendor_plant_data = pd.merge(vendor_plant_data, pldt_pivot, how = 'inner', on = ['LocationID', 'MaterialID'])

#merging vendor_plant_data with check to get the flag and mthod values
vendor_plant_data=pd.merge(vendor_plant_data,reorder_method_check, on =['LocationID','MaterialID'],how='left')
vendor_plant_data.drop(columns=['Frequency_Based_OP','Inventory_Based_OP_median','Inventory_Based_OP_mean'],axis=1,inplace=True)
vendor_plant_data=vendor_plant_data[vendor_plant_data['flag']!='not_defined']

In [0]:
# Checking common data availability across inventory and plant-vendor data

# creating table with unique plant, material combination from vendor_plant_data
Rop12=vendor_plant_data[['LocationID','MaterialID']]
Rop12.drop_duplicates(inplace=True)

#getting plant and material only for those which has historical inventory data on 2021-09-01

inv_check=h_inv_fi[(h_inv_fi['SnapshotDate'] == Start_dates)][['LocationID','MaterialID']]
inv_check.drop_duplicates(inplace=True)

#filtering the plant and materials data which has historical inventory data on 2021-09-01
print(Rop12.shape,vendor_plant_data.shape)
vendor_plant_data=pd.merge(vendor_plant_data,inv_check,on=['LocationID','MaterialID'],how='inner')
Rop12=pd.merge(Rop12,inv_check,on=['LocationID','MaterialID'],how='inner')
print(Rop12.shape,vendor_plant_data.shape)
print('Unique material count: ',len(Rop12['MaterialID'].unique()))

In [0]:
# def order_placed is used by model_based_prediction to determine when to place the order
def order_placed (SnapshotDate,Plants,Matls,Exp_invt,counter):
  
  #getting planned lead time
  lead_time=pldt_pivot[(pldt_pivot['LocationID']==Plants) & (pldt_pivot['MaterialID']==Matls)]['Planned_avg_lead_time'].values[0]
  
  op=pd.DataFrame(data=[[SnapshotDate,Plants,Matls,Exp_invt,lead_time,counter]],columns=['SnapshotDate','LocationID','MaterialID','InventoryStockUnRestricted_daily','lead_time','counter'])
  op['SnapshotDate']=pd.to_datetime(op['SnapshotDate']).dt.date
  op['SnapshotDate']=pd.to_datetime(op['SnapshotDate'])
  
  # getting planned_gr date with extended_gr date of 2,4 and 6 weeks
  try:
    op['Planned_GR_date'] = pd.to_datetime(op['SnapshotDate']).dt.date + pd.to_timedelta(op['lead_time'], unit='D')
  except:
    ags=[]
    for i,j in zip(op['SnapshotDate'],op['lead_time']):
      ags.append(i+timedelta(int(j)))
    op['Planned_GR_date']=ags

  op['Planned_GR_date']=pd.to_datetime(op['Planned_GR_date']).dt.date
  op['Planned_GR_date']=pd.to_datetime(op['Planned_GR_date'])
  op['Extended_GR_date_2']=pd.to_datetime(op['Planned_GR_date'])+timedelta(14)
  op['Extended_GR_date_2']=pd.to_datetime(op['Extended_GR_date_2']).dt.date
  op['Extended_GR_date_2']=pd.to_datetime(op['Extended_GR_date_2'])
  op['Extended_GR_date_4']=pd.to_datetime(op['Planned_GR_date'])+timedelta(28)
  op['Extended_GR_date_4']=pd.to_datetime(op['Extended_GR_date_4']).dt.date
  op['Extended_GR_date_4']=pd.to_datetime(op['Extended_GR_date_4'])
  op['Extended_GR_date_6']=pd.to_datetime(op['Planned_GR_date'])+timedelta(42)
  op['Extended_GR_date_6']=pd.to_datetime(op['Extended_GR_date_6']).dt.date
  op['Extended_GR_date_6']=pd.to_datetime(op['Extended_GR_date_6'])
  
  Mb51mat=Mb51matls[(Mb51matls['Plant']==Plants) & (Mb51matls['Material']==Matls)]
  avg_consump_w_0=pd.pivot_table(Mb51mat,values='Qty_in_BUoM',index=['Plant','Material'],aggfunc='mean').reset_index()
  avg_con_w_0=abs(avg_consump_w_0[(avg_consump_w_0['Plant']==Plants) & (avg_consump_w_0['Material']==Matls)]['Qty_in_BUoM'].values[0])
  
  #creating a dataframe with all dates starting from snapshot date to extended_gr_date_6 and populating it with daily mean consumption
  inv_con1=inv_con[(inv_con['LocationID']==Plants) & (inv_con['MaterialID']==Matls)]
  alldates=pd.date_range(start=op['SnapshotDate'].values[0], end=op['Extended_GR_date_6'].values[0])
  tempdf1=pd.DataFrame(alldates,columns=['dates'])
  tempdf1['LocationID']=Plants
  tempdf1['MaterialID']=Matls
  tempdf1['Week']=tempdf1['dates'].dt.week
  inv_consumption=pd.merge(tempdf1,inv_con1,on=['LocationID','MaterialID','Week'],how='left')       

  consump_2=inv_consumption[(inv_consumption['LocationID']==Plants) & (inv_consumption['MaterialID']==Matls) & (inv_consumption['dates']>=op['Planned_GR_date'].values[0]) & (inv_consumption['dates']<op['Extended_GR_date_2'].values[0])]['daily_mean_consumption'].sum()

  consump_4=inv_consumption[(inv_consumption['LocationID']==Plants) & (inv_consumption['MaterialID']==Matls) & (inv_consumption['dates']>=op['Planned_GR_date'].values[0]) & (inv_consumption['dates']<op['Extended_GR_date_4'].values[0])]['daily_mean_consumption'].sum()

  consump_6=inv_consumption[(inv_consumption['LocationID']==Plants) & (inv_consumption['MaterialID']==Matls) & (inv_consumption['dates']>=op['Planned_GR_date'].values[0]) & (inv_consumption['dates']<op['Extended_GR_date_6'].values[0])]['daily_mean_consumption'].sum()
  
  # getting consumption ratio for 2,4 and 6 weeks along with inventory ratio
  op['Expected_consumption_btw_gr_dates_2']=abs(consump_2)
  op['Expected_consumption_btw_gr_dates_4']=abs(consump_4)
  op['Expected_consumption_btw_gr_dates_6']=abs(consump_6)
  op['inventory_ratio_with_0']=abs(op['InventoryStockUnRestricted_daily'].values[0]/avg_con_w_0)
  op['Expected_consumption_ratio_2_with_0']=abs(consump_2/avg_con_w_0)
  op['Expected_consumption_ratio_4_with_0']=abs(consump_4/avg_con_w_0)
  op['Expected_consumption_ratio_6_with_0']=abs(consump_6/avg_con_w_0)

  features=['counter','inventory_ratio_with_0','Expected_consumption_ratio_2_with_0', 'Expected_consumption_ratio_4_with_0', 'Expected_consumption_ratio_6_with_0']
  
  #Extracting the model object from the directory where the models were saved
  op1=op[features].copy()
  #print(op1)
  file_nameds = '/dbfs/FileStore/models/classifiers/'+f"{model_run_date}"+'/'+str(Plants)+'/'+str(Matls)+'/'+ '_class_if.pkl'
  loaded_model_classif = pickle.load(open(file_nameds, "rb"))
  reorder_points = loaded_model_classif.predict(op1)[0]
  #print(reorder_point)
  return reorder_points

**Inorder to find which model between model_based_prediction (ML models) & trend_based (Inventory or frequency based) is suitable for forecasting the plant material combination.
**We are going to follow below steps.

  1. Simulate forecasting based on model_based_prediction for next 13 weeks.
  2. Simulate forecasting based on trend_based for next 13 weeks.
  3. Choosing best model based on whether the number of orders placed is the closest to actual.
  4. In the trend_based checking the method flag type for inventory based mean or median whether it needs to be replaced by hybrid to improve order placement

In [0]:
###Setting up base for the 13 week forecast. 
# The inventory consumption/inflow and PO flag -> 'forecasted'
# Identifying open Purchase orders and their expected delivery -> 'pending'
# Vendors -> List of vendors for the material plant combination and their attributes


forecasted=pd.DataFrame()
pending=pd.DataFrame()
vendors=pd.DataFrame()
eli5_reasons=pd.DataFrame()

Start_date = Start_dates
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')

##### Getting the Reorder trigger attributes

for Plant,Matl in zip(Rop12['LocationID'],Rop12['MaterialID']):
  try:
  ############################## Dataframe for logging POs and expected dates of receipts#########################################
    vendors1 = vendor_plant_data[(vendor_plant_data['LocationID']==Plant) & (vendor_plant_data['MaterialID']==Matl)]
    vendors_v1 = vendors1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ','Avg_lead_time','Planned_avg_lead_time']]

    # Checking if there are open purchase orders in the VES and logging the same in the 'pending' dataframe
    pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]

    if (pending_v1.shape[0] >0): #if there are open pending orders
      pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]
      pending_v1 = pd.merge(pending_v1, vendors_v1, on = ['updated_VS_ID'], how = 'left')
      pending_v1['Vendor_ROQ'] = pending_v1['Purchase_Order_Scheduled_Qty']
      pds=[]
      for pl_id,v_id,mt_id,po_c_d,p_qty in zip(pending_v1['LocationID'],pending_v1['updated_VS_ID'],pending_v1['MaterialID'],pending_v1['PO_Create_Date'].dt.date.astype(str),pending_v1['Vendor_ROQ']):

        lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty) #predicting lead time for open orders
        tempeli5['LocationID']=pl_id
        tempeli5['updated_VS_ID']=v_id
        tempeli5['MaterialID']=mt_id
        tempeli5['PO_Create_Date']=po_c_d
        eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
        pds.append(lead_time_pred)
      pending_v1['lead_time']=pds    

      pending_v1 = pending_v1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ', 'PO_Create_Date',  'lead_time', 'Avg_lead_time', 'Planned_avg_lead_time']]

      #Getting expected delivery date and logging in pending
      try:
        pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'], unit='D') 
      except:
        ags=[]
        for i,j in zip(pending_v1['PO_Create_Date'],pending_v1['lead_time']):
          ags.append(i+timedelta(int(j)))
        pending_v1['Exp_GR_Date']=ags
        pending_v1['Exp_GR_Date']=pending_v1['Exp_GR_Date'].dt.date
      pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']

      date_since_order = np.timedelta64( Start_date - max(ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] < Start_date)]['PO_Create_Date']), 'D')
      date_since_order = date_since_order.astype(int)

    else: #if no pending orders, create df with dummy values
      pending_v1 = pd.DataFrame({'LocationID': [Plant], 'MaterialID': [Matl], 'updated_VS_ID': [0], 'Fulfillment_rate': [0.0], 'Vendor_ROQ': [0.0], 'PO_Create_Date': ['2000-01-01'],'lead_time': [0],'Avg_lead_time':[0],'Planned_avg_lead_time':[0]})
      pending_v1['PO_Create_Date'] =   pd.to_datetime(pending_v1['PO_Create_Date'], format="%Y-%m-%d") 
      pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'].values[0], unit='D')
      pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']

      date_since_order = np.timedelta64( Start_date - max(ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] < Start_date)]['PO_Create_Date']), 'D')
      date_since_order = date_since_order.astype(int)

    ############################## Dataframe for consumption, receipts and PO triggers#########################################
    #Creating only the first row of the forecasted dataframe with the following elements:
    # 1. Snapshot Date
    # 2. Material ID
    # 3. Location ID
    # 4. Uinit of Measure
    # 5. Week
    # 6. Inventory stock in hand (from the inventory history)
    # 7. Average Consumption
    # 8. Std Deviation of consumption
    # 9. Material received
    # 10. Expected inventory at the end of the day = inventory in hand - consumption + 
    # 11. Counter: Count of days since last order 
    # 12. Reorder flag

    forecasted_v1 = h_inv_fi[(h_inv_fi['SnapshotDate'] == Start_date) & (h_inv_fi['MaterialID'] ==Matl) & (h_inv_fi['LocationID'] == Plant)]
    forecasted_v1=pd.merge(forecasted_v1,inv_con,on=['LocationID','MaterialID','Week'],how='left')
    forecasted_v1['Matl_Recp'] = pending_v1.loc[pending_v1['Exp_GR_Date'] == pd.to_datetime(Start_date) , 'Delivery_qty'].sum()
    forecasted_v1.loc[forecasted_v1['daily_mean_consumption'].isnull()==True,'daily_mean_consumption']=0
    forecasted_v1['Exp_inv'] = max((forecasted_v1['InventoryStockUnRestricted_daily'].astype(np.int64) + forecasted_v1['daily_mean_consumption'].astype(np.int64) + forecasted_v1['Matl_Recp'].astype(np.int64)).values[0],0)
    forecasted_v1['counter'] = date_since_order

    # check if order needs to be placed on start date. If yes, log PO Create date, Vendor, lead time, expected delivery date, PO Quantity, Expected delivery in Pending based on the predited order placement
    # Inventory based reorder check

    if max(pending_v1['PO_Create_Date'])==Start_date:
      forecasted_v1['Reorder']=1

    #order placement prediction
    else:
      reorder_call = order_placed(forecasted_v1['SnapshotDate'].values[0],forecasted_v1['LocationID'].values[0],forecasted_v1['MaterialID'].values[0],forecasted_v1['Exp_inv'].values[0],forecasted_v1['counter'].values[0])        

      forecasted_v1['Reorder'] = reorder_call


    if (forecasted_v1['Reorder'].values[0] == 1):
      po_place = vendors_v1.copy()
      po_place['PO_Create_Date'] = Start_date
      po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")

      pds=[]
      for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'],po_place['updated_VS_ID'],po_place['MaterialID'],po_place['PO_Create_Date'].dt.date.astype(str),po_place['Vendor_ROQ']):
        lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
        tempeli5['LocationID']=pl_id
        tempeli5['updated_VS_ID']=v_id
        tempeli5['MaterialID']=mt_id
        tempeli5['PO_Create_Date']=po_c_d
        eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
        pds.append(lead_time_pred)
      po_place['lead_time']=pds

      try:
        po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
      except:
        ags=[]
        for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
          ags.append(i+timedelta(int(j)))
        po_place['Exp_GR_Date']=ags
        po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
      po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']
      pending_v1 = pending_v1.append(po_place, ignore_index= True)
    forecasted=forecasted.append(forecasted_v1,ignore_index= True)
    pending=pending.append(pending_v1,ignore_index= True)
    vendors=vendors.append(vendors_v1,ignore_index= True)
  except:
    print(Plant,Matl,'- unable to build model')
  ####End of loop

forecasted['InventoryStockUnRestricted_daily']=forecasted['InventoryStockUnRestricted_daily'].astype(np.int64)
forecasted['daily_mean_consumption']=forecasted['daily_mean_consumption'].astype(np.int64)
forecasted['Exp_inv']=forecasted['Exp_inv'].astype(np.int64)
pending['Delivery_qty']=pending['Delivery_qty'].astype(np.int64)
pending['Exp_GR_Date']=pd.to_datetime(pending['Exp_GR_Date'])

In [0]:
print(pending.shape)
pending.head()

In [0]:
print(forecasted.shape)
forecasted.head()

In [0]:
print(eli5_reasons.shape)
eli5_reasons.head()

In [0]:
############Forecasting consumption, order placement, material receipts and expected inventory for next 90 days

forecasting=pd.DataFrame()
pendings=pd.DataFrame()
leadtime_reasons=pd.DataFrame()

Start_date = Start_dates
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')

for Plant,Matl in zip(Rop12['LocationID'],Rop12['MaterialID']):
  try:
    forecasted1=forecasted[(forecasted['LocationID']==Plant) & (forecasted['MaterialID']==Matl)].copy()
    pending1=pending[(pending['LocationID']==Plant)&(pending['MaterialID']==Matl)].copy()
    vendors1=vendors[(vendors['LocationID']==Plant)&(vendors['MaterialID']==Matl)].copy()
    leadtime_reasons1=eli5_reasons[(eli5_reasons['LocationID']==Plant)&(eli5_reasons['MaterialID']==Matl)].copy()

    for i in range(1,90):
      #Last updated date
      td=timedelta(i)
      todate = Start_date+td
      tempdf = forecasted1.tail(1).copy() # taking the latest entry of the forecasted
      tempdf['SnapshotDate'] = todate
      tempdf['Week'] = int(todate.isocalendar()[1])
      if tempdf['Reorder'].values[0] ==1:
        tempdf['counter']=1
        tempdf['Reorder'] = 0
      else:        
        tempdf['counter'] = tempdf['counter'].values[0] + 1
        tempdf['Reorder'] = 0

      # Update Consumption stats
      try:
        tempdf['daily_mean_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])),'daily_mean_consumption'].values[0]
        tempdf['std_dev_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])), 'std_dev_consumption'].values[0]
      except:
        tempdf['daily_mean_consumption'] = 0
        tempdf['std_dev_consumption'] = 0

      # Update Material Receipt
      tempdf['Matl_Recp'] = pending1.loc[pending1['Exp_GR_Date'] == pd.to_datetime(todate), 'Delivery_qty'].sum()

      # Update the Expected inventory
      inv_calc = max((tempdf['Exp_inv'] + tempdf['Matl_Recp'] + tempdf['daily_mean_consumption']).values[0], 0) #Non negative inventory
      tempdf['Exp_inv']= int(inv_calc)

      #   Order Placement prediction
      if (forecasted1['Reorder'].tail(3).sum()==0):
        reorder = order_placed(tempdf['SnapshotDate'].values[0],tempdf['LocationID'].values[0],tempdf['MaterialID'].values[0],tempdf['Exp_inv'].values[0],tempdf['counter'].values[0])        
        tempdf['Reorder'] = reorder
      else:
        tempdf['Reorder']=0
    # Order logging in 'Pending' by suppliers, PO Create date, Expected delivery, PO Qty and expected delivery qty:
      if(tempdf['Reorder'].values[0] ==1):
        po_place = vendors1.copy()
        po_place['PO_Create_Date'] = todate
        po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")
        pds=[]
        for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'], po_place['updated_VS_ID'], po_place['MaterialID'], po_place['PO_Create_Date'].dt.date.astype(str), po_place['Vendor_ROQ']):
          lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
          tempeli5['LocationID']=pl_id
          tempeli5['updated_VS_ID']=v_id
          tempeli5['MaterialID']=mt_id
          tempeli5['PO_Create_Date']=po_c_d
          leadtime_reasons1=leadtime_reasons1.append(tempeli5,ignore_index= True)
          pds.append(lead_time_pred)
        po_place['lead_time']=pds
        try:
          po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
        except:
          ags=[]
          for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
            ags.append(i+timedelta(int(j)))
          po_place['Exp_GR_Date']=ags
          po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
        po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']

        pending1 = pending1.append(po_place, ignore_index= True)
      forecasted1=forecasted1.append(tempdf,ignore_index=True)
    forecasting=forecasting.append(forecasted1,ignore_index=True)
    leadtime_reasons=leadtime_reasons.append(leadtime_reasons1,ignore_index=True)
    pendings=pendings.append(pending1,ignore_index=True)
  except:
    print(Plant,Matl,'- unable to build model')
  ####End of loop
    
pendings['Exp_GR_Date']=pd.to_datetime(pendings['Exp_GR_Date'])

In [0]:
print(leadtime_reasons.shape)
leadtime_reasons.head()

In [0]:
print(forecasting.shape)
forecasting.head()

In [0]:
print(pendings.shape)
pendings.head()

In [0]:
fore = spark.createDataFrame(forecasting)
fore.write.saveAsTable(model_forecasting,mode = 'overwrite')

In [0]:
Rop350=forecasting[['LocationID','MaterialID']].copy()
Rop350.drop_duplicates(inplace=True)
Rop350.shape

In [0]:
#1. Creating output for actual Vs estimated number of Purchase orders for a given plant, material combination

num_pos=pd.DataFrame()
plants=[]
matrls=[]
acts=[]
estm=[]
Start_date = '{}'.format(Start_dates)
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')
for Plant,Matl in zip(Rop350['LocationID'],Rop350['MaterialID']):
  ves_date['PO_Create_Date'] = pd.to_datetime(ves_date['PO_Create_Date'])
  PO_original = ves_date[(ves_date['PO_Create_Date'] >= Start_date) & (ves_date['PO_Create_Date'] <= Start_date + timedelta(90)) & (ves_date['Material_No.'] ==Matl) & (ves_date['Plant_ID'] == Plant)]
  plants.append(Plant)
  matrls.append(Matl)
  acts.append(PO_original.shape[0])
  estm.append(forecasting[(forecasting['LocationID']==Plant) & (forecasting['MaterialID']==Matl) & (forecasting['SnapshotDate']>=Start_date) & (forecasting['SnapshotDate']<=Start_date + timedelta(90))]['Reorder'].sum())
num_pos['LocationID']=plants
num_pos['MaterialID']=matrls
num_pos['Actual_POs_in_this_period']=acts
num_pos['Estimated_POs_in_this_period']=estm
num_pos['model']='model_based_prediction'
print(num_pos.shape)
num_pos.head()

In [0]:
mopos = spark.createDataFrame(num_pos)
mopos.write.saveAsTable(model_numpos,mode = 'overwrite')

In [0]:
###Setting up base for the 13 week forecast. 
# The inventory consumption/inflow and PO flag -> 'forecasted'
# Identifying open Purchase orders and their expected delivery -> 'pending'
# Vendors -> List of vendors for the material plant combination and their attributes

forecasted=pd.DataFrame()
pending=pd.DataFrame()
vendors=pd.DataFrame()
eli5_reasons=pd.DataFrame()

Start_date = Start_dates
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')

##### Getting the Reorder trigger attributes
for Plant,Matl in zip(Rop12['LocationID'],Rop12['MaterialID']):
#   try:
  ROP_freq = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'freq_val'].values[0]
  ROP_median = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'median_val'].values[0]
  ROP_mean = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'mean_val'].values[0]
  ROP_method = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'flag'].values[0]

  if (ROP_method == 'mean'):
    ROP = ROP_mean
  else:
    ROP = ROP_median

  ############################## Dataframe for logging POs and expected dates of receipts#########################################
  vendors1 = vendor_plant_data[(vendor_plant_data['LocationID']==Plant) & (vendor_plant_data['MaterialID']==Matl)]
  vendors_v1 = vendors1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ','Avg_lead_time','Planned_avg_lead_time']]

  # Checking if there are open purchase orders in the VES and logging the same in the 'pending' dataframe
  pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]

  if (pending_v1.shape[0] >0): #if there are open pending orders
    pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]
    pending_v1 = pd.merge(pending_v1, vendors_v1, on = ['updated_VS_ID'], how = 'left')
    pending_v1['Vendor_ROQ'] = pending_v1['Purchase_Order_Scheduled_Qty']
    pds=[]
    for pl_id,v_id,mt_id,po_c_d,p_qty in zip(pending_v1['LocationID'],pending_v1['updated_VS_ID'],pending_v1['MaterialID'],pending_v1['PO_Create_Date'].dt.date.astype(str),pending_v1['Vendor_ROQ']):

      #print(pl_id, v_id, mt_id, po_c_d, p_qty)
      lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty) #predicting lead time for open orders
      tempeli5['LocationID']=pl_id
      tempeli5['updated_VS_ID']=v_id
      tempeli5['MaterialID']=mt_id
      tempeli5['PO_Create_Date']=po_c_d
      eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
      pds.append(lead_time_pred)
    pending_v1['lead_time']=pds    

    pending_v1 = pending_v1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ', 'PO_Create_Date',  'lead_time', 'Avg_lead_time', 'Planned_avg_lead_time']]

    #Getting expected delivery date and logging in pending
    try:
      pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'], unit='D') 
    except:
      ags=[]
      for i,j in zip(pending_v1['PO_Create_Date'],pending_v1['lead_time']):
        ags.append(i+timedelta(int(j)))
      pending_v1['Exp_GR_Date']=ags
      pending_v1['Exp_GR_Date']=pending_v1['Exp_GR_Date'].dt.date
    pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']
    date_since_order = np.timedelta64( Start_date - max(pending_v1['PO_Create_Date']), 'D')
    date_since_order = date_since_order.astype(int)

  else: #if no pending orders, create df with dummy values
    pending_v1 = pd.DataFrame({'LocationID': [Plant], 'MaterialID': [Matl], 'updated_VS_ID': [0], 'Fulfillment_rate': [0.0], 'Vendor_ROQ': [0.0], 'PO_Create_Date': ['2000-01-01'],'lead_time': [0],'Avg_lead_time':[0],'Planned_avg_lead_time':[0]})
    pending_v1['PO_Create_Date'] =   pd.to_datetime(pending_v1['PO_Create_Date'], format="%Y-%m-%d") 
    pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'].values[0], unit='D')
    pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']
    date_since_order = 0

  ############################## Dataframe for consumption, receipts and PO triggers#########################################
  #Creating only the first row of the forecasted dataframe with the following elements:
  # 1. Snapshot Date
  # 2. Material ID
  # 3. Location ID
  # 4. Uinit of Measure
  # 5. Week
  # 6. Inventory stock in hand (from the inventory history)
  # 7. Average Consumption
  # 8. Std Deviation of consumption
  # 9. Material received
  # 10. Expected inventory at the end of the day = inventory in hand - consumption + 
  # 11. Counter: Count of days since last order 
  # 12. Reorder flag

  forecasted_v1 = h_inv_fi[(h_inv_fi['SnapshotDate'] == Start_date) & (h_inv_fi['MaterialID'] ==Matl) & (h_inv_fi['LocationID'] == Plant)]
  forecasted_v1=pd.merge(forecasted_v1,inv_con,on=['LocationID','MaterialID','Week'],how='left')
  forecasted_v1['Matl_Recp'] = pending_v1.loc[pending_v1['Exp_GR_Date'] == pd.to_datetime(Start_date) , 'Delivery_qty'].sum()
  forecasted_v1.loc[forecasted_v1['daily_mean_consumption'].isnull()==True,'daily_mean_consumption']=0
  forecasted_v1['Exp_inv'] = max((forecasted_v1['InventoryStockUnRestricted_daily'].astype(np.int64) + forecasted_v1['daily_mean_consumption'].astype(np.int64) + forecasted_v1['Matl_Recp'].astype(np.int64)).values[0],0)


  forecasted_v1['counter'] = date_since_order
  forecasted_v1['Reorder'] = 0

  # check if order needs to be placed on start date. Uf yes, log PO Create date, Vendor, lead time, expected delivery date, PO Quantity, Expected delivery in Pending based on the method of reorder
  # Inventory based reorder check

  if max(pending_v1['PO_Create_Date'])==Start_date:
    forecasted_v1['Reorder']=1

  elif ((ROP_method == 'median') | (ROP_method == 'mean')):
    if ((forecasted_v1['InventoryStockUnRestricted_daily'].values[0] >= ROP) & (forecasted_v1['Exp_inv'].values[0] < ROP)):
      forecasted_v1['Reorder'] = 1


  # Frequency based reorder check
  elif (ROP_method == 'frequency'):
    if (date_since_order >= ROP_freq):
      forecasted_v1['Reorder'] = 1

  # Hybrid Reorder check
  elif (ROP_method == 'hybrid'): #(here ROP is median, ROP_freq is mode)
    if (((date_since_order >= ROP_freq) & (forecasted_v1['Exp_inv'].values[0]<ROP))  | ((forecasted_v1['InventoryStockUnRestricted_daily'].values[0] >= ROP) & (forecasted_v1['Exp_inv'].values[0] < ROP))):
      forecasted_v1['Reorder'] = 1

  if (forecasted_v1['Reorder'].values[0] == 1):
    po_place = vendors_v1.copy()
    po_place['PO_Create_Date'] = Start_date
    po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")

    pds=[]
    for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'],po_place['updated_VS_ID'],po_place['MaterialID'],po_place['PO_Create_Date'].dt.date.astype(str),po_place['Vendor_ROQ']):
      #print(pl_id, v_id, mt_id, po_c_d, p_qty)
      lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
      tempeli5['LocationID']=pl_id
      tempeli5['updated_VS_ID']=v_id
      tempeli5['MaterialID']=mt_id
      tempeli5['PO_Create_Date']=po_c_d
      eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
      pds.append(lead_time_pred)
    po_place['lead_time']=pds

    try:
      po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
    except:
      ags=[]
      for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
        ags.append(i+timedelta(int(j)))
      po_place['Exp_GR_Date']=ags
      po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
    po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']
    forecasted_v1['counter'] = 0
    pending_v1 = pending_v1.append(po_place, ignore_index= True)
  forecasted=forecasted.append(forecasted_v1,ignore_index= True)
  pending=pending.append(pending_v1,ignore_index= True)
  vendors=vendors.append(vendors_v1,ignore_index= True)
#   except:
#     print(Plant,Matl,"- Unable to build invt based model")
####End of loop

forecasted['InventoryStockUnRestricted_daily']=forecasted['InventoryStockUnRestricted_daily'].astype(np.int64)
forecasted['daily_mean_consumption']=forecasted['daily_mean_consumption'].astype(np.int64)
forecasted['Exp_inv']=forecasted['Exp_inv'].astype(np.int64)
pending['Delivery_qty']=pending['Delivery_qty'].astype(np.int64)
pending['Exp_GR_Date']=pd.to_datetime(pending['Exp_GR_Date'])

In [0]:
print(eli5_reasons.shape)
eli5_reasons.head()

In [0]:
print(forecasted.shape)
forecasted.head()

In [0]:
print(pending.shape)
pending.head()

In [0]:
############Forecasting consumption, order placement, material receipts and expected inventory for next 90 days

forecasting=pd.DataFrame()
pendings=pd.DataFrame()
leadtime_reasons=pd.DataFrame()

Start_date = Start_dates
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')
for Plant,Matl in zip(Rop12['LocationID'],Rop12['MaterialID']):
#   try:

  forecasted1=forecasted[(forecasted['LocationID']==Plant) & (forecasted['MaterialID']==Matl)].copy()
  pending1=pending[(pending['LocationID']==Plant)&(pending['MaterialID']==Matl)].copy()
  vendors1=vendors[(vendors['LocationID']==Plant)&(vendors['MaterialID']==Matl)].copy()
  leadtime_reasons1=eli5_reasons[(eli5_reasons['LocationID']==Plant)&(eli5_reasons['MaterialID']==Matl)].copy()

  ##### Getting the Reorder trigger attributes
  ROP_freq = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'freq_val'].values[0]
  ROP_median = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'median_val'].values[0]
  ROP_mean = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'mean_val'].values[0]
  ROP_method = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'flag'].values[0]

  if (ROP_method == 'mean'):
    ROP = ROP_mean
  else:
    ROP = ROP_median

  for i in range(1,90):
    #Last updated date
    td=timedelta(i)
    todate = Start_date+td
    tempdf = forecasted1.tail(1).copy() # taking the latest entry of the forecasted
    tempdf['SnapshotDate'] = todate
    tempdf['Week'] = int(todate.isocalendar()[1])
    tempdf['counter'] = tempdf['counter'].values[0] + 1
    tempdf['Reorder'] = 0

    # Update Consumption stats
    try:
      tempdf['daily_mean_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])),'daily_mean_consumption'].values[0]
      tempdf['std_dev_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])), 'std_dev_consumption'].values[0]
    except:
      tempdf['daily_mean_consumption'] = 0
      tempdf['std_dev_consumption'] = 0

    # Update Material Receipt
    tempdf['Matl_Recp'] = pending1.loc[pending1['Exp_GR_Date'] == pd.to_datetime(todate), 'Delivery_qty'].sum()


  #   Order Placement Check - Mean/Median or hybrid or frequency and triggering order if needed
    if((ROP_method == 'median') | (ROP_method == 'mean')):
      if ((tempdf['Exp_inv'].values[0] >= ROP) & ((tempdf['Exp_inv'].values[0] + tempdf['Matl_Recp'].values[0] + tempdf['daily_mean_consumption'].values[0]) < ROP)):
        tempdf['Reorder'] =1

    elif(ROP_method == 'frequency'):
      if (tempdf['counter'].values[0] >= ROP_freq):
        tempdf['Reorder'] =1

    elif(ROP_method == 'hybrid'):
      if (((tempdf['Exp_inv'].values[0] >= ROP) & ((tempdf['Exp_inv'].values[0] + tempdf['Matl_Recp'].values[0] + tempdf['daily_mean_consumption'].values[0]) < ROP)) | ((tempdf['counter'].values[0] >= ROP_freq) & (tempdf['Exp_inv'].values[0]<ROP))):
        tempdf['Reorder'] =1

  # Order logging in 'Pending' by suppliers, PO Create date, Expected delivery, PO Qty and expected delivery qty:
    if(tempdf['Reorder'].values[0] ==1):
      po_place = vendors1.copy()
      po_place['PO_Create_Date'] = todate
      po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")
      pds=[]
      for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'], po_place['updated_VS_ID'], po_place['MaterialID'], po_place['PO_Create_Date'].dt.date.astype(str), po_place['Vendor_ROQ']):
        #print(pl_id,v_id,mt_id,po_c_d,p_qty)
        lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
        tempeli5['LocationID']=pl_id
        tempeli5['updated_VS_ID']=v_id
        tempeli5['MaterialID']=mt_id
        tempeli5['PO_Create_Date']=po_c_d
        leadtime_reasons1=leadtime_reasons1.append(tempeli5,ignore_index= True)
        pds.append(lead_time_pred)
      po_place['lead_time']=pds
      try:
        po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
      except:
        ags=[]
        for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
          ags.append(i+timedelta(int(j)))
        po_place['Exp_GR_Date']=ags
        po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
      po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']

      pending1 = pending1.append(po_place, ignore_index= True)
      tempdf['counter'] = 0


    # Update the Expected inventory
    inv_calc = max((tempdf['Exp_inv'] + tempdf['Matl_Recp'] + tempdf['daily_mean_consumption']).values[0], 0) #Non negative inventory
    tempdf['Exp_inv']= int(inv_calc)


    forecasted1=forecasted1.append(tempdf,ignore_index=True)
  forecasting=forecasting.append(forecasted1,ignore_index=True)
  pendings=pendings.append(pending1,ignore_index=True)
  leadtime_reasons=leadtime_reasons.append(leadtime_reasons1,ignore_index=True)
#   except:
#     print(Plant,Matl,"- Unable to build invt based model")
pendings['Exp_GR_Date']=pd.to_datetime(pendings['Exp_GR_Date'])

In [0]:
print(leadtime_reasons.shape)
leadtime_reasons.head()

In [0]:
print(forecasting.shape)
forecasting.head()

In [0]:
print(pendings.shape)
pendings.head()

In [0]:
fore = spark.createDataFrame(forecasting)
fore.write.saveAsTable(prediction_forecasting,mode = 'overwrite')

In [0]:
Rop450=forecasting[['LocationID','MaterialID']].copy()
Rop450.drop_duplicates(inplace=True)
Rop450.shape

In [0]:
#1. Creating output for actual Vs estimated number of Purchase orders for a given plant, material combination

num_pos1=pd.DataFrame()
plants=[]
matrls=[]
acts=[]
estm=[]
Start_date = '{}'.format(Start_dates)
Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')
for Plant,Matl in zip(Rop450['LocationID'],Rop450['MaterialID']):
  ves_date['PO_Create_Date'] = pd.to_datetime(ves_date['PO_Create_Date'])
  PO_original = ves_date[(ves_date['PO_Create_Date'] >= Start_date) & (ves_date['PO_Create_Date'] <= Start_date + timedelta(90)) & (ves_date['Material_No.'] ==Matl) & (ves_date['Plant_ID'] == Plant)]
  plants.append(Plant)
  matrls.append(Matl)
  acts.append(PO_original.shape[0])
  estm.append(forecasting[(forecasting['LocationID']==Plant) & (forecasting['MaterialID']==Matl) & (forecasting['SnapshotDate']>=Start_date) & (forecasting['SnapshotDate']<=Start_date + timedelta(90))]['Reorder'].sum())
num_pos1['LocationID']=plants
num_pos1['MaterialID']=matrls
num_pos1['Actual_POs_in_this_period']=acts
num_pos1['Estimated_POs_in_this_period']=estm
num_pos1['model']='trend_based'
print(num_pos1.shape)
num_pos1.head()

In [0]:
mopos = spark.createDataFrame(num_pos1)
mopos.write.saveAsTable(prediction_numpos,mode = 'overwrite')

In [0]:
#Choosing best model based on whether the number of orders placed is the closest to actual.

numpo=pd.concat([num_pos,num_pos1]).reset_index()
numpo.drop(columns=['index'],inplace=True)
numpo['absolute_error']=abs(numpo['Estimated_POs_in_this_period']-numpo['Actual_POs_in_this_period'])
Rop555=numpo[['LocationID','MaterialID']]
Rop555.drop_duplicates(inplace=True)
model_to_use=pd.DataFrame()
for plant, matl in zip (Rop555['LocationID'],Rop555['MaterialID']):
  a=numpo[(numpo['LocationID']==plant) & (numpo['MaterialID']==matl)]
  tempdf=a.loc[a['absolute_error'].idxmin()]
  model_to_use=model_to_use.append(tempdf)
var1=model_to_use[model_to_use['model']=='trend_based']
var2=model_to_use[model_to_use['model']=='model_based_prediction']
var2['flag']=''
var3=pd.merge(var1,reorder_method_check[['LocationID','MaterialID','flag']],on=['LocationID','MaterialID'],how='inner')
models=pd.concat([var2,var3])
print(models.shape)
models['LocationID']=models['LocationID'].astype(int)
models['MaterialID']=models['MaterialID'].astype(int)
models.head()

In [0]:
mopos1 = spark.createDataFrame(models)
mopos1.write.saveAsTable("temp_models",mode = 'overwrite')

**In the Trend_based checking the method flag type of mean or median whether it needs to be replaced by hybrid to improve order placement. For this the following steps are followed.
  1. Forecasting the plant material combination of Trend_based with flag mean or median for 6 months
  2. In the month of february if more than 2 weeks has expected inventory equal to 0 replacing those plants and material combination with hybrid.

In [0]:
model_verification=models[(models['model']=='trend_based') & ((models['flag']=='median') | (models['flag']=='mean'))].copy()
print(model_verification.shape)
model_verification.head()

In [0]:
###Setting up base for the 27 week forecast. 
# The inventory consumption/inflow and PO flag -> 'forecasted'
# Identifying open Purchase orders and their expected delivery -> 'pending'
# Vendors -> List of vendors for the material plant combination and their attributes

def set_base (Plants,Matls):
  forecasted=pd.DataFrame()
  pending=pd.DataFrame()
  vendors=pd.DataFrame()
  eli5_reasons=pd.DataFrame()

  Start_date = Start_dates
  Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')

  ##### Getting the Reorder trigger attributes
  for Plant,Matl in zip(Plants,Matls):
    
    ROP_freq = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'freq_val'].values[0]
    ROP_median = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'median_val'].values[0]
    ROP_mean = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'mean_val'].values[0]
    ROP_method = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'flag'].values[0]

    if (ROP_method == 'mean'):
      ROP = ROP_mean
    else:
      ROP = ROP_median

    ############################## Dataframe for logging POs and expected dates of receipts#########################################
    vendors1 = vendor_plant_data[(vendor_plant_data['LocationID']==Plant) & (vendor_plant_data['MaterialID']==Matl)]
    vendors_v1 = vendors1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ','Avg_lead_time','Planned_avg_lead_time']]

    # Checking if there are open purchase orders in the VES and logging the same in the 'pending' dataframe
    pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]

    if (pending_v1.shape[0] >0): #if there are open pending orders
      pending_v1 = ves[(ves['Plant_ID']==Plant) & (ves['Material_No.']==Matl) & (ves['PO_Create_Date'] <= Start_date) & (ves['Delivery_date'] >= Start_date)][['Plant_ID', 'Material_No.', 'PO_Create_Date', 'Purchase_Order_Scheduled_Qty', 'updated_VS_ID']]
      pending_v1 = pd.merge(pending_v1, vendors_v1, on = ['updated_VS_ID'], how = 'left')
      pending_v1['Vendor_ROQ'] = pending_v1['Purchase_Order_Scheduled_Qty']
      pds=[]
      for pl_id,v_id,mt_id,po_c_d,p_qty in zip(pending_v1['LocationID'],pending_v1['updated_VS_ID'],pending_v1['MaterialID'],pending_v1['PO_Create_Date'].dt.date.astype(str),pending_v1['Vendor_ROQ']):

        lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty) #predicting lead time for open orders
        tempeli5['LocationID']=pl_id
        tempeli5['updated_VS_ID']=v_id
        tempeli5['MaterialID']=mt_id
        tempeli5['PO_Create_Date']=po_c_d
        eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
        pds.append(lead_time_pred)
      pending_v1['lead_time']=pds    

      pending_v1 = pending_v1[['LocationID', 'MaterialID', 'updated_VS_ID', 'Fulfillment_rate', 'Vendor_ROQ', 'PO_Create_Date',  'lead_time', 'Avg_lead_time', 'Planned_avg_lead_time']]

      #Getting expected delivery date and logging in pending
      try:
        pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'], unit='D') 
      except:
        ags=[]
        for i,j in zip(pending_v1['PO_Create_Date'],pending_v1['lead_time']):
          ags.append(i+timedelta(int(j)))
        pending_v1['Exp_GR_Date']=ags
        pending_v1['Exp_GR_Date']=pending_v1['Exp_GR_Date'].dt.date
      pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']
      date_since_order = np.timedelta64( Start_date - max(pending_v1['PO_Create_Date']), 'D')
      date_since_order = date_since_order.astype(int)

    else: #if no pending orders, create df with dummy values
      pending_v1 = pd.DataFrame({'LocationID': [Plant], 'MaterialID': [Matl], 'updated_VS_ID': [0], 'Fulfillment_rate': [0.0], 'Vendor_ROQ': [0.0], 'PO_Create_Date': ['2000-01-01'],'lead_time': [0],'Avg_lead_time':[0],'Planned_avg_lead_time':[0]})
      pending_v1['PO_Create_Date'] =   pd.to_datetime(pending_v1['PO_Create_Date'], format="%Y-%m-%d") 
      pending_v1['Exp_GR_Date'] = pending_v1['PO_Create_Date'].dt.date + pd.to_timedelta(pending_v1['lead_time'].values[0], unit='D')
      pending_v1['Delivery_qty'] = pending_v1['Vendor_ROQ']*pending_v1['Fulfillment_rate']
      date_since_order = 0

    ############################## Dataframe for consumption, receipts and PO triggers#########################################
    #Creating only the first row of the forecasted dataframe with the following elements:
    # 1. Snapshot Date
    # 2. Material ID
    # 3. Location ID
    # 4. Uinit of Measure
    # 5. Week
    # 6. Inventory stock in hand (from the inventory history)
    # 7. Average Consumption
    # 8. Std Deviation of consumption
    # 9. Material received
    # 10. Expected inventory at the end of the day = inventory in hand - consumption + 
    # 11. Counter: Count of days since last order 
    # 12. Reorder flag
    
    forecasted_v1 = h_inv_fi[(h_inv_fi['SnapshotDate'] == Start_date) & (h_inv_fi['MaterialID'] ==Matl) & (h_inv_fi['LocationID'] == Plant)]
    forecasted_v1=pd.merge(forecasted_v1,inv_con,on=['LocationID','MaterialID','Week'],how='left')
    forecasted_v1['Matl_Recp'] = pending_v1.loc[pending_v1['Exp_GR_Date'] == pd.to_datetime(Start_date) , 'Delivery_qty'].sum()
    forecasted_v1.loc[forecasted_v1['daily_mean_consumption'].isnull()==True,'daily_mean_consumption']=0
    forecasted_v1['Exp_inv'] = max((forecasted_v1['InventoryStockUnRestricted_daily'].astype(np.int64) + forecasted_v1['daily_mean_consumption'].astype(np.int64) + forecasted_v1['Matl_Recp'].astype(np.int64)).values[0],0)


    forecasted_v1['counter'] = date_since_order
    forecasted_v1['Reorder'] = 0
    
    # check if order needs to be placed on start date. Uf yes, log PO Create date, Vendor, lead time, expected delivery date, PO Quantity, Expected delivery in Pending based on the method of reorder
    # Inventory based reorder check
    
    if max(pending_v1['PO_Create_Date'])==Start_date:
      forecasted_v1['Reorder']=1
    
    if ((ROP_method == 'median') | (ROP_method == 'mean')):
      if ((forecasted_v1['InventoryStockUnRestricted_daily'].values[0] >= ROP) & (forecasted_v1['Exp_inv'].values[0] < ROP)):
        forecasted_v1['Reorder'] = 1


    # Frequency based reorder check
    elif (ROP_method == 'frequency'):
      if (date_since_order >= ROP_freq):
        forecasted_v1['Reorder'] = 1

    # Hybrid Reorder check
    elif (ROP_method == 'hybrid'): #(here ROP is median, ROP_freq is mode)
      if (((date_since_order >= ROP_freq) & (forecasted_v1['Exp_inv'].values[0]<ROP))  | ((forecasted_v1['InventoryStockUnRestricted_daily'].values[0] >= ROP) & (forecasted_v1['Exp_inv'].values[0] < ROP))):
        forecasted_v1['Reorder'] = 1

    if (forecasted_v1['Reorder'].values[0] == 1):
      po_place = vendors_v1.copy()
      po_place['PO_Create_Date'] = Start_date
      po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")

      pds=[]
      for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'],po_place['updated_VS_ID'],po_place['MaterialID'],po_place['PO_Create_Date'].dt.date.astype(str),po_place['Vendor_ROQ']):
        lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
        tempeli5['LocationID']=pl_id
        tempeli5['updated_VS_ID']=v_id
        tempeli5['MaterialID']=mt_id
        tempeli5['PO_Create_Date']=po_c_d
        eli5_reasons=eli5_reasons.append(tempeli5,ignore_index= True)
        pds.append(lead_time_pred)
      po_place['lead_time']=pds

      try:
        po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
      except:
        ags=[]
        for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
          ags.append(i+timedelta(int(j)))
        po_place['Exp_GR_Date']=ags
        po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
      po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']
      forecasted_v1['counter'] = 0
      pending_v1 = pending_v1.append(po_place, ignore_index= True)
    forecasted=forecasted.append(forecasted_v1,ignore_index= True)
    pending=pending.append(pending_v1,ignore_index= True)
    vendors=vendors.append(vendors_v1,ignore_index= True)
  ####End of loop

  forecasted['InventoryStockUnRestricted_daily']=forecasted['InventoryStockUnRestricted_daily'].astype(np.int64)
  forecasted['daily_mean_consumption']=forecasted['daily_mean_consumption'].astype(np.int64)
  forecasted['Exp_inv']=forecasted['Exp_inv'].astype(np.int64)
  pending['Delivery_qty']=pending['Delivery_qty'].astype(np.int64)
  pending['Exp_GR_Date']=pd.to_datetime(pending['Exp_GR_Date'])
  return forecasted,pending,vendors,eli5_reasons

In [0]:
forecasted,pending,vendors,eli5_reasons=set_base(model_verification['LocationID'],model_verification['MaterialID'])

In [0]:
print(eli5_reasons.shape)
eli5_reasons.head()

In [0]:
print(forecasted.shape)
forecasted.head()

In [0]:
print(pending.shape)
pending.head()

In [0]:
############Forecasting consumption, order placement, material receipts and expected inventory for next 180 days

def forecast (Plants,Matls):
  forecasting=pd.DataFrame()
  pendings=pd.DataFrame()
  leadtime_reasons=pd.DataFrame()
  
  Start_date = Start_dates
  Start_date = datetime.datetime.strptime(Start_date, '%Y-%m-%d')
  for Plant,Matl in zip(Plants,Matls):
    
    forecasted1=forecasted[(forecasted['LocationID']==Plant) & (forecasted['MaterialID']==Matl)].copy()
    pending1=pending[(pending['LocationID']==Plant)&(pending['MaterialID']==Matl)].copy()
    vendors1=vendors[(vendors['LocationID']==Plant)&(vendors['MaterialID']==Matl)].copy()
    leadtime_reasons1=eli5_reasons[(eli5_reasons['LocationID']==Plant)&(eli5_reasons['MaterialID']==Matl)].copy()
   
    ##### Getting the Reorder trigger attributes
    ROP_freq = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'freq_val'].values[0]
    ROP_median = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'median_val'].values[0]
    ROP_mean = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'mean_val'].values[0]
    ROP_method = vendor_plant_data.loc[(vendor_plant_data['LocationID'] == Plant) & (vendor_plant_data['MaterialID'] == Matl), 'flag'].values[0]

    if (ROP_method == 'mean'):
      ROP = ROP_mean
    else:
      ROP = ROP_median

    for i in range(1,180):
      #Last updated date
      td=timedelta(i)
      todate = Start_date+td
      tempdf = forecasted1.tail(1).copy() # taking the latest entry of the forecasted
      tempdf['SnapshotDate'] = todate
      tempdf['Week'] = int(todate.isocalendar()[1])
      tempdf['counter'] = tempdf['counter'].values[0] + 1
      tempdf['Reorder'] = 0

      # Update Consumption stats
      try:
        tempdf['daily_mean_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])),'daily_mean_consumption'].values[0]
        tempdf['std_dev_consumption'] = inv_con.loc[(inv_con['MaterialID'] == Matl) & (inv_con['LocationID'] == Plant) & (inv_con['Week'] == int(todate.isocalendar()[1])), 'std_dev_consumption'].values[0]
      except:
        tempdf['daily_mean_consumption'] = 0
        tempdf['std_dev_consumption'] = 0

      # Update Material Receipt
      tempdf['Matl_Recp'] = pending1.loc[pending1['Exp_GR_Date'] == pd.to_datetime(todate), 'Delivery_qty'].sum()


    #   Order Placement Check - Mean/Median or hybrid or frequency and triggering order if needed
      if((ROP_method == 'median') | (ROP_method == 'mean')):
        if ((tempdf['Exp_inv'].values[0] >= ROP) & ((tempdf['Exp_inv'].values[0] + tempdf['Matl_Recp'].values[0] + tempdf['daily_mean_consumption'].values[0]) < ROP)):
          tempdf['Reorder'] =1

      elif(ROP_method == 'frequency'):
        if (tempdf['counter'].values[0] >= ROP_freq):
          tempdf['Reorder'] =1

      elif(ROP_method == 'hybrid'):
        if (((tempdf['Exp_inv'].values[0] >= ROP) & ((tempdf['Exp_inv'].values[0] + tempdf['Matl_Recp'].values[0] + tempdf['daily_mean_consumption'].values[0]) < ROP)) | ((tempdf['counter'].values[0] >= ROP_freq) & (tempdf['Exp_inv'].values[0]<ROP))):
          tempdf['Reorder'] =1

    # Order logging in 'Pending' by suppliers, PO Create date, Expected delivery, PO Qty and expected delivery qty:
      if(tempdf['Reorder'].values[0] ==1):
        po_place = vendors1.copy()
        po_place['PO_Create_Date'] = todate
        po_place['PO_Create_Date'] = pd.to_datetime(po_place['PO_Create_Date'], format="%Y-%m-%d")
        pds=[]
        for pl_id,v_id,mt_id,po_c_d,p_qty in zip(po_place['LocationID'], po_place['updated_VS_ID'], po_place['MaterialID'], po_place['PO_Create_Date'].dt.date.astype(str), po_place['Vendor_ROQ']):
          lead_time_pred,tempeli5 = pred_output(historical_data, pl_id, v_id, mt_id, po_c_d, p_qty)
          tempeli5['LocationID']=pl_id
          tempeli5['updated_VS_ID']=v_id
          tempeli5['MaterialID']=mt_id
          tempeli5['PO_Create_Date']=po_c_d
          leadtime_reasons1=leadtime_reasons1.append(tempeli5,ignore_index= True)
          pds.append(lead_time_pred)
        po_place['lead_time']=pds
        try:
          po_place['Exp_GR_Date'] = po_place['PO_Create_Date'].dt.date + pd.to_timedelta(po_place['lead_time'], unit='D')
        except:
          ags=[]
          for i,j in zip(po_place['PO_Create_Date'],po_place['lead_time']):
            ags.append(i+timedelta(int(j)))
          po_place['Exp_GR_Date']=ags
          po_place['Exp_GR_Date']=po_place['Exp_GR_Date'].dt.date
        po_place['Delivery_qty'] = po_place['Vendor_ROQ']*po_place['Fulfillment_rate']

        pending1 = pending1.append(po_place, ignore_index= True)
        tempdf['counter'] = 0


      # Update the Expected inventory
      inv_calc = max((tempdf['Exp_inv'] + tempdf['Matl_Recp'] + tempdf['daily_mean_consumption']).values[0], 0) #Non negative inventory
      tempdf['Exp_inv']= int(inv_calc)


      forecasted1=forecasted1.append(tempdf,ignore_index=True)
    forecasting=forecasting.append(forecasted1,ignore_index=True)
    leadtime_reasons=leadtime_reasons.append(leadtime_reasons1,ignore_index=True)
    pendings=pendings.append(pending1,ignore_index=True)
  pendings['Exp_GR_Date']=pd.to_datetime(pendings['Exp_GR_Date'])
  return forecasting,pendings,leadtime_reasons

In [0]:
forecasting,pendings,leadtime_reasons=forecast(model_verification['LocationID'],model_verification['MaterialID'])

In [0]:
print(leadtime_reasons.shape)
leadtime_reasons.head()

In [0]:
print(forecasting.shape)
forecasting.head()

In [0]:
print(pendings.shape)
pendings.head()

In [0]:
#identfying plant material combination whose flag has to be replaced to hybrid
sampledf=forecasting[(forecasting['Exp_inv']<1) & (forecasting['SnapshotDate']>=Start_dates)]
sampledf=pd.pivot_table(sampledf,values='Exp_inv',index=['LocationID','MaterialID','Week'],aggfunc='count').reset_index()
sampledf=sampledf[sampledf['Exp_inv']==7]
sampledf=pd.pivot_table(sampledf,values='Exp_inv',index=['LocationID','MaterialID'],aggfunc='count').reset_index()
sampledf=sampledf[sampledf['Exp_inv']>=2]
print(sampledf.shape)
sampledf.head()

In [0]:
for plant,matl in zip(sampledf['LocationID'],sampledf['MaterialID']):
  models.loc[(models['LocationID']==plant) & (models['MaterialID']==matl),'flag']='hybrid'

In [0]:
models.reset_index(inplace=True)
models.drop(columns=['index'],inplace=True)
print(models.shape)
models.head()

In [0]:
mopos = spark.createDataFrame(models)
mopos.write.saveAsTable('models_tobe_saved',mode = 'overwrite')

In [0]:
models = spark.table('models_tobe_saved')
models=models.toPandas()
models['type']=1
models_hist = spark.table(models_saved)
models_hist=models_hist.toPandas()
models_hist['type']=2
models=pd.concat([models,models_hist])
models.sort_values(by=['LocationID','MaterialID','type'],inplace=True)
models.drop_duplicates(subset=['LocationID','MaterialID'],keep='first',inplace=True)
models.drop(columns=['type'],inplace=True)

In [0]:
mopos = spark.createDataFrame(models)
mopos.write.saveAsTable(models_saved,mode = 'overwrite')

*************************END****************************************