<a href="https://colab.research.google.com/github/vamsikrishna00466/time_series/blob/main/Multivariate%20Time%20Series%20Updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#importing Necessary Libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

##Hyphothesis Testing Libraries 
from scipy.stats import shapiro
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.vector_ar.vecm import coint_johansen

## Algorithms for forcasting
from statsmodels.tsa.vector_ar.var_model import VAR
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import AdaBoostRegressor

##Matrix for undestanding the permance 
from sklearn.metrics import mean_absolute_percentage_error


## Importing Raw Data, Initial Preprocessing And Basic analysis

In [None]:
#importing Raw data
df=pd.read_excel('/content/drive/MyDrive/SCA Projects/Forecasting /Raw Data for Forecasting Project.xlsx')


In [None]:
##Initial preprocessing(12-12-2021) work-Function
def data_preprocess1(df):
  df = df.drop(df.columns.to_series()["Unnamed: 6":"Receipt Details"], axis=1)
  df.columns = df.iloc[0]
  df = df[1:].reset_index(drop=bool)
  del df['Total']
  df = df.melt(id_vars=["Part No.", "Unit Price",'Category','Sales MOQ','Opening Stock'],
               var_name="Month",
               value_name="demand")
  df['Month']=pd.to_datetime(df['Month']).dt.to_period('M')
  df['Unit Price']=df['Unit Price'].astype('float')
  df['Sales MOQ']=df['Sales MOQ'].astype('float')
  df['Opening Stock']=df['Opening Stock'].astype('float')
  df['demand']=df['demand'].astype('float')
  return df

In [None]:
##Initial preprocessing raw data
df1 = data_preprocess1(df)

In [None]:
##Basic Analysis Function for initial preprocessed data
def basic_analysis(df):
  print('*****Initial Information about Data*****\n')
  print(df.head())
  print('\nNumber of Rows and Columns:',df.shape,'\n--------------------')
  Null = df.isnull().sum()
  Null = Null[Null>0]
  print('missing data:\n',Null)
  print('--------------------\nData Types:\n',df.dtypes)
  print('--------------------\nCategorical variables:',df.select_dtypes(include=['object']).columns.to_list(),'And Number of columns:',len(df.select_dtypes(include=['object']).columns.to_list()))
  print('--------------------\nNumerical variables:',df.select_dtypes(include=[int,float]).columns.to_list(),'And Number of columns:',len(df.select_dtypes(include=[int,float]).columns.to_list()))
  print('--------------------\nDate time variables:',df.select_dtypes(include=['period[M]']).columns.to_list())
  print('--------------------\n')
  print(round(df.describe()).astype(int))
  print('--------------------\n')
  print(df.describe(include=object))
  print('--------------------\n')
  print(df['Month'].describe())
  print('--------------------\n')


In [None]:
#Basic information about initial pre processed data
basic_analysis(df1)

## Hyphothesis Tests

In [None]:
## Hyphothesis Testing for distribution checking(normally distributed or not)
alpha = 0.05
H0 = 'Data is normal'
Ha = 'Data is not normal'
for i in df3.columns.to_list():
  p = round(shapiro(df3[i])[1], 2)
  print(f'**************{i}**************')
  if p > alpha:
    print(f"{p} > {alpha}. We fail to reject Null Hypothesis. {H0}")
  else:
    print(f"{p} <= {alpha}. We reject Null Hypothesis. {Ha}")
  print('--------------\n')


In [None]:
##Stationary test
def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')

    for key,val in r[4].items():
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.") 

# ADF Test on each column
for name, column in df3.iteritems():
    adfuller_test(column, name=column.name)
    print('\n')

In [None]:
##grangercausalitytests test
maxlag=12
test = 'ssr_chi2test'
def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

grangers_causation_matrix(df3, variables = df3.columns)  

In [None]:
##coint_johansen test
def cointegration_test(df, alpha=0.05): 
    """Perform Johanson's Cointegration Test and Report Summary"""
    out = coint_johansen(df,-1,5)
    d = {'0.90':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 6): return str(val).ljust(length)

    # Summary
    print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)

cointegration_test(df3)

## SKU wise Forecasting -- VAR(Vector Auto Regression)


In [None]:
# Data Preparing for SKU wise Forecasting
df1 = df.groupby(by=['Month','Part No.'])['demand'].sum().reset_index()

##converting categories into columns
df2 = df1.pivot(index='Month', columns='Part No.', values='demand')

In [None]:
#Splitting Data Into Train And Test
train = df2[:int(0.8*(len(df2)))]
valid = df2[int(0.8*(len(df2))):]


In [None]:
#VAR Training
model = VAR(endog=train)
model_fit = model.fit()

In [None]:
# make prediction on validation
prediction = model_fit.forecast(model_fit.y, steps=len(valid))

In [None]:
#predictions into data frame
df_pred = pd.DataFrame(prediction, columns = df2.columns.to_list())

In [None]:
error = pd.DataFrame({})
for i in valid.columns.to_list():
  er = round(mean_absolute_percentage_error(valid[i], df_pred[i]),3)
  rmse = round(np.sqrt(mean_squared_error(valid[i], df_pred[i], squared=False)),3)
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)
error['mape_int'] = round(error['mape']).astype(int)

In [None]:
error.sort_values(by='mape_int',ascending=False)


## categorize wise Sales forecasting --VAR


In [None]:
# Converting Data into sales wise from dimand wise
df['sales'] = df['demand']*df['Unit Price']

df1 = df.groupby(by=['Month','Category'])['sales'].sum().reset_index()

In [None]:
# Data Preparing for forecasting

##converting categories into columns
df2 = df1.pivot(index='Month', columns='Category', values='sales')
#resetting default index
df2 = df2.rename_axis(None).rename_axis(None, axis=1)

In [None]:
#creating the train and validation set
train = df2[:int(0.8*(len(df2)))]
valid = df2[int(0.8*(len(df2))):]

In [None]:
#fit the model
from statsmodels.tsa.vector_ar.var_model import VAR

model = VAR(endog=train)
model_fit = model.fit()

In [None]:
# make prediction on validation
model_fit.summary()

In [None]:
# make prediction on validation
prediction = model_fit.forecast(model_fit.y, steps=len(valid))

In [None]:
df_pred = pd.DataFrame(prediction, columns = df2.columns.to_list())

error = pd.DataFrame({})
for i in valid.columns.to_list():
  er = round(mean_absolute_percentage_error(valid[i], df_pred[i]),3)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(valid[i], df_pred[i], squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

error

In [None]:
## forecasting future
months_to_forecast=20
forcast_future = model_fit.forecast(model_fit.y, steps=months_to_forecast)
forcast_future1 = pd.DataFrame(prediction, columns = df2.columns.to_list())
forcast_future1.plot()

## Time series to Machine learning


In [None]:
def data_preparing_Time_series_to_supervise_ML(df):
  #creating sales column
  df['sales'] = df['demand']*df['Unit Price']
  df1 = df.groupby(by=['Month','Category'])['sales'].sum().reset_index()
  
  ##converting categories into columns
  df2 = df1.pivot(index='Month', columns='Category', values='sales')
  #resetting default index
  df2 = df2.rename_axis(None).rename_axis(None, axis=1)
  col = df2.columns.to_list()
  df3 = df2.copy()
  df3 = round(df3).astype(int)
  #t-1lag
  
  df3['Kit(t-1)'] = df3['Kit'].shift(1)
  df3['Kit-Child Part(t-1)'] = df3['Kit-Child Part'].shift(1)
  df3['Obsolete(t-1)'] = df3['Obsolete'].shift(1)
  df3['Regular(t-1)'] = df3['Regular'].shift(1)
  #t-2 lagdf3['Kit(t-2)'] = df3['Kit'].shift(2)
  df3['Kit-Child Part(t-2)'] = df3['Kit-Child Part'].shift(2)
  df3['Obsolete(t-2)'] = df3['Obsolete'].shift(2)
  df3['Regular(t-2)'] = df3['Regular'].shift(2)
  #t-3 lag
  df3['Kit(t-3)'] = df3['Kit'].shift(3)
  df3['Kit-Child Part(t-3)'] = df3['Kit-Child Part'].shift(3)
  df3['Obsolete(t-3)'] = df3['Obsolete'].shift(3)
  df3['Regular(t-3)'] = df3['Regular'].shift(3)
  
  df3.dropna(inplace=True)
  
  df3 = df3.reset_index()
  df3['index']=pd.to_datetime(df3['index'])
  df3['month'] = df3['index'].dt.month
  df3['year'] = df3['index'].dt.year
  
  del df3['index']
  return df3, col

In [None]:
df_ML, col = data_preparing_Time_series_to_supervise_ML(df)


In [None]:
# seperating Dependent and Independent variables
X=df_ML.copy()
Y=df_ML[['Kit','Kit-Child Part','Obsolete','Regular']]
del X['Kit']
del X['Kit-Child Part']
del X['Obsolete']
del X['Regular']

## categorie Wise Sales forecasting -- With Linear Regression


In [None]:
def model_perfamance(X,Y[i]):
  X_train, X_test, y_train, y_test = train_test_split(X,Y[i], test_size=0.20, random_state=42)
  model = LinearRegression()
  model.fit(X_train, y_train)
  y_train_predict = model.predict(X_train)
  y_test_predict = model.predict(X_test)
  print(f'MAPE Error of {i}:=--',mean_absolute_percentage_error(y_test,y_test_predict))
  return y_test_predict,y_test


error = pd.DataFrame({})
print('********************Forecasting error with Linear regression********************')
for i in col:
  y_test_predict,y_test = model_perfamance(df_ML,Y[i])
  er = mean_absolute_percentage_error(y_test,y_test_predict)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(y_test,y_test_predict, squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

round(error_mape,3)

## categorie Wise Sales forecasting -- With Decission Tree


In [None]:
def model_perfamance(X,Y[i]):
  X_train, X_test, y_train, y_test = train_test_split(X,Y[i], test_size=0.20, random_state=42)
  model = DecisionTreeRegressor(max_depth=2)
  model.fit(X_train, y_train)
  y_train_predict = model.predict(X_train)
  y_test_predict = model.predict(X_test)
  print(f'MAPE Error of {i}:=--',mean_absolute_percentage_error(y_test,y_test_predict))
  return y_test_predict,y_test


error = pd.DataFrame({})
print('********************Forecasting error with DecisionTreeRegressor********************')
for i in col:
  y_test_predict,y_test = model_perfamance(df_ML,Y[i])
  er = mean_absolute_percentage_error(y_test,y_test_predict)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(y_test,y_test_predict, squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

round(error_mape,3)

## categorie Wise Sales forecasting -- With Random Forest


In [None]:
def model_perfamance(X,Y[i]):
  X_train, X_test, y_train, y_test = train_test_split(X,Y[i], test_size=0.20, random_state=42)
  model = RandomForestRegressor(max_depth=2, random_state=0)
  model.fit(X_train, y_train)
  y_train_predict = model.predict(X_train)
  y_test_predict = model.predict(X_test)
  print(f'MAPE Error of {i}:=--',mean_absolute_percentage_error(y_test,y_test_predict))
  return y_test_predict,y_test


error = pd.DataFrame({})
print('********************Forecasting error with RandomForestRegressor********************')
for i in col:
  y_test_predict,y_test = model_perfamance(df_ML,Y[i])
  er = mean_absolute_percentage_error(y_test,y_test_predict)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(y_test,y_test_predict, squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

round(error_mape,3)

## categorie Wise Sales forecasting -- With XG Boost


In [None]:
def model_perfamance(X,Y[i]):
  X_train, X_test, y_train, y_test = train_test_split(X,Y[i], test_size=0.20, random_state=42)
  model =  XGBRegressor(n_estimators=10, max_depth=20, verbosity=2)
  model.fit(X_train, y_train)
  y_train_predict = model.predict(X_train)
  y_test_predict = model.predict(X_test)
  print(f'MAPE Error of {i}:=--',mean_absolute_percentage_error(y_test,y_test_predict))
  return y_test_predict,y_test


error = pd.DataFrame({})
print('********************Forecasting error with XGBRegressor********************')
for i in col:
  y_test_predict,y_test = model_perfamance(df_ML,Y[i])
  er = mean_absolute_percentage_error(y_test,y_test_predict)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(y_test,y_test_predict, squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

round(error_mape,3)

## categorie Wise Sales forecasting -- With Ada Boost


In [None]:
def model_perfamance(X,Y[i]):
  X_train, X_test, y_train, y_test = train_test_split(X,Y[i], test_size=0.20, random_state=42)
  model =  AdaBoostRegressor(random_state=0, n_estimators=100
  model.fit(X_train, y_train)
  y_train_predict = model.predict(X_train)
  y_test_predict = model.predict(X_test)
  print(f'MAPE Error of {i}:=--',mean_absolute_percentage_error(y_test,y_test_predict))
  return y_test_predict,y_test


error = pd.DataFrame({})
print('********************Forecasting error with AdaBoostRegressor********************')
for i in col:
  y_test_predict,y_test = model_perfamance(df_ML,Y[i])
  er = mean_absolute_percentage_error(y_test,y_test_predict)##mape calculation
  rmse = round(np.sqrt(mean_squared_error(y_test,y_test_predict, squared=False)),3)##rmse calculation
  error=error.append({'Part No.':i,
                                'mape':er,
                                'rmse':rmse},ignore_index=True)

round(error_mape,3)