**Importing Modules**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

**Loading Data**

In [None]:
def load_data(path):
  '''
  Takes input path : as string of csv file
  Gives output as pandas Dataframe
  '''
  df = pd.read_csv(path)
  df.drop(columns=["Unnamed: 0"], inplace=True, errors='ignore')
  return df

**Creating target and predictor variable**

In [None]:
def target_and_predictor(data , target):
  '''
  Input data : Dataframe having merged data necessary for model
  Target: The value to be predicted
  Output : X -> Data containing necessary features for prediction
           Y -> Labels to be predicted
  '''

  #if target not in dataframe raise exception
  if target not in data.columns:
        raise Exception(f"Target: {target} is not present in the data")

  X = data.drop(columns=[target])
  y = data[target]
  return X, y


**Training models**

Random Forest

In [None]:
def random_forest_model(X,Y):
  '''
  This function trains the random forest model on predictor: X and target:Y for k folds
  The mean absolute error and mean squared error is calculated for each fold
  The average mean absolute error is given at the end of k fold cross validation

  Input : (X,Y) -> (Predictor, Target)
  Output : Trained model

  '''
  k=10
  accuracy = []
  for fold in range(0,k):

    #loading model
    model = RandomForestRegressor()
    scaler = StandardScaler()

    #splitting data with 20% as test data and rest as training data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.2, random_state=3)

    #Standardizing values using standard scaler
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    #trained model in result
    result = model.fit(X_train,Y_train)

    #target values predicted by model on test data
    y_predicted = result.predict(X_test)

    mse = mean_squared_error(Y_test, y_predicted)

    mae = mean_absolute_error(y_true = Y_test, y_pred = y_predicted)
    accuracy.append(mae)

    print('Fold ' , fold, ': ')
    print('MSE: ',mse)
    print('MAE: ', mae)

  print(f"Average MAE: {(sum(accuracy) / len(accuracy)):.2f}")
  return model

Linear Regression

In [None]:
def linear_regression_model(X,Y):
    '''
    This function trains the linear regression on predictor: X and target:Y
    The mean absolute error and mean squared error is calculated

    Input : (X,Y) -> (Predictor, Target)
    Output : Trained model
    '''


    scaler = StandardScaler()

    #splitting data with 20% as test data and rest as training data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.2, random_state=3)

    #Standardizing values using standard scaler
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    #loading model
    model = LinearRegression()

    #training model
    model.fit(X_train,Y_train)

    #target values predicted by model on test data
    y_pred = model.predict(X_test)

    mse = mean_squared_error(Y_test, y_pred)
    print(mse)
    mae = mean_absolute_error(y_true = Y_test, y_pred = y_pred)
    print(mae)
    return model