### MODELS WITHOUT DUMMY VARIABLES
ALl the models here are performed without dummy variables

### 1. Linear Regression

In [None]:
#import pandas and assign it as pd
import pandas as pd
#'train_test_split' imported from 'sklearn.model_selection'
#'train_test_split' used to split a dataset into training and testing subsets for model evalution
from sklearn.model_selection import train_test_split
#Used to perform read or write files,here used to set directory where csv file is present.
import os
#Import 'Ridge' and 'Lasso' from linear model for regularized linear regression 
from sklearn.linear_model import Ridge, Lasso
#Import accuracy_score from metrics for calculating accuracy score for classification models
from sklearn.metrics import accuracy_score

In [97]:
#chdir() stands for change directory
os.chdir('F:/SUB3 - Big data for computational finance/Project')

In [98]:
#Using 'read_csv' function from 'pandas' to read csv file and store as Dataframe
Credit_data = pd.read_csv('MLF_GP1_CreditScore.csv')

In [99]:
if Credit_data.isnull().sum().sum()==0:
    print("No Missing Values present")

No Missing Values present


In [100]:
#train_test_split() splits data into 80% training and 20% for testing as test_size=0.2. 
#random seed is used to split in similar random way everytime it is run
train_credit, test_credit = train_test_split(Credit_data, test_size=0.2, random_state=50)

In [102]:
#Computing training data
#iloc selects all rows ':' , and all columns except last two columns(InvGrd,Rating)
#X_train holds training data for input variables(features)
A_train = train_credit.iloc[:, :-2]
#iloc selects all rows ':' , and only second-to-last column(InvGrd)
#y_train holds training data for output variables(Target variable)
B_train = train_credit.iloc[:, -2]

In [103]:
#Computing testing data
#iloc selects all rows ':' , and all columns except last two columns(InvGrd,Rating)
#X_test holds testing data for input variables(features)
A_test = test_credit.iloc[:, :-2]
#iloc selects all rows ':' , and only second-to-last column(InvGrd)
#y_test holds testing data for output variables(Target variable)
B_test = test_credit.iloc[:, -2]


In [104]:
#Ridge() functions used to set regularization equal to 0.1
#Helps to reduce overfitting
ridge = Ridge(alpha=0.1)
#ridge.fit() is a model to train data using ridge regression
#X_train is input variables and y_train is target variable(InvGrd)
ridge.fit(A_train, B_train)


Ridge(alpha=0.1)

In [105]:
#Lasso() functions used to set regularization equal to 0.1
#Helps to reduce overfitting
lasso = Lasso(alpha=0.01)
#lasso.fit() is a model to train data using lasso regression
#X_train is input variables and y_train is target variable(InvGrd)
lasso.fit(A_train, B_train)

Lasso(alpha=0.01)

In [106]:
#ridge.predict() takes X_test(input variables) as input 
#And predicts target variable(InvGrd)
B_pred_ridge = ridge.predict(A_test)
#lasso.predict() takes X_test(input variables) as input 
#And predicts target variable(InvGrd)
B_pred_lasso = lasso.predict(A_test)

In [107]:
#for each value x in predicted target variable, it compares x with threshold 0.5
#If x is greater than or equal to 0.5 ,it assigns 1 else value is 0.
B_pred_ridge_bin = [1 if a >= 0.5 else 0 for a in B_pred_ridge]
B_pred_lasso_bin = [1 if a >= 0.5 else 0 for a in B_pred_lasso]

In [108]:
#Calculates the accuracy of binay predictions made by respective regression model.
accuracy_ridge = accuracy_score(B_test, B_pred_ridge_bin)
accuracy_lasso = accuracy_score(B_test, B_pred_lasso_bin)

In [109]:
#Accuracy value is the printed by rounding of two decimals(.2f)
print(f"Accuracy of Ridge Regression Model: {accuracy_ridge:.2f}")
print(f"Accuracy of Lasso Regression Model: {accuracy_lasso:.2f}")


Accuracy of Ridge Regression Model: 0.81
Accuracy of Lasso Regression Model: 0.81


### 2. Logistic Regression

In [110]:
#Import 'LogisticRegression' from linear model present in 'scikit-learn(library)'
from sklearn.linear_model import LogisticRegression

In [111]:
#LogisticRegression() is used to add penalty of l2 regularisation with 'liblinear' solver
#liblinear is well suited for l2
ridge = LogisticRegression(penalty='l2', solver='liblinear')
ridge.fit(A_train, B_train)

#LogisticRegression() is used to add penalty of l2 regularisation with 'liblinear' solver
#liblinear is well suited for l2
lasso = LogisticRegression(penalty='l2', solver='liblinear')
lasso.fit(A_train, B_train)

#ridge.predict() takes X_test(input variables) as input 
#And predicts target variable(InvGrd)
B_pred_ridge = ridge.predict(A_test)
#lasso.predict() takes X_test(input variables) as input 
#And predicts target variable(InvGrd)
B_pred_lasso = lasso.predict(A_test)

#Calculates the accuracy of binay predictions made by respective regression model.
accuracy_ridge = accuracy_score(B_test, B_pred_ridge)
accuracy_lasso = accuracy_score(B_test, B_pred_lasso)

#Accuracy value is the printed by rounding of two decimals(.2f)
print(f"Accuracy of Ridge Logistic Regression Model: {accuracy_ridge:.2f}")
print(f"Accuracy of Lasso Logistic Regression Model: {accuracy_lasso:.2f}")

Accuracy of Ridge Logistic Regression Model: 0.81
Accuracy of Lasso Logistic Regression Model: 0.81


### 3. Neural Network

In [112]:
#StandardScaler used to normalize the feature of dataset
#Scaling to have zero mean and unit variance
from sklearn.preprocessing import StandardScaler
#Sequential is used to add one layer at a time in sequence and
#Specify the input and output dimensions
from keras.models import Sequential
#Dense is used for classification
#It consists of interconnected nodes
from keras.layers import Dense

In [113]:
#Standardizes the range of input features
scaler = StandardScaler()
#Computes mean and standard deviation of the training data and scales based on this statistics
A_train = scaler.fit_transform(A_train)
#Similarly as above test data is scaled.
A_test = scaler.transform(A_test)

# Creates a Sequential neural network model
#Here layers can be addded sequentially
model = Sequential()
#Adding dense layer with 64 neurons to the model,
#with rectified linear unit(ReLU) activation function
#input_dim specifies dimension of input features
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
#Adding dense layer with 32 neurons to the model,
#with rectified linear unit(ReLU) activation function
model.add(Dense(32, activation='relu'))
#Adding dense layer with 16 neurons to the model,
#with rectified linear unit(ReLU) activation function
model.add(Dense(16, activation='relu'))
#Adds output layer with 1 neuron and sigmoid function,used for binary classification
model.add(Dense(1, activation='sigmoid'))

#Configure training process
#optimizer specifies algorithm, adam is alogrithm used
#loss specifies loss function, binary_crossentropy is commonly used for binary classification
#metrics is evaluation metrics
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#model.fit is used to train model with providing training data
#epochs is specifies number of times training data must be passed through neural network
#batch_size specifies number of samples to be used in each iteration
#verbose specifies , whether progress information should be displayed(1) or not(0,2)
model.fit(A_train, B_train, epochs=50, batch_size=32, verbose=1)

#model.predict() does binary classification for test data
B_pred = model.predict(A_test)
#later based on threshold(0.5), they are labeled as 0 or 1
B_pred_bin = [1 if a >= 0.5 else 0 for a in B_pred]

##Calculates the accuracy of binay predictions made by respective neural network model.
accuracy = accuracy_score(B_test, B_pred_bin)

#Accuracy value is the printed by rounding of two decimals(.2f)
print(f"Accuracy of Neural Networks Model: {accuracy:.2f}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy of Neural Networks Model: 0.81
