In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense

import pandas as pd 
from sklearn.metrics import f1_score

In [None]:
dataset = pd.read_csv('/kaggle/input/creditscreening/credit-screening.data')

dataset.head()

In [None]:
colNames = []

for i in range(15):
    x = "A" + str(i+1)
    colNames.append(x)

colNames.append('class')
dataset.columns = colNames
dataset.tail()

In [None]:
dataset.isna().sum()

In [None]:
dataset.dtypes

In [None]:
dataset.replace('?', np.nan, inplace=True)
dataset.isna().sum()

In [None]:
dataset = dataset.fillna(method ='pad')
dataset.isna().sum()

In [None]:
dataset['A14'] = dataset['A14'].astype('int64')
dataset['A2'] = dataset['A2'].astype('float64')
dataset.dtypes

In [None]:
dataset['A1'] = dataset['A1'].astype('category')
dataset['A4'] = dataset['A4'].astype('category')
dataset['A5'] = dataset['A5'].astype('category')
dataset['A6'] = dataset['A6'].astype('category')
dataset['A7'] = dataset['A7'].astype('category')
dataset['A9'] = dataset['A9'].astype('category')
dataset['A10'] = dataset['A10'].astype('category')
dataset['A12'] = dataset['A12'].astype('category')
dataset['A13'] = dataset['A13'].astype('category')
print(dataset.info())

In [None]:
dataset['A1'] = dataset['A1'].cat.codes
dataset['A4'] = dataset['A4'].cat.codes
dataset['A5'] = dataset['A5'].cat.codes
dataset['A6'] = dataset['A6'].cat.codes
dataset['A7'] = dataset['A7'].cat.codes
dataset['A9'] = dataset['A9'].cat.codes
dataset['A10'] = dataset['A10'].cat.codes
dataset['A12'] = dataset['A12'].cat.codes
dataset['A13'] = dataset['A13'].cat.codes

In [None]:
dataset.head()

### setting numpy seed for repeatability.

In [None]:
np.random.seed(1337)

In [None]:
dataset.to_csv('credit-screening-all-numerics.csv', index=None)

In [None]:
from sklearn.model_selection import KFold

cv = KFold(n_splits=5, random_state=42, shuffle=False)

# X = one_hot_encoded_data.iloc[:,:-1]
X = dataset.iloc[:,:-1]
Y = dataset.iloc[:,-1]

Y.replace('+', 1, inplace=True)
Y.replace('-', 0, inplace=True)

print(len(X.columns))

### Normalizing data

In [None]:
X = (X-X.min())/(X.max()-X.min())
print(X.head())

In [None]:
from keras import backend as K

In [None]:
def doTrainAndEvaluation(model, hiddenlayerCount):
    
    scores = []
    
    for train_index, test_index in cv.split(X):
        x_train, x_test, y_train, y_test = X.iloc[train_index], X.iloc[test_index], Y.iloc[train_index], Y.iloc[test_index]
        model.fit(x_train, y_train, batch_size=20, epochs=10)
        y_pred = model.predict(x_test, batch_size=100, verbose=1)
        y_pred = np.where(y_pred > 0.5, 1, 0)
        f1 = f1_score(y_test, y_pred, average='macro')
        scores.append(f1)

    print("")
    print("##########################################################################################")

    print("Hidden layer count %s Mean values for f1:" % hiddenlayerCount)
    print(np.mean(scores, axis=0))
    
    return np.mean(scores, axis=0)

In [None]:
import matplotlib.pylab as plt

def plot_summary(result_dict, s = "hidden layer neuron count"):      
    items = result_dict.items()
    x,y = zip(*items)
    plt.plot(x, y)
    plt.xlabel(s)
    plt.ylabel('F1 value')
    maximum_f1_value = max(y)
    hidden_neuron_count = max(result_dict, key=lambda k: result_dict[k])

    print("Maximum f1 val=" + str(maximum_f1_value) + ", " + s + "=" +  str(hidden_neuron_count))

### Model 1

* loss=mean_squared_error
* activaion=sigmoid
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model1 = Sequential()
    model1.add(Dense(i, input_dim=15, activation='sigmoid'))
    model1.add(Dense(1, activation='sigmoid'))
    model1.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model1, i)

In [None]:
plot_summary(matrices_variation)

### Model 2

* loss=binary_crossentropy
* activaion=sigmoid
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model2 = Sequential()
    model2.add(Dense(i, input_dim=15, activation='sigmoid'))
    model2.add(Dense(1, activation='sigmoid'))
    model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model2, i)

In [None]:
plot_summary(matrices_variation)

### Model 3
* loss=binary_crossentropy
* activaion=rectified liner unit (relu)
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model3 = Sequential()
    model3.add(Dense(i, input_dim=15, activation='relu'))
    model3.add(Dense(1, activation='relu'))
    model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model3, i)

In [None]:
plot_summary(matrices_variation)

### Model 4
* loss=mean_squared_error
* activaion=rectified liner unit (relu)
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model4 = Sequential()
    model4.add(Dense(i, input_dim=15, activation='relu'))
    model4.add(Dense(1, activation='relu'))
    model4.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model4, i)

In [None]:
plot_summary(matrices_variation)

### Model 5
* loss=mean_squared_error
* activaion=tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model5 = Sequential()
    model5.add(Dense(i, input_dim=15, activation='tanh'))
    model5.add(Dense(1, activation='tanh'))
    model5.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model5, i)

In [None]:
plot_summary(matrices_variation)

### Model 6
* loss=binary_crossentropy
* activaion=tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model6 = Sequential()
    model6.add(Dense(i, input_dim=15, activation='tanh'))
    model6.add(Dense(1, activation='tanh'))
    model6.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model6, i)

In [None]:
plot_summary(matrices_variation)

### Model 7
* loss=mean_squared_error
* activaion=linear
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model7 = Sequential()
    model7.add(Dense(i, input_dim=15, activation='linear'))
    model7.add(Dense(1, activation='linear'))
    model7.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model7, i)

In [None]:
plot_summary(matrices_variation)

> ### Model 7.1
* loss=binary_crossentropy
* activaion=linear
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model7_1 = Sequential()
    model7_1.add(Dense(i, input_dim=15, activation='linear'))
    model7_1.add(Dense(1, activation='linear'))
    model7_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model7_1, i)

In [None]:
plot_summary(matrices_variation)

### Model 8
* loss=mean_squared_error
* activaion=sigmoid and tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model8 = Sequential()
    model8.add(Dense(i, input_dim=15, activation='sigmoid'))
    model8.add(Dense(1, activation='tanh'))
    model8.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model8, i)

In [None]:
plot_summary(matrices_variation)

### Model 8.1
* loss=binary_crossentropy

* activaion=sigmoid and tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model8_1 = Sequential()
    model8_1.add(Dense(i, input_dim=15, activation='sigmoid'))
    model8_1.add(Dense(1, activation='tanh'))
    model8_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model8_1, i)

In [None]:
plot_summary(matrices_variation)

### Model 9
* loss=mean_squared_error
* activaion= tanh and sigmoid
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model9 = Sequential()
    model9.add(Dense(i, input_dim=15, activation='tanh'))
    model9.add(Dense(1, activation='sigmoid'))
    model9.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model9, i)

In [None]:
plot_summary(matrices_variation)

### Model 9.1
* loss=binary_crossentropy
* activaion= tanh and sigmoid
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model9_1 = Sequential()
    model9_1.add(Dense(i, input_dim=15, activation='tanh'))
    model9_1.add(Dense(1, activation='sigmoid'))
    model9_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model9_1, i)

In [None]:
plot_summary(matrices_variation)

### Model 10
* loss=mean_squared_error
* activaion= sigmoid
* inputlayer=15
* outputlayer=1


In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model10 = Sequential()
    model10.add(Dense(i, input_dim=15, activation='sigmoid'))
    model10.add(Dense(i, activation='sigmoid'))
    model10.add(Dense(1, activation='sigmoid'))
    model10.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model10, i)

In [None]:
plot_summary(matrices_variation)

### Model 11
* loss=mean_squared_error
* activaion= tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model11 = Sequential()
    model11.add(Dense(i, input_dim=15, activation='tanh'))
    model11.add(Dense(i, activation='tanh'))
    model11.add(Dense(1, activation='tanh'))
    model11.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model11, i)

In [None]:
plot_summary(matrices_variation)

### Model 12
* loss=binary_crossentropy
* activaion= tanh
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model12 = Sequential()
    model12.add(Dense(i, input_dim=15, activation='tanh'))
    model12.add(Dense(i, activation='tanh'))
    model12.add(Dense(1, activation='tanh'))
    model12.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model12, i)

In [None]:
plot_summary(matrices_variation)

### Model 13
* loss=binary_crossentropy
* activaion= tanh and sigmoid
* inputlayer=15
* outputlayer=1

In [None]:
matrices_variation = dict()

for i in range(1, 15, 1):
    model13 = Sequential()
    model13.add(Dense(i, input_dim=15, activation='sigmoid'))
    model13.add(Dense(i, activation='tanh'))
    model13.add(Dense(1, activation='tanh'))
    model13.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

    matrices_variation[i] = doTrainAndEvaluation(model13, i)

In [None]:
plot_summary(matrices_variation)

### Model 9.1 above has given out the best F1 value

## Introduce Normalzation to above model 9.1

### Adding L1 normalization

In [None]:
from keras import regularizers
import numpy

matrices_variation = dict() 

for i in numpy.arange(0, 0.01, 0.001):
    model = Sequential()
    model.add(Dense(12, input_dim=15, activation='tanh', kernel_regularizer=regularizers.l1(i)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])
    
    matrices_variation[i] = doTrainAndEvaluation(model, 12)

In [None]:
print(matrices_variation)
plot_summary(matrices_variation)

### Adding L2 normalization

In [None]:
matrices_variation = dict()

for i in numpy.arange(0, 0.01, 0.001):
    model = Sequential()
    model.add(Dense(12, input_dim=15, activation='tanh', kernel_regularizer=regularizers.l2(i)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])
    
    matrices_variation[i] = doTrainAndEvaluation(model, 12)

In [None]:
print(matrices_variation)
plot_summary(matrices_variation, "Regularization penalty")