<a href="https://colab.research.google.com/github/willdphan/bank-note-auth-nn/blob/main/Bank_Note_Authentication_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [227]:
import pandas as pd
import numpy as np
from copy import deepcopy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn.ensemble import RandomForestRegressor as RFR

df = pd.read_csv('/content/sample_data/BankNote_Authentication.csv')
df

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.62160,8.66610,-2.8073,-0.44699,0
1,4.54590,8.16740,-2.4586,-1.46210,0
2,3.86600,-2.63830,1.9242,0.10645,0
3,3.45660,9.52280,-4.0112,-3.59440,0
4,0.32924,-4.45520,4.5718,-0.98880,0
...,...,...,...,...,...
1367,0.40614,1.34920,-1.4501,-0.55949,1
1368,-1.38870,-4.87730,6.4774,0.34179,1
1369,-3.75030,-13.45860,17.5932,-2.77710,1
1370,-3.56370,-8.38270,12.3930,-1.28230,1


In [228]:
df.isnull().sum()

variance    0
skewness    0
curtosis    0
entropy     0
class       0
dtype: int64

In [229]:
### Independent and Dependent features
X=df.iloc[:,:-1]
y=df.iloc[:,-1]
X, y

(      variance  skewness  curtosis  entropy
 0      3.62160   8.66610   -2.8073 -0.44699
 1      4.54590   8.16740   -2.4586 -1.46210
 2      3.86600  -2.63830    1.9242  0.10645
 3      3.45660   9.52280   -4.0112 -3.59440
 4      0.32924  -4.45520    4.5718 -0.98880
 ...        ...       ...       ...      ...
 1367   0.40614   1.34920   -1.4501 -0.55949
 1368  -1.38870  -4.87730    6.4774  0.34179
 1369  -3.75030 -13.45860   17.5932 -2.77710
 1370  -3.56370  -8.38270   12.3930 -1.28230
 1371  -2.54190  -0.65804    2.6842  1.19520
 
 [1372 rows x 4 columns],
 0       0
 1       0
 2       0
 3       0
 4       0
        ..
 1367    1
 1368    1
 1369    1
 1370    1
 1371    1
 Name: class, Length: 1372, dtype: int64)

In [230]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(960, 4) (960,) (412, 4) (412,)


In [231]:
min_max_scaler = MinMaxScaler().fit(X_train)

def preprocessor(X):
  A = np.copy(X)
  A = min_max_scaler.transform(X)
  return A

In [232]:
preprocessor(X_train)

array([[0.41049142, 0.8566794 , 0.30278071, 0.26001485],
       [0.21781793, 0.62477783, 0.26520602, 0.69678059],
       [0.45086951, 0.2196657 , 0.51981821, 0.79406437],
       ...,
       [0.56173284, 0.55276766, 0.1324014 , 0.77018593],
       [0.66580916, 0.50592299, 0.45046202, 0.8658303 ],
       [0.36501683, 0.92032464, 0.34161156, 0.28806621]])

In [233]:
new_preprocessor = FunctionTransformer(preprocessor)

In [234]:
p1 = Pipeline([("Scaler", new_preprocessor),('Linear Regression', LinearRegression())])

In [235]:
def fit_and_print(p, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test):
  p.fit(X_train, y_train)
  training = p.predict(X_train)
  testing = p.predict(X_test)
  print('Training Error: ' + str(mean_absolute_error(training, y_train)))
  print('Testing Error: ' + str(mean_absolute_error(testing, y_test)))


In [236]:
fit_and_print(p1)

Training Error: 0.13340497286881955
Testing Error: 0.1404870077772941


In [237]:
p2 = Pipeline([("Scaler", new_preprocessor),('KNR', KNR())])
fit_and_print(p2)

Training Error: 0.0012500000000000002
Testing Error: 0.0024271844660194173


In [238]:
p3 = Pipeline([("Scaler", new_preprocessor),('Random Forest', RFR())])
fit_and_print(p3)

Training Error: 0.008260416666666668
Testing Error: 0.02179611650485437
