In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [4]:
df = pd.read_csv('Datasets/data_banknote_authentication.csv')
df.sample(5)

Unnamed: 0,variance,skewness,curtosis,entropy,class
537,-0.10648,-0.76771,7.7575,0.64179,0
853,-3.1366,0.42212,2.6225,-0.064238,1
286,1.3419,-4.4221,8.09,-1.7349,0
236,-1.3274,9.498,2.4408,-5.2689,0
1031,-1.8554,-9.6035,7.7764,-0.97716,1


In [5]:
X = df[['variance', 'skewness', 'curtosis', 'entropy']]
Y = df['class']

In [6]:
Y.value_counts()

0    762
1    610
Name: class, dtype: int64

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train[:5]

array([[ 0.89967493,  1.18805208, -1.18171152, -0.09274226],
       [ 0.79132601,  1.52427784, -1.12635383, -1.42833711],
       [-1.4620017 , -1.74354285,  2.08102182,  0.74693093],
       [ 1.16581047, -0.98729225,  0.45528455,  0.56204943],
       [-0.67271772, -1.57668249,  1.51805865,  1.15257286]])

In [9]:
def accuracy(y_true, y_pred):
    true_sum = np.sum(y_true == y_pred)
    m = len(y_true)
    return true_sum / m

def get_precision_and_recall(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    precision = cm[0][0] / (cm[0][0] + cm[0][1])
    recall = cm[0][0] / (cm[0][0] + cm[1][0])
    return precision, recall

def get_f1_score(y_t, y_p):
    p, r = get_precision_and_recall(y_t, y_p)
    F1 = 2 * (p*r)/(p+r)
    return F1

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)
acc = accuracy(Y_test, y_pred)
print(f"Model accuracy is: {acc*100:.2f}%")
pre, rec = get_precision_and_recall(Y_test, y_pred)
print(f"Model P and R is: {pre} {rec}")
F1_score = get_f1_score(Y_test, y_pred)
print(f"Model F1 is: {F1_score}")

Model accuracy is: 97.96%
Model P and R is: 0.9641025641025641 1.0
Model F1 is: 0.9817232375979111


In [13]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42)
model.fit(X_train, Y_train)
Y_pred = model.predict(X_train)
acc = accuracy(Y_train, Y_pred)
print(f"Model accuracy is: {acc*100:.2f}%")
pre, rec = get_precision_and_recall(Y_train, Y_pred)
print(f"Model P and R is: {pre} {rec}")
F1_score = get_f1_score(Y_train, Y_pred)
print(f"Model F1 score is: {F1_score}")

Model accuracy is: 98.25%
Model P and R is: 0.9717813051146384 0.9963833634719711
Model F1 score is: 0.9839285714285715
