In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dataset =  pd.read_csv("bill_authentication.csv")
#Data were extracted from images that were taken from genuine and forged banknote-like specimens. For digitization,
#an industrial camera usually used for print inspection was used. The final images have 400x 400 pixels. 
#Due to the object lens and distance to the investigated object gray-scale pictures
#with a resolution of about 660 dpi were gained. Wavelet Transform tool were used to extract features from images.

In [3]:
# we would like to perform a classification according to the variance, skewness, curtosis and entropy
# we will determine if the banknote can be authenticated or not!
dataset.head(25)  

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0
5,4.3684,9.6718,-3.9606,-3.1625,0
6,3.5912,3.0129,0.72888,0.56421,0
7,2.0922,-6.81,8.4636,-0.60216,0
8,3.2032,5.7588,-0.75345,-0.61251,0
9,1.5356,9.1772,-2.2718,-0.73535,0


In [4]:
dataset.shape

(1372, 5)

In [5]:
missing_val_count_by_column = (dataset.isnull().sum())
print(missing_val_count_by_column[missing_val_count_by_column > 0])  

Variance    1
Skewness    6
Curtosis    8
Entropy     4
dtype: int64


In [6]:
from sklearn.impute import SimpleImputer  # Imputer is for missing values
# Fill in the lines below: imputation
my_imputer = SimpleImputer()
datasetClean = pd.DataFrame(my_imputer.fit_transform(dataset))
# Fill in the lines below: imputation removed column names; put them back
datasetClean.columns = dataset.columns
# In general you can use this imputer without worrying about the missing values.

In [7]:
# Number of missing values in each column of training data
missing_val_count_by_column = (datasetClean.isnull().sum())
print(missing_val_count_by_column[missing_val_count_by_column > 0]) 

Series([], dtype: int64)


In [9]:
X = datasetClean.drop('Class', axis=1)  # these are lectures
y = datasetClean['Class'] # these are results

In [28]:
from sklearn.model_selection import train_test_split  #
 #
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, shuffle=True,random_state=27)  # 80 percent training , 20 percent testing

In [29]:
X_train

Unnamed: 0,Variance,Skewness,Curtosis,Entropy
912,1.22790,4.03090,-4.643500,-3.912500
1066,-2.62860,0.18002,1.795600,0.972820
1004,-3.71810,-8.50890,12.363000,-0.955180
126,-2.34300,12.95160,3.328500,-5.942600
184,-1.75590,11.94590,3.094600,-4.897800
...,...,...,...,...
1048,-0.84710,3.13290,-3.011200,-2.938800
1317,-1.26670,2.81830,-2.426000,-1.886200
752,0.38478,6.59890,-0.333600,-0.564660
543,-1.42170,11.65420,-0.057699,-7.102500


In [30]:
from sklearn.svm import SVC  # support vector machine - linear
svclassifier = SVC() # kernel='linear'
fitted = svclassifier.fit(X_train, y_train)

In [52]:
y_test

430     0.0
588     0.0
296     0.0
184     0.0
244     0.0
       ... 
1341    1.0
1100    1.0
620     0.0
617     0.0
602     0.0
Name: Class, Length: 275, dtype: float64

In [36]:
y_pred = fitted.predict(X_test)

In [39]:
y_pred

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1.,
       1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0.,
       0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0.,
       0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.,
       0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1.,
       0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1.,
       1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
       1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1.,
       1., 1., 0., 1., 0.

In [40]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[148   0]
 [  2 125]]
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99       148
         1.0       1.00      0.98      0.99       127

    accuracy                           0.99       275
   macro avg       0.99      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275



In [49]:
from sklearn.metrics import accuracy_score
print('SVC: %.2f%% '%  (accuracy_score(y_test, y_pred)*100))

SVC: 99.27% 


In [50]:
# Try to predict the price, you can choose any row from data set and check if the prediction "0" or "1" is correct!
test =datasetClean.loc[[1000]]   # take Nth row to check your prediction result manually 
test = test.drop('Class', axis=1)  # drop target "class" column
prediction= fitted.predict(test)
prediction

array([1.])

In [51]:
#now here is your new perspective, you cant always rely on numbers yea?
# here is how you can really predict your ML algorithm. you show it a NEW data and it gives you a result!
#dont forget about this part of code, you will need to check your final predictions in your career(dont worry about exam)

data = [[1.565,2.1539,44.457,1.99532]]   # you can also create a dataframe by yourself and feed it into SVM prediction
# imagine you have a banknote which has values as above. See if it can pass the authentication or not
# 1 for the pass. 0 for not-pass
  
# Create the pandas DataFrame 
df = pd.DataFrame(data, columns = ['Variance', 'Skewness','Curtosis','Entropy']) 
df

Unnamed: 0,Variance,Skewness,Curtosis,Entropy
0,1.565,2.1539,44.457,9.99532


In [52]:
prediction= fitted.predict(df)  # you can adjust the values and see what will your model generate.
# this is a binary classification so result will be either 1 or 0 as fake or real.
prediction

array([1.])