## Linear Discriminant Analysis (LDA)

In [1]:
# Importing libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# Loading data

dataset = pd.read_csv("wine.csv")

print(dataset.head())
print(dataset.tail())

# independent variables 
X = dataset.iloc[:, 1:14].values
print(X)

# dependent variable - type of whine (1, 2 or 3)
y = dataset.iloc[:, 0].values
print(y)


   Wine  Alcohol  Malic.acid   Ash   Acl   Mg  Phenols  Flavanoids  \
0     1    14.23        1.71  2.43  15.6  127     2.80        3.06   
1     1    13.20        1.78  2.14  11.2  100     2.65        2.76   
2     1    13.16        2.36  2.67  18.6  101     2.80        3.24   
3     1    14.37        1.95  2.50  16.8  113     3.85        3.49   
4     1    13.24        2.59  2.87  21.0  118     2.80        2.69   

   Nonflavanoid.phenols  Proanth  Color.int   Hue    OD  Proline  
0                  0.28     2.29       5.64  1.04  3.92     1065  
1                  0.26     1.28       4.38  1.05  3.40     1050  
2                  0.30     2.81       5.68  1.03  3.17     1185  
3                  0.24     2.18       7.80  0.86  3.45     1480  
4                  0.39     1.82       4.32  1.04  2.93      735  
     Wine  Alcohol  Malic.acid   Ash   Acl   Mg  Phenols  Flavanoids  \
173     3    13.71        5.65  2.45  20.5   95     1.68        0.61   
174     3    13.40        3.91  2

In [3]:
# Split dataset into train and test set

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print(X_train.shape)
print(X_test.shape)

(142, 13)
(36, 13)


In [4]:
# Feature scaling on independent variables

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [6]:
# Applying LDA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=2)

X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

explained_variance = lda.explained_variance_ratio_
print(explained_variance)

[0.73931691 0.26068309]


In [7]:
print(X_train)

[[ 3.57315552  1.94018924]
 [ 0.85475898 -2.08182977]
 [ 0.62173655 -3.06234453]
 [ 4.80786412  2.00638739]
 [-3.8579759   0.14987256]
 [-3.59455458  1.24961706]
 [-0.53772906 -3.0852704 ]
 [ 0.04058577 -2.47312446]
 [ 0.99835348 -3.36989668]
 [-3.74095658  1.94844242]
 [ 3.76035226  0.82126218]
 [-0.15106412 -1.86820292]
 [ 3.62762899  2.05460026]
 [-3.94229781  2.80328429]
 [ 3.33429017  0.73627798]
 [ 3.90206871  1.03276135]
 [-3.55835472  0.18783108]
 [ 5.63175281  2.40524214]
 [-5.56217254  0.85694946]
 [ 0.23296188 -3.94615581]
 [ 5.03141997  3.23313754]
 [ 3.52861651  0.94605778]
 [-1.17815662 -2.17294825]
 [ 3.58320131  0.67947364]
 [ 5.21649905  2.41090952]
 [-3.01647841  1.24411621]
 [ 1.86178658 -0.47484926]
 [ 3.93816398 -0.2204059 ]
 [-1.0836235  -3.32496762]
 [ 1.8691488  -0.63362283]
 [ 3.27717205  1.51263542]
 [-0.47842302 -1.16766723]
 [-4.14433134  1.37391708]
 [ 2.45009727 -2.49336285]
 [-1.20844631 -2.30679956]
 [ 2.55631466 -0.98550214]
 [-1.6091476   0.55066705]
 

In [8]:
# Fitting logistic regression to training set

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')

classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [9]:
# Predicting Test set results

y_pred = classifier.predict(X_test)
print(y_pred)
print(y_test)

[1 3 2 1 2 2 1 3 2 2 3 3 1 2 3 2 1 1 2 1 2 1 1 2 2 2 2 2 2 3 1 1 2 1 1 1]
[1 3 2 1 2 2 1 3 2 2 3 3 1 2 3 2 1 1 2 1 2 1 1 2 2 2 2 2 2 3 1 1 2 1 1 1]


In [10]:
# Confusion Matrix

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[14  0  0]
 [ 0 16  0]
 [ 0  0  6]]


In [11]:
accuracy = (14 + 16 + 6)/36 * 100
print(accuracy)

100.0
