# Principal Component Analysis (PCA)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Wine.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# for comparison with no-PCA reduction
X_trainv0, X_testv0, y_trainv0, y_testv0 = train_test_split(X, y, test_size = 0.21, random_state = 10)

## Feature Scaling

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_trainv0 = sc.fit_transform(X_trainv0)
X_testv0 = sc.transform(X_testv0)

## Applying PCA

In [5]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

## Training the Logistic Regression model on the Training set

In [6]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

In [7]:
classifier2 = LogisticRegression(random_state = 10)
classifier2.fit(X_trainv0, y_trainv0)

LogisticRegression(random_state=10)

## Making the Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[14  0  0]
 [ 1 15  0]
 [ 0  0  6]]


0.9722222222222222

In [9]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred2 = classifier2.predict(X_testv0)
cm2 = confusion_matrix(y_testv0, y_pred2)
print(cm2)
accuracy_score(y_testv0, y_pred2)

[[10  0  0]
 [ 1 16  3]
 [ 0  0  8]]


0.8947368421052632

So by applying PCA to features before logistic regression, we improve the performace from 0.89 to 0.97!