# Setup

## Import modules

In [16]:
import numpy as np
import pandas as pd

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import sklearn.datasets as datasets
from sklearn.pipeline import Pipeline

import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions

## Load and prepare data

In [6]:
dataS = datasets.load_wine()
# dataS = datasets.load_breast_cancer()
# dataS = datasets.load_iris()

X, y = dataS.data, dataS.target

print(f'Number of input features (d): {X.shape[1]}')
print(f'Unique target classes: {np.unique(y)}')

classes = np.unique(y)

Number of input features (d): 13
Unique target classes: [0 1 2]


In [7]:
# Print the different input features
print(dataS.feature_names)
# Print the different target names
print(dataS.target_names)

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
['class_0' 'class_1' 'class_2']


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3,
    stratify=y, random_state=43)


# Pipeline of Scaling LDA and LR

## Pipeline setup

In [9]:
pipeline = Pipeline([
    ('scaling', StandardScaler()),
    ('lda', LDA(n_components=2)),
    ('clf', LogisticRegression(solver='liblinear', multi_class='auto'))
])

## Training

In [10]:
# Fit the pipeline on training data
pipeline.fit(X_train, y_train)

## Predictions

In [11]:
# Make predictions
predictions = pipeline.predict(X_test)

## Evaluation

In [12]:
# Evaluate the model
train_accuracy = pipeline.score(X_train, y_train)
test_accuracy  = pipeline.score(X_test, y_test)

In [13]:
print(f'LDA - logReg - TRAIN accuracy = {train_accuracy:.3f}')
print(f'LDA - logReg - Test  accuracy  = {test_accuracy:.3f}')

LDA - logReg - TRAIN accuracy = 1.000
LDA - logReg - Test  accuracy  = 0.963


# Pipeline of scaling, PCA and SVC

In [17]:
pipeline2 = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Data scaling
    ('pca', PCA(n_components=3)),  # Step 2: PCA with three components
    ('svc', SVC())  # Step 3: Support Vector Classifier
])

# Fit the pipeline on training data
pipeline2.fit(X_train, y_train)

# Make predictions
predictions = pipeline2.predict(X_test)

# Evaluate the model
train_accuracy = pipeline2.score(X_train, y_train)
test_accuracy  = pipeline2.score(X_test, y_test)

In [18]:
print(f'LDA - logReg - TRAIN accuracy = {train_accuracy:.3f}')
print(f'LDA - logReg - Test  accuracy  = {test_accuracy:.3f}')

LDA - logReg - TRAIN accuracy = 0.992
LDA - logReg - Test  accuracy  = 0.907
