## Important Points

* Converted iris to linearly separable dataset by removing last 50 data points
* Models used -> LinearSVC, SVC, SGDClassifier
* Achieved perfect score on all 3, although the lines would be different

In [1]:
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

np.random.seed(42)
%matplotlib inline

In [2]:
"""
There are 3 classes in iris dataset and only 2 of them are linearly 
separable so using those 2 only
"""
data = load_iris()
x = data.data
y = data.target 

In [3]:
# Choosing only examples with setos or versicolor
# Remove last 50 as that class is not linearly separable
np.unique(y, return_counts=True)
y==2

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [4]:
# Selecting first 100 indices
x = x[:100]
y = y[:100]

In [5]:
# Splitting the dataset

rand_ind = np.random.permutation(100)
x = x[rand_ind]
y = y[rand_ind]

x_train, y_train = x[:80], y[:80]
x_valid, y_valid = x[80:], y[80:]

In [6]:
# Scaling the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_valid = scaler.transform(x_valid)

According to the problem:
* train LinearSVC
* Train SVC
* Train SGDClassifier
* Try to have comparable performances in all of them

In [7]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

lin_svc = LinearSVC(loss='hinge', random_state=42)
lin_svc.fit(x_train, y_train)

# Prediction 
y_pred = lin_svc.predict(x_valid)
accuracy_score(y_valid, y_pred)

1.0

In [17]:
from sklearn.svm import SVC
svc = SVC(kernel='linear', random_state=42)

svc.fit(x_train, y_train)

# Prediction
y_pred = svc.predict(x_valid)
accuracy_score(y_valid, y_pred)

1.0

In [19]:
from sklearn.linear_model import SGDClassifier
C = 3
sgd = SGDClassifier(loss='hinge', alpha=1/(x_train.shape[0] * C), 
                   random_state=42)

sgd.fit(x_valid, y_valid)

# Prediction
y_pred = svc.predict(x_valid)
accuracy_score(y_valid, y_pred)

1.0