## Question 1

### Logistic Regression for multiclass classification

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv('Iris.csv', header=0)
dataset.sample(n=5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
43,44,5.0,3.5,1.6,0.6,Iris-setosa
108,109,6.7,2.5,5.8,1.8,Iris-virginica
129,130,7.2,3.0,5.8,1.6,Iris-virginica
17,18,5.1,3.5,1.4,0.3,Iris-setosa
139,140,6.9,3.1,5.4,2.1,Iris-virginica


In [3]:
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
x = np.hstack((np.ones((x.shape[0], 1)), x)) #[1 x]
print(x.shape)

(150, 6)


In [4]:
#since multiclass regression has different outcomes and we need to identify unique sets of target output in given dataset
y_class = y.unique()
# y_class = np.insert(y_class, 0, 10)
y_class

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
#in the given dataset, replace the Species column with the numerical value that referes to the value when decoded
Y = np.zeros((y.shape[0], len(y_class)))
print(Y.shape)
for i in range(len(Y)):
    for j in range(len(y_class)):
        if y_class[j] == y[i]:
            Y[i][j] = 1


(150, 3)


In [6]:
train_x, test_x, train_y, test_y = train_test_split(x, Y, train_size=0.8, shuffle=True)

In [7]:
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

(120, 6)
(30, 6)
(120, 3)
(30, 3)


In [8]:
theta = np.zeros((x.shape[1], len(y_class)))
theta.shape

(6, 3)

In [9]:
def sigmoid(x, theta):
    return 1/(1+np.exp(-np.dot(x, theta)))

In [10]:
def multiclass_logistic_regression(x, y, theta, alpha, iterations):
    m = x.shape[0]
    for _ in range(iterations):
        prec_y = sigmoid(x, theta)
        theta = theta - (alpha/m)*np.dot(x.T, prec_y - y)
    return theta

In [11]:
theta = multiclass_logistic_regression(train_x, train_y, theta, 0.0002, 70000)

In [29]:
prediction = sigmoid(test_x, theta)
for i in prediction:
    ind = np.where(i == np.amax(i))
    for j in range(len(i)):
        i[j] = 1 if ind[0][0] == j else 0
prediction

array([[0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [25]:
error = (prediction == test_y)
error

array([[ True, False, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True, False, False],
       [ True, False, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True, False, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True, False, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [26]:
s = len(y_class)
error_prec = 0
for i in error:
    if np.sum(i) != s:
        error_prec += 1
print(error_prec)

5


In [40]:
percentage_error = (error_prec/len(prediction))*100
percentage_error

16.666666666666664

In [41]:
print(f"Accuracy: {100-percentage_error}")

Accuracy: 83.33333333333334


## Question 2

### Support Vector Machines (SVM) for Linear and Polynomial kernel

In [30]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [31]:
dataset = pd.read_csv('Iris.csv', header=0)
dataset.drop(dataset[dataset["Species"] == "Iris-versicolor"].index, inplace = True)
dataset.sample(n=5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
109,110,7.2,3.6,6.1,2.5,Iris-virginica
118,119,7.7,2.6,6.9,2.3,Iris-virginica
6,7,4.6,3.4,1.4,0.3,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
27,28,5.2,3.5,1.5,0.2,Iris-setosa


In [32]:
scaler = StandardScaler()

In [35]:
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
x = scaler.fit_transform(x)
x = np.hstack((np.ones((x.shape[0], 1)), x)) #[1 x]
print(x.shape)

(100, 6)


In [68]:
y.unique()

array(['Iris-setosa', 'Iris-virginica'], dtype=object)

In [36]:
for i in range(len(y)):
    if y.iat[i] == "Iris-setosa":
        y.iat[i] = 1
    if y.iat[i] == "Iris-virginica":
        y.iat[i] = 2

In [39]:
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size=0.8, shuffle=True)