In [1]:
import pandas as pd 
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt 

%matplotlib inline
np.random.seed(42)

In [2]:
# Loading the dataset
data = datasets.load_iris()

In [3]:
print(data.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [4]:
# The features and the classes in the dataset
print(data.feature_names)
print(data.target_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']


In [5]:
# Loading the dataset 
x = data.data
y = data.target
x.shape, y.shape

((150, 4), (150,))

In [6]:
# Adding bias term as original solution has implemented with it
x = np.c_[x, np.ones([x.shape[0], 1])] 
print(x.shape)

(150, 5)


In [7]:
print(x[:5])

[[5.1 3.5 1.4 0.2 1. ]
 [4.9 3.  1.4 0.2 1. ]
 [4.7 3.2 1.3 0.2 1. ]
 [4.6 3.1 1.5 0.2 1. ]
 [5.  3.6 1.4 0.2 1. ]]


In [8]:
# Splitting data into train, valid, test
indices = np.random.permutation(x.shape[0])
test_size = int(0.2*x.shape[0])
val_size = int(0.2*x.shape[0])
train_size = int(x.shape[0] - test_size - val_size)

x_train = x[indices[:train_size]]
y_train = y[indices[:train_size]]

x_valid = x[indices[train_size:train_size+val_size]]
y_valid = y[indices[train_size:train_size+val_size]]

x_test = x[indices[-test_size:]]
y_test = y[indices[-test_size:]]

In [9]:
# Types of classes
print(np.unique(y_train))

[0 1 2]


In [10]:
# For softmax regression convert all values to one hot encoded
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
ohe.fit(y_train.reshape(-1, 1))

OneHotEncoder(categories='auto', drop=None, dtype=<class 'numpy.float64'>,
              handle_unknown='error', sparse=True)

In [11]:
# Works as required
print(y_train[0])
print(ohe.transform(y_train[0].reshape(-1, 1)).toarray())

1
[[0. 1. 0.]]


In [12]:
# Converting all to one hot
y_train_ohe = ohe.transform(y_train.reshape(-1, 1)).toarray()
y_valid_ohe = ohe.transform(y_valid.reshape(-1, 1)).toarray()
y_test_ohe = ohe.transform(y_test.reshape(-1, 1)).toarray()

In [13]:
y_train_ohe[0], y_valid_ohe[0], y_test_ohe[0]

(array([0., 1., 0.]), array([0., 1., 0.]), array([0., 1., 0.]))

In [14]:
# axis=1 is important, all check your axis
def softmax(logit):
    return np.exp(logit) / np.sum(np.exp(logit), axis=1, keepdims=True)

def cross_entropy_loss(y, y_pred):
    loss = -np.mean(np.sum(y*np.log(y_pred+1e-7), axis=1))
    return loss

In [15]:
# Weight matrix shape and initialization
n_rows = x_train.shape[1]
n_cols = 3 #Since 3 unique classes

W = np.random.randn(n_rows, n_cols)

In [16]:
print(W.shape)

(5, 3)


In [17]:
n_epochs = 5000
lr = 0.01 
best_loss = 10000000 

In [18]:
for epoch in range(n_epochs):
#     Calculation of training loss and optimizing weights
    logit = x_train.dot(W)
    y_pred = softmax(logit)
    loss = cross_entropy_loss(y_train_ohe, y_pred)
    gradients = 1/(x_train.shape[0]) * (y_pred - y_train_ohe).T.dot(x_train)
    W -= lr*gradients.T
#    Validation loss 
    logit_valid = x_valid.dot(W)
    val_pred = softmax(logit_valid)
    val_loss = cross_entropy_loss(y_valid_ohe, val_pred)
    if(epoch % 1000 == 0):
        print("Iteration {0}: {1}".format(epoch, val_loss))
#      Early Stopping
    if val_loss < best_loss:
        best_loss = val_loss
    else:
        print("Early Stopping epoch:{0}, loss:{1}".format(epoch-1, val_loss))
        break
    if epoch == n_epochs-1:
        print("Final Loss:{0}".format(val_loss))


Iteration 0: 3.365655193095109
Iteration 1000: 0.4311004027331794
Iteration 2000: 0.33267583030571946
Iteration 3000: 0.2826633021623899
Iteration 4000: 0.25217246117539344
Final Loss:0.23152148052677238


In [19]:
# Model accuracy on test set
from sklearn.metrics import accuracy_score
test_logits = x_test.dot(W)
y_test_prob = softmax(test_logits)
y_test_pred = np.argmax(y_test_prob, axis=1)
# Perfect Score...Cool
accuracy_score(y_test, y_test_pred)

1.0