In [2]:
import tensorflow as tf
import numpy as np
import random
from google.colab import drive
from sklearn.model_selection import train_test_split

In [4]:
drive.mount('/content/drive')
with open('iris.csv', 'r') as f: 
  temp = np.genfromtxt(f, dtype='f4', delimiter=',')  

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i:min(i + batch_size, num_examples)])
        yield tf.gather(features, j), tf.gather(labels, j)

In [6]:
# Optimization algorithm Stochastic Gradient Descent
def sgd(param, grad, lr, batch_size):
  param.assign_sub(lr * grad)

In [7]:
# Loss Function
def cross_entropy(z, t):
  # return tf.keras.losses.CategoricalCrossentropy()(t,z)            
  return  -(1/len(z))*tf.reduce_sum(tf.math.log(z)*t)

In [8]:
def model(x,w):
  # layer with softmax function  
  u = np.hstack((np.ones((x.shape[0],1)), x))@w
  u_exp = tf.math.exp(u)
  z = u_exp/tf.reduce_sum(u_exp,axis=1,keepdims=True)
  # if softmax activation is to be used, 
  # z = tf.nn.softmax(u)
  return z

In [11]:
# Data Prep

X = temp[:,0:-1]
# One-hot output layer encodding
labels = np.array(1*[temp[:,-1]==1, temp[:,-1]==2, temp[:,-1]==3]).T.astype('f4')

# K: Nsamples, d: featureDimension, N: Nclasses
K,d = X.shape
N = labels.shape[1] 

# partition into 80/20% training/testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, labels, train_size=0.8, shuffle=True, random_state=3, stratify=labels)

In [12]:
# Hyperparameters

batch_size = 10
lr = 0.03 # learning rate
num_epochs = 10

In [13]:
# Learning
w = tf.Variable(tf.random.normal(shape=(d+1,N)), trainable=True)
for epoch in range(num_epochs):
  for x, y in data_iter(batch_size, X_train, y_train):
    # Feed-forward model
    with tf.GradientTape() as g:
      l = cross_entropy(model(x, w),y)
    # Compute gradient on l with respect to w
    dw = g.gradient(l, w)
    # Update parameters using their gradient
    sgd(w, dw, lr, batch_size)
  # After one epoch, Evaluate resubstitution loss
  train_loss = cross_entropy(model(tf.constant(X_train), w),y_train)
  print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_loss)):f}')

epoch 1, loss 1.305316
epoch 2, loss 1.043620
epoch 3, loss 0.812080
epoch 4, loss 0.718381
epoch 5, loss 0.612626
epoch 6, loss 0.569255
epoch 7, loss 0.532431
epoch 8, loss 0.511231
epoch 9, loss 0.504288
epoch 10, loss 0.476407


In [14]:
# Evaluation
Z_test = model(X_test,w)
idx = np.argmax(Z_test,axis=1)
y_pred = np.zeros((idx.size, N))
y_pred[np.arange(idx.size),idx] = 1
Cmat = y_test.T@y_pred

print('Confusion Matrix= \n', Cmat)

Confusion Matrix= 
 [[10.  0.  0.]
 [ 0.  9.  1.]
 [ 0.  2.  8.]]
