In [1]:
"""
Student Name: ZENG Yang

Student ID: 20711899

Assignment #: 1

Student Email: yzengav@connect.ust.hk

Course Name: Machine Learning

"""
"""
           Programming Assignment 1
         

This program learns a softmax model for the Iris dataset (included).
There is a function, compute_softmax_loss, that computes the
softmax loss and the gradient. It is left empty. Your task is to write
the function.
     
"""
import numpy as np
import os
import math

# Data sets
IRIS_TRAINING = "iris_training.csv"
IRIS_TEST = "iris_test.csv"

def get_data():
    # Load datasets.
    train_data = np.genfromtxt(IRIS_TRAINING, skip_header=1, 
        dtype=float, delimiter=',') 
    test_data = np.genfromtxt(IRIS_TEST, skip_header=1, 
        dtype=float, delimiter=',') 
    train_x = train_data[:, :4]
    train_y = train_data[:, 4].astype(np.int64)
    test_x = test_data[:, :4]
    test_y = test_data[:, 4].astype(np.int64)

    return train_x, train_y, test_x, test_y

def compute_softmax_loss(W, X, y, reg):
    """
    Softmax loss function.
    Inputs:
    - W: D x K array of weight, where K is the number of classes.
    - X: N x D array of training data. Each row is a D-dimensional point.
    - y: 1-d array of shape (N, ) for the training labels.
    - reg: weight regularization coefficient.

    Returns:
    - softmax loss: NLL/N +  0.5 *reg* L2 regularization,
            
    - dW: the gradient for W.
    """
 

    #############################################################################
    # TODO: Compute the softmax loss and its gradient.                          #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    #loss function
    Z = np.exp(np.matmul(W.T, X.T)).sum(axis = 0)
    loss = 0
    for i in range(len(X)):
        loss += np.matmul(W.T[y[i]], X.T[:, i]) - np.log(Z[i]) 
    loss = -1/len(X) *loss
    
    #L2 Regularization
    loss = loss + 0.5 *reg *np.square(W).sum().sum()
    
    #Gradient
    dW = np.zeros(np.shape(W))
    for i in range(len(W[0])):
        for j in range(len(X)):
            if y[j] == i:
                dW[:, i] += (1 - np.exp(np.matmul(W.T[i], X.T[:, j])) / Z[j]) *X.T[:, j]
            else:
                dW[:, i] += (0 - np.exp(np.matmul(W.T[i], X.T[:, j])) / Z[j]) *X.T[:, j]
    dW = -1/len(X) *dW + reg *W
    

    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW

def predict(W, X):
    """
    Use the trained weights of this linear classifier to predict labels for
    data points.

    Inputs:
    - W: D x K array of weights. K is the number of classes.
    - X: N x D array of training data. Each row is a D-dimensional point.

    Returns:
    - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
      array of length N, and each element is an integer giving the predicted
      class.
    """
    
    ###########################################################################
    # TODO:                                                                   #
    # Implement this method. Store the predicted labels in y_pred.            #
    ###########################################################################
    score = X.dot(W)
    y_pred = np.argmax(score, axis=1)
    ###########################################################################
    #                           END OF YOUR CODE                              #
    ###########################################################################
    return y_pred

def acc(ylabel, y_pred):
    return np.mean(ylabel == y_pred)


def train(X, y, Xtest, ytest, learning_rate=1e-3, reg=1e-5, epochs=100, batch_size=20):
    num_train, dim = X.shape
    num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
    num_iters_per_epoch = int(math.floor(1.0*num_train/batch_size))
    
    # randomly initialize W
    W = 0.001 * np.random.randn(dim, num_classes)

    for epoch in range(max_epochs):
        perm_idx = np.random.permutation(num_train)
        # perform mini-batch SGD update
        for it in range(num_iters_per_epoch):
            idx = perm_idx[it*batch_size:(it+1)*batch_size]
            batch_x = X[idx]
            batch_y = y[idx]
            
            # evaluate loss and gradient
            loss, grad = compute_softmax_loss(W, batch_x, batch_y, reg)

            # update parameters
            W += -learning_rate * grad
            

        # evaluate and print every 10 steps
        if epoch % 10 == 0:
            train_acc = acc(y, predict(W, X))
            test_acc = acc(ytest, predict(W, Xtest))
            print('Epoch %4d: loss = %.2f, train_acc = %.4f, test_acc = %.4f' \
                % (epoch, loss, train_acc, test_acc))
    
    return W

max_epochs = 200
batch_size = 20
learning_rate = 0.1
reg = 0.01

# get training and testing data
train_x, train_y, test_x, test_y = get_data()
W = train(train_x, train_y, test_x, test_y, learning_rate, reg, max_epochs, batch_size)

# Classify two new flower samples.
def new_samples():
    return np.array(
      [[6.4, 3.2, 4.5, 1.5],
       [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
new_x = new_samples()
predictions = predict(W, new_x)

print("New Samples, Class Predictions:    {}\n".format(predictions))

Epoch    0: loss = 0.87, train_acc = 0.7000, test_acc = 0.5333
Epoch   10: loss = 0.39, train_acc = 0.7000, test_acc = 0.5333
Epoch   20: loss = 0.96, train_acc = 0.7083, test_acc = 0.5667
Epoch   30: loss = 0.55, train_acc = 0.8333, test_acc = 0.9333
Epoch   40: loss = 0.27, train_acc = 0.8833, test_acc = 0.7667
Epoch   50: loss = 0.30, train_acc = 0.9167, test_acc = 0.9333
Epoch   60: loss = 0.27, train_acc = 0.9750, test_acc = 0.9333
Epoch   70: loss = 0.23, train_acc = 0.9083, test_acc = 0.9333
Epoch   80: loss = 0.23, train_acc = 0.9667, test_acc = 1.0000
Epoch   90: loss = 0.26, train_acc = 0.9667, test_acc = 1.0000
Epoch  100: loss = 0.43, train_acc = 0.8833, test_acc = 0.7667
Epoch  110: loss = 0.23, train_acc = 0.9667, test_acc = 1.0000
Epoch  120: loss = 0.41, train_acc = 0.9083, test_acc = 0.8667
Epoch  130: loss = 0.26, train_acc = 0.9500, test_acc = 0.9333
Epoch  140: loss = 0.31, train_acc = 0.9750, test_acc = 0.9333
Epoch  150: loss = 0.32, train_acc = 0.9583, test_acc =

In [10]:
W = np.array([[2,3],[5,6]])
a = np.array([[2,3],[5,6]])
np.dot(W,a)

array([[19, 24],
       [40, 51]])

In [4]:
import numpy as np
b = np.array([[1, 2, 3], [4, 5, 6]])
c = np.exp(b)
d = b.sum().sum()
e = np.log(b)
f = np.square(b)
g = np.array([2, 2, 2])
h = np.argmax(b, axis=0)
i = b[:,0]
w = np.zeros(np.shape(b))
z = np.array([[1,2],[3,4],[5,6]])
print(b)
print(z)
np.matmul(b,z)

[[1 2 3]
 [4 5 6]]
[[1 2]
 [3 4]
 [5 6]]


array([[22, 28],
       [49, 64]])