In [1]:
################################################################################
# CSE 253: Programming Assignment 1
# Code snippet by Jenny Hamer
# Winter 2019
################################################################################
# We've provided you with the dataset in CAFE.tar.gz. To uncompress, use:
# tar -xzvf CAFE.tar.gz
################################################################################
# To install PIL, refer to the instructions for your system:
# https://pillow.readthedocs.io/en/5.2.x/installation.html
################################################################################
# If you don't have NumPy installed, please use the instructions here:
# https://scipy.org/install.html
################################################################################

from os import listdir
from PIL import Image
import numpy as np


# The relative path to your CAFE-Gamma dataset
data_dir = "./CAFE/"

# Dictionary of semantic "label" to emotions
emotion_dict = {"h": "happy", "ht": "happy with teeth", "m": "maudlin",
	"s": "surprise", "f": "fear", "a": "anger", "d": "disgust", "n": "neutral"}


def load_data(data_dir="./CAFE/"):
    """ Load all PGM images stored in your data directory into a list of NumPy
    arrays with a list of corresponding labels.

    Args:
        data_dir: The relative filepath to the CAFE dataset.
    Returns:
        images: A list containing every image in CAFE as an array.
        labels: A list of the corresponding labels (filenames) for each image.
    """
    # Get the list of image file names
    all_files = listdir(data_dir)

    # Store the images as arrays and their labels in two lists
    images = []
    labels = []

    for file in all_files:
    # Load in the files as PIL images and convert to NumPy arrays
        if file.find('_ht') == -1 and file.find('_n') == -1:
            img = Image.open(data_dir + file)
            images.append(np.array(img))
            labels.append(file)

    print("Total number of images:", len(images), "and labels:", len(labels))

    return images, labels




In [157]:
def PCA(data_ori, dims_rescaled_data=2):
    """
    returns: data transformed in 2 dims/columns + regenerated original data
    pass in: data as 2D NumPy array
    """
    data = data_ori.transpose()
    import numpy as NP
    from scipy import linalg as LA
    m, n = data.shape
    # mean center the data
    data -= data.mean(axis=0)
    data /= data.std(axis=0)
    # calculate the covariance matrix
    R = NP.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric, 
    # the performance gain is substantial
    evals, evecs = LA.eigh(R)
    print(evals.shape)
    print(evecs.shape)
    # sort eigenvalue in decreasing order
    idx = NP.argsort(evals)[::-1]
    evecs = evecs[:,idx]
    # sort eigenvectors according to same index
    evals = evals[idx]
    # select the first n eigenvectors (n is desired dimension
    # of resca?led data array, or dims_rescaled_data)
    evecs = evecs[:, :dims_rescaled_data]
    vect = NP.dot(evecs.T, data.T).T
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    print(data_ori.shape)
    print(vect.shape)
    print(NP.matmul(data_ori,vect).shape)
    vect = vect - vect.mean(axis = 0)
    vect = vect / vect.std(axis = 0)
    result = NP.matmul(data.transpose(),vect)
    result = np.transpose(result.T - result.mean(axis = 1).T)
    result = np.transpose(result.T / result.std(axis = 1).T)
    return result,vect

def test_PCA(data, dims_rescaled_data=2):
    '''
    test by attempting to recover original data array from
    the eigenvectors of its covariance matrix & comparing that
    'recovered' array with the original data
    '''
    _ , _ , eigenvectors = PCA(data, dim_rescaled_data=2)
    data_recovered = NP.dot(eigenvectors, m).T
    data_recovered += data_recovered.mean(axis=0)
    assert NP.allclose(data, data_recovered)


def plot_pca(data):
    from matplotlib import pyplot as MPL
    clr1 =  '#2026B2'
    fig = MPL.figure()
    ax1 = fig.add_subplot(111)
    data_resc, data_orig = PCA(data)
    ax1.plot(data_resc[:, 0], data_resc[:, 1], '.', mfc=clr1, mec=clr1)
    MPL.show()


In [3]:
import numpy as np
images, labels = load_data(data_dir="./CAFE/")
im = np.array(images[:48], 'float64')
im_re = np.reshape(im, [len(im), -1])
pca_result,evecs = PCA(im_re, dims_rescaled_data=6)

Total number of images: 60 and labels: 60


In [4]:
evecs = evecs/evecs.std(axis=0)
evecs = evecs-evecs.mean(axis=0)
evecs_pic = np.reshape(np.transpose(evecs),[6,380,-1])

In [5]:
def display_face(img):
    """ Display the input image and optionally save as a PNG.

    Args:
    img: The NumPy array or image to display

    Returns: None
    """
    # Convert img to PIL Image object (if it's an ndarray)
    if type(img) == np.ndarray:
        print("Converting from array to PIL Image")
        im = (img - img.min())*(255/(img.max()-img.min()))
        # normalize the img into 0-255
        img = Image.fromarray(im)
    # Display the image
    img.show()

In [6]:
display_face(evecs_pic[0,:,:])
display_face(evecs_pic[1,:,:])
display_face(evecs_pic[2,:,:])
display_face(evecs_pic[3,:,:])
display_face(evecs_pic[4,:,:])
display_face(evecs_pic[5,:,:])



Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image


In [8]:
def load_happy_sad(data_dir="./CAFE/"):
    """ Load all PGM images stored in your data directory into a list of NumPy
    arrays with a list of corresponding labels.

    Args:
        data_dir: The relative filepath to the CAFE dataset.
    Returns:
        images: A list containing every image in CAFE as an array.
        labels: A list of the corresponding labels (filenames) for each image.
    """
    # Get the list of image file names
    all_files = listdir(data_dir)

    # Store the images as arrays and their labels in two lists
    images = []
    labels = []

    for file in all_files:
    # Load in the files as PIL images and convert to NumPy arrays
        if file.find('_h') != -1 or file.find('_m')!=-1 :
            img = Image.open(data_dir + file)
            images.append(np.array(img))
            labels.append(file)

    print("Total number of h_m:", len(images), "and labels:", len(labels))

    return images, labels

In [9]:
def get_sad(images, labels):
    image_sad = []
    for i in range(len(images)):
        if labels[i].find('_m') != -1:
            image_sad.append(images[i])
    return image_sad

In [10]:
sad_vector = get_sad(images, labels)
sad =np.array(np.reshape(sad_vector,[len(sad_vector),-1]),'float64')

In [11]:
def get_happy(images, labels):
    image_happy = []
    for i in range(len(images)):
        if labels[i].find('_h') != -1:
            image_happy.append(images[i])
    return image_happy

In [12]:
happy_vector = get_happy(images, labels)
print(np.array(happy_vector).shape)
happy=np.array(np.reshape(happy_vector,[len(happy_vector),-1]),'float64')

(10, 380, 240)


In [129]:
features = np.concatenate((sad[:8],happy[:8]),axis=0)
labels_1 = np.concatenate(([0]*8,[1]*8),axis=0)

In [130]:
holdout_feature = np.concatenate((sad[7:8],happy[7:8]),axis=0)
holdout_label = np.concatenate(([0],[1]),axis=0)

In [131]:
test_feature = np.concatenate((sad[8:9],happy[8:9]),axis=0)
test_label = np.concatenate(([0],[1]),axis=0)

In [160]:
features_pca, evect = PCA(features, dims_rescaled_data=15)

(16,)
(16, 16)
(16, 91200)
(91200, 15)
(16, 15)


In [161]:
features_pca

array([[-1.82948095,  2.85550848, -1.24678986,  0.47655301,  0.27357105,
         0.08044501, -0.11779909,  0.4564379 , -0.61105113, -0.51454176,
        -0.18231453, -0.35550254,  0.77393048, -0.11750038,  0.05853431],
       [-2.82609533,  1.6084112 ,  0.9745908 , -1.11935066,  0.17168092,
         0.81782256,  0.86950537, -0.44732322, -0.19798486,  0.20882141,
        -0.01740262,  0.24904784,  0.26675438,  0.03440187, -0.59287965],
       [-2.84629722,  0.80422512,  1.85801394, -0.38374653,  0.4927662 ,
        -0.1153255 , -0.79465616, -0.12295929,  0.34716095, -0.61723477,
         1.10577839,  0.03853081,  0.06268005,  0.03390454,  0.13715949],
       [-3.11235849, -0.61736027,  0.39433414,  1.62178208, -0.48828385,
        -0.09880345,  1.08101917, -0.32086095, -0.04867171,  0.0250695 ,
         0.38000871,  0.18459528,  0.34509901,  0.56382093,  0.0906099 ],
       [-2.4591408 ,  1.56250366, -0.69647371,  0.92347969, -1.14326777,
         1.20201062, -0.83511168, -0.60710734, 

In [212]:
import math
def sigmoid(scores):
    return 1 / (1 + np.exp(-scores))
def loss(t, y):
    return -(t * np.log(y) +(1 - t) * np.log(1 - y)).mean()
def accuracy(t,p):
    return 1-1.0*sum(abs(t-(p>0.5)))/len(t)
def logistic_regression(features, target, num_steps, learning_rate, add_intercept = False):
    ce_loss = []
    if add_intercept:
        intercept = np.ones((features.shape[0], 1))
        features = np.hstack((intercept, features))   
    weights = np.zeros(features.shape[1])
    for step in range(num_steps):
        scores = np.dot(features, weights)
        predictions = sigmoid(scores)
        # Update weights with gradient
        gradient = np.dot(features.T, target-predictions)
        weights += learning_rate * gradient
        # Print log-likelihood every so often
        ce_loss.append(loss(target,sigmoid(np.dot(features,weights))))
    return weights,ce_loss

In [213]:
loss_overall=[]
for i in range(10):
    [weight,ce_loss] = logistic_regression(features_pca, labels_1, 10, 0.1, add_intercept = False)
    loss_overall.append(ce_loss)

(10, 10)

In [198]:
holdout_pca = np.dot(holdout_feature,evect)
holdout_pca = holdout_pca - holdout_pca.mean()
holdout_pca = holdout_pca / holdout_pca.std()

In [164]:
holdout_pca

array([[-3.59456151,  0.82955879,  1.50045876,  0.37629237,  0.27772601,
        -0.08406059, -0.45896371,  0.04358075,  0.11146505, -0.29916364,
        -0.98522272,  0.99948702, -0.06402932, -0.05704641,  0.29104213],
       [-1.97691541,  2.04132495, -1.1899904 ,  0.40191635,  0.53067138,
        -0.13887111,  0.37930887,  0.84190681,  0.20134868,  0.08671695,
         0.31215654,  0.27197625, -0.68661055,  0.15992016, -0.12142244]])

In [165]:
holdout_result = sigmoid(np.dot(holdout_pca,weight)) 

In [166]:
holdout_result

array([0.1421452 , 0.67520279])