In [None]:
########################################################################
# VERSION 1
# filters applied: [ 'sobel', 'vertical', 'horizontal', 'contrast_low', 'contrast_mid', 'contrast_high' ]
# BUT SOBEL is not correct
########################################################################
# original fer2013 size: 0.3 GB
# v1 size generated:     1.9 GB
# v2 size generated:     3.8 GB
# v3 size generated:     3.8 GB

In [None]:
import numpy as np
import pandas as pd
import math
import torch
import os
from random import seed
from random import random
import seaborn as sea
import matplotlib.pyplot as plot
from tqdm.auto import tqdm

In [None]:
dataset_filename = 'fer2013.csv'
#image sizes
im_l = 48
im_h = 48

In [3]:
#can_be_computed = [ 'local', 'colab', 'kaggle' ]
actually_computed = 'local'

In [None]:
# for local
# --------------------------------------------
if (actually_computed == 'local') :
    import_dir = ''
    export_dir = 'output_csv/'

In [4]:
# for google colab 
# --------------------------------------------
if (actually_computed == 'colab') :
    import_dir = '/content/drive/MyDrive/Colab Notebooks/DL/progetto/'
    export_dir = import_dir + 'output_csv/'

    # Mount data from drive
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
# for kaggle
# --------------------------------------------
if (actually_computed == 'kaggle') :
    import_dir = '/kaggle/input/fer2013/'
    export_dir = '/kaggle/working/output_csv/'

    if(not os.path.exists(export_dir)) :
        os.makedirs(export_dir)
    print(os.path.exists(export_dir))

In [None]:
dataset_filepath = import_dir + dataset_filename
data = pd.read_csv(dataset_filepath)

In [None]:
print(data)

In [None]:
#analysis of dataset
emotions =  ('rage',   'disgust',  'fear',  'happiness', 'sadness',   'surprise', 'neutral')
y = data['emotion']

sx = sea.countplot(x=y)
plot.xticks(range(len(emotions)), emotions)
plot.xlabel("Emotions")
plot.ylabel("Count")

In [None]:
num_of_emotions = data['emotion'].value_counts().sort_index()
print(num_of_emotions)

In [None]:
def flat_image(image, im_l=im_l, im_h=im_h) :
    return image.reshape(im_l*im_h)

def unflat_image(image,im_l,im_h) :
    return np.array([ np.array(image[im_h*i:im_h*(i+1)]) for i in range(im_l) ])

In [None]:
def scale_image_to_int_in_bounds(image, im_l=im_l, im_h=im_h) :
    flatted_image = flat_image(image)
    #print(type(flatted_image), len(flatted_image))
    tot_max = max(flatted_image)
    tot_min = min(flatted_image)
    #print(tot_min, tot_max)
    #nans = np.isnan(flatted_image)
    #nans_indexes = [ i for i in range(len(nans)) if nans[i] == True ]
    scaled = np.zeros(im_l * im_h)
    if (tot_min == tot_max) :
        #image of just one constant color
        constant = tot_min
        if (constant > 0 and constant <= 255) :
            #if the constant is in image bounds use the constant, else use all zeros (black image)
            scaled = flatted_image
    else :
        #image not a costant ==> (tot_max - tot_min) != 0
        scaled = np.array([int(255 * (pixel - tot_min) / (tot_max - tot_min)) for pixel in flatted_image])
    #print(type(scaled), len(scaled))    
    unflatted = unflat_image(scaled, im_l, im_h)
    #print(type(unflatted), len(unflatted))
    return unflatted

In [None]:
#function apply_filter : 
#  applies the filter as lamba instruction
# input parameters :
#  - image :         the original image 
#  - filter_lambda : lambda instruction of the filter, it's building with (x,y,pix),
#                         where pix is value of pixel, x and y are the points on the filter matrix, x,y: 0<=x<filter_size and 0<=y<filter_size
#  - filter_size :   the size of filter
# output : 
#  - new image as matrix, with dimensionality as original image 

def apply_filter(image, filter_lambda, filter_size=(3,3)):
  im_h = image.shape[0]
  im_l = image.shape[1]
  #new_image = torch.zeros(image.shape, dtype=image.dtype)
  new_image = np.zeros(image.shape)
  for x in range(-1, im_h-filter_size[0]+1):
    for y in range(-1, im_l-filter_size[1]+1):
      for x1 in range(0, filter_size[0]):
        for y1 in range(0, filter_size[1]):
            i = x + x1
            j = y + y1
            in_image_bounds = (i >= 0) and (j >= 0) and (i <= im_h) and (j <= im_l)
            if in_image_bounds :
                new_image[x+1][y+1] += filter_lambda(x1,y1,image[i][j])
      #newImage[x+1][y+1] /= (filter_size ** 2)
  return scale_image_to_int_in_bounds(new_image)


In [None]:
#prints image
def print_image(ima, labelI, val=False, subpl="",title=""):
    if subpl=="":
      plot.imshow(ima, cmap='gray')
      plot.title(emotions[labelI]+ ((" "+ str(labelI)) if val else ""))
    else :
      subpl.imshow(ima, cmap='gray')
      subpl.set_title(((" "+ str(title)) if title!="" else ""))

In [None]:
#conversione in np-array per guardarlo e in tensor per la rete e li scalo
#np-array of Tensor, each

def split_image_pixels(x, shape):
    temp = []
    for im in tqdm(x):
      temp.append(np.array(im.split()).reshape(shape).astype('double'))
    return temp

shape = (im_h,im_l)
original_images = split_image_pixels(data['pixels'], shape)

In [None]:
#list: all images
print("all images:\t", len(original_images), type(original_images))
#np.array : first image
print("first image:\t", len(original_images[0]), type(original_images[0]))
#np.array : first line of first image
print("first row:\t", len(original_images[0][0]), type(original_images[0][0]))
#float
print("first element:\t", 0, type(original_images[0][0][0]))

In [None]:
#some traditional filter
filters = {
    #'original'      : (lambda x,y,pix: (pix)),
    'sobel'         : (lambda x,y,pix: ((1-(x%2))*(1+(y%2))*(1-(x%3))*pix + (1-(y%2))*(1+(x%2))*(1-(y%3))*pix)/2),
    'vertical'      : (lambda x,y,pix: ((1-(y%2))*(1+(x%2))*(1-(y%3))*pix)),
    'horizontal'    : (lambda x,y,pix: ((1-(x%2))*(1+(y%2))*(1-(x%3))*pix)),
    #'sobel'         : (lambda x,y,pix: (((filters['horizontal'](x,y,pix) ** 2) + (filters['vertical'](x,y,pix) ** 2)) ** (1/2))),
    'contrast_low'  : (lambda x,y,pix: (pix ** (1/2))),
    'contrast_mid'  : (lambda x,y,pix: (pix ** 2)),
    'contrast_high' : (lambda x,y,pix: (pix ** 3)) 
}

In [None]:
#print some examples
seed(None)
img_idx = int(random() * len(y))
#img_idx = 6458
print('img_idx:', img_idx)
plot.figure(1)
rows = 1
cols = len(filters)
fig, axs = plot.subplots(rows,cols,figsize=(25,25))
filter_size = (3,3)

i = 0
j = 0

for filter_name, filter_lambda in filters.items() :
    image = apply_filter(original_images[img_idx], filters[filter_name], filter_size)
    idxs = max(i,j) if (rows == 1 or cols == 1) else (i,j)
    #print(image)
    print_image(image, 0, subpl=axs[idxs], title=filter_name)
    j += 1
    if j == cols :
        j = 0; i += 1

In [None]:
print_image(original_images[img_idx], y[img_idx])

In [None]:
def count_char(string, char=' ') :
    c = 0
    for i in range(len(string)) :
        if string[i] == char :
            c += 1
    return c

In [None]:
def array_to_string(array) :
    return ' '.join(str(number) for number in array.astype(int))

In [None]:
# function arrToNpWFilter :
#   converts from string of pixel inside data["pixels"] to np-array and then in to "normalized" tensor
# input parameters :
#  - x :       data images array of strings
#  - filters : array of lambda filters
#  - kernel :  array of kernel's size
# output: 
#  - array that contains |x|*(|filters|+1) images, the original images and the filter, its shape is (|x|,|filter|+1, im_h, im_l), |filter|+1 is like im_d  

def build_new_dataset(original_images,filters=filters,kernel=(3,3)):
    new_dataset = []
    for original_image in tqdm(original_images):
      all_image_filters = []
      all_image_filters.append(array_to_string(flat_image(original_image, im_l, im_h)))
      for filter_name, filter_lambda in filters.items() :
        all_image_filters.append(array_to_string(flat_image(apply_filter(original_image, filter_lambda, kernel), im_l, im_h)))
      new_dataset.append(all_image_filters)
    return new_dataset

In [None]:
#test = [ int(x) for cur_list in original_images[6457] for x in cur_list ]
#test = np.isnan(original_images[6457])
#errors = [ (i,j) for i in range(len(test)) for j in range(i) if test[i][j] == True ]
#print(errors)

In [None]:
data_len = len(original_images)
#data_len = 50
#print(data_len)
# problem: image 6458 all zeros.... fixed!
new_dataset = build_new_dataset(original_images[:data_len])

In [None]:
l1 = len(new_dataset)
t1 = type(new_dataset)
l2 = len(new_dataset[0])
t2 = type(new_dataset[0])
l3 = count_char(new_dataset[0][0])
t3 = type(new_dataset[0][0])
print(l1, "\t", t1)
print(l2, "\t", t2)
print(l3, "\t", t3)

In [None]:
#check lengths and types
ok = True
for image in new_dataset :
    new_l2 = len(image)
    new_t2 = type(image)
    if new_l2 != l2 or new_t2 != t2:
        print("2:", new_l2, "\t", new_t2)
        ok = False
    for im_filter in image :
        new_l3 = count_char(im_filter)
        new_t3 = type(im_filter)
        if new_l3 != l3 or new_t3 != t3:
            print("3:", new_l3, "\t", new_t3)
            ok = False
print(ok)        

In [None]:
def get_columns(filters=filters) :
    columns = ['original']
    for filter_name in filters :
        columns.append(filter_name)
    return columns

In [None]:
new_columns = get_columns()
new_dataset_df = pd.DataFrame(new_dataset, columns = new_columns)

In [None]:
#test if everything ok
rows = 1
cols = len(new_dataset[0])
fig, axs = plot.subplots(rows, cols, figsize=(25,25))
filter_size = (3,3)

seed(None)
img_idx = int(random() * data_len)
print('img_idx:', img_idx)

i=j=0
cur_im = 0
columns = get_columns()
for image_str in new_dataset[img_idx] :
    image = np.fromstring(image_str, dtype=int, sep=' ')
    idxs = max(i,j) if (rows == 1 or cols == 1) else (i,j)
    cur_filter = columns[cur_im]
    print_image(image.reshape(48,48), 0, subpl=axs[idxs], title=cur_filter)
    cur_im += 1
    j += 1
    if j == cols :
        j = 0; i += 1

In [None]:
csv_filename = 'fer2013_augmented_v1.csv'

In [None]:
from IPython.display import HTML
import base64

def create_download_link(df, title = "Download CSV file", filename = csv_filename):  
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

In [None]:
#write a new dataframe
if (actually_computed == 'local') :
    new_dataset_df.to_csv(csv_filename, index=False)
else :
    create_download_link(new_dataset_df)