In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from os import path as opath
import os
from PIL import Image
import errno
sns.set(style="whitegrid", color_codes=True)


In [2]:
DATA_PATH = 'data/'

In [13]:
def convert_pixels(pix_str):
    return np.array([int(p) for p in pix_str.split(' ')], 'uint8').reshape((48, 48))


def load_data():
    df = pd.read_csv(opath.join(DATA_PATH, 'train.csv'), converters={'Pixels': convert_pixels})
    return df

data = load_data()

In [14]:
def decode_label(df):
    df = df.copy()
    df.loc[df['Emotion'] == 0, 'Emotion'] = 'anger'
    df.loc[df['Emotion'] == 1, 'Emotion'] = 'disgust'
    df.loc[df['Emotion'] == 2, 'Emotion'] = 'fear'
    df.loc[df['Emotion'] == 3, 'Emotion'] = 'happy'
    df.loc[df['Emotion'] == 4, 'Emotion'] = 'sad'
    df.loc[df['Emotion'] == 5, 'Emotion'] = 'surprise'
    df.loc[df['Emotion'] == 6, 'Emotion'] = 'neutral'
    return df

In [15]:
decoded_data = decode_label(data)
remove_neutral_data = decoded_data[decoded_data['Emotion'] == 'neutral'].sample(700)
remove_happy_data = decoded_data[decoded_data['Emotion'] == 'happy'].sample(300)
transformed_data = decoded_data.drop((remove_happy_data+remove_neutral_data).index)
test_data = transformed_data.sample(frac=0.1)
transformed_data = transformed_data.drop(test_data.index)
valid_data = transformed_data.sample(frac=0.2)
train_data = transformed_data.drop(valid_data.index)

In [19]:
def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
            
            
def save_img_files(df, base):
    sub_path = opath.join(DATA_PATH, base)
    for idx, row in df.iterrows():
        category_path = opath.join(sub_path, row['Emotion'])
        mkdir(category_path)
        Image.fromarray(row['Pixels']).save(opath.join(category_path, '{}.png'.format(idx)), 'png')

In [20]:
save_img_files(test_data, 'test')
save_img_files(valid_data, 'valid')
save_img_files(train_data, 'train')

In [18]:
test_data

Unnamed: 0,Emotion,Pixels
474,anger,"[[78, 73, 72, 65, 56, 56, 61, 63, 66, 65, 49, ..."
2778,surprise,"[[87, 91, 88, 82, 80, 80, 76, 78, 82, 86, 80, ..."
2590,neutral,"[[95, 104, 82, 54, 36, 30, 30, 32, 29, 30, 31,..."
750,neutral,"[[122, 127, 127, 123, 122, 126, 127, 148, 166,..."
690,neutral,"[[131, 141, 147, 154, 163, 163, 152, 147, 150,..."
2557,anger,"[[53, 74, 91, 102, 107, 116, 123, 134, 143, 15..."
4034,sad,"[[69, 86, 84, 67, 74, 71, 72, 56, 53, 56, 56, ..."
496,happy,"[[155, 159, 155, 154, 159, 166, 162, 172, 179,..."
2862,surprise,"[[79, 92, 100, 98, 96, 103, 110, 114, 117, 117..."
3797,anger,"[[83, 84, 78, 76, 76, 78, 76, 75, 68, 67, 69, ..."
